Fdrsim.R
From Organic Design wiki
fdrsim <- function(outfile = "tmp", m=1000, iiter=100, seqpi0=1, alpha = 0.05,
adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1,Estpi0 = FALSE,
datadir = ifelse(version$os=="mingw32", "D:Data", "Data"), lambdaseq = seq(0, 0.95, by=0.05),
ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE, pad=TRUE, FDR = FALSE)
{
- ----------------------------------------------------------------------------- #
- {{#security:admin}}
- m = numeric number of genes to simulate
- itter = numeric number of iterations
- sepi0 = numeric vector of True pi0 values to simulate
- alpha = numeric number [0,1] type I probability test cutoff
- adaptive = logical Finner adaptive FDR control
- use.siggenes = logical to use pi0.est function in siggenes package
- lambdaseq = numeric vector lambda range for pi0.est smoothing splines
- ncs.value = character string "paper " or "max"
- use.convest = logical to use convest function in limma package
- pad = logical Populate padded lists for speed
- FDR = logical whether to calculate fdr elements to increase speed
- ----------------------------------------------------------------------------- #
- 1) ---------------------------------- Setup --------------------------------- #
- Check and create directories for save(...) of objects
if(length(dir(datadir))==0) {
dir.create(datadir)
}
- Check and install multtest
if(!require(multtest, quietly=TRUE)) {
if(!exists("biocLite")){
cat("[ installing biocLite]\n")
source("http://www.bioconductor.org/iocLite.R")
}
cat("[ Package:Multtest not available attempting to install ]\n")
biocLite("multtest")
}
- Check and install limma
if(!require(limma, quietly=TRUE)) {
if(!exists("biocLite")){
cat("[ installing biocLite]\n")
source("http://www.bioconductor.org/iocLite.R")
}
cat("[ Package:limma not available attempting to install ]\n")
biocLite("limma")
}
- Check and install siggenes
if(!require(siggenes, quietly=TRUE)) {
if(!exists("biocLite")){
cat("[ installing biocLite]\n")
source("http://www.bioconductor.org/iocLite.R")
}
cat("[ Package:siggenes not available attempting to install ]\n")
biocLite("siggenes")
}
set.seed(seed) starttime <- proc.time()[3]
- Set up False discovery and False non discovery vectors
FD <- rep(NA, iiter) FND <- rep(NA, iiter)
- Output lists
if(Estpi0) {
FDlist <- list(m=m, alpha=alpha, parameters=list(mu0=mu0,mu1=mu1),
V=list(), T=list(), p0=list(),p0NW=list(), convest=list())
}else{
FDlist <- list(m=m, alpha=alpha, parameters=list(mu0=mu0,mu1=mu1),
V=list(), T=list())
}
- for(pi0 in seqpi0) {
- Pad out FDlist here
- if(pad) {
- toPad <- rep(NA, iiter)
- FDlist"V"as.character(pi0) <- toPad
- FDlist"T"as.character(pi0) <- toPad
- if(Estpi0) {
- FDlist"p0NW"as.character(pi0) <- toPad
- FDlist"p0"as.character(pi0) <- toPad
- }
- if(use.convest) {
- FDlist"convest"as.character(pi0) <- toPad
- }
- }
- }
- 2) ---------------------------- pi0 outer loop ------------------------------ #
- Main loop for each value of pi0, the proportion of truly Null hypotheses
for(pi0 in seqpi0)
{
# False discovery threshold
if(adaptive)
{
# Adaptive control adjustment on true pi0 (finner & Roters, 2002)
alphastar<- alpha/pi0
} else {
# Non adaptive control
alphastar <- alpha
}
m1 <- round(m * (1-pi0))
m0 <- m - m1
DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
DEflag <- as.logical(DEmeans)
- 3) --------------------------- iiter inner loop ----------------------------- #
for(i in seq(iiter)){
# Names for list elements
pi0char <- as.character(round(pi0, 5))
- 4) ------------------------------ Simulation -------------------------------- #
X <- rnorm(m, DEmeans, sigma)
pvalues <- 2*(1-pnorm(abs(X),0,sigma))
- 5) ----------------------------- Saving statistics -------------------------- #
if(Estpi0){
- pi0 estimation
- Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
- Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
if(use.convest) {# Convest estimation
FDlist"convest"pi0char[i] <- convest(pvalues, niter=50)
}
# Siggenes or first principles
if( use.siggenes ) {
require(siggenes, quietly=TRUE)
# A) weighting by 1-lambda
pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
ncs.weights=1-lambdaseq)$p0
FDlist"p0"pi0char[i] <- round(pi0hat,8)
# B) no 1-lambda weighting
pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
ncs.weights=NULL)$p0
FDlist"p0NW"pi0char[i] <- round(pi0hat,8)
} else {
pi0lambda <- tapply(lambdaseq, lambdaseq,
function(x){sum(pvalues>x)/(m*(1-x))})
# A) weighting by 1-lambda
pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq,
df=3)$y[length(lambdaseq)],1)
FDlist"p0"pi0char[i] <- round(pi0hat,8)
# B) no 1-lambda weighting
pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
df=3)$y[length(lambdaseq)],1)
FDlist"p0NW"pi0char[i] <- round(pi0hat,8)
}
}
if(FDR) {
# Step down fdr Bengamini & Hochberg control
Porder <- order(pvalues)
# Sorted pvalues
cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m
#Reordered DEflag
DEorder <- DEflag[Porder]
rejected <- DEorder[!cutoff]
accepted <- DEorder[cutoff]
FD[i] <- sum(!rejected)
FND[i] <- sum(accepted)
}
}
# Writing to FDlist every iteration of p
if(FDR) {
FDlist"V"pi0char <- FD
FDlist"T"pi0char <- FND
}
save(FDlist, file=file.path(datadir, outfile), compress=T)
} # End of main loop
- Return simulation time
endtime <- proc.time()[3] - starttime return(endtime) }



