Fdrsim.R

From Organic Design wiki

fdrsim <- function(outfile = "tmp", m=1000, iiter=100, seqpi0=1, alpha = 0.05,

                  adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1,Estpi0 = FALSE,
                  datadir = ifelse(version$os=="mingw32", "D:Data", "Data"), lambdaseq = seq(0, 0.95, by=0.05),
                  ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE, pad=TRUE, FDR = FALSE)

{

  1. ----------------------------------------------------------------------------- #
  2. {{#security:admin}}
  3. m = numeric number of genes to simulate
  4. itter = numeric number of iterations
  5. sepi0 = numeric vector of True pi0 values to simulate
  6. alpha = numeric number [0,1] type I probability test cutoff
  7. adaptive = logical Finner adaptive FDR control
  8. use.siggenes = logical to use pi0.est function in siggenes package
  9. lambdaseq = numeric vector lambda range for pi0.est smoothing splines
  10. ncs.value = character string "paper " or "max"
  11. use.convest = logical to use convest function in limma package
  12. pad = logical Populate padded lists for speed
  13. FDR = logical whether to calculate fdr elements to increase speed
  14. ----------------------------------------------------------------------------- #
  1. 1) ---------------------------------- Setup --------------------------------- #
  2. Check and create directories for save(...) of objects

if(length(dir(datadir))==0) {

 dir.create(datadir)

}

  1. Check and install multtest

if(!require(multtest, quietly=TRUE)) {

 if(!exists("biocLite")){
   cat("[ installing biocLite]\n")
   source("http://www.bioconductor.org/iocLite.R")
 }
 cat("[ Package:Multtest not available attempting to install ]\n")
 biocLite("multtest")

}

  1. Check and install limma

if(!require(limma, quietly=TRUE)) {

  if(!exists("biocLite")){
   cat("[ installing biocLite]\n")
   source("http://www.bioconductor.org/iocLite.R")
 }
  cat("[ Package:limma not available attempting to install ]\n")
  biocLite("limma")

}

  1. Check and install siggenes

if(!require(siggenes, quietly=TRUE)) {

  if(!exists("biocLite")){
   cat("[ installing biocLite]\n")
   source("http://www.bioconductor.org/iocLite.R")
 }
  cat("[ Package:siggenes not available attempting to install ]\n")
  biocLite("siggenes")

}

set.seed(seed) starttime <- proc.time()[3]

  1. Set up False discovery and False non discovery vectors

FD <- rep(NA, iiter) FND <- rep(NA, iiter)

  1. Output lists

if(Estpi0) {

 FDlist <- list(m=m, alpha=alpha, parameters=list(mu0=mu0,mu1=mu1),
                V=list(), T=list(), p0=list(),p0NW=list(), convest=list())

}else{

 FDlist <- list(m=m, alpha=alpha, parameters=list(mu0=mu0,mu1=mu1),
                V=list(), T=list())

}

  1. for(pi0 in seqpi0) {
  2. Pad out FDlist here
  3. if(pad) {
  4. toPad <- rep(NA, iiter)
  5. FDlist"V"as.character(pi0) <- toPad
  6. FDlist"T"as.character(pi0) <- toPad
  7. if(Estpi0) {
  8. FDlist"p0NW"as.character(pi0) <- toPad
  9. FDlist"p0"as.character(pi0) <- toPad
  10. }
  11. if(use.convest) {
  12. FDlist"convest"as.character(pi0) <- toPad
  13. }
  14. }
  15. }
  1. 2) ---------------------------- pi0 outer loop ------------------------------ #
  2. Main loop for each value of pi0, the proportion of truly Null hypotheses

for(pi0 in seqpi0)

 {
 # False discovery threshold
   if(adaptive)
     {
   # Adaptive control adjustment on true pi0 (finner & Roters, 2002)
       alphastar<- alpha/pi0
     } else { 
   # Non  adaptive control
       alphastar <- alpha
     }

   m1 <- round(m * (1-pi0))
   m0 <- m - m1
   DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
   DEflag <- as.logical(DEmeans)

  1. 3) --------------------------- iiter inner loop ----------------------------- #
  for(i in seq(iiter)){

     # Names for list elements
     pi0char <- as.character(round(pi0, 5)) 

  1. 4) ------------------------------ Simulation -------------------------------- #
     X <- rnorm(m, DEmeans, sigma)
     pvalues <- 2*(1-pnorm(abs(X),0,sigma))

  1. 5) ----------------------------- Saving statistics -------------------------- #
     if(Estpi0){
  1. pi0 estimation
  2. Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
  3. Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
       if(use.convest) {# Convest estimation
         FDlist"convest"pi0char[i] <- convest(pvalues, niter=50)
       }

       # Siggenes or first principles
       if( use.siggenes ) {
         require(siggenes, quietly=TRUE)
         # A) weighting by 1-lambda
         pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                           ncs.weights=1-lambdaseq)$p0
         FDlist"p0"pi0char[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
         pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                           ncs.weights=NULL)$p0
         FDlist"p0NW"pi0char[i] <- round(pi0hat,8)
       } else {
         pi0lambda <- tapply(lambdaseq, lambdaseq,
                             function(x){sum(pvalues>x)/(m*(1-x))})
         # A) weighting by 1-lambda
         pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq, 
                                     df=3)$y[length(lambdaseq)],1)
         FDlist"p0"pi0char[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
         pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
                                     df=3)$y[length(lambdaseq)],1)
         FDlist"p0NW"pi0char[i] <- round(pi0hat,8) 
       }
     }
     if(FDR) {
     # Step down fdr Bengamini & Hochberg control
       Porder <- order(pvalues)
     # Sorted pvalues
       cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m 
     #Reordered DEflag
       DEorder  <- DEflag[Porder] 
       rejected <- DEorder[!cutoff]
       accepted <- DEorder[cutoff]
       FD[i]  <- sum(!rejected)
       FND[i] <- sum(accepted)
     }
   }
  # Writing to FDlist every iteration of p
   if(FDR) {
     FDlist"V"pi0char <- FD
     FDlist"T"pi0char <- FND
   }
  save(FDlist, file=file.path(datadir, outfile), compress=T)
 } # End of main loop

  1. Return simulation time

endtime <- proc.time()[3] - starttime return(endtime) }