Difference between revisions of "Fdrsim.R"

From Organic Design wiki
(Add # {{R}}{{#security:*|Sven,Mik}})
(bug)
 
Line 5: Line 5:
 
{
 
{
 
# ----------------------------------------------------------------------------- #
 
# ----------------------------------------------------------------------------- #
# {{R}}{{#security:*|Sven,Mik}}
+
#  
 +
# [[Category:Private]]{{#security:admin}}
 
# m            = numeric number of genes to simulate
 
# m            = numeric number of genes to simulate
 
# itter        = numeric number of iterations
 
# itter        = numeric number of iterations
Line 18: Line 19:
 
# FDR          = logical whether to calculate fdr elements to increase speed
 
# FDR          = logical whether to calculate fdr elements to increase speed
 
# ----------------------------------------------------------------------------- #
 
# ----------------------------------------------------------------------------- #
 
+
 
# 1) ---------------------------------- Setup --------------------------------- #   
 
# 1) ---------------------------------- Setup --------------------------------- #   
 
# Check and create directories for save(...) of objects  
 
# Check and create directories for save(...) of objects  
Line 24: Line 25:
 
   dir.create(datadir)
 
   dir.create(datadir)
 
}
 
}
 
+
 
# Check and install multtest
 
# Check and install multtest
 
if(!require(multtest, quietly=TRUE)) {
 
if(!require(multtest, quietly=TRUE)) {
   cat("Package:Multtest not available attempting to install\n")
+
   if(!exists("biocLite")){
   install.packages("multtest")
+
    cat("[ installing biocLite]\n")
 +
    source("http://www.bioconductor.org/iocLite.R")
 +
  }
 +
  cat("[ Package:Multtest not available attempting to install ]\n")
 +
   biocLite("multtest")
 
}
 
}
 
# Check and install limma
 
# Check and install limma
 
if(!require(limma, quietly=TRUE)) {
 
if(!require(limma, quietly=TRUE)) {
   cat("Package:limma not available attempting to install\n")
+
  if(!exists("biocLite")){
  install.packages("limma")
+
    cat("[ installing biocLite]\n")
 +
    source("http://www.bioconductor.org/iocLite.R")
 +
   }
 +
  cat("[ Package:limma not available attempting to install ]\n")
 +
  biocLite("limma")
 
}
 
}
 
# Check and install siggenes
 
# Check and install siggenes
 
if(!require(siggenes, quietly=TRUE)) {
 
if(!require(siggenes, quietly=TRUE)) {
   cat("Package:siggenes not available attempting to install\n")
+
  if(!exists("biocLite")){
  install.packages("siggenes")
+
    cat("[ installing biocLite]\n")
 +
    source("http://www.bioconductor.org/iocLite.R")
 +
   }
 +
  cat("[ Package:siggenes not available attempting to install ]\n")
 +
  biocLite("siggenes")
 
}
 
}
 
+
 
set.seed(seed)
 
set.seed(seed)
 
starttime <- proc.time()[3]
 
starttime <- proc.time()[3]
 
+
 
# Set up False discovery and False non discovery vectors
 
# Set up False discovery and False non discovery vectors
 
FD <- rep(NA, iiter)
 
FD <- rep(NA, iiter)
 
FND <- rep(NA, iiter)
 
FND <- rep(NA, iiter)
 
+
 
# Output lists
 
# Output lists
 
if(Estpi0) {
 
if(Estpi0) {
Line 56: Line 69:
 
                 V=list(), T=list())
 
                 V=list(), T=list())
 
}
 
}
 
+
for(pi0 in seqpi0) {
+
#for(pi0 in seqpi0) {
 
# Pad out FDlist here
 
# Pad out FDlist here
if(pad) {
+
#if(pad) {
  toPad <- rep(NA, iiter)
+
toPad <- rep(NA, iiter)
  FDlist[["V"]][[as.character(pi0)]] <- toPad
+
FDlist[["V"]][[as.character(pi0)]] <- toPad
  FDlist[["T"]][[as.character(pi0)]] <- toPad
+
FDlist[["T"]][[as.character(pi0)]] <- toPad
  if(Estpi0) {
+
if(Estpi0) {
    FDlist[["p0NW"]][[as.character(pi0)]]    <- toPad
+
#    FDlist[["p0NW"]][[as.character(pi0)]]    <- toPad
    FDlist[["p0"]][[as.character(pi0)]]      <- toPad
+
#    FDlist[["p0"]][[as.character(pi0)]]      <- toPad
  }
+
}
  if(use.convest) {
+
if(use.convest) {
    FDlist[["convest"]][[as.character(pi0)]] <- toPad
+
#    FDlist[["convest"]][[as.character(pi0)]] <- toPad
  }
+
}
}
+
# }
}
+
#}
 
+
 
# 2) ---------------------------- pi0 outer loop ------------------------------ #
 
# 2) ---------------------------- pi0 outer loop ------------------------------ #
 
# Main loop for each value of pi0, the proportion of truly Null hypotheses  
 
# Main loop for each value of pi0, the proportion of truly Null hypotheses  
Line 86: Line 99:
 
         alphastar <- alpha
 
         alphastar <- alpha
 
       }
 
       }
 
+
 
     m1 <- round(m * (1-pi0))
 
     m1 <- round(m * (1-pi0))
 
     m0 <- m - m1
 
     m0 <- m - m1
 
     DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
 
     DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
 
     DEflag <- as.logical(DEmeans)
 
     DEflag <- as.logical(DEmeans)
 
+
 
# 3) --------------------------- iiter inner loop ----------------------------- #  
 
# 3) --------------------------- iiter inner loop ----------------------------- #  
 
   for(i in seq(iiter)){
 
   for(i in seq(iiter)){
   
+
 
       # Names for list elements
 
       # Names for list elements
 
       pi0char <- as.character(round(pi0, 5))  
 
       pi0char <- as.character(round(pi0, 5))  
 
+
 
# 4) ------------------------------ Simulation -------------------------------- #       
 
# 4) ------------------------------ Simulation -------------------------------- #       
 
       X <- rnorm(m, DEmeans, sigma)
 
       X <- rnorm(m, DEmeans, sigma)
 
       pvalues <- 2*(1-pnorm(abs(X),0,sigma))
 
       pvalues <- 2*(1-pnorm(abs(X),0,sigma))
 
+
 
# 5) ----------------------------- Saving statistics -------------------------- #
 
# 5) ----------------------------- Saving statistics -------------------------- #
 
       if(Estpi0){
 
       if(Estpi0){
Line 107: Line 120:
 
# Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
 
# Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
 
# Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
 
# Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
 
+
 
         if(use.convest) {# Convest estimation
 
         if(use.convest) {# Convest estimation
 
           FDlist[["convest"]][[pi0char]][i] <- convest(pvalues, niter=50)
 
           FDlist[["convest"]][[pi0char]][i] <- convest(pvalues, niter=50)
 
         }
 
         }
       
+
 
         # Siggenes or first principles
 
         # Siggenes or first principles
 
         if( use.siggenes ) {
 
         if( use.siggenes ) {
Line 156: Line 169:
 
   save(FDlist, file=file.path(datadir, outfile), compress=T)
 
   save(FDlist, file=file.path(datadir, outfile), compress=T)
 
   } # End of main loop
 
   } # End of main loop
 
+
 
# Return simulation time
 
# Return simulation time
 
endtime <- proc.time()[3] - starttime
 
endtime <- proc.time()[3] - starttime
 
return(endtime)
 
return(endtime)
 
}
 
}
 
#fdrsim(outfile = "tmp", m=100, iiter=1000, seqpi0=1, #seqpi0= seq(0,1, length=5),
 
# alpha = 0.05, adaptive=F, mu0=0, mu1=3, sigma=1,seed=2, Estpi0=TRUE)
 
#load(file.path("Data", "tmp"))
 
 
#source("plotfun.R")
 
#plotfun(FDlist)
 

Latest revision as of 00:02, 21 September 2009

fdrsim <- function(outfile = "tmp", m=1000, iiter=100, seqpi0=1, alpha = 0.05,

                  adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1,Estpi0 = FALSE,
                  datadir = ifelse(version$os=="mingw32", "D:Data", "Data"), lambdaseq = seq(0, 0.95, by=0.05),
                  ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE, pad=TRUE, FDR = FALSE)

{

  1. ----------------------------------------------------------------------------- #
  2. {{#security:admin}}
  3. m = numeric number of genes to simulate
  4. itter = numeric number of iterations
  5. sepi0 = numeric vector of True pi0 values to simulate
  6. alpha = numeric number [0,1] type I probability test cutoff
  7. adaptive = logical Finner adaptive FDR control
  8. use.siggenes = logical to use pi0.est function in siggenes package
  9. lambdaseq = numeric vector lambda range for pi0.est smoothing splines
  10. ncs.value = character string "paper " or "max"
  11. use.convest = logical to use convest function in limma package
  12. pad = logical Populate padded lists for speed
  13. FDR = logical whether to calculate fdr elements to increase speed
  14. ----------------------------------------------------------------------------- #
  1. 1) ---------------------------------- Setup --------------------------------- #
  2. Check and create directories for save(...) of objects

if(length(dir(datadir))==0) {

 dir.create(datadir)

}

  1. Check and install multtest

if(!require(multtest, quietly=TRUE)) {

 if(!exists("biocLite")){
   cat("[ installing biocLite]\n")
   source("http://www.bioconductor.org/iocLite.R")
 }
 cat("[ Package:Multtest not available attempting to install ]\n")
 biocLite("multtest")

}

  1. Check and install limma

if(!require(limma, quietly=TRUE)) {

  if(!exists("biocLite")){
   cat("[ installing biocLite]\n")
   source("http://www.bioconductor.org/iocLite.R")
 }
  cat("[ Package:limma not available attempting to install ]\n")
  biocLite("limma")

}

  1. Check and install siggenes

if(!require(siggenes, quietly=TRUE)) {

  if(!exists("biocLite")){
   cat("[ installing biocLite]\n")
   source("http://www.bioconductor.org/iocLite.R")
 }
  cat("[ Package:siggenes not available attempting to install ]\n")
  biocLite("siggenes")

}

set.seed(seed) starttime <- proc.time()[3]

  1. Set up False discovery and False non discovery vectors

FD <- rep(NA, iiter) FND <- rep(NA, iiter)

  1. Output lists

if(Estpi0) {

 FDlist <- list(m=m, alpha=alpha, parameters=list(mu0=mu0,mu1=mu1),
                V=list(), T=list(), p0=list(),p0NW=list(), convest=list())

}else{

 FDlist <- list(m=m, alpha=alpha, parameters=list(mu0=mu0,mu1=mu1),
                V=list(), T=list())

}

  1. for(pi0 in seqpi0) {
  2. Pad out FDlist here
  3. if(pad) {
  4. toPad <- rep(NA, iiter)
  5. FDlist"V"as.character(pi0) <- toPad
  6. FDlist"T"as.character(pi0) <- toPad
  7. if(Estpi0) {
  8. FDlist"p0NW"as.character(pi0) <- toPad
  9. FDlist"p0"as.character(pi0) <- toPad
  10. }
  11. if(use.convest) {
  12. FDlist"convest"as.character(pi0) <- toPad
  13. }
  14. }
  15. }
  1. 2) ---------------------------- pi0 outer loop ------------------------------ #
  2. Main loop for each value of pi0, the proportion of truly Null hypotheses

for(pi0 in seqpi0)

 {
 # False discovery threshold
   if(adaptive)
     {
   # Adaptive control adjustment on true pi0 (finner & Roters, 2002)
       alphastar<- alpha/pi0
     } else { 
   # Non  adaptive control
       alphastar <- alpha
     }

   m1 <- round(m * (1-pi0))
   m0 <- m - m1
   DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
   DEflag <- as.logical(DEmeans)

  1. 3) --------------------------- iiter inner loop ----------------------------- #
  for(i in seq(iiter)){

     # Names for list elements
     pi0char <- as.character(round(pi0, 5)) 

  1. 4) ------------------------------ Simulation -------------------------------- #
     X <- rnorm(m, DEmeans, sigma)
     pvalues <- 2*(1-pnorm(abs(X),0,sigma))

  1. 5) ----------------------------- Saving statistics -------------------------- #
     if(Estpi0){
  1. pi0 estimation
  2. Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
  3. Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
       if(use.convest) {# Convest estimation
         FDlist"convest"pi0char[i] <- convest(pvalues, niter=50)
       }

       # Siggenes or first principles
       if( use.siggenes ) {
         require(siggenes, quietly=TRUE)
         # A) weighting by 1-lambda
         pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                           ncs.weights=1-lambdaseq)$p0
         FDlist"p0"pi0char[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
         pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                           ncs.weights=NULL)$p0
         FDlist"p0NW"pi0char[i] <- round(pi0hat,8)
       } else {
         pi0lambda <- tapply(lambdaseq, lambdaseq,
                             function(x){sum(pvalues>x)/(m*(1-x))})
         # A) weighting by 1-lambda
         pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq, 
                                     df=3)$y[length(lambdaseq)],1)
         FDlist"p0"pi0char[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
         pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
                                     df=3)$y[length(lambdaseq)],1)
         FDlist"p0NW"pi0char[i] <- round(pi0hat,8) 
       }
     }
     if(FDR) {
     # Step down fdr Bengamini & Hochberg control
       Porder <- order(pvalues)
     # Sorted pvalues
       cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m 
     #Reordered DEflag
       DEorder  <- DEflag[Porder] 
       rejected <- DEorder[!cutoff]
       accepted <- DEorder[cutoff]
       FD[i]  <- sum(!rejected)
       FND[i] <- sum(accepted)
     }
   }
  # Writing to FDlist every iteration of p
   if(FDR) {
     FDlist"V"pi0char <- FD
     FDlist"T"pi0char <- FND
   }
  save(FDlist, file=file.path(datadir, outfile), compress=T)
 } # End of main loop

  1. Return simulation time

endtime <- proc.time()[3] - starttime return(endtime) }