Difference between revisions of "Golubsim.R"

From Organic Design wiki
m (New page: # {{R}}{{#security:*|Sven}})
 
(Function added)
Line 1: Line 1:
 
# {{R}}{{#security:*|Sven}}
 
# {{R}}{{#security:*|Sven}}
 +
nCr <- function(n,r) {
 +
  factorial(n) / (factorial(n-r) * factorial(r))
 +
}
 +
golubsim <- function(outfile = "tmp", iiter=nCr(11,6), seqpi0=1, alpha = 0.05,
 +
                    adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1,
 +
                    datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
 +
                    ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE,
 +
                    pad=TRUE, FDR = FALSE)
 +
{
 +
# ----------------------------------------------------------------------------- #
 +
# m            = numeric number of genes to simulate
 +
# itter        = numeric number of iterations
 +
# sepi0        = numeric vector of True pi0 values to simulate
 +
# alpha        = numeric number [0,1] type I probability test cutoff
 +
# adaptive    = logical Finner adaptive FDR control
 +
# use.siggenes = logical to use pi0.est function in siggenes package
 +
# lambdaseq    = numeric vector lambda range for pi0.est smoothing splines
 +
# ncs.value    =  character string "paper  " or "max"
 +
# use.convest  = logical to use convest function in limma package
 +
# pad          = logical Populate padded lists for speed
 +
# ----------------------------------------------------------------------------- #
 +
 +
# 1) ---------------------------------- Setup --------------------------------- # 
 +
# Check and create directories for save(...) of objects
 +
if(length(dir(datadir))==0) {
 +
  create.dir(datadir)
 +
}
 +
 +
# Check and install multtest
 +
if(!require(multtest, quietly=TRUE)) {
 +
  cat("Package:Multtest not available attempting to install\n")
 +
  install.packages("multtest")
 +
}
 +
 +
data(golub)
 +
m <- nrow(golub)
 +
 +
# AML slides=11
 +
n <- 11
 +
r <-  6
 +
 +
m <- nCr(n,r)
 +
AML.samples <- list()
 +
i <- 1
 +
while(i <= m) {
 +
  value <- sort(sample(n, r, replace=FALSE))
 +
  key  <- paste(value, collapse=" ")
 +
  if(is.null(AML.samples[[key]])) {
 +
    AML.samples[[key]] <- value
 +
    i <-i+1
 +
    print(i)
 +
  }
 +
}
 +
 +
# ALL slides=27
 +
n <- 27
 +
r <-  6
 +
 +
ALL.samples <- list()
 +
i <- 1
 +
while(i <= m) {
 +
  value <- sort(sample(n, r, replace=FALSE))
 +
  key  <- paste(value, collapse=" ")
 +
  if(is.null(ALL.samples[[key]])) {
 +
    ALL.samples[[key]] <- value
 +
    i <-i+1
 +
    print(i)
 +
  }
 +
}
 +
 +
# Need to randomise the way the keys are drawn
 +
ALLdraw <- sample(m,m, replace=FALSE)
 +
AMLdraw <- sample(m,m, replace=FALSE)
 +
 +
golub.ALL <- golub[,golub.cl==0]
 +
golub.AML <- golub[,golub.cl==1]
 +
 +
golub.clSample <- rep(0:1, each=6)
 +
 +
design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)
 +
 +
 +
# Check and install limma
 +
if(!require(limma, quietly=TRUE)) {
 +
  cat("Package:limma not available attempting to install\n")
 +
  install.packages("limma")
 +
}
 +
# Check and install siggenes
 +
if(!require(siggenes, quietly=TRUE)) {
 +
  cat("Package:siggenes not available attempting to install\n")
 +
  install.packages("siggenes")
 +
}
 +
 +
set.seed(seed)
 +
starttime <- proc.time()[3]
 +
 +
# Set up False discovery and False non discovery vectors
 +
#FD <- rep(NA, iiter)
 +
#FND <- rep(NA, iiter)
 +
 +
# Output lists
 +
 +
FDlist <- list(m=m, alpha=alpha, p0=list(),p0NW=list(), convest=list())
 +
 +
 +
# 2) ---------------------------- pi0 outer loop ------------------------------ #
 +
# Main loop for each value of pi0, the proportion of truly Null hypotheses
 +
 +
#for(pi0 in seqpi0)
 +
#  {
 +
  # False discovery threshold
 +
#    if(adaptive)
 +
#      {
 +
    # Adaptive control adjustment on true pi0 (finner & Roters, 2002)
 +
#        alphastar<- alpha/pi0
 +
#      } else {
 +
    # Non  adaptive control
 +
alphastar <- alpha
 +
#      }
 +
 +
#    m1 <- round(m * (1-pi0))
 +
#    m0 <- m - m1
 +
#    DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
 +
#    DEflag <- as.logical(DEmeans)
 +
 +
# 3) --------------------------- iiter inner loop ----------------------------- #
 +
for(i in seq(iiter)){
 +
   
 +
      # Names for list elements
 +
#      pi0char <- as.character(round(pi0, 5))
 +
 +
# 4) ------------------------ Permutation analysis ---------------------------- #     
 +
  x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
 +
            golub.AML[,AML.samples[[AMLdraw[i]]]]
 +
            )
 +
 +
  fit <- lmFit(x, design.Sample)
 +
  fit <- eBayes(fit)
 +
  pvalues <- fit$p.value[,"AMLvsALL"]
 +
#  p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
 +
#  pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
 +
 +
# 5) ----------------------------- Saving statistics -------------------------- #
 +
# pi0 estimation
 +
# Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
 +
# Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
 +
 +
  if(use.convest) {# Convest estimation
 +
    FDlist[["convest"]][i] <- convest(pvalues, niter=50)
 +
  }
 +
       
 +
        # Siggenes or first principles
 +
  if( use.siggenes ) {
 +
    require(siggenes, quietly=TRUE)
 +
          # A) weighting by 1-lambda
 +
    pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
 +
                      ncs.weights=1-lambdaseq)$p0
 +
    FDlist[["p0"]][i] <- round(pi0hat,8)
 +
          # B) no 1-lambda weighting
 +
    pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
 +
                      ncs.weights=NULL)$p0
 +
    FDlist[["p0NW"]][i] <- round(pi0hat,8)
 +
  } else {
 +
    pi0lambda <- tapply(lambdaseq, lambdaseq,
 +
                        function(x){sum(pvalues>x)/(m*(1-x))})
 +
          # A) weighting by 1-lambda
 +
    pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq,
 +
                                df=3)$y[length(lambdaseq)],1)
 +
    FDlist[["p0"]][i] <- round(pi0hat,8)
 +
          # B) no 1-lambda weighting
 +
    pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
 +
                                df=3)$y[length(lambdaseq)],1)
 +
    FDlist[["p0NW"]][i] <- round(pi0hat,8)
 +
  }
 +
 +
#      if(FDR) {
 +
      # Step down fdr Bengamini & Hochberg control
 +
#        Porder <- order(pvalues)
 +
      # Sorted pvalues
 +
#        cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m
 +
      #Reordered DEflag
 +
#        DEorder  <- DEflag[Porder]
 +
#        rejected <- DEorder[!cutoff]
 +
#        accepted <- DEorder[cutoff]
 +
#        FD[i]  <- sum(!rejected)
 +
#        FND[i] <- sum(accepted)
 +
#      }
 +
#    }
 +
  # Writing to FDlist every iteration of p
 +
#    if(FDR) {
 +
#      FDlist[["V"]][[pi0char]] <- FD
 +
#      FDlist[["T"]][[pi0char]] <- FND
 +
#    }
 +
  save(FDlist, file=file.path(datadir, outfile), compress=T)
 +
} # End of main loop
 +
 +
# Return simulation time
 +
endtime <- proc.time()[3] - starttime
 +
return(endtime)
 +
}
 +
 +
#fdrsim(outfile = "tmp", m=100, iiter=1000, seqpi0=1, #seqpi0= seq(0,1, length=5),
 +
# alpha = 0.05, adaptive=F, mu0=0, mu1=3, sigma=1,seed=2, Estpi0=TRUE)
 +
#load(file.path("Data", "tmp"))
 +
 +
#source("plotfun.R")
 +
#plotfun(FDlist)

Revision as of 02:05, 20 June 2007

Code snipits and programs written in R, S or S-PLUS{{#security:*|Sven}} nCr <- function(n,r) {

 factorial(n) / (factorial(n-r) * factorial(r))

} golubsim <- function(outfile = "tmp", iiter=nCr(11,6), seqpi0=1, alpha = 0.05,

                    adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1,
                    datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
                    ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE,
                    pad=TRUE, FDR = FALSE)

{

  1. ----------------------------------------------------------------------------- #
  2. m = numeric number of genes to simulate
  3. itter = numeric number of iterations
  4. sepi0 = numeric vector of True pi0 values to simulate
  5. alpha = numeric number [0,1] type I probability test cutoff
  6. adaptive = logical Finner adaptive FDR control
  7. use.siggenes = logical to use pi0.est function in siggenes package
  8. lambdaseq = numeric vector lambda range for pi0.est smoothing splines
  9. ncs.value = character string "paper " or "max"
  10. use.convest = logical to use convest function in limma package
  11. pad = logical Populate padded lists for speed
  12. ----------------------------------------------------------------------------- #
  1. 1) ---------------------------------- Setup --------------------------------- #
  2. Check and create directories for save(...) of objects

if(length(dir(datadir))==0) {

 create.dir(datadir)

}

  1. Check and install multtest

if(!require(multtest, quietly=TRUE)) {

 cat("Package:Multtest not available attempting to install\n")
 install.packages("multtest")

}

data(golub) m <- nrow(golub)

  1. AML slides=11

n <- 11 r <- 6

m <- nCr(n,r) AML.samples <- list() i <- 1 while(i <= m) {

 value <- sort(sample(n, r, replace=FALSE))
 key   <- paste(value, collapse=" ")
 if(is.null(AML.sampleskey)) {
   AML.sampleskey <- value
   i <-i+1
   print(i)
 }

}

  1. ALL slides=27

n <- 27 r <- 6

ALL.samples <- list() i <- 1 while(i <= m) {

 value <- sort(sample(n, r, replace=FALSE))
 key   <- paste(value, collapse=" ")
 if(is.null(ALL.sampleskey)) {
   ALL.sampleskey <- value
   i <-i+1
   print(i)
 }

}

  1. Need to randomise the way the keys are drawn

ALLdraw <- sample(m,m, replace=FALSE) AMLdraw <- sample(m,m, replace=FALSE)

golub.ALL <- golub[,golub.cl==0] golub.AML <- golub[,golub.cl==1]

golub.clSample <- rep(0:1, each=6)

design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)


  1. Check and install limma

if(!require(limma, quietly=TRUE)) {

 cat("Package:limma not available attempting to install\n")
 install.packages("limma")

}

  1. Check and install siggenes

if(!require(siggenes, quietly=TRUE)) {

 cat("Package:siggenes not available attempting to install\n")
 install.packages("siggenes")

}

set.seed(seed) starttime <- proc.time()[3]

  1. Set up False discovery and False non discovery vectors
  2. FD <- rep(NA, iiter)
  3. FND <- rep(NA, iiter)
  1. Output lists

FDlist <- list(m=m, alpha=alpha, p0=list(),p0NW=list(), convest=list())


  1. 2) ---------------------------- pi0 outer loop ------------------------------ #
  2. Main loop for each value of pi0, the proportion of truly Null hypotheses
  1. for(pi0 in seqpi0)
  2. {
 # False discovery threshold
  1. if(adaptive)
  2. {
   # Adaptive control adjustment on true pi0 (finner & Roters, 2002)
  1. alphastar<- alpha/pi0
  2. } else {
   # Non  adaptive control

alphastar <- alpha

  1. }
  1. m1 <- round(m * (1-pi0))
  2. m0 <- m - m1
  3. DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
  4. DEflag <- as.logical(DEmeans)
  1. 3) --------------------------- iiter inner loop ----------------------------- #

for(i in seq(iiter)){

     # Names for list elements
  1. pi0char <- as.character(round(pi0, 5))
  1. 4) ------------------------ Permutation analysis ---------------------------- #
 x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
            golub.AML[,AML.samples[[AMLdraw[i]]]]
            )

 fit <- lmFit(x, design.Sample) 
 fit <- eBayes(fit)
 pvalues <- fit$p.value[,"AMLvsALL"]
  1. p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
  2. pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
  1. 5) ----------------------------- Saving statistics -------------------------- #
  2. pi0 estimation
  3. Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
  4. Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
 if(use.convest) {# Convest estimation
   FDlist"convest"[i] <- convest(pvalues, niter=50)
 }
       
       # Siggenes or first principles
 if( use.siggenes ) {
   require(siggenes, quietly=TRUE)
         # A) weighting by 1-lambda
   pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                     ncs.weights=1-lambdaseq)$p0
   FDlist"p0"[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
   pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                     ncs.weights=NULL)$p0
   FDlist"p0NW"[i] <- round(pi0hat,8)
 } else {
   pi0lambda <- tapply(lambdaseq, lambdaseq,
                       function(x){sum(pvalues>x)/(m*(1-x))})
         # A) weighting by 1-lambda
   pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq, 
                               df=3)$y[length(lambdaseq)],1)
   FDlist"p0"[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
   pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
                               df=3)$y[length(lambdaseq)],1)
   FDlist"p0NW"[i] <- round(pi0hat,8) 
 }
  1. if(FDR) {
     # Step down fdr Bengamini & Hochberg control
  1. Porder <- order(pvalues)
     # Sorted pvalues
  1. cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m
     #Reordered DEflag
  1. DEorder <- DEflag[Porder]
  2. rejected <- DEorder[!cutoff]
  3. accepted <- DEorder[cutoff]
  4. FD[i] <- sum(!rejected)
  5. FND[i] <- sum(accepted)
  6. }
  7. }
  # Writing to FDlist every iteration of p
  1. if(FDR) {
  2. FDlist"V"pi0char <- FD
  3. FDlist"T"pi0char <- FND
  4. }
 save(FDlist, file=file.path(datadir, outfile), compress=T)

} # End of main loop

  1. Return simulation time

endtime <- proc.time()[3] - starttime return(endtime) }

  1. fdrsim(outfile = "tmp", m=100, iiter=1000, seqpi0=1, #seqpi0= seq(0,1, length=5),
  2. alpha = 0.05, adaptive=F, mu0=0, mu1=3, sigma=1,seed=2, Estpi0=TRUE)
  3. load(file.path("Data", "tmp"))
  1. source("plotfun.R")
  2. plotfun(FDlist)