Difference between revisions of "Golubsim.R"

From Organic Design wiki
(Function added)
(Bug fixes)
 
(One intermediate revision by the same user not shown)
Line 3: Line 3:
 
   factorial(n) / (factorial(n-r) * factorial(r))
 
   factorial(n) / (factorial(n-r) * factorial(r))
 
}
 
}
golubsim <- function(outfile = "tmp", iiter=nCr(11,6), seqpi0=1, alpha = 0.05,
+
golubsim <- function(outfile = "tmp", iiter=nCr(11,6), alpha = 0.05, seed=1,
                    adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1,
 
 
                     datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
 
                     datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
                     ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE,
+
                     ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE
                     pad=TRUE, FDR = FALSE)
+
                     ) {
{
 
 
# ----------------------------------------------------------------------------- #
 
# ----------------------------------------------------------------------------- #
# m            = numeric number of genes to simulate
 
 
# itter        = numeric number of iterations
 
# itter        = numeric number of iterations
# sepi0        = numeric vector of True pi0 values to simulate
 
 
# alpha        = numeric number [0,1] type I probability test cutoff  
 
# alpha        = numeric number [0,1] type I probability test cutoff  
# adaptive    = logical Finner adaptive FDR control
 
 
# use.siggenes = logical to use pi0.est function in siggenes package
 
# use.siggenes = logical to use pi0.est function in siggenes package
 
# lambdaseq    = numeric vector lambda range for pi0.est smoothing splines  
 
# lambdaseq    = numeric vector lambda range for pi0.est smoothing splines  
 
# ncs.value    =  character string "paper  " or "max"
 
# ncs.value    =  character string "paper  " or "max"
 
# use.convest  = logical to use convest function in limma package
 
# use.convest  = logical to use convest function in limma package
# pad          = logical Populate padded lists for speed
 
 
# ----------------------------------------------------------------------------- #
 
# ----------------------------------------------------------------------------- #
  
 
# 1) ---------------------------------- Setup --------------------------------- #   
 
# 1) ---------------------------------- Setup --------------------------------- #   
 
# Check and create directories for save(...) of objects  
 
# Check and create directories for save(...) of objects  
if(length(dir(datadir))==0) {
+
  if(length(dir(datadir))==0) {
  create.dir(datadir)
+
    create.dir(datadir)
}
+
  }
  
 
# Check and install multtest
 
# Check and install multtest
if(!require(multtest, quietly=TRUE)) {
+
  if(!require(multtest, quietly=TRUE)) {
  cat("Package:Multtest not available attempting to install\n")
+
    cat("Package:Multtest not available attempting to install\n")
  install.packages("multtest")
+
    install.packages("multtest")
}
+
  }
  
data(golub)
+
  data(golub)
m <- nrow(golub)
+
  m <- nrow(golub)
  
 
# AML slides=11  
 
# AML slides=11  
n <- 11
+
  n <- 11
r <-  6
+
  r <-  6
 
   
 
   
m <- nCr(n,r)
+
 
AML.samples <- list()
+
  AML.samples <- list()
i <- 1
+
  i <- 1
while(i <= m) {
+
  while(i <= iiter) {
  value <- sort(sample(n, r, replace=FALSE))
+
    value <- sort(sample(n, r, replace=FALSE))
  key  <- paste(value, collapse=" ")
+
    key  <- paste(value, collapse=" ")
  if(is.null(AML.samples[[key]])) {
+
    if(is.null(AML.samples[[key]])) {
    AML.samples[[key]] <- value
+
      AML.samples[[key]] <- value
    i <-i+1
+
      i <-i+1
    print(i)
+
#      if(!(i %% 10)){ # print every 10th
 +
#        print(i)
 +
#      }
 +
    }
 
   }
 
   }
}
+
 
  
 
# ALL slides=27
 
# ALL slides=27
n <- 27
+
  n <- 27
r <-  6
+
  r <-  6
+
 
ALL.samples <- list()
+
  ALL.samples <- list()
i <- 1
+
  i <- 1
while(i <= m) {
+
  while(i <= iiter) {
  value <- sort(sample(n, r, replace=FALSE))
+
    value <- sort(sample(n, r, replace=FALSE))
  key  <- paste(value, collapse=" ")
+
    key  <- paste(value, collapse=" ")
  if(is.null(ALL.samples[[key]])) {
+
    if(is.null(ALL.samples[[key]])) {
    ALL.samples[[key]] <- value
+
      ALL.samples[[key]] <- value
    i <-i+1
+
      i <-i+1
    print(i)
+
#      if(!(i %% 10)){ # print every 10th
 +
#        print(i)
 +
#      }
 +
    }
 
   }
 
   }
}
+
cat("Column permutations sampled\n")
  
 
# Need to randomise the way the keys are drawn
 
# Need to randomise the way the keys are drawn
ALLdraw <- sample(m,m, replace=FALSE)
+
  ALLdraw <- sample(iiter,iiter, replace=FALSE)
AMLdraw <- sample(m,m, replace=FALSE)
+
  AMLdraw <- sample(iiter,iiter, replace=FALSE)
 
   
 
   
golub.ALL <- golub[,golub.cl==0]
+
  golub.ALL <- golub[,golub.cl==0]
golub.AML <- golub[,golub.cl==1]
+
  golub.AML <- golub[,golub.cl==1]
+
 
golub.clSample <- rep(0:1, each=6)
+
  golub.clSample <- rep(0:1, each=6)
+
 
design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)
+
  design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)
 
+
 
  
 
# Check and install limma
 
# Check and install limma
if(!require(limma, quietly=TRUE)) {
+
  if(!require(limma, quietly=TRUE)) {
  cat("Package:limma not available attempting to install\n")
+
    cat("Package:limma not available attempting to install\n")
  install.packages("limma")
+
    install.packages("limma")
}
+
  }
 
# Check and install siggenes
 
# Check and install siggenes
if(!require(siggenes, quietly=TRUE)) {
+
  if(!require(siggenes, quietly=TRUE)) {
  cat("Package:siggenes not available attempting to install\n")
+
    cat("Package:siggenes not available attempting to install\n")
  install.packages("siggenes")
+
    install.packages("siggenes")
}
+
  }
  
set.seed(seed)
+
  set.seed(seed)
starttime <- proc.time()[3]
+
  starttime <- proc.time()[3]
 
 
# Set up False discovery and False non discovery vectors
 
#FD <- rep(NA, iiter)
 
#FND <- rep(NA, iiter)
 
  
 
# Output lists
 
# Output lists
  
FDlist <- list(m=m, alpha=alpha, p0=list(),p0NW=list(), convest=list())
+
  FDlist <- list(m=m, alpha=alpha, p0=rep(NA,iiter), p0NW=rep(NA,iiter), convest=rep(NA,iiter))
 
 
 
 
# 2) ---------------------------- pi0 outer loop ------------------------------ #
 
# Main loop for each value of pi0, the proportion of truly Null hypotheses
 
 
 
#for(pi0 in seqpi0)
 
#  {
 
  # False discovery threshold
 
#    if(adaptive)
 
#      {
 
    # Adaptive control adjustment on true pi0 (finner & Roters, 2002)
 
#        alphastar<- alpha/pi0
 
#      } else {
 
    # Non  adaptive control
 
alphastar <- alpha
 
#      }
 
 
 
#    m1 <- round(m * (1-pi0))
 
#    m0 <- m - m1
 
#    DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
 
#    DEflag <- as.logical(DEmeans)
 
  
# 3) --------------------------- iiter inner loop ----------------------------- #  
+
  alphastar <- alpha # Do we need this?
for(i in seq(iiter)){
 
   
 
      # Names for list elements
 
#      pi0char <- as.character(round(pi0, 5))
 
  
 +
# 3) --------------------------- iiter loop ----------------------------- #
 +
  for(i in seq(iiter)){
 
# 4) ------------------------ Permutation analysis ---------------------------- #       
 
# 4) ------------------------ Permutation analysis ---------------------------- #       
  x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
+
    x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
            golub.AML[,AML.samples[[AMLdraw[i]]]]
+
              golub.AML[,AML.samples[[AMLdraw[i]]]]
            )
+
              )
 
   
 
   
  fit <- lmFit(x, design.Sample)  
+
    fit <- lmFit(x, design.Sample)  
  fit <- eBayes(fit)
+
    fit <- eBayes(fit)
  pvalues <- fit$p.value[,"AMLvsALL"]
+
    pvalues <- fit$p.value[,"AMLvsALL"]
 
#  p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
 
#  p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
 
#  pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
 
#  pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
Line 146: Line 119:
 
# Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
 
# Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
 
# Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
 
# Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
 
+
    if(use.convest) {# Convest estimation
  if(use.convest) {# Convest estimation
+
      FDlist[["convest"]][i] <- convest(pvalues, niter=50)
    FDlist[["convest"]][i] <- convest(pvalues, niter=50)
+
    }
  }
+
  # Siggenes or first principles
       
+
    if( use.siggenes ) {
        # Siggenes or first principles
+
      require(siggenes, quietly=TRUE)
  if( use.siggenes ) {
 
    require(siggenes, quietly=TRUE)
 
 
           # A) weighting by 1-lambda
 
           # A) weighting by 1-lambda
    pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
+
      pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                      ncs.weights=1-lambdaseq)$p0
+
                        ncs.weights=1-lambdaseq)$p0
    FDlist[["p0"]][i] <- round(pi0hat,8)  
+
      FDlist[["p0"]][i] <- round(pi0hat,8)  
 
           # B) no 1-lambda weighting
 
           # B) no 1-lambda weighting
    pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
+
      pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                      ncs.weights=NULL)$p0
+
                        ncs.weights=NULL)$p0
    FDlist[["p0NW"]][i] <- round(pi0hat,8)
+
      FDlist[["p0NW"]][i] <- round(pi0hat,8)
  } else {
+
    } else {
    pi0lambda <- tapply(lambdaseq, lambdaseq,
+
      pi0lambda <- tapply(lambdaseq, lambdaseq,
                        function(x){sum(pvalues>x)/(m*(1-x))})
+
                          function(x){sum(pvalues>x)/(m*(1-x))})
 
           # A) weighting by 1-lambda
 
           # A) weighting by 1-lambda
    pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq,  
+
      pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq,  
                                df=3)$y[length(lambdaseq)],1)
+
                                  df=3)$y[length(lambdaseq)],1)
    FDlist[["p0"]][i] <- round(pi0hat,8)  
+
      FDlist[["p0"]][i] <- round(pi0hat,8)  
 
           # B) no 1-lambda weighting
 
           # B) no 1-lambda weighting
    pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
+
      pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
                                df=3)$y[length(lambdaseq)],1)
+
                                  df=3)$y[length(lambdaseq)],1)
    FDlist[["p0NW"]][i] <- round(pi0hat,8)  
+
      FDlist[["p0NW"]][i] <- round(pi0hat,8)  
  }
+
    }
 
+
    save(FDlist, file=file.path(datadir, outfile), compress=T)
#      if(FDR) {
+
  } # End of main loop
      # Step down fdr Bengamini & Hochberg control
 
#        Porder <- order(pvalues)
 
      # Sorted pvalues
 
#        cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m
 
      #Reordered DEflag
 
#        DEorder  <- DEflag[Porder]
 
#        rejected <- DEorder[!cutoff]
 
#        accepted <- DEorder[cutoff]
 
#        FD[i]  <- sum(!rejected)
 
#        FND[i] <- sum(accepted)
 
#      }
 
#    }
 
  # Writing to FDlist every iteration of p
 
#    if(FDR) {
 
#      FDlist[["V"]][[pi0char]] <- FD
 
#      FDlist[["T"]][[pi0char]] <- FND
 
#    }
 
  save(FDlist, file=file.path(datadir, outfile), compress=T)
 
} # End of main loop
 
  
 
# Return simulation time
 
# Return simulation time
endtime <- proc.time()[3] - starttime
+
  endtime <- proc.time()[3] - starttime
return(endtime)
+
  return(endtime)
 
}
 
}
  
#fdrsim(outfile = "tmp", m=100, iiter=1000, seqpi0=1, #seqpi0= seq(0,1, length=5),
 
# alpha = 0.05, adaptive=F, mu0=0, mu1=3, sigma=1,seed=2, Estpi0=TRUE)
 
 
#load(file.path("Data", "tmp"))
 
#load(file.path("Data", "tmp"))
  
 
#source("plotfun.R")
 
#source("plotfun.R")
 
#plotfun(FDlist)
 
#plotfun(FDlist)

Latest revision as of 02:27, 20 June 2007

Code snipits and programs written in R, S or S-PLUS{{#security:*|Sven}} nCr <- function(n,r) {

 factorial(n) / (factorial(n-r) * factorial(r))

} golubsim <- function(outfile = "tmp", iiter=nCr(11,6), alpha = 0.05, seed=1,

                    datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
                    ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE
                    ) {
  1. ----------------------------------------------------------------------------- #
  2. itter = numeric number of iterations
  3. alpha = numeric number [0,1] type I probability test cutoff
  4. use.siggenes = logical to use pi0.est function in siggenes package
  5. lambdaseq = numeric vector lambda range for pi0.est smoothing splines
  6. ncs.value = character string "paper " or "max"
  7. use.convest = logical to use convest function in limma package
  8. ----------------------------------------------------------------------------- #
  1. 1) ---------------------------------- Setup --------------------------------- #
  2. Check and create directories for save(...) of objects
 if(length(dir(datadir))==0) {
   create.dir(datadir)
 }
  1. Check and install multtest
 if(!require(multtest, quietly=TRUE)) {
   cat("Package:Multtest not available attempting to install\n")
   install.packages("multtest")
 }
 data(golub)
 m <- nrow(golub)
  1. AML slides=11
 n <- 11
 r <-  6

 AML.samples <- list()
 i <- 1
 while(i <= iiter) {
   value <- sort(sample(n, r, replace=FALSE))
   key   <- paste(value, collapse=" ")
   if(is.null(AML.sampleskey)) {
     AML.sampleskey <- value
     i <-i+1
  1. if(!(i %% 10)){ # print every 10th
  2. print(i)
  3. }
   }
 }


  1. ALL slides=27
 n <- 27
 r <-  6
 
 ALL.samples <- list()
 i <- 1
 while(i <= iiter) {
   value <- sort(sample(n, r, replace=FALSE))
   key   <- paste(value, collapse=" ")
   if(is.null(ALL.sampleskey)) {
     ALL.sampleskey <- value
     i <-i+1
  1. if(!(i %% 10)){ # print every 10th
  2. print(i)
  3. }
   }
 }

cat("Column permutations sampled\n")

  1. Need to randomise the way the keys are drawn
 ALLdraw <- sample(iiter,iiter, replace=FALSE)
 AMLdraw <- sample(iiter,iiter, replace=FALSE)

 golub.ALL <- golub[,golub.cl==0]
 golub.AML <- golub[,golub.cl==1]
 
 golub.clSample <- rep(0:1, each=6)
 
 design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)
 
  1. Check and install limma
 if(!require(limma, quietly=TRUE)) {
   cat("Package:limma not available attempting to install\n")
   install.packages("limma")
 }
  1. Check and install siggenes
 if(!require(siggenes, quietly=TRUE)) {
   cat("Package:siggenes not available attempting to install\n")
   install.packages("siggenes")
 }
 set.seed(seed)
 starttime <- proc.time()[3]
  1. Output lists
 FDlist <- list(m=m, alpha=alpha, p0=rep(NA,iiter), p0NW=rep(NA,iiter), convest=rep(NA,iiter))
 alphastar <- alpha # Do we need this?
  1. 3) --------------------------- iiter loop ----------------------------- #
 for(i in seq(iiter)){
  1. 4) ------------------------ Permutation analysis ---------------------------- #
   x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
              golub.AML[,AML.samples[[AMLdraw[i]]]]
              )

   fit <- lmFit(x, design.Sample) 
   fit <- eBayes(fit)
   pvalues <- fit$p.value[,"AMLvsALL"]
  1. p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
  2. pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
  1. 5) ----------------------------- Saving statistics -------------------------- #
  2. pi0 estimation
  3. Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
  4. Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
   if(use.convest) {# Convest estimation
     FDlist"convest"[i] <- convest(pvalues, niter=50)
   }
 # Siggenes or first principles
   if( use.siggenes ) {
     require(siggenes, quietly=TRUE)
         # A) weighting by 1-lambda
     pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                       ncs.weights=1-lambdaseq)$p0
     FDlist"p0"[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
     pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                       ncs.weights=NULL)$p0
     FDlist"p0NW"[i] <- round(pi0hat,8)
   } else {
     pi0lambda <- tapply(lambdaseq, lambdaseq,
                         function(x){sum(pvalues>x)/(m*(1-x))})
         # A) weighting by 1-lambda
     pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq, 
                                 df=3)$y[length(lambdaseq)],1)
     FDlist"p0"[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
     pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
                                 df=3)$y[length(lambdaseq)],1)
     FDlist"p0NW"[i] <- round(pi0hat,8) 
   }
   save(FDlist, file=file.path(datadir, outfile), compress=T)
 } # End of main loop
  1. Return simulation time
 endtime <- proc.time()[3] - starttime
 return(endtime)

}

  1. load(file.path("Data", "tmp"))
  1. source("plotfun.R")
  2. plotfun(FDlist)