Difference between revisions of "Golubsim.R"

Latest revision as of 02:27, 20 June 2007

Code snipits and programs written in R, S or S-PLUS{{#security:*|Sven}} nCr <- function(n,r) {

 factorial(n) / (factorial(n-r) * factorial(r))

} golubsim <- function(outfile = "tmp", iiter=nCr(11,6), alpha = 0.05, seed=1,

                    datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
                    ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE
                    ) {

----------------------------------------------------------------------------- #
itter = numeric number of iterations
alpha = numeric number [0,1] type I probability test cutoff
use.siggenes = logical to use pi0.est function in siggenes package
lambdaseq = numeric vector lambda range for pi0.est smoothing splines
ncs.value = character string "paper " or "max"
use.convest = logical to use convest function in limma package
----------------------------------------------------------------------------- #

1) ---------------------------------- Setup --------------------------------- #
Check and create directories for save(...) of objects

 if(length(dir(datadir))==0) {
   create.dir(datadir)
 }

Check and install multtest

 if(!require(multtest, quietly=TRUE)) {
   cat("Package:Multtest not available attempting to install\n")
   install.packages("multtest")
 }

 data(golub)
 m <- nrow(golub)

AML slides=11

 n <- 11
 r <-  6

 AML.samples <- list()
 i <- 1
 while(i <= iiter) {
   value <- sort(sample(n, r, replace=FALSE))
   key   <- paste(value, collapse=" ")
   if(is.null(AML.sampleskey)) {
     AML.sampleskey <- value
     i <-i+1

if(!(i %% 10)){ # print every 10th
print(i)
}

   }
 }

ALL slides=27

 n <- 27
 r <-  6
 
 ALL.samples <- list()
 i <- 1
 while(i <= iiter) {
   value <- sort(sample(n, r, replace=FALSE))
   key   <- paste(value, collapse=" ")
   if(is.null(ALL.sampleskey)) {
     ALL.sampleskey <- value
     i <-i+1

if(!(i %% 10)){ # print every 10th
print(i)
}

   }
 }

cat("Column permutations sampled\n")

Need to randomise the way the keys are drawn

 ALLdraw <- sample(iiter,iiter, replace=FALSE)
 AMLdraw <- sample(iiter,iiter, replace=FALSE)

 golub.ALL <- golub[,golub.cl==0]
 golub.AML <- golub[,golub.cl==1]
 
 golub.clSample <- rep(0:1, each=6)
 
 design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)

Check and install limma

 if(!require(limma, quietly=TRUE)) {
   cat("Package:limma not available attempting to install\n")
   install.packages("limma")
 }

Check and install siggenes

 if(!require(siggenes, quietly=TRUE)) {
   cat("Package:siggenes not available attempting to install\n")
   install.packages("siggenes")
 }

 set.seed(seed)
 starttime <- proc.time()[3]

Output lists

 FDlist <- list(m=m, alpha=alpha, p0=rep(NA,iiter), p0NW=rep(NA,iiter), convest=rep(NA,iiter))

 alphastar <- alpha # Do we need this?

3) --------------------------- iiter loop ----------------------------- #

 for(i in seq(iiter)){

4) ------------------------ Permutation analysis ---------------------------- #

   x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
              golub.AML[,AML.samples[[AMLdraw[i]]]]
              )

   fit <- lmFit(x, design.Sample) 
   fit <- eBayes(fit)
   pvalues <- fit$p.value[,"AMLvsALL"]

p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)

5) ----------------------------- Saving statistics -------------------------- #
pi0 estimation
Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
Maximum value of lambda needs to be less than 1 estimated by (m-1)/m

   if(use.convest) {# Convest estimation
     FDlist"convest"[i] <- convest(pvalues, niter=50)
   }
 # Siggenes or first principles
   if( use.siggenes ) {
     require(siggenes, quietly=TRUE)
         # A) weighting by 1-lambda
     pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                       ncs.weights=1-lambdaseq)$p0
     FDlist"p0"[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
     pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                       ncs.weights=NULL)$p0
     FDlist"p0NW"[i] <- round(pi0hat,8)
   } else {
     pi0lambda <- tapply(lambdaseq, lambdaseq,
                         function(x){sum(pvalues>x)/(m*(1-x))})
         # A) weighting by 1-lambda
     pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq, 
                                 df=3)$y[length(lambdaseq)],1)
     FDlist"p0"[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
     pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
                                 df=3)$y[length(lambdaseq)],1)
     FDlist"p0NW"[i] <- round(pi0hat,8) 
   }
   save(FDlist, file=file.path(datadir, outfile), compress=T)
 } # End of main loop

Return simulation time

 endtime <- proc.time()[3] - starttime
 return(endtime)

}

load(file.path("Data", "tmp"))

source("plotfun.R")
plotfun(FDlist)

Difference between revisions of "Golubsim.R"

Latest revision as of 02:27, 20 June 2007

Navigation menu

Views

Personal tools

Navigation

Search

Navigation

Blogs

Site map

Tools

@@ Line 3: / Line 3: @@
    factorial(n) / (factorial(n-r) * factorial(r))
 }
-golubsim <- function(outfile = "tmp", iiter=nCr(11,6), seqpi0=1, alpha = 0.05,
+golubsim <- function(outfile = "tmp", iiter=nCr(11,6), alpha = 0.05, seed=1,
-                     adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1,
                       datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
-                      ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE,
+                      ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE
-                      pad=TRUE, FDR = FALSE)
+                      ) {
-{
 # ----------------------------------------------------------------------------- #
-# m            = numeric number of genes to simulate
 # itter        = numeric number of iterations
-# sepi0        = numeric vector of True pi0 values to simulate
 # alpha        = numeric number [0,1] type I probability test cutoff
-# adaptive     = logical Finner adaptive FDR control
 # use.siggenes = logical to use pi0.est function in siggenes package
 # lambdaseq    = numeric vector lambda range for pi0.est smoothing splines
 # ncs.value    =  character string "paper  " or "max"
 # use.convest  = logical to use convest function in limma package
-# pad          = logical Populate padded lists for speed
 # ----------------------------------------------------------------------------- #
 # 1) ---------------------------------- Setup --------------------------------- #
 # Check and create directories for save(...) of objects
-if(length(dir(datadir))==0) {
+  if(length(dir(datadir))==0) {
-  create.dir(datadir)
+    create.dir(datadir)
-}
+  }
 # Check and install multtest
-if(!require(multtest, quietly=TRUE)) {
+  if(!require(multtest, quietly=TRUE)) {
-  cat("Package:Multtest not available attempting to install\n")
+    cat("Package:Multtest not available attempting to install\n")
-  install.packages("multtest")
+    install.packages("multtest")
-}
+  }
-data(golub)
+  data(golub)
-m <- nrow(golub)
+  m <- nrow(golub)
 # AML slides=11
-n <- 11
+  n <- 11
-r <-  6
+  r <-  6
-m <- nCr(n,r)
-AML.samples <- list()
+  AML.samples <- list()
-i <- 1
+  i <- 1
-while(i <= m) {
+  while(i <= iiter) {
-  value <- sort(sample(n, r, replace=FALSE))
+    value <- sort(sample(n, r, replace=FALSE))
-  key   <- paste(value, collapse=" ")
+    key   <- paste(value, collapse=" ")
-  if(is.null(AML.samples[[key]])) {
+    if(is.null(AML.samples[[key]])) {
-    AML.samples[[key]] <- value
+      AML.samples[[key]] <- value
-    i <-i+1
+      i <-i+1
-    print(i)
+#      if(!(i %% 10)){ # print every 10th
+#        print(i)
+#      }
+    }
    }
-}
 # ALL slides=27
-n <- 27
+  n <- 27
-r <-  6
+  r <-  6
-ALL.samples <- list()
+  ALL.samples <- list()
-i <- 1
+  i <- 1
-while(i <= m) {
+  while(i <= iiter) {
-  value <- sort(sample(n, r, replace=FALSE))
+    value <- sort(sample(n, r, replace=FALSE))
-  key   <- paste(value, collapse=" ")
+    key   <- paste(value, collapse=" ")
-  if(is.null(ALL.samples[[key]])) {
+    if(is.null(ALL.samples[[key]])) {
-    ALL.samples[[key]] <- value
+      ALL.samples[[key]] <- value
-    i <-i+1
+      i <-i+1
-    print(i)
+#      if(!(i %% 10)){ # print every 10th
+#        print(i)
+#      }
+    }
    }
-}
+cat("Column permutations sampled\n")
 # Need to randomise the way the keys are drawn
-ALLdraw <- sample(m,m, replace=FALSE)
+  ALLdraw <- sample(iiter,iiter, replace=FALSE)
-AMLdraw <- sample(m,m, replace=FALSE)
+  AMLdraw <- sample(iiter,iiter, replace=FALSE)
-golub.ALL <- golub[,golub.cl==0]
+  golub.ALL <- golub[,golub.cl==0]
-golub.AML <- golub[,golub.cl==1]
+  golub.AML <- golub[,golub.cl==1]
-golub.clSample <- rep(0:1, each=6)
+  golub.clSample <- rep(0:1, each=6)
-design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)
+  design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)
 # Check and install limma
-if(!require(limma, quietly=TRUE)) {
+  if(!require(limma, quietly=TRUE)) {
-  cat("Package:limma not available attempting to install\n")
+    cat("Package:limma not available attempting to install\n")
-  install.packages("limma")
+    install.packages("limma")
-}
+  }
 # Check and install siggenes
-if(!require(siggenes, quietly=TRUE)) {
+  if(!require(siggenes, quietly=TRUE)) {
-  cat("Package:siggenes not available attempting to install\n")
+    cat("Package:siggenes not available attempting to install\n")
-  install.packages("siggenes")
+    install.packages("siggenes")
-}
+  }
-set.seed(seed)
+  set.seed(seed)
-starttime <- proc.time()[3]
+  starttime <- proc.time()[3]
-# Set up False discovery and False non discovery vectors
-#FD <- rep(NA, iiter)
-#FND <- rep(NA, iiter)
 # Output lists
-FDlist <- list(m=m, alpha=alpha, p0=list(),p0NW=list(), convest=list())
+  FDlist <- list(m=m, alpha=alpha, p0=rep(NA,iiter), p0NW=rep(NA,iiter), convest=rep(NA,iiter))
-# 2) ---------------------------- pi0 outer loop ------------------------------ #
-# Main loop for each value of pi0, the proportion of truly Null hypotheses
-#for(pi0 in seqpi0)
-#  {
-  # False discovery threshold
-#    if(adaptive)
-#      {
-    # Adaptive control adjustment on true pi0 (finner & Roters, 2002)
-#        alphastar<- alpha/pi0
-#      } else {
-    # Non  adaptive control
-alphastar <- alpha
-#      }
-#    m1 <- round(m * (1-pi0))
-#    m0 <- m - m1
-#    DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
-#    DEflag <- as.logical(DEmeans)
-# 3) --------------------------- iiter inner loop ----------------------------- #
+  alphastar <- alpha # Do we need this?
-for(i in seq(iiter)){
-      # Names for list elements
-#      pi0char <- as.character(round(pi0, 5))
+# 3) --------------------------- iiter loop ----------------------------- #
+  for(i in seq(iiter)){
 # 4) ------------------------ Permutation analysis ---------------------------- #
-  x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
+    x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
-             golub.AML[,AML.samples[[AMLdraw[i]]]]
+               golub.AML[,AML.samples[[AMLdraw[i]]]]
-             )
+               )
-  fit <- lmFit(x, design.Sample)
+    fit <- lmFit(x, design.Sample)
-  fit <- eBayes(fit)
+    fit <- eBayes(fit)
-  pvalues <- fit$p.value[,"AMLvsALL"]
+    pvalues <- fit$p.value[,"AMLvsALL"]
 #  p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
 #  pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
@@ Line 146: / Line 119: @@
 # Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
 # Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
+    if(use.convest) {# Convest estimation
-  if(use.convest) {# Convest estimation
+      FDlist[["convest"]][i] <- convest(pvalues, niter=50)
-    FDlist[["convest"]][i] <- convest(pvalues, niter=50)
+    }
-  }
+  # Siggenes or first principles
+    if( use.siggenes ) {
-        # Siggenes or first principles
+      require(siggenes, quietly=TRUE)
-  if( use.siggenes ) {
-    require(siggenes, quietly=TRUE)
            # A) weighting by 1-lambda
-    pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
+      pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
-                      ncs.weights=1-lambdaseq)$p0
+                        ncs.weights=1-lambdaseq)$p0
-    FDlist[["p0"]][i] <- round(pi0hat,8)
+      FDlist[["p0"]][i] <- round(pi0hat,8)
            # B) no 1-lambda weighting
-    pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
+      pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
-                      ncs.weights=NULL)$p0
+                        ncs.weights=NULL)$p0
-    FDlist[["p0NW"]][i] <- round(pi0hat,8)
+      FDlist[["p0NW"]][i] <- round(pi0hat,8)
-  } else {
+    } else {
-    pi0lambda <- tapply(lambdaseq, lambdaseq,
+      pi0lambda <- tapply(lambdaseq, lambdaseq,
-                        function(x){sum(pvalues>x)/(m*(1-x))})
+                          function(x){sum(pvalues>x)/(m*(1-x))})
            # A) weighting by 1-lambda
-    pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq,
+      pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq,
-                                df=3)$y[length(lambdaseq)],1)
+                                  df=3)$y[length(lambdaseq)],1)
-    FDlist[["p0"]][i] <- round(pi0hat,8)
+      FDlist[["p0"]][i] <- round(pi0hat,8)
            # B) no 1-lambda weighting
-    pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
+      pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
-                                df=3)$y[length(lambdaseq)],1)
+                                  df=3)$y[length(lambdaseq)],1)
-    FDlist[["p0NW"]][i] <- round(pi0hat,8)
+      FDlist[["p0NW"]][i] <- round(pi0hat,8)
-  }
+    }
+    save(FDlist, file=file.path(datadir, outfile), compress=T)
-#      if(FDR) {
+  } # End of main loop
-      # Step down fdr Bengamini & Hochberg control
-#        Porder <- order(pvalues)
-      # Sorted pvalues
-#        cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m
-      #Reordered DEflag
-#        DEorder  <- DEflag[Porder]
-#        rejected <- DEorder[!cutoff]
-#        accepted <- DEorder[cutoff]
-#        FD[i]  <- sum(!rejected)
-#        FND[i] <- sum(accepted)
-#      }
-#    }
-   # Writing to FDlist every iteration of p
-#    if(FDR) {
-#      FDlist[["V"]][[pi0char]] <- FD
-#      FDlist[["T"]][[pi0char]] <- FND
-#    }
-  save(FDlist, file=file.path(datadir, outfile), compress=T)
-} # End of main loop
 # Return simulation time
-endtime <- proc.time()[3] - starttime
+  endtime <- proc.time()[3] - starttime
-return(endtime)
+  return(endtime)
 }
-#fdrsim(outfile = "tmp", m=100, iiter=1000, seqpi0=1, #seqpi0= seq(0,1, length=5),
-# alpha = 0.05, adaptive=F, mu0=0, mu1=3, sigma=1,seed=2, Estpi0=TRUE)
 #load(file.path("Data", "tmp"))
 #source("plotfun.R")
 #plotfun(FDlist)