QvalueComparison.R

From Organic Design wiki

Code snipits and programs written in R, S or S-PLUS library(qvalue) library(siggenes) library(limma)

  1. ============= Generating a mixture of normally distributed data ==============

simNorm <- function(m=1000, pi0=0.8, mu0=0, mu1=3, sigma=1) {

 m1 <- round(m * (1-pi0)) 
 m0 <- m - m1
 DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
 DEflag <- as.logical(DEmeans)
 X <- rnorm(m, DEmeans, sigma)
 pvalues <- 2*(1-pnorm(abs(X),0,sigma))
 return(pvalues)

}

  1. ===============================================================================
  2. Multtest does not allow a max of 1 (NA) going into smooth.spline

pvalues <- simNorm() m <- 1000 lambdaseq <- seq(0,(m-1)/m, length=100) # a fraction less than 1 pi0lambda <- tapply(lambdaseq, lambdaseq, function(x){sum(pvalues>x)/(m*(1-x))}) pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,

                           w=1-lambdaseq, df=3)$y[length(lambdaseq)],1)
  1. ========================= First principles pi0 estimation =====================

mypi0.est <- function(pvalues, lambdaseq = seq(0,1, length=100),

                     weight=TRUE, EstAtOne = TRUE) {
 m <- length(pvalues)
 pi0lambda <- tapply(lambdaseq, lambdaseq,
                     function(x){sum(pvalues>x)/(m*(1-x))})
 if( EstAtOne ) {
   LambdaPred <- 1
 } else {
   LambdaPred <- max(lambdaseq)
 }
 if( weight ) { # a) weighting by 1-lambda
   pi0hat <- min(predict(smooth.spline(lambdaseq, pi0lambda,
                                       w=1-lambdaseq, df=3), LambdaPred)$y,1)
  1. pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
  2. w=1-lambdaseq, df=3)$y[length(lambdaseq)],1)
 } else {       # B) no 1-lambda weighting
   pi0hat <- min(predict(smooth.spline(lambdaseq, pi0lambda, df=3),
                         LambdaPred)$y,1)
  1. pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
  2. df=3)$y[length(lambdaseq)],1)
 }
 return(pi0hat)

}

  1. ===============================================================================

pvalues <- simNorm()

str(qvalue) str(pi0.est)

  1. =============== Settings when qvalue and pi0.est are identical ================

qvalue(pvalues, lambda = seq(0, 0.95, 0.05),pi0.method="smoother")$pi0

pi0.est(pvalues, lambda = seq(0, 0.95, 0.05),ncs.value="max")$p0

mypi0.est(pvalues, seq(0,0.95, 0.05), weight=FALSE, EstAtOne=FALSE)

convest(pvalues, niter=1000, doplot=FALSE)

  1. ===============================================================================
  1. ============== Settings when mypi0.est and pi0.est use weights ================

pi0.est(pvalues, lambda = seq(0, 0.95, 0.05),ncs.value="paper",

       ncs.weights = 1-seq(0, 0.95, 0.05))$p0

mypi0.est(pvalues, seq(0,0.95, 0.05), weight=TRUE, EstAtOne=TRUE)

  1. ===============================================================================
  1. ====================== Examining single cases pi0 = 0.8 =======================

pvalues <- simNorm() qvalue(pvalues, lambda = seq(0, 0.95, 0.05),pi0.method="smoother")$pi0 mylambda <- seq(0,999/1000, length=100) pi0.est(pvalues, lambda = mylambda,ncs.value="paper",

       ncs.weights = 1 - mylambda)$p0

mypi0.est(pvalues, mylambda, weight=TRUE, EstAtOne=TRUE)

  1. ===============================================================================
  1. ====================== Examining single n cases pi0 = 0.8 =====================

n <- 1000 results <- list() seed <- 1 seqlambda <- seq(0, 0.95, 0.05) unix.time(

         for(i in 1:n) {
           set.seed(seed)
           pvalues <- simNorm()
           results"pi0.est default"i <- pi0.est(pvalues, lambda = seqlambda, ncs.value="max")$p0
           results"pi0.est lambda=1"i <- pi0.est(pvalues, lambda = seqlambda, ncs.value="paper")$p0
           results"pi0.est weights"i <- pi0.est(pvalues, lambda = seqlambda, ncs.value="max",
                                                        ncs.weights=1-seqlambda)$p0
           results"pi0.est weights, lambda=1"i <- pi0.est(pvalues, lambda = seqlambda, ncs.value="paper",
                                        ncs.weights=1-seqlambda)$p0
           seed <- seed+1 
         }
         )


default <- results


  1. An interesting property depending on the setting of lambda is P(pi_0[lambda==1] = 1),
  2. for biased and unbiased values of lambda. Estimating pi_0[lambda] for many values
  3. of lambda seems to improve this slightly. The improvement is not as large as using
  4. weights though in the spline estimation

results <- list() seed <- 1 m <- 100 seqlambda <- seq(0,(m-1)/m , length=m)

unix.time(

         for(i in 1:n) {
           set.seed(seed)
           pvalues <- simNorm()
           results"pi0.est default"i <- pi0.est(pvalues, lambda = seqlambda, ncs.value="max")$p0
           results"pi0.est lambda=1"i <- pi0.est(pvalues, lambda = seqlambda, ncs.value="paper")$p0
           results"pi0.est weights"i <- pi0.est(pvalues, lambda = seqlambda, ncs.value="max",
                                                        ncs.weights=1-seqlambda)$p0
           results"pi0.est weights, lambda=1"i <- pi0.est(pvalues, lambda = seqlambda, ncs.value="paper",
                                        ncs.weights=1-seqlambda)$p0
           seed <- seed+1 
         }
         )

lambda100values <- results

  1. =========== Histograms of n simulations at pi0=0.8 =======================
  1. Notice the large spike of estimates at pi_0 = 1 when weights are not used!

par(mfrow=c(2,2)) for(i in names(default)) {

 hist(defaulti, breaks=50, main=i, xlab="Estimate of pi_0")
 mtext(paste("95CI width:", round(quantile(defaulti, 0.975)
                                  - quantile(defaulti, 0.025),3)))

}

X11() par(mfrow=c(2,2)) for(i in names(lambda100values)) {

 hist(lambda100valuesi, breaks=50, main=i, xlab="Estimate of pi_0 #2")
 mtext(paste("95CI width:", round(quantile(lambda100valuesi, 0.975) -
                            quantile(lambda100valuesi, 0.025),3)))

}