Difference between revisions of "ExploreGolub.R"

From Organic Design wiki
m (# {{R}})
(limma analysis and locfdr added)
Line 1: Line 1:
 
# {{R}}
 
# {{R}}
 
library(multtest)
 
library(multtest)
 +
library(locfdr)
 +
library(NutriTools)
 +
 
data(golub)
 
data(golub)
  
nbreaks=100
+
# 1) Golub struture - Affymetrix dataset
 +
 
 +
dim(golub)
 +
# golub.cl 0= ALL (27 slides) 1=AML (11 slides)
 +
table(golub.cl)
 +
 
 +
# 2) Exploratory analysis
 +
# Clustering
 +
# Slide 21 different from others in ALL
 +
clusterPlot(t(golub[,golub.cl==0]))
 +
# Slide 2 different from others in ALL
 +
clusterPlot(t(golub[,golub.cl==1]))
 +
 
 +
library(affy)
 +
mva.pairs(golub[,golub.cl==1][,1:5], log.it=FALSE)
  
 +
 +
nbreaks <- 100
 
x <- rowMeans(golub[,golub.cl==0])
 
x <- rowMeans(golub[,golub.cl==0])
 
y <- rowMeans(golub[,golub.cl==1])
 
y <- rowMeans(golub[,golub.cl==1])
 
+
 
mdplot(cbind(x,y))
 
mdplot(cbind(x,y))
 
+
 
hist(y-x, breaks=nbreaks)
 
hist(y-x, breaks=nbreaks)
hist(2^(y-x), breaks=nbreaks)
 
 
 
# Abundance apears to be a slight mixture (hump more prevelant with background subtraction)
 
# Abundance apears to be a slight mixture (hump more prevelant with background subtraction)
 
# Mixture could be of exponential and low expressing distribution on raw scale
 
# Mixture could be of exponential and low expressing distribution on raw scale
 
hist((y+x)/2, breaks=nbreaks)
 
hist((y+x)/2, breaks=nbreaks)
 +
 +
hist(2^(y-x), breaks=nbreaks)
 +
 
# Abundance, observed exponential like decay on raw scale
 
# Abundance, observed exponential like decay on raw scale
 
hist(2^((y+x)/2), breaks=nbreaks)
 
hist(2^((y+x)/2), breaks=nbreaks)
 
qqplot(rexp(10000,1), 2^((y+x)/2))
 
qqplot(rexp(10000,1), 2^((y+x)/2))
 
abline(0,1)
 
abline(0,1)
 +
 +
# 2) Analysis
 +
design <- cbind("ALL"=1, "AMLvsALL"=golub.cl)
 +
fit <- lmFit(golub, design)
 +
fit <- eBayes(fit)
 +
p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
 +
pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
 +
 +
# Check t is moderated t-statistic
 +
topTable(fit, coef="AMLvsALL", number=1)
 +
fit$t[829,"AMLvsALL"]
 +
 +
# 3) Efrons Bayesian H0 estimation
 +
locfdr(fit$t[,"AMLvsALL"])
 +
 +
# z-transformed locfdr
 +
z <- qnorm(pt(fit$t[,"AMLvsALL"], fit$df.residual + fit$df.prior))
 +
locfdr(z)
 +
 +
 +
# Sampling of golub.cl
 +
n <- 11
 +
r <-  6
 +
 +
m <- nCr(n,r)
 +
AML.samples <- list()
 +
i <- 1
 +
while(i <= m) {
 +
  value <- sort(sample(n, r, replace=FALSE))
 +
  key  <- paste(value, collapse=" ")
 +
  if(is.null(AML.samples[[key]])) {
 +
    AML.samples[[key]] <- value
 +
    i <-i+1
 +
    print(i)
 +
  }
 +
}
 +
 +
n <- 27
 +
r <-  6
 +
 +
ALL.samples <- list()
 +
i <- 1
 +
while(i <= m) {
 +
  value <- sort(sample(n, r, replace=FALSE))
 +
  key  <- paste(value, collapse=" ")
 +
  if(is.null(ALL.samples[[key]])) {
 +
    ALL.samples[[key]] <- value
 +
    i <-i+1
 +
    print(i)
 +
  }
 +
}
 +
 +
# Need to randomise the way the keys are drawn

Revision as of 02:40, 19 June 2007

Code snipits and programs written in R, S or S-PLUS library(multtest) library(locfdr) library(NutriTools)

data(golub)

  1. 1) Golub struture - Affymetrix dataset

dim(golub)

  1. golub.cl 0= ALL (27 slides) 1=AML (11 slides)

table(golub.cl)

  1. 2) Exploratory analysis
  2. Clustering
  3. Slide 21 different from others in ALL

clusterPlot(t(golub[,golub.cl==0]))

  1. Slide 2 different from others in ALL

clusterPlot(t(golub[,golub.cl==1]))

library(affy) mva.pairs(golub[,golub.cl==1][,1:5], log.it=FALSE)


nbreaks <- 100 x <- rowMeans(golub[,golub.cl==0]) y <- rowMeans(golub[,golub.cl==1])

mdplot(cbind(x,y))

hist(y-x, breaks=nbreaks)

  1. Abundance apears to be a slight mixture (hump more prevelant with background subtraction)
  2. Mixture could be of exponential and low expressing distribution on raw scale

hist((y+x)/2, breaks=nbreaks)

hist(2^(y-x), breaks=nbreaks)

  1. Abundance, observed exponential like decay on raw scale

hist(2^((y+x)/2), breaks=nbreaks) qqplot(rexp(10000,1), 2^((y+x)/2)) abline(0,1)

  1. 2) Analysis

design <- cbind("ALL"=1, "AMLvsALL"=golub.cl) fit <- lmFit(golub, design) fit <- eBayes(fit) p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH") pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)

  1. Check t is moderated t-statistic

topTable(fit, coef="AMLvsALL", number=1) fit$t[829,"AMLvsALL"]

  1. 3) Efrons Bayesian H0 estimation

locfdr(fit$t[,"AMLvsALL"])

  1. z-transformed locfdr

z <- qnorm(pt(fit$t[,"AMLvsALL"], fit$df.residual + fit$df.prior)) locfdr(z)


  1. Sampling of golub.cl

n <- 11 r <- 6

m <- nCr(n,r) AML.samples <- list() i <- 1 while(i <= m) {

 value <- sort(sample(n, r, replace=FALSE))
 key   <- paste(value, collapse=" ")
 if(is.null(AML.sampleskey)) {
   AML.sampleskey <- value
   i <-i+1
   print(i)
 }

}

n <- 27 r <- 6

ALL.samples <- list() i <- 1 while(i <= m) {

 value <- sort(sample(n, r, replace=FALSE))
 key   <- paste(value, collapse=" ")
 if(is.null(ALL.sampleskey)) {
   ALL.sampleskey <- value
   i <-i+1
   print(i)
 }

}

  1. Need to randomise the way the keys are drawn