Difference between revisions of "ExploreGolub.R"
m (# {{R}}) |
(limma analysis and locfdr added) |
||
Line 1: | Line 1: | ||
# {{R}} | # {{R}} | ||
library(multtest) | library(multtest) | ||
+ | library(locfdr) | ||
+ | library(NutriTools) | ||
+ | |||
data(golub) | data(golub) | ||
− | + | # 1) Golub struture - Affymetrix dataset | |
+ | |||
+ | dim(golub) | ||
+ | # golub.cl 0= ALL (27 slides) 1=AML (11 slides) | ||
+ | table(golub.cl) | ||
+ | |||
+ | # 2) Exploratory analysis | ||
+ | # Clustering | ||
+ | # Slide 21 different from others in ALL | ||
+ | clusterPlot(t(golub[,golub.cl==0])) | ||
+ | # Slide 2 different from others in ALL | ||
+ | clusterPlot(t(golub[,golub.cl==1])) | ||
+ | |||
+ | library(affy) | ||
+ | mva.pairs(golub[,golub.cl==1][,1:5], log.it=FALSE) | ||
+ | |||
+ | nbreaks <- 100 | ||
x <- rowMeans(golub[,golub.cl==0]) | x <- rowMeans(golub[,golub.cl==0]) | ||
y <- rowMeans(golub[,golub.cl==1]) | y <- rowMeans(golub[,golub.cl==1]) | ||
− | + | ||
mdplot(cbind(x,y)) | mdplot(cbind(x,y)) | ||
− | + | ||
hist(y-x, breaks=nbreaks) | hist(y-x, breaks=nbreaks) | ||
− | |||
− | |||
# Abundance apears to be a slight mixture (hump more prevelant with background subtraction) | # Abundance apears to be a slight mixture (hump more prevelant with background subtraction) | ||
# Mixture could be of exponential and low expressing distribution on raw scale | # Mixture could be of exponential and low expressing distribution on raw scale | ||
hist((y+x)/2, breaks=nbreaks) | hist((y+x)/2, breaks=nbreaks) | ||
+ | |||
+ | hist(2^(y-x), breaks=nbreaks) | ||
+ | |||
# Abundance, observed exponential like decay on raw scale | # Abundance, observed exponential like decay on raw scale | ||
hist(2^((y+x)/2), breaks=nbreaks) | hist(2^((y+x)/2), breaks=nbreaks) | ||
qqplot(rexp(10000,1), 2^((y+x)/2)) | qqplot(rexp(10000,1), 2^((y+x)/2)) | ||
abline(0,1) | abline(0,1) | ||
+ | |||
+ | # 2) Analysis | ||
+ | design <- cbind("ALL"=1, "AMLvsALL"=golub.cl) | ||
+ | fit <- lmFit(golub, design) | ||
+ | fit <- eBayes(fit) | ||
+ | p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH") | ||
+ | pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE) | ||
+ | |||
+ | # Check t is moderated t-statistic | ||
+ | topTable(fit, coef="AMLvsALL", number=1) | ||
+ | fit$t[829,"AMLvsALL"] | ||
+ | |||
+ | # 3) Efrons Bayesian H0 estimation | ||
+ | locfdr(fit$t[,"AMLvsALL"]) | ||
+ | |||
+ | # z-transformed locfdr | ||
+ | z <- qnorm(pt(fit$t[,"AMLvsALL"], fit$df.residual + fit$df.prior)) | ||
+ | locfdr(z) | ||
+ | |||
+ | |||
+ | # Sampling of golub.cl | ||
+ | n <- 11 | ||
+ | r <- 6 | ||
+ | |||
+ | m <- nCr(n,r) | ||
+ | AML.samples <- list() | ||
+ | i <- 1 | ||
+ | while(i <= m) { | ||
+ | value <- sort(sample(n, r, replace=FALSE)) | ||
+ | key <- paste(value, collapse=" ") | ||
+ | if(is.null(AML.samples[[key]])) { | ||
+ | AML.samples[[key]] <- value | ||
+ | i <-i+1 | ||
+ | print(i) | ||
+ | } | ||
+ | } | ||
+ | |||
+ | n <- 27 | ||
+ | r <- 6 | ||
+ | |||
+ | ALL.samples <- list() | ||
+ | i <- 1 | ||
+ | while(i <= m) { | ||
+ | value <- sort(sample(n, r, replace=FALSE)) | ||
+ | key <- paste(value, collapse=" ") | ||
+ | if(is.null(ALL.samples[[key]])) { | ||
+ | ALL.samples[[key]] <- value | ||
+ | i <-i+1 | ||
+ | print(i) | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # Need to randomise the way the keys are drawn |
Revision as of 02:40, 19 June 2007
Code snipits and programs written in R, S or S-PLUS library(multtest) library(locfdr) library(NutriTools)
data(golub)
- 1) Golub struture - Affymetrix dataset
dim(golub)
- golub.cl 0= ALL (27 slides) 1=AML (11 slides)
table(golub.cl)
- 2) Exploratory analysis
- Clustering
- Slide 21 different from others in ALL
clusterPlot(t(golub[,golub.cl==0]))
- Slide 2 different from others in ALL
clusterPlot(t(golub[,golub.cl==1]))
library(affy) mva.pairs(golub[,golub.cl==1][,1:5], log.it=FALSE)
nbreaks <- 100
x <- rowMeans(golub[,golub.cl==0])
y <- rowMeans(golub[,golub.cl==1])
mdplot(cbind(x,y))
hist(y-x, breaks=nbreaks)
- Abundance apears to be a slight mixture (hump more prevelant with background subtraction)
- Mixture could be of exponential and low expressing distribution on raw scale
hist((y+x)/2, breaks=nbreaks)
hist(2^(y-x), breaks=nbreaks)
- Abundance, observed exponential like decay on raw scale
hist(2^((y+x)/2), breaks=nbreaks) qqplot(rexp(10000,1), 2^((y+x)/2)) abline(0,1)
- 2) Analysis
design <- cbind("ALL"=1, "AMLvsALL"=golub.cl) fit <- lmFit(golub, design) fit <- eBayes(fit) p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH") pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
- Check t is moderated t-statistic
topTable(fit, coef="AMLvsALL", number=1) fit$t[829,"AMLvsALL"]
- 3) Efrons Bayesian H0 estimation
locfdr(fit$t[,"AMLvsALL"])
- z-transformed locfdr
z <- qnorm(pt(fit$t[,"AMLvsALL"], fit$df.residual + fit$df.prior)) locfdr(z)
- Sampling of golub.cl
n <- 11 r <- 6
m <- nCr(n,r) AML.samples <- list() i <- 1 while(i <= m) {
value <- sort(sample(n, r, replace=FALSE)) key <- paste(value, collapse=" ") if(is.null(AML.sampleskey)) { AML.sampleskey <- value i <-i+1 print(i) }
}
n <- 27 r <- 6
ALL.samples <- list() i <- 1 while(i <= m) {
value <- sort(sample(n, r, replace=FALSE)) key <- paste(value, collapse=" ") if(is.null(ALL.sampleskey)) { ALL.sampleskey <- value i <-i+1 print(i) }
}
- Need to randomise the way the keys are drawn