Difference between revisions of "AffyVsInHouse.R"
(Added summary stats etc) |
|||
Line 57: | Line 57: | ||
MA <- normalizeWithinArrays(RG, method="loess", bc.method="none") | MA <- normalizeWithinArrays(RG, method="loess", bc.method="none") | ||
RG.norm <- RG.MA(MA) | RG.norm <- RG.MA(MA) | ||
+ | |||
+ | # summary stats | ||
+ | iqr <- function(x, qlims=c(0.25, 0.75)) { | ||
+ | IQR <- quantile(x, qlims[2]) - quantile(x, qlims[1]) | ||
+ | return(IQR) | ||
+ | } | ||
+ | |||
+ | # Use median and mad for skewed chisq distributions | ||
+ | median(esd) | ||
+ | mean(esd) | ||
+ | |||
+ | mad(esd) | ||
+ | sd(esd) | ||
+ | sqrt(var(esd)) | ||
+ | iqr(esd) | ||
+ | |||
+ | calcStats <- function(x, type="median", format = "%4.3f") { | ||
+ | xStat <- c() | ||
+ | if(type=="median") { | ||
+ | medx <- sprintf(format, median(x)) | ||
+ | xStat <- paste(expression(median(x)), "=", medx, sep=" ") | ||
+ | } else { | ||
+ | madx <- sprintf(format, mad(x)) | ||
+ | xStat <- paste(expression(mad(x)), "=", madx, sep=" ") | ||
+ | } | ||
+ | return(xStat) | ||
+ | } | ||
+ | # Test | ||
+ | calcStats(1:10, type="median") | ||
+ | median(1:10) | ||
+ | calcStats(1:10, type="mad") | ||
+ | mad(1:10) | ||
# --------------------------- Comparison plots -------------------------------- # | # --------------------------- Comparison plots -------------------------------- # | ||
Line 87: | Line 119: | ||
dev.set(2) | dev.set(2) | ||
hist(emean, main=AffyMain, xlim=c(4,16), xlab="Average signal") | hist(emean, main=AffyMain, xlim=c(4,16), xlab="Average signal") | ||
+ | text(x=15, y=1700, label=calcStats(emean, type="median"), adj=c(1,0)) | ||
+ | text(x=15, y=1550, label=calcStats(emean, type="mad"), adj=c(1,0)) | ||
+ | |||
dev.set(3) | dev.set(3) | ||
hist(Rmean, main=InHouseMain, xlim=c(4,16), xlab="Cy5 Average signal") | hist(Rmean, main=InHouseMain, xlim=c(4,16), xlab="Cy5 Average signal") | ||
+ | text(x=8, y=1700, label=calcStats(Rmean, type="median", format="%2.1f"), adj=c(1,0)) | ||
+ | text(x=8, y=1550, label=calcStats(Rmean, type="mad", format="%2.1f"), adj=c(1,0)) | ||
+ | |||
Sys.sleep(1) | Sys.sleep(1) | ||
hist(Gmean, main=InHouseMain, xlim=c(4,16), xlab="Cy3 Average signal") | hist(Gmean, main=InHouseMain, xlim=c(4,16), xlab="Cy3 Average signal") | ||
+ | text(x=8, y=1700, label=calcStats(Gmean, type="median", format="%2.1f"), adj=c(1,0)) | ||
+ | text(x=8, y=1550, label=calcStats(Gmean, type="mad", format="%2.1f"), adj=c(1,0)) | ||
# SD distributions | # SD distributions | ||
Line 99: | Line 139: | ||
dev.set(2) | dev.set(2) | ||
hist(esd, main=AffyMain, breaks = 12, xlim=c(0,1.5), xlab="Standard deviation") | hist(esd, main=AffyMain, breaks = 12, xlim=c(0,1.5), xlab="Standard deviation") | ||
+ | text(x=1, y=8000, label=calcStats(esd, type="median"), adj=c(1,0)) | ||
+ | text(x=1, y=7300, label=calcStats(esd, type="mad"), adj=c(1,0)) | ||
+ | |||
dev.set(3) | dev.set(3) | ||
− | hist(Rsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3 interspot standard deviation") | + | hist(Rsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3 interspot standard deviation") |
+ | text(x=1, y=3000, label=calcStats(Rsd, type="median"), adj=c(1,0)) | ||
+ | text(x=1, y=2700, label=calcStats(Rsd, type="mad"), adj=c(1,0)) | ||
Sys.sleep(1) | Sys.sleep(1) | ||
hist(Gsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy5 interspot standard deviation") | hist(Gsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy5 interspot standard deviation") | ||
− | + | text(x=1, y=3000, label=calcStats(Gsd, type="median"), adj=c(1,0)) | |
− | + | text(x=1, y=2700, label=calcStats(Gsd, type="mad"), adj=c(1,0)) | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
# CV | # CV | ||
dev.set(2) | dev.set(2) | ||
hist(esd/emean, main=AffyMain, breaks = 12, xlim=c(0,0.2), xlab="Coefficient of variation") | hist(esd/emean, main=AffyMain, breaks = 12, xlim=c(0,0.2), xlab="Coefficient of variation") | ||
+ | text(x=0.15, y=6000, label=calcStats(esd/emean, type="median"), adj=c(1,0)) | ||
+ | text(x=0.15, y=5500, label=calcStats(esd/emean, type="mad"), adj=c(1,0)) | ||
+ | |||
dev.set(3) | dev.set(3) | ||
hist(Rsd/Rmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy5 interspot coefficient of variation") | hist(Rsd/Rmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy5 interspot coefficient of variation") | ||
+ | text(x=0.15, y=3000, label=calcStats(Rsd/Rmean, type="median"), adj=c(1,0)) | ||
+ | text(x=0.15, y=2700, label=calcStats(Rsd/Rmean, type="mad"), adj=c(1,0)) | ||
Sys.sleep(1) | Sys.sleep(1) | ||
hist(Gsd/Gmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3 interspot coefficient of variation") | hist(Gsd/Gmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3 interspot coefficient of variation") | ||
+ | text(x=0.15, y=3000, label=calcStats(Gsd/Gmean, type="median"), adj=c(1,0)) | ||
+ | text(x=0.15, y=2700, label=calcStats(Gsd/Gmean, type="mad"), adj=c(1,0)) | ||
+ | |||
+ | # ---------------------- Empirical validation of Chisq ------------------------ # | ||
+ | df <- 10 | ||
+ | ncp <- 0 | ||
+ | empiricalSD <- rchisq(50000, df=df, ncp=ncp) | ||
+ | sqrt(var(empiricalSD)) | ||
+ | sd(empiricalSD) | ||
+ | mad(empiricalSD) | ||
+ | |||
+ | sqrt(2 * df) | ||
+ | |||
+ | hist(empiricalSD) | ||
+ | qqplot(empiricalSD, esd) |
Revision as of 07:55, 17 December 2006
library(limma) library(affy)
packageDescription("limma", field="Version") packageDescription("affy", field="Version")
vignette("affy")
- -------------------------------- Affymetrix --------------------------------- #
if(0) { # Change for HortResearch
dataDir <- "/Volumes/HD2/Max Planck/Data/Affy/DayNight/Celfiles"
} else {
dataDir <- "/Users/admin/Desktop/DayNight/Celfiles/" Sys.putenv("DISPLAY"=":0")
}
dset<- ReadAffy(filenames=file.path(dataDir, dir(dataDir, pattern=".CEL")), widget = F) # loads CEL files into an affybatch object
un <- ".CEL" # remove extra names sampleNames(dset) <- gsub(un, "", sampleNames(dset))
- Obtaining indexes of sampleNames (affy slides) of interest
technicalreps <- grep("00 G048", sampleNames(dset)) techset <- dset[,technicalreps]
- Normalization
erma <- rma(techset)
- --------------------------------- In house ---------------------------------- #
library(limma)
if(0) { # Change for HortResearch
}else {
dataDir <- "/Users/admin/Desktop/Directories/VariabilityStudy/Data"
} files <- dir(dataDir, pattern="gpr")
- Examine genepix, genepix.median
RG <- read.maimages(files, path=dataDir, source="genepix", wt.fun=wtflags(0))
- Visually AC3 and AC4 most similar
pairs(log2(RG$R), pch=".") pairs(log2(RG$G), pch=".")
RG <- RG[,c("AC3","AC4")]
- All spots
nrow(RG)
- Good spots
apply(RG$weights, 2, sum)
- Bad spots
nrow(RG) - apply(RG$weights, 2, sum)
- Normalization (loess, printtiploess)
MA <- normalizeWithinArrays(RG, method="loess", bc.method="none") RG.norm <- RG.MA(MA)
- summary stats
iqr <- function(x, qlims=c(0.25, 0.75)) {
IQR <- quantile(x, qlims[2]) - quantile(x, qlims[1]) return(IQR)
}
- Use median and mad for skewed chisq distributions
median(esd) mean(esd)
mad(esd) sd(esd) sqrt(var(esd)) iqr(esd)
calcStats <- function(x, type="median", format = "%4.3f") {
xStat <- c() if(type=="median") { medx <- sprintf(format, median(x)) xStat <- paste(expression(median(x)), "=", medx, sep=" ") } else { madx <- sprintf(format, mad(x)) xStat <- paste(expression(mad(x)), "=", madx, sep=" ") } return(xStat)
}
- Test
calcStats(1:10, type="median") median(1:10) calcStats(1:10, type="mad") mad(1:10)
- --------------------------- Comparison plots -------------------------------- #
- Setup
size <- 8 AffyMain <- "Affymetrix technical replication" InHouseMain <- "In house technical replication"
graphics.off() X11(xpos=0, ypos=0, width=size, height=size) X11(xpos=600, ypos=0, width=size, height=size) dev.list()
- Pairs plots
pairs(log2(RG.norm$R), pch=".") pairs(log2(RG.norm$G), pch=".")
dev.set(2) plot(exprs(erma), main = AffyMain, pch=".") dev.set(3) plot(log2(RG.norm$R), main = InHouseMain, pch=".")
- Histograms
- Mean distributions
emean <- apply(exprs(erma), 1, mean) Rmean <- apply(log2(RG.norm$R), 1, mean) Gmean <- apply(log2(RG.norm$G), 1, mean)
dev.set(2) hist(emean, main=AffyMain, xlim=c(4,16), xlab="Average signal") text(x=15, y=1700, label=calcStats(emean, type="median"), adj=c(1,0)) text(x=15, y=1550, label=calcStats(emean, type="mad"), adj=c(1,0))
dev.set(3) hist(Rmean, main=InHouseMain, xlim=c(4,16), xlab="Cy5 Average signal") text(x=8, y=1700, label=calcStats(Rmean, type="median", format="%2.1f"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(Rmean, type="mad", format="%2.1f"), adj=c(1,0))
Sys.sleep(1) hist(Gmean, main=InHouseMain, xlim=c(4,16), xlab="Cy3 Average signal") text(x=8, y=1700, label=calcStats(Gmean, type="median", format="%2.1f"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(Gmean, type="mad", format="%2.1f"), adj=c(1,0))
- SD distributions
esd <- apply(exprs(erma), 1, sd) Rsd <- apply(log2(RG.norm$R), 1, sd) Gsd <- apply(log2(RG.norm$G), 1, sd)
dev.set(2) hist(esd, main=AffyMain, breaks = 12, xlim=c(0,1.5), xlab="Standard deviation") text(x=1, y=8000, label=calcStats(esd, type="median"), adj=c(1,0)) text(x=1, y=7300, label=calcStats(esd, type="mad"), adj=c(1,0))
dev.set(3) hist(Rsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3 interspot standard deviation") text(x=1, y=3000, label=calcStats(Rsd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(Rsd, type="mad"), adj=c(1,0)) Sys.sleep(1) hist(Gsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(Gsd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(Gsd, type="mad"), adj=c(1,0))
- CV
dev.set(2) hist(esd/emean, main=AffyMain, breaks = 12, xlim=c(0,0.2), xlab="Coefficient of variation") text(x=0.15, y=6000, label=calcStats(esd/emean, type="median"), adj=c(1,0)) text(x=0.15, y=5500, label=calcStats(esd/emean, type="mad"), adj=c(1,0))
dev.set(3) hist(Rsd/Rmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(Rsd/Rmean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(Rsd/Rmean, type="mad"), adj=c(1,0)) Sys.sleep(1) hist(Gsd/Gmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(Gsd/Gmean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(Gsd/Gmean, type="mad"), adj=c(1,0))
- ---------------------- Empirical validation of Chisq ------------------------ #
df <- 10 ncp <- 0 empiricalSD <- rchisq(50000, df=df, ncp=ncp) sqrt(var(empiricalSD)) sd(empiricalSD) mad(empiricalSD)
sqrt(2 * df)
hist(empiricalSD) qqplot(empiricalSD, esd)