AffyVsInHouse.R
library(limma) library(affy)
packageDescription("limma", field="Version") packageDescription("affy", field="Version")
vignette("affy")
- -------------------------------- Affymetrix --------------------------------- #
if(1) { # Change for HortResearch
dataDir <- "/Volumes/HD2/Max Planck/Data/Affy/DayNight/Celfiles"
} else {
dataDir <- "/Users/admin/Desktop/DayNight/Celfiles/" Sys.putenv("DISPLAY"=":0")
}
dset<- ReadAffy(filenames=file.path(dataDir, dir(dataDir, pattern=".CEL")), widget = F) # loads CEL files into an affybatch object
un <- ".CEL" # remove extra names sampleNames(dset) <- gsub(un, "", sampleNames(dset))
- Obtaining indexes of sampleNames (affy slides) of interest
technicalreps <- grep("00 G048", sampleNames(dset)) techset <- dset[,technicalreps]
- Normalization
erma <- rma(techset)
- --------------------------------- In house ---------------------------------- #
library(limma)
if(1) { # Change for HortResearch
dataDir <- "/Volumes/HD2/Max\ Planck/HortResearch/VariabilityStudy/Data"
}else {
dataDir <- "/Users/admin/Desktop/Directories/VariabilityStudy/Data"
} files <- dir(dataDir, pattern="gpr")
- Examine genepix, genepix.median
RG <- read.maimages(files, path=dataDir, source="genepix", wt.fun=wtflags(0))
- Visually AC3 and AC4 most similar
pairs(log2(RG$R), pch=".") pairs(log2(RG$G), pch=".")
RG <- RG[,c("AC3","AC4")]
- All spots
nrow(RG)
- Good spots
apply(RG$weights, 2, sum)
- Bad spots
nrow(RG) - apply(RG$weights, 2, sum)
- Normalization (loess, printtiploess)
MA <- normalizeWithinArrays(RG, method="loess", bc.method="none") MA <- noralizeBetweenArrays(MA, method="scale") RG.norm <- RG.MA(MA)
- summary stats
iqr <- function(x, qlims=c(0.25, 0.75)) {
IQR <- quantile(x, qlims[2]) - quantile(x, qlims[1]) return(IQR)
}
- Use median and mad for skewed chisq distributions
calcStats <- function(x, type="median", format = "%2.3f") {
xStat <- c() if(type=="median") { medx <- sprintf(format, median(x)) xStat <- paste(expression(median(x)), "=", medx, sep=" ") } else { madx <- sprintf(format, mad(x)) xStat <- paste(expression(mad(x)), "=", madx, sep=" ") } return(xStat)
}
- Test
calcStats(1:10, type="median") median(1:10) calcStats(1:10, type="mad") mad(1:10)
- --------------------------- Comparison plots -------------------------------- #
- Setup
size <- 8 AffyMain <- "Affymetrix technical replication" InHouseMain <- "In house technical replication"
graphics.off() X11(xpos=0, ypos=0, width=size, height=size) X11(xpos=600, ypos=0, width=size, height=size) dev.list()
- Pairs plots
pairs(log2(RG.norm$R), pch=".") pairs(log2(RG.norm$G), pch=".")
dev.set(2) plot(exprs(erma), main = AffyMain, pch=".") dev.set(3) plot(log2(RG.norm$R), main = InHouseMain, pch=".")
- Histograms
- Mean distributions
emean <- apply(exprs(erma), 1, mean) Rmean <- apply(log2(RG.norm$R), 1, mean) Gmean <- apply(log2(RG.norm$G), 1, mean)
RG1mean <- apply(log2(cbind(RG.norm$R[,1], RG.norm$G[,2])),1, mean) RG2mean <- apply(log2(cbind(RG.norm$R[,2], RG.norm$G[,1])),1, mean)
dev.set(2) hist(emean, main=AffyMain, xlim=c(4,16), xlab="Average signal") text(x=15, y=1700, label=calcStats(emean, type="median"), adj=c(1,0)) text(x=15, y=1550, label=calcStats(emean, type="mad"), adj=c(1,0))
dev.set(3) hist(Rmean, main=InHouseMain, xlim=c(4,16), xlab="Cy5 Average signal") text(x=8, y=1700, label=calcStats(Rmean, type="median", format="%2.1f"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(Rmean, type="mad", format="%2.1f"), adj=c(1,0))
Sys.sleep(1) hist(Gmean, main=InHouseMain, xlim=c(4,16), xlab="Cy3 Average signal") text(x=8, y=1700, label=calcStats(Gmean, type="median", format="%2.1f"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(Gmean, type="mad", format="%2.1f"), adj=c(1,0))
- Dye pair not much different in mean signal
hist(RG1mean, main=InHouseMain, xlim=c(4,16), xlab="Cy3/Cy5 Average signal") text(x=8, y=1700, label=calcStats(RG1mean, type="median"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(RG1mean, type="mad"), adj=c(1,0))
hist(RG2mean, main=InHouseMain, xlim=c(4,16), xlab="Cy3/Cy5 Average signal") text(x=8, y=1700, label=calcStats(RG2mean, type="median"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(RG2mean, type="mad"), adj=c(1,0))
- SD distributions
esd <- apply(exprs(erma), 1, sd) Rsd <- apply(log2(RG.norm$R), 1, sd) Gsd <- apply(log2(RG.norm$G), 1, sd)
RG1sd <- apply(log2(cbind(RG.norm$R[,1], RG.norm$G[,2])), 1, sd) RG2sd <- apply(log2(cbind(RG.norm$R[,2], RG.norm$G[,1])), 1, sd)
dev.set(2)
hist(esd, main=AffyMain, breaks = 12, xlim=c(0,1.5), xlab="Standard deviation")
text(x=1, y=8000, label=calcStats(esd, type="median"), adj=c(1,0))
text(x=1, y=7300, label=calcStats(esd, type="mad"), adj=c(1,0))
dev.set(3) hist(Rsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3 interspot standard deviation") text(x=1, y=3000, label=calcStats(Rsd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(Rsd, type="mad"), adj=c(1,0)) Sys.sleep(1) hist(Gsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(Gsd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(Gsd, type="mad"), adj=c(1,0))
- Dye pair seems to add about another 10% noise (0.107/0.113, 0.107/0.114)
hist(RG1sd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3/Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(RG1sd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(RG1sd, type="mad"), adj=c(1,0))
hist(RG2sd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3/Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(RG2sd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(RG2sd, type="mad"), adj=c(1,0))
- CV
dev.set(2) hist(esd/emean, main=AffyMain, breaks = 12, xlim=c(0,0.2), xlab="Coefficient of variation") text(x=0.15, y=6000, label=calcStats(esd/emean, type="median"), adj=c(1,0)) text(x=0.15, y=5500, label=calcStats(esd/emean, type="mad"), adj=c(1,0))
dev.set(3) hist(Rsd/Rmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(Rsd/Rmean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(Rsd/Rmean, type="mad"), adj=c(1,0)) Sys.sleep(1) hist(Gsd/Gmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(Gsd/Gmean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(Gsd/Gmean, type="mad"), adj=c(1,0))
- Checking a Dye swap, also about a 10% error added dye to dye swap (0.009/0.01)
hist(RG1sd/RG1mean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3/Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(RG1sd/RG1mean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(RG1sd/RG1mean, type="mad"), adj=c(1,0))
hist(RG2sd/RG2mean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3/Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(RG2sd/RG1mean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(RG2sd/RG1mean, type="mad"), adj=c(1,0))
- ---------------------- Empirical validation of Chisq ------------------------ #
df <- 10 ncp <- 0 empiricalSD <- rchisq(50000, df=df, ncp=ncp) sqrt(var(empiricalSD)) sd(empiricalSD) mad(empiricalSD)
sqrt(2 * df)
hist(empiricalSD) qqplot(empiricalSD, esd)