AffyVsInHouse.R
library(limma) library(affy)
packageDescription("limma", field="Version") packageDescription("affy", field="Version")
vignette("affy")
- -------------------------------- Affymetrix --------------------------------- #
if(0) { # Change for HortResearch
dataDir <- "/Volumes/HD2/Max Planck/Data/Affy/DayNight/Celfiles"
} else {
dataDir <- "/Users/admin/Desktop/DayNight/Celfiles/" Sys.putenv("DISPLAY"=":0")
}
dset<- ReadAffy(filenames=file.path(dataDir, dir(dataDir, pattern=".CEL")), widget = F) # loads CEL files into an affybatch object
un <- ".CEL" # remove extra names sampleNames(dset) <- gsub(un, "", sampleNames(dset))
- Obtaining indexes of sampleNames (affy slides) of interest
technicalreps <- grep("00 G048", sampleNames(dset)) techset <- dset[,technicalreps]
- Normalization
erma <- rma(techset)
- --------------------------------- In house ---------------------------------- #
library(limma)
if(0) { # Change for HortResearch
}else {
dataDir <- "/Users/admin/Desktop/Directories/VariabilityStudy/Data"
} files <- dir(dataDir, pattern="gpr")
- Examine genepix, genepix.median
RG <- read.maimages(files, path=dataDir, source="genepix", wt.fun=wtflags(0))
- Visually AC3 and AC4 most similar
pairs(log2(RG$R), pch=".") pairs(log2(RG$G), pch=".")
RG <- RG[,c("AC3","AC4")]
- All spots
nrow(RG)
- Good spots
apply(RG$weights, 2, sum)
- Bad spots
nrow(RG) - apply(RG$weights, 2, sum)
- Normalization (loess, printtiploess)
MA <- normalizeWithinArrays(RG, method="loess", bc.method="none") RG.norm <- RG.MA(MA)
- --------------------------- Comparison plots -------------------------------- #
- Setup
size <- 8 AffyMain <- "Affymetrix technical replication" InHouseMain <- "In house technical replication"
graphics.off() X11(xpos=0, ypos=0, width=size, height=size) X11(xpos=600, ypos=0, width=size, height=size) dev.list()
- Pairs plots
pairs(log2(RG.norm$R), pch=".") pairs(log2(RG.norm$G), pch=".")
dev.set(2) plot(exprs(erma), main = AffyMain, pch=".") dev.set(3) plot(log2(RG.norm$R), main = InHouseMain, pch=".")
- Histograms
- Mean distributions
emean <- apply(exprs(erma), 1, mean) Rmean <- apply(log2(RG.norm$R), 1, mean) Gmean <- apply(log2(RG.norm$G), 1, mean)
dev.set(2) hist(emean, main=AffyMain, xlim=c(4,16), xlab="Average signal") dev.set(3) hist(Rmean, main=InHouseMain, xlim=c(4,16), xlab="Cy5 Average signal") Sys.sleep(1) hist(Gmean, main=InHouseMain, xlim=c(4,16), xlab="Cy3 Average signal")
- SD distributions
esd <- apply(exprs(erma), 1, sd) Rsd <- apply(log2(RG.norm$R), 1, sd) Gsd <- apply(log2(RG.norm$G), 1, sd)
dev.set(2) hist(esd, main=AffyMain, breaks = 12, xlim=c(0,1.5), xlab="Standard deviation") dev.set(3) hist(Rsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3 interspot standard deviation") Sys.sleep(1) hist(Gsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy5 interspot standard deviation")
- summary stats
iqr <- function(x, qlims=c(0.25, 0.75)) {
IQR <- quantile(x, qlims[2]) - quantile(x, qlims[1]) return(IQR)
}
mean(esd) mean(Rsd) mean(Gsd)
sqrt(mean(esd^2)) sqrt(mean(Rsd^2)) sqrt(mean(Gsd^2))
iqr(esd) iqr(Rsd) iqr(Gsd)
- CV
dev.set(2) hist(esd/emean, main=AffyMain, breaks = 12, xlim=c(0,0.2), xlab="Coefficient of variation") dev.set(3) hist(Rsd/Rmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy5 interspot coefficient of variation") Sys.sleep(1) hist(Gsd/Gmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3 interspot coefficient of variation")