Difference between revisions of "Nunz-mik.R"
m |
|||
(One intermediate revision by the same user not shown) | |||
Line 1: | Line 1: | ||
+ | # {{R}} | ||
auxillaryDir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Auxillary" | auxillaryDir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Auxillary" | ||
dataDir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR" | dataDir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR" | ||
Line 5: | Line 6: | ||
# Load required R libraries. | # Load required R libraries. | ||
###################################################################################### | ###################################################################################### | ||
− | library(limma) | + | library(limma) |
# Read targets | # Read targets |
Latest revision as of 00:42, 6 June 2007
Code snipits and programs written in R, S or S-PLUS auxillaryDir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Auxillary" dataDir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR"
- Load required R libraries.
library(limma)
- Read targets
targets<-readTargets(path=auxillaryDir) targets$Name <- paste(targets$Filetype, targets$Scan, sep="")
- Define weight function to read in the recorded flags in each GPR file
wt.obs<-function(gpr){
flagged <- (gpr[, "Flags"]) flagged
}
files <- dir(dataDir, pattern="gpr")
- Read in GPR files
RG<-read.maimages(files, names=substr(files,20,29), source="genepix",path=dataDir,
columns=list( Rf = "F635 Median", Gf = "F532 Median", Rb = "B635 Median", Gb = "B532 Median")) # names=targets$Name,
RG$printer <- getLayout(RG$genes)
- Means...
- RG<-read.maimages(files, names=substr(files,20,29), source="genepix.median",path=dataDir)
- Define object containing weights for each GPR file, and set weights to "NULL" in
- RG variables (otherwise weights are used in normalization).
RG.weights<-RG$weights
- mwd weights were not loaded in
RG$weights=NULL
save(list=c("RG","RG.weights","targets"),file="RGw.data")
- Set zero values in foreground and background to 0.5
load("RGw.data") library(limma)
- mwd There are none
RG$R[RG$R==0]<-0.5 RG$G[RG$G==0]<-0.5 RG$Rb[RG$Rb==0]<-0.5 RG$Gb[RG$Gb==0]<-0.5
- Create objects containing background corrected intensity data, and non-background
- (RGh) corrected intensity data (RGb).
RGb<-backgroundCorrect(RG,method="none") RGh<-backgroundCorrect(RG,method="half")
- Create MA objects based on RGb and RGh, prior to normalization.
MAh<-normalizeWithinArrays(RGh,method="none") MAb<-normalizeWithinArrays(RGb,method="none")
- Perform loess normalization on MAh and MAb.
MAh.n<-normalizeWithinArrays(RGh,method='loess') MAb.n<-normalizeWithinArrays(RGb,method='loess')
save(list=c("RGb","RGh","MAh","MAb","MAh.n","MAb.n","RG.weights"),file="normw.data")
- Image plots
load('RGw.data') for(i in 1:(dim(RG$R)[2]/8)){
#mwd need to make the folders... fname<-paste("JPG/BGFG/BG_FG_",i,".jpg",sep="") jpeg(fname, height = 900, width=1200, quality=80, bg="white") par(mfrow=c(8,4)) for(j in 1:8){ ar<-(i-1)*8+j imageplot(log2(RG$Rb[,ar]), RG$printer, low="white", high="red") text(1,80,colnames(RG$R),pos=4) imageplot(log2(RG$Gb[,ar]), RG$printer, low="white", high="green") text(1,80,colnames(RG$R),pos=4) imageplot(log2(RG$R[,ar]), RG$printer, low="white", high="red") imageplot(log2(RG$G[,ar]), RG$printer, low="white", high="green") } dev.off()
}
- Create histograms of red and green background and foreground intensities
- for each array. Four arrays per page.
load('RGw.data') load('normw.data')
for(i in 1:(ncol(RG$R)/4)){
fname<-paste("JPG/Hist/BGFG_hist_",i,".jpg",sep="") jpeg(fname, height = 900, width=1200, quality=80, bg="white")
- par(mfrow=c(4,5))
par(mfrow=c(4,4)) for(j in 1:4){ ar<-(i-1)*4+j hist(log2(RG$Rb[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(Rb)",col='red') hist(log2(RG$Gb[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(Gb)",col='green') hist(log2(RG$R[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(R)",col='red') hist(log2(RG$G[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(G)",col='green')
- plot(density(log2(RG$G[,ar])),col='green',lty=1)
- lines(density(log2(RG$Rb[,ar])),col='red',lty=2)
- lines(density(log2(RG$R[,ar])),col='red',lty=1)
- lines(density(log2(RG$Gb[,ar])),col='green',lty=2)
} dev.off()
}
- Boxplots for NBC data (Pre-norm and post-norm)
- mwd minor changes (repeated later)
lo<-grep("L",targets$Scan) md<-grep("A",targets$Scan) hi<-grep("H",targets$Scan)
jpeg("JPG/Boxplots/boxplot-full-nbc-pre.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAb$M~col(MAb$M),names=targets$Name,main="Pre-normalization (NBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-nbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb$M[,lo]~col(MAb$M[,lo]),names=targets$Name[lo],main="Pre-normalization (NBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-nbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb$M[,md]~col(MAb$M[,md]),names=targets$Name[md],main="Pre-normalization (NBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-nbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb$M[,hi]~col(MAb$M[,hi]),names=targets$Name[hi],main="Pre-normalization (NBC): high arrays",cex.lab=0.7) dev.off()
jpeg("JPG/Boxplots/boxplot-full-nbc-post.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAb.n$M~col(MAb.n$M),names=targets$Name,main="Post-normalization (NBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-nbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb.n$M[,lo]~col(MAb.n$M[,lo]),names=targets$Name[lo],main="Post-normalization (NBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-nbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb.n$M[,md]~col(MAb.n$M[,md]),names=targets$Name[md],main="Post-normalization (NBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-nbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb.n$M[,hi]~col(MAb.n$M[,hi]),names=targets$Name[hi],main="Post-normalization (NBC): high arrays",cex.axis=0.7) dev.off()
- Boxplots for HBC data (Pre-norm and post-norm)
jpeg("JPG/Boxplots/boxplot-full-hbc-pre.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAh$M~col(MAh$M),names=targets$Name,main="Pre-normalization (HBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-hbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh$M[,lo]~col(MAh$M[,lo]),names=targets$Name[lo],main="Pre-normalization (HBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-hbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh$M[,md]~col(MAh$M[,md]),names=targets$Name[md],main="Pre-normalization (HBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-hbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh$M[,hi]~col(MAh$M[,hi]),names=targets$Name[hi],main="Pre-normalization (HBC): high arrays",cex.lab=0.7) dev.off()
jpeg("JPG/Boxplots/boxplot-full-hbc-post.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAh.n$M~col(MAh.n$M),names=targets$Name,main="Post-normalization (HBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-hbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh.n$M[,lo]~col(MAh.n$M[,lo]),names=targets$Name[lo],main="Post-normalization (HBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-hbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh.n$M[,md]~col(MAh.n$M[,md]),names=targets$Name[md],main="Post-normalization (HBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-hbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh.n$M[,hi]~col(MAh.n$M[,hi]),names=targets$Name[hi],main="Post-normalization (HBC): high arrays",cex.axis=0.7) dev.off()
- Mb.nn.hi<-normalizeBetweenArrays(MAb.n$M[,odd])
- Mb.nn.med<-normalizeBetweenArrays(MAb.n$M[,even])
- Mh.nn.hi<-normalizeBetweenArrays(MAh.n$M[,odd])
- Mh.nn.med<-normalizeBetweenArrays(MAh.n$M[,even])
- save(list=c("Mh.nn.hi","Mh.nn.med","Mb.nn.hi","Mb.nn.med"),file="norm2w.data")
- Find saturated spots and create combined data sets
load("RGw.data") load("normw.data")
- load("norm2w.data")
sat.r<-RG$R==(2^16-1) sat.g<-RG$G==(2^16-1) sat<-sat.r | sat.g
- mwd minor name changes
lo<-grep("L",targets$Scan) md<-grep("A",targets$Scan) hi<-grep("H",targets$Scan) tsat<-sat[,lo] & sat[,md] & sat[,hi]
sum(tsat) sum(apply(tsat,1,sum)>0)
MCb.n<-MAb.n MCb.n$M<-MAb.n$M[,hi] MCb.n$M[sat[,hi]]<-MAb.n$M[,md][sat[,hi]] MCb.n$M[sat[,md]]<-MAb.n$M[,lo][sat[,md]] MCb.n$A<-MAb.n$A[,hi] MCb.n$A[sat[,hi]]<-MAb.n$A[,md][sat[,hi]] MCb.n$A[sat[,md]]<-MAb.n$A[,lo][sat[,md]]
MCh.n<-MAh.n MCh.n$M<-MAh.n$M[,hi] MCh.n$M[sat[,hi]]<-MAh.n$M[,md][sat[,hi]] MCh.n$M[sat[,md]]<-MAh.n$M[,lo][sat[,md]] MCh.n$A<-MAh.n$A[,hi] MCh.n$A[sat[,hi]]<-MAh.n$A[,md][sat[,hi]] MCh.n$A[sat[,md]]<-MAh.n$A[,lo][sat[,md]]
- boxplot(MCh.n~col(MCh.n))
- boxplot(MCh.n~col(MCh.n))
- par(mfrow=c(3,4))
- for(i in 1:ncol(MCb.n$M)){
- plotMA(MCb.n,array=i)
- }
- par(mfrow=c(3,4))
- for(i in 1:ncol(MCh.n$M)){
- plotMA(MCh.n,array=i)
- }
- Linear model analysis
targ<-targets[md,] design<-matrix(c(0, 0,1, 0, 0,0,-1,0,0, 0,0,0, # C.25
0, 0,0,-1, 0,0, 0,1,0, 0,0,0, # M.I10.25 1, 0,0, 0,-1,0, 0,0,0, 0,0,0, # C.12 0,-1,0, 0, 0,0, 0,0,0, 0,1,0, # M.I0.12 0, 0,0, 0, 0,0, 0,0,1,-1,0,0, # M.I5.22 ),12,5)
colnames(design)<-c("C.25","M.I10.25","C.12","M.I0.12","M.I5.22") rownames(design) <- c("P1","P10","P11","P12","P2","P3","P4","P5","P7","P8","P9","Ref") design
- rownames(design)<-targets$Name[md]
MM<-MCb.n[,c(1:5,7:11)] design<-design[c(1:5,7:11),]
fit<-lmFit(MM,design) cont.matrix<-makeContrasts(C.25vsM.I10.25=C.25-M.I10.25, # Control versus MDR1 at 25wks
C.12vsM.I0.12=C.12-M.I0.12, # Control versus MDR1 at 12 wks C.25vsC.12=C.25-C.12, # Control at 12wks versus 25wks M.I10.25vsM.I0.12=M.I10.25-M.I0.12, # MDR1 12wks versus 25wks levels=design)
fit2<-contrasts.fit(fit,cont.matrix) fit2<-eBayes(fit2)
- ===============================================================================
- first data point
MM$M[1,]
- fit coefficients
fit$coef[1,]
- check
sum(MM$M[1,] * design[,1])/2 sum(MM$M[1,] * design[,3])/2
- Illustrates what contrasts matrix is doing
all.equal(fit$coef %*% cont.matrix, fit2$coef) fit2$coef[1,]
- check for C.25vsC.12
sum(MM$M[1,] * design[,1])/2 - sum(MM$M[1,] * design[,3])/2
- ===============================================================================
par(mfrow=c(2,2)) p.n<-c("C.25vsM.I10.25","C.12vsM.I0.12","C.25vsC.12","M.I10.25vsM.I0.12") tt<-kk<-list() for(i in 1:length(p.n)){
tti<-toptable(fit2,coef=i,adjust="BH",number=nrow(RG$R)) kki<-as.numeric(rownames(tti)[1:20])
} for(i in 1:length(p.n)){ # rewrites over normalized MA object
MA<-list() MA$M<-fit2$coef[,i] MA$A<-fit2$Amean plotMA(MA,main=p.n[i],ylim=c(-4,4)) points(fit2$Amean[kki],fit2$coef[kki,i],pch=16,col="red")
}
write.table(file="out.txt",RG$genes$Name[unlist(kk)],quote=F,row.names=F,col.names=F)
ord1<-order(as.numeric(rownames(tt1))) ord2<-order(as.numeric(rownames(tt2))) ord3<-order(as.numeric(rownames(tt3))) ord4<-order(as.numeric(rownames(tt4))) out.mat<-cbind(MCb.n$genes$ID,
MCb.n$genes$Name, MCb.n$M[,1],-MCb.n$M[,5], MCb.n$M[,3],MCb.n$M[,6],-MCb.n$M[,7], -MCb.n$M[,2],MCb.n$M[,11], -MCb.n$M[,4],MCb.n$M[,8], MCb.n$M[,9],-MCb.n$M[,10], MCb.n$M[,12],
- tt1[as.numeric(rownames(tt1)),1:3],
- tt2[as.numeric(rownames(tt2)),1:3],
- tt3[as.numeric(rownames(tt3)),1:3],
- tt4[as.numeric(rownames(tt4)),1:3])
tt1[ord1,1:3], tt2[ord2,1:3], tt3[ord3,1:3], tt4[ord4,1:3])
colnames(out.mat)<-c("ProbeID","ProbeName",
paste("Pool",c("1.C12","10.M.I0.12","11.C25","12.M.I10.25","2.C.12","3.C.25","4.C25","5.M.I10.25","7.M.I5.22","8.M.I5.22","9.M.I0.12")[c(1,5,3,6,7,2,11,4,8,9,10)],sep=""),"Ref", paste(c("log2FC","t.stat","p.value"),".",p.n[1],sep=""), paste(c("log2FC","t.stat","p.value"),".",p.n[2],sep=""), paste(c("log2FC","t.stat","p.value"),".",p.n[3],sep=""), paste(c("log2FC","t.stat","p.value"),".",p.n[4],sep=""))
- Output seems to be getting confused here
write.table(out.mat,file='results.txt',sep='\t',row.names=F,col.names=T,quote=F)