Difference between revisions of "Nunz-mik.R"

From Organic Design wiki
m
m
 
Line 1: Line 1:
 +
# {{R}}
 
auxillaryDir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Auxillary"
 
auxillaryDir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Auxillary"
 
dataDir      <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR"
 
dataDir      <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR"

Latest revision as of 00:42, 6 June 2007

Code snipits and programs written in R, S or S-PLUS auxillaryDir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Auxillary" dataDir <- "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR"

  1. Load required R libraries.

library(limma)

  1. Read targets

targets<-readTargets(path=auxillaryDir) targets$Name <- paste(targets$Filetype, targets$Scan, sep="")

  1. Define weight function to read in the recorded flags in each GPR file

wt.obs<-function(gpr){

 flagged <- (gpr[, "Flags"])
 flagged

}

files <- dir(dataDir, pattern="gpr")

  1. Read in GPR files

RG<-read.maimages(files, names=substr(files,20,29), source="genepix",path=dataDir,

                  columns=list(
                    Rf = "F635 Median",
                    Gf = "F532 Median",
                    Rb = "B635 Median",
                    Gb = "B532 Median")) # names=targets$Name,

RG$printer <- getLayout(RG$genes)

  1. Means...
  2. RG<-read.maimages(files, names=substr(files,20,29), source="genepix.median",path=dataDir)
  1. Define object containing weights for each GPR file, and set weights to "NULL" in
  2. RG variables (otherwise weights are used in normalization).

RG.weights<-RG$weights

  1. mwd weights were not loaded in

RG$weights=NULL

save(list=c("RG","RG.weights","targets"),file="RGw.data")

  1. Set zero values in foreground and background to 0.5

load("RGw.data") library(limma)

  1. mwd There are none

RG$R[RG$R==0]<-0.5 RG$G[RG$G==0]<-0.5 RG$Rb[RG$Rb==0]<-0.5 RG$Gb[RG$Gb==0]<-0.5

  1. Create objects containing background corrected intensity data, and non-background
  2. (RGh) corrected intensity data (RGb).

RGb<-backgroundCorrect(RG,method="none") RGh<-backgroundCorrect(RG,method="half")

  1. Create MA objects based on RGb and RGh, prior to normalization.

MAh<-normalizeWithinArrays(RGh,method="none") MAb<-normalizeWithinArrays(RGb,method="none")

  1. Perform loess normalization on MAh and MAb.

MAh.n<-normalizeWithinArrays(RGh,method='loess') MAb.n<-normalizeWithinArrays(RGb,method='loess')

save(list=c("RGb","RGh","MAh","MAb","MAh.n","MAb.n","RG.weights"),file="normw.data")

  1. Image plots

load('RGw.data') for(i in 1:(dim(RG$R)[2]/8)){

 #mwd need to make the folders...
 fname<-paste("JPG/BGFG/BG_FG_",i,".jpg",sep="")
 jpeg(fname, height = 900, width=1200, quality=80, bg="white")
 par(mfrow=c(8,4))
 for(j in 1:8){
   ar<-(i-1)*8+j
   imageplot(log2(RG$Rb[,ar]), RG$printer, low="white", high="red")
   text(1,80,colnames(RG$R),pos=4)
   imageplot(log2(RG$Gb[,ar]), RG$printer, low="white", high="green")
   text(1,80,colnames(RG$R),pos=4)
   imageplot(log2(RG$R[,ar]), RG$printer, low="white", high="red")
   imageplot(log2(RG$G[,ar]), RG$printer, low="white", high="green")
 }
 dev.off()

}

  1. Create histograms of red and green background and foreground intensities
  2. for each array. Four arrays per page.

load('RGw.data') load('normw.data')

for(i in 1:(ncol(RG$R)/4)){

 fname<-paste("JPG/Hist/BGFG_hist_",i,".jpg",sep="")
 jpeg(fname, height = 900, width=1200, quality=80, bg="white")
  1. par(mfrow=c(4,5))
 par(mfrow=c(4,4))
 for(j in 1:4){
   ar<-(i-1)*4+j
   hist(log2(RG$Rb[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(Rb)",col='red')
   hist(log2(RG$Gb[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(Gb)",col='green')
   hist(log2(RG$R[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(R)",col='red')
   hist(log2(RG$G[,ar]),50,main=colnames(RG$R)[ar],xlab="log2(G)",col='green')
  1. plot(density(log2(RG$G[,ar])),col='green',lty=1)
  2. lines(density(log2(RG$Rb[,ar])),col='red',lty=2)
  3. lines(density(log2(RG$R[,ar])),col='red',lty=1)
  4. lines(density(log2(RG$Gb[,ar])),col='green',lty=2)
 }
 dev.off()

}

  1. Boxplots for NBC data (Pre-norm and post-norm)
  2. mwd minor changes (repeated later)

lo<-grep("L",targets$Scan) md<-grep("A",targets$Scan) hi<-grep("H",targets$Scan)

jpeg("JPG/Boxplots/boxplot-full-nbc-pre.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAb$M~col(MAb$M),names=targets$Name,main="Pre-normalization (NBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-nbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb$M[,lo]~col(MAb$M[,lo]),names=targets$Name[lo],main="Pre-normalization (NBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-nbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb$M[,md]~col(MAb$M[,md]),names=targets$Name[md],main="Pre-normalization (NBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-nbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb$M[,hi]~col(MAb$M[,hi]),names=targets$Name[hi],main="Pre-normalization (NBC): high arrays",cex.lab=0.7) dev.off()

jpeg("JPG/Boxplots/boxplot-full-nbc-post.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAb.n$M~col(MAb.n$M),names=targets$Name,main="Post-normalization (NBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-nbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb.n$M[,lo]~col(MAb.n$M[,lo]),names=targets$Name[lo],main="Post-normalization (NBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-nbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb.n$M[,md]~col(MAb.n$M[,md]),names=targets$Name[md],main="Post-normalization (NBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-nbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAb.n$M[,hi]~col(MAb.n$M[,hi]),names=targets$Name[hi],main="Post-normalization (NBC): high arrays",cex.axis=0.7) dev.off()

  1. Boxplots for HBC data (Pre-norm and post-norm)

jpeg("JPG/Boxplots/boxplot-full-hbc-pre.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAh$M~col(MAh$M),names=targets$Name,main="Pre-normalization (HBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-hbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh$M[,lo]~col(MAh$M[,lo]),names=targets$Name[lo],main="Pre-normalization (HBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-hbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh$M[,md]~col(MAh$M[,md]),names=targets$Name[md],main="Pre-normalization (HBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-hbc-pre.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh$M[,hi]~col(MAh$M[,hi]),names=targets$Name[hi],main="Pre-normalization (HBC): high arrays",cex.lab=0.7) dev.off()

jpeg("JPG/Boxplots/boxplot-full-hbc-post.jpg",height=900,width=1200,quality=80, bg="white") boxplot(MAh.n$M~col(MAh.n$M),names=targets$Name,main="Post-normalization (HBC): all arrays",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-lo-hbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh.n$M[,lo]~col(MAh.n$M[,lo]),names=targets$Name[lo],main="Post-normalization (HBC): low scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-md-hbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh.n$M[,md]~col(MAh.n$M[,md]),names=targets$Name[md],main="Post-normalization (HBC): medium scans",cex.axis=0.7) dev.off() jpeg("JPG/Boxplots/boxplot-hi-hbc-post.jpg",height=900,width=1200,quality=80,bg="white") boxplot(MAh.n$M[,hi]~col(MAh.n$M[,hi]),names=targets$Name[hi],main="Post-normalization (HBC): high arrays",cex.axis=0.7) dev.off()

  1. Mb.nn.hi<-normalizeBetweenArrays(MAb.n$M[,odd])
  2. Mb.nn.med<-normalizeBetweenArrays(MAb.n$M[,even])
  3. Mh.nn.hi<-normalizeBetweenArrays(MAh.n$M[,odd])
  4. Mh.nn.med<-normalizeBetweenArrays(MAh.n$M[,even])
  5. save(list=c("Mh.nn.hi","Mh.nn.med","Mb.nn.hi","Mb.nn.med"),file="norm2w.data")
  1. Find saturated spots and create combined data sets

load("RGw.data") load("normw.data")

  1. load("norm2w.data")

sat.r<-RG$R==(2^16-1) sat.g<-RG$G==(2^16-1) sat<-sat.r | sat.g

  1. mwd minor name changes

lo<-grep("L",targets$Scan) md<-grep("A",targets$Scan) hi<-grep("H",targets$Scan) tsat<-sat[,lo] & sat[,md] & sat[,hi]

sum(tsat) sum(apply(tsat,1,sum)>0)

MCb.n<-MAb.n MCb.n$M<-MAb.n$M[,hi] MCb.n$M[sat[,hi]]<-MAb.n$M[,md][sat[,hi]] MCb.n$M[sat[,md]]<-MAb.n$M[,lo][sat[,md]] MCb.n$A<-MAb.n$A[,hi] MCb.n$A[sat[,hi]]<-MAb.n$A[,md][sat[,hi]] MCb.n$A[sat[,md]]<-MAb.n$A[,lo][sat[,md]]

MCh.n<-MAh.n MCh.n$M<-MAh.n$M[,hi] MCh.n$M[sat[,hi]]<-MAh.n$M[,md][sat[,hi]] MCh.n$M[sat[,md]]<-MAh.n$M[,lo][sat[,md]] MCh.n$A<-MAh.n$A[,hi] MCh.n$A[sat[,hi]]<-MAh.n$A[,md][sat[,hi]] MCh.n$A[sat[,md]]<-MAh.n$A[,lo][sat[,md]]

  1. boxplot(MCh.n~col(MCh.n))
  2. boxplot(MCh.n~col(MCh.n))
  1. par(mfrow=c(3,4))
  2. for(i in 1:ncol(MCb.n$M)){
  3. plotMA(MCb.n,array=i)
  4. }
  5. par(mfrow=c(3,4))
  6. for(i in 1:ncol(MCh.n$M)){
  7. plotMA(MCh.n,array=i)
  8. }
  1. Linear model analysis

targ<-targets[md,] design<-matrix(c(0, 0,1, 0, 0,0,-1,0,0, 0,0,0, # C.25

                0, 0,0,-1, 0,0, 0,1,0, 0,0,0,  # M.I10.25
                1, 0,0, 0,-1,0, 0,0,0, 0,0,0,  # C.12
                0,-1,0, 0, 0,0, 0,0,0, 0,1,0,  # M.I0.12
                0, 0,0, 0, 0,0, 0,0,1,-1,0,0,  # M.I5.22                 
                ),12,5)

colnames(design)<-c("C.25","M.I10.25","C.12","M.I0.12","M.I5.22") rownames(design) <- c("P1","P10","P11","P12","P2","P3","P4","P5","P7","P8","P9","Ref") design

  1. rownames(design)<-targets$Name[md]

MM<-MCb.n[,c(1:5,7:11)] design<-design[c(1:5,7:11),]

fit<-lmFit(MM,design) cont.matrix<-makeContrasts(C.25vsM.I10.25=C.25-M.I10.25, # Control versus MDR1 at 25wks

                          C.12vsM.I0.12=C.12-M.I0.12,         # Control versus MDR1 at 12 wks
                          C.25vsC.12=C.25-C.12,               # Control at 12wks versus 25wks
                          M.I10.25vsM.I0.12=M.I10.25-M.I0.12, # MDR1 12wks versus 25wks
                          levels=design)

fit2<-contrasts.fit(fit,cont.matrix) fit2<-eBayes(fit2)

  1. ===============================================================================
  2. first data point

MM$M[1,]

  1. fit coefficients

fit$coef[1,]

  1. check

sum(MM$M[1,] * design[,1])/2 sum(MM$M[1,] * design[,3])/2

  1. Illustrates what contrasts matrix is doing

all.equal(fit$coef %*% cont.matrix, fit2$coef) fit2$coef[1,]

  1. check for C.25vsC.12

sum(MM$M[1,] * design[,1])/2 - sum(MM$M[1,] * design[,3])/2

  1. ===============================================================================

par(mfrow=c(2,2)) p.n<-c("C.25vsM.I10.25","C.12vsM.I0.12","C.25vsC.12","M.I10.25vsM.I0.12") tt<-kk<-list() for(i in 1:length(p.n)){

 tti<-toptable(fit2,coef=i,adjust="BH",number=nrow(RG$R))
 kki<-as.numeric(rownames(tti)[1:20])

} for(i in 1:length(p.n)){ # rewrites over normalized MA object

 MA<-list()
 MA$M<-fit2$coef[,i]
 MA$A<-fit2$Amean
 plotMA(MA,main=p.n[i],ylim=c(-4,4))
 points(fit2$Amean[kki],fit2$coef[kki,i],pch=16,col="red")
  1. points(fit2$Amean[kkabs(i-3)],fit2$coef[kkabs(i-3),i],pch=16,col="green")

}

write.table(file="out.txt",RG$genes$Name[unlist(kk)],quote=F,row.names=F,col.names=F)

ord1<-order(as.numeric(rownames(tt1))) ord2<-order(as.numeric(rownames(tt2))) ord3<-order(as.numeric(rownames(tt3))) ord4<-order(as.numeric(rownames(tt4))) out.mat<-cbind(MCb.n$genes$ID,

              MCb.n$genes$Name,
              MCb.n$M[,1],-MCb.n$M[,5],
              MCb.n$M[,3],MCb.n$M[,6],-MCb.n$M[,7],
              -MCb.n$M[,2],MCb.n$M[,11],
              -MCb.n$M[,4],MCb.n$M[,8],
              MCb.n$M[,9],-MCb.n$M[,10],
              MCb.n$M[,12],
  1. tt1[as.numeric(rownames(tt1)),1:3],
  2. tt2[as.numeric(rownames(tt2)),1:3],
  3. tt3[as.numeric(rownames(tt3)),1:3],
  4. tt4[as.numeric(rownames(tt4)),1:3])
              tt1[ord1,1:3],
              tt2[ord2,1:3],
              tt3[ord3,1:3],
              tt4[ord4,1:3])

colnames(out.mat)<-c("ProbeID","ProbeName",

                    paste("Pool",c("1.C12","10.M.I0.12","11.C25","12.M.I10.25","2.C.12","3.C.25","4.C25","5.M.I10.25","7.M.I5.22","8.M.I5.22","9.M.I0.12")[c(1,5,3,6,7,2,11,4,8,9,10)],sep=""),"Ref",
                    paste(c("log2FC","t.stat","p.value"),".",p.n[1],sep=""),
                    paste(c("log2FC","t.stat","p.value"),".",p.n[2],sep=""),
                    paste(c("log2FC","t.stat","p.value"),".",p.n[3],sep=""),
                    paste(c("log2FC","t.stat","p.value"),".",p.n[4],sep=""))
  1. Output seems to be getting confused here

write.table(out.mat,file='results.txt',sep='\t',row.names=F,col.names=T,quote=F)