GDNA analysis.R

Code snipits and programs written in R, S or S-PLUS library(limma) source("plotDensities.R") dataDir <- "/Volumes/HD2/Data/Rosaceae/GPR" files <- dir(dataDir, pattern=".gpr") printRun <- substr(files,0,2)

for( i in unique(printRun) ) {

 fname <- paste("RG.",i,sep="")
 tmp <- read.maimages(files=files[printRun==i], path=dataDir, source="genepix")

assign(paste("RG.",i,sep=""), tmp)

 assign(fname, tmp)

} save.image()

Read in mapping file

mappingfname <- "gDNACy3Cy5mapping.csv" targets <- read.csv(file.path(dataDir, mappingfname), as.is=TRUE)

targets <- targets[-(61:64),]

for( i in unique(printRun)[-2]) {

 RGname <- paste("RG.",i, sep="")
 OBJ <- get(RGname)

Grab names of mapping files

 Cy3files <- targets$gDNA.Cy3[grep(i, targets$gDNA.Cy3)]
 Cy5files <- targets$gDNA.Cy5[grep(i, targets$gDNA.Cy5)]

Check files are in loaded RG.AC object

 Cy5common <- Cy5files[ Cy5files %in% colnames(OBJ)  ]
 Cy3common <- Cy3files[ Cy3files %in% colnames(OBJ)  ]

Subset
? plotDensities
singlechannels: vector of integers indicating which individual-channels
will be selected to be plotted. Values correspond to the
columns of the matrix of 'cbind(R,G)' and range between
'1:ncol(R)' for red channels and '(
(ncol(R)+1):(ncol(R)+ncol(G)) )' for the green channels in
'object'. Defaults to all channels.

graphics.off() X11(width=6, height=6, xpos=1, ypos=1) X11(width=6, height=6, xpos=1, ypos=493) X11(width=6, height=6, xpos=450, ypos=1) X11(width=6, height=6, xpos=450, ypos=493)

 ylim <- c(0,0.3)

dev.set(2) nCy5 <- ncol(OBJ[,Cy5common]) nCy3 <- ncol(OBJ[,Cy3common])

plotDensities(OBJ[,Cy5common], singlechannels=1:nCy5, ylim=ylim) mtext(paste(RGname, "Red Genomic"),3)

dev.set(3) plotDensities(OBJ[,Cy5common], singlechannels=(nCy5+1):(2*nCy5), ylim=ylim) mtext(paste(RGname,"Green RNA"),3)

dev.set(4) plotDensities(OBJ[,Cy3common], singlechannels=(nCy3+1):(2*nCy3), ylim=ylim) mtext(paste(RGname,"Green Genomic"),3)

dev.set(5) plotDensities(OBJ[,Cy3common], singlechannels=1:nCy3, ylim=ylim) mtext(paste(RGname,"Red RNA"),3) Sys.sleep(5) }

ls(pattern="RG\\.")

identical(RG.AC$genes, RG.AF$genes) # AC same as AF print only identical(RG.AF$genes, RG.AL$genes) identical(RG.AL$genes, RG.AO$genes) identical(RG.AO$genes, RG.AQ$genes) identical(RG.AQ$genes, RG.AU$genes) identical(RG.AU$genes, RG.BC$genes) identical(RG.BC$genes, RG.BE$genes) identical(RG.BE$genes, RG.BM$genes)

Merging the print runs

tmp <- merge.RGList(RG.AC, RG.AF) tmp <- merge.RGList(tmp, RG.AL) tmp <- merge.RGList(tmp, RG.AO) tmp <- merge.RGList(tmp, RG.AQ) tmp <- merge.RGList(tmp, RG.AU) tmp <- merge.RGList(tmp, RG.BC) tmp <- merge.RGList(tmp, RG.BE) RG <- merge.RGList(tmp, RG.BM)

match(substr(colnames(RG),1,2), substr(ls(pattern="RG\\."),4,5))

Quick check that merging is working

cbind(RG.AC$R[match(RG.AC$genes$Name, tmp$genes$Name),], tmp$R[match(tmp$genes$Name, RG.AC$genes$Name),])[1:1000,c("AC1", "AC1")]

Robert Schaffers approach to normalizing gDNA/RNA dye swap pairs

----------------------------------------------------------------------------- #
-------------------- Genomic DNA Normalisation Script ---------------------- #
------ Need microarray .gpr files from Genepix or other software package ---- #
------ SlideSummary file set up as described in limma ---------------------- #
------ >= R-2.0.0 (URL http://cran.r-project.org/) ------------------------ #
-- limma package version 1.8.6 or above (URL http://www.bioconductor.org/) - #
----------------------------------------------------------------------------- #

gDNA_reference_norm<-function(Rawdata){ ### Select the gDNA channels and RNA channels into two separate files gDNA<-cbind(Rawdata$R[,targets[grep("gDNA",targets[,4],),1]], Rawdata$G[,targets[grep("gDNA",targets[,3],),1]]) RNA<-cbind(Rawdata$G[,targets[grep("RNA",targets[,3],),1]], Rawdata$R[,targets[grep("RNA",targets[,4],),1]])

### reorder so dye swaps are next to each other

gDNA<-gDNA[,match(targets[,2],dimnames(gDNA)2)] RNA<-RNA[,match(targets[,2],dimnames(RNA)2)]

### floor intensities that are less than 100 to 100

temp<-gDNA < 100 gDNA[temp] <- 100 temp<-RNA < 100 RNA[temp] <- 100

### flagged spots converted to NA (remove from analysis)

gDNA[Rawdata$weights == 0] <- NA RNA[Rawdata$weights == 0] <- NA

### global mean normalise

for (i in 1:length(gDNA[1,])){ gDNA[,i]<- 2^(log(gDNA[,i],2)- mean(log(gDNA[,i],2),na.rm=T)) }

for (i in 1:length(RNA[1,])){ RNA[,i]<- 2^(log(RNA[,i],2)- mean(log(RNA[,i],2),na.rm=T)) }

### normalise Quantiles (script within limma)

gDNAnorm<-normalizeQuantiles(gDNA) RNAnorm<-normalizeQuantiles(RNA)

### create ratios of RNA compared to gDNA (in log base 2 space)

RNAgDNArat<-RNAnorm/gDNAnorm RNAgDNAratlog<-log(RNAgDNArat,2)

### loess smooth M values

RNAgDNAratloglo<-matrix(data=NA,length(RNAgDNAratlog[,1]),length(RNAgDNAratlog[1,])) for (i in seq(1,length(RNAgDNAratlog[1,])-1,by=2)){ tempc<-matrix(data=NA,length(RNAgDNAratlog[,1]),2) tempa<-RNAgDNAratlog[,c(i,i+1)] tempb<-is.na(tempa) tempnam<-paste("a",1:length(RNAgDNAratlog[,1]),sep="") row.names(tempa)<-tempnam tempa[tempb[,1],1]<- tempa[tempb[,1],2] tempa[tempb[,2],2]<- tempa[tempb[,2],1] tempa<-na.omit(tempa) Mav <- (tempa[,1] + tempa[,2])/2 Mrat <- tempa[,1] - tempa[,2] Mrat.out <- Mrat - loess( Mrat ~ Mav,span=0.4,degree=2)$fitted Mrat.out<-Mrat.out[match(tempnam,dimnames(tempa)1)] Mav<-Mav[match(tempnam,dimnames(tempa)1)] tempc[,1] <- Mav + Mrat.out/2 tempc[,2] <- Mav - Mrat.out/2 tempc[tempb]<- NA RNAgDNAratloglo[,c(i,i+1)] <- tempc }

RNAgDNAratlo<-2^RNAgDNAratloglo

### Create Absolute RNA intensities

gDNAmedVals<-apply(gDNAnorm,1,median,na.rm=T)

AbsRNANorm<-RNAgDNAratlo*gDNAmedVals

return(AbsRNANorm) }

1. 1. Script

library(limma)

auxillaryDir <- "/Volumes/HD2/Data/MDMib7FourSlides/Auxillary" dataDir <- "/Volumes/HD2/Data/MDMib7FourSlides/GPR"

1. 1. set the directory to a working directory containing gpr files and slide summary (eg labelled microarray1)

1. 1. read in summary (script within limma)

targets <- readTargets(file.path(auxillaryDir, "targets.txt"))

1. 1. Read in datafiles (this example is with genepix, other formats available)

RG <- read.maimages(targets$filename, path=dataDir, source="genepix.median", wt.fun=wtflags(0))

1. 1. normalise

RGmatrix <- gDNA_reference_norm(RG)

mergeRG <- function(R, G, Rb, Gb) {

----------------------------------------------------------------------------- #
Function for combining elements of two similar RGLists
together for the purposes of exploratory graphical investigation
Author: Marcus Davy
----------------------------------------------------------------------------- #

 obj <- new("RGList")
 obj$R <- R
 obj$G <- G
 if(!(missing(Rb))) {
   obj$Rb <- Rb
 }
 if(!(missing(Gb))) {
   obj$Gb <- Gb
 }
 return(obj)

}

RGnorm <- mergeRG(R = RGmatrix[,c(1,3)], G = RGmatrix[,c(2,4)]) plot(log2(RGnorm$R[,1]), log2(RGnorm$G[,1])) plotMA(RGnorm)

GDNA analysis.R

Navigation menu

Views

Personal tools

Navigation

Search

Navigation

Blogs

Recent activity

Site map

Tools