NutriTools.R
Code snipits and programs written in R, S or S-PLUS checkall <- function() {
require(codetools) require(limma) for(i in ls(env=.GlobalEnv)) { cat(i, sep = "\n") checkUsage(get(i)) cat(paste(rep("-", 80), collapse="", sep=""), sep="\n") } return("done")
} clusterPlot <- function(x, rowNames = NULL, dmethod = "euclidian", hmethod="complete", ...) {
oldpar <- par(mfrow=c(2,1))
if(is.character(rowNames) & (length(rowNames) == nrow(x))) { rownames(x) <- rowNames } xDist <- dist(x, method=dmethod)
- 1) Tree dendrogram
xClust <- hclust(xDist, method=hmethod) plclust(xClust, sub="")
- 2) Image plot...
image(x = 1:nrow(x), y = 1:ncol(x), z = x[xClust$order, ], col=heat.colors(nrow(x)), ...) par(oldpar) invisible()
} corMA <- function(filter, MA, targets, plot=FALSE) {
tCols <- colnames(targets) e <- new.env() for (i in tCols) { ei <- targetsi } toSub <- eval(parse(text = filter), envir = e)
- return(MA$M[,toSub])
MAsub <- MA$M[,toSub] if(plot) { pairs(MAsub, pch=".") } return(list(targets=targets[toSub,], cor=cov2cor(var(MAsub, na.rm=TRUE))))
} createPlots <- function(RG, plotDir=getwd(),
bc.correct = c("none","subtract","minimum", "movingmin"), method.within = c("none", "loess", "printtiploess"), method.between = c("none","scale", "quantile")) {
if(exists("RG")) {
- 1) Create directories
dir.create(file.path(plotDir, "Densityplots"), recursive=TRUE) for(i in c("ImagePlots","MAplots")) { dir.create(file.path(plotDir, i), recursive=TRUE) if(i == "ImagePlots") { for(x in bc.correct) { for(y in method.within) { for(z in method.between) { for( j in c("R","G", "M","A")) { dir.create(file.path(plotDir, i, paste(x,"-",y,"-",z, sep=""),j), recursive=TRUE) if(x=="none" & y =="none" & z =="none") { for(k in c("Rb", "Gb")) { dir.create(file.path(plotDir, i, paste(x,"-",y,"-",z, sep=""),k), recursive=TRUE) } } } } } } } if(i == "MAplots") { for(x in bc.correct) { for(y in method.within) { for(z in method.between) { dir.create(file.path(plotDir, i, paste(x,"-",y,"-",z, sep="")), recursive=TRUE) } } } } }
- 2) Create plots
for(i in bc.correct) { for( j in method.within) { MA <- normalizeWithinArrays(RG, method=j, bc.method=i) for (k in method.between) { MA.Between <- normalizeBetweenArrays(MA, method=k) print(paste("[ Graphing:",i,j,k,"]", sep=" ")) # plotDensities png(file.path(plotDir, "Densityplots", paste(i,"-",j,"-",k,".png", sep=""))) plotDensities(MA.Between) dev.off() # plotMA3by2 plotMA3by2(MA.Between, path=file.path(plotDir, "MAplots", paste(i,"-",j,"-",k, sep=""))) # Imageplots imageplot3by2(MA.Between, z="M", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""), "M")) imageplot3by2(MA.Between, z="A", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""), "A")) imageplot3by2(RG.MA(MA.Between), z="G", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"G"), low="black", high="green") imageplot3by2(RG.MA(MA.Between), z="R", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"R"), low="black", high="red") if(i=="none" & j=="none" & k=="none") { imageplot3by2(RG, z="Gb", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"Gb"), low="black", high="green") imageplot3by2(RG, z="Rb", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"Rb"), low="black", high="red") } } } } } return("done")
} dir.exists <- function(x, curdir = ".", rootdir) {
- -------------------------------------------------------------------------------------- #
- Author : Marcus Davy
- This is a recursive program to take in a string such as:
- foo/fodda/fi, and create subdirectories foo, fodda, and fi
- from the current directory location (if they don't already exist)
- -------------------------------------------------------------------------------------- #
if( missing(rootdir)) { rootdir <- getwd() } setwd(curdir) x <- unlist(strsplit(x,"/")) if( !file.exists(x[1]) ){ dir.create(x[1]) print(paste("Creating", file.path(getwd(), x[1]))) } curdir <- x[1]
if(length(x)==1) { setwd(rootdir) } else { return(dir.exists(x[-1], curdir = curdir, rootdir = rootdir)) }
} fixLabel <- function(x, string, sep=".") {
- ----------------------------------------------------------------------------- #
- Function for fixing labels for limma package functions:
- modelMatrix and make Contrasts e.g. 14DAFB converts to DAFB.14
- Author: Marcus Davy
- ----------------------------------------------------------------------------- #
beginning <- regexpr(string, x) ending <- nchar(x)
labelstart <- substr(x, beginning, ending) labelend <- substr(x, 0, beginning - 1) newlabel <- paste(labelstart, labelend, sep=sep) return(newlabel)
} formatTopTable <- function(OBJ, ...) {
if(!is(OBJ, "MArrayLM")) { return("OBJ is not of MArrayLM class") } genelist <- topTable(OBJ, ...) # Remove the B statistic column genelist <- genelist[,-grep("^B$", colnames(genelist))] rawt <- OBJ$coef / OBJ$stdev.unscaled / OBJ$sigma colnames(genelist)[grep("^t$", colnames(genelist))] <- "Mod.t" return(cbind(genelist[,1:(grep("^Mod.t$", colnames(genelist))-1)], "Raw.t" = rawt[as.numeric(rownames(genelist))], genelist[,grep("^Mod.t$", colnames(genelist)):length(colnames(genelist))]) )
}
"gprDiagnostics" <-
function(fnames, path = ".", annotation = c("Block","Column","Row", "Name", "ID")) { skipcol <- c() dimensions <- data.frame(matrix(NA,nc=2, nr=length(fnames))) dimnames(dimensions) <- list(fnames, c("nrows", "ncols")) diagnostics <- list() fullfnames <- file.path(path, fnames) for(i in 1:length(fullfnames)){ y <- readLines(fullfnames[i], n = 100) skipcol[fnames[i]] <- grep(annotation[1], y)[1] - 1 h <- scan(fullfnamesi, quiet=TRUE, what=character(1), sep="\t", skip = skipcol[fnames[i]], quote="\"", nlines=1) names(h) <- h h[] <- rep("NULL", length(h)) h[annotation] <- NA dimensions[fnamesi,] <- dim(read.table(fullfnames[i], sep="\t", quote="\"", colClasses=h, skip=skipcol[fnames[i]], header=T)) } list(skipcol, dimensions) }
"getWikiCells" <-
function (ids, repository = "ug")
{
if (is.list(ids)) { out <- vector() temp <- lapply(ids, getQueryLink, repository = repository) for (i in seq(along = ids)) { if (temp[i] != " ") {
out[i] <- paste("
", sep = "", collapse = "")
} else out[i] <- temp[i] } } else { temp <- getQueryLink(ids, repository) blanks <- temp == " " out <- paste("[", temp, " ", ids, "]", sep = "") out[blanks] <- " " } return(out)
} "getWikiRows" <-
function (ids, repository = "ug")
{
out <- paste("|", getWikiCells(ids, repository), sep = " ") return(out)
} hr <- function(n=80) {
return(paste(rep("-", n), collapse=""))
} mergeRG <- function(R, G, Rb, Gb) {
- ----------------------------------------------------------------------------- #
- Function for combining elements of two similar RGLists
- together for the purposes of exploratory graphical investigation
- Author: Marcus Davy
- ----------------------------------------------------------------------------- #
obj <- new("RGList") obj$R <- R obj$G <- G if(!(missing(Rb))) { obj$Rb <- Rb } if(!(missing(Gb))) { obj$Gb <- Gb } return(obj)
} NApadding <-
function (OBJ, method = "weights")
{
choices = c("weights","unspotted") method <- match.arg(method, choices)
if(method == "weights") { ind <- (OBJ$weights == 0) } if(method =="unspotted") { if(is.null(OBJ$genes$ID)) { stop("Expecting genepix RGList for method 'unspotted'") } ind <- (is.na(OBJ$genes$ID) | OBJ$genes$ID=="") } if (is(OBJ, "RGList")) { OBJ$R[ind] <- NA OBJ$G[ind] <- NA OBJ$Gb[ind] <- NA OBJ$Rb[ind] <- NA } else { OBJ$M[ind] <- NA OBJ$A[ind] <- NA } return(OBJ)
} "padGPR" <-
function (RG)
{
mapGPRind <- function(RG) { N <- apply(RG$genes[, c("Block", "Column", "Row")], 2, max) ind <- N["Column"] * (RG$genes[, "Row"] - 1) + RG$genes[, "Column"] + (RG$genes[, "Block"] - 1) * (N["Row"] * N["Column"]) return(ind) } OUT <- new("RGList") browser() OUT$R <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) OUT$Rb <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) OUT$G <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) OUT$Gb <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) OUT$weights <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) OUT$genes <- data.frame(matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG$genes), dimnames=list(NULL, colnames(RG$genes)))) OUT$targets <- RG$targets OUT$source <- RG$source OUT$printer <- RG$printer ind <- mapGPRind(RG) OUT$R[ind, ] <- RG$R OUT$Rb[ind, ] <- RG$Rb OUT$G[ind, ] <- RG$G OUT$Gb[ind, ] <- RG$Gb OUT$weights[ind, ] <- RG$weights OUT$genes[ind, ] <- RG$genes return(OUT)
} readAgilent <- function(dataDir="/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Agilent"){
require(limma)
- 1) Read in data using limma
if(exists("targets")) { RGsub <- read.maimages(targets$Filename, path = dataDir, source="agilent", name=targets$Filetype) } else { RGsub <- read.maimages(dir(dataDir, pattern=".txt"), path = dataDir, source="agilent") }
- 2) Find Agilent indices from x ,y coords
getInd <- function(x,y, nx, byrow=T) {
return( (x-1)* nx + y)
}
ind <- getInd(RGsub$genes$Row, RGsub$genes$Col, nx=215)
- 3) Construct complete RGList
RGfull <- new("RGList") RGfull$R <-RGfull$G <-RGfull$Rb <-RGfull$Gb <- matrix(NA, nc=ncol(RGsub), nr=max(ind)) RGfull$genes <- data.frame(matrix(NA, nc=7, nr=max(ind), dimnames = list(NULL, names(RGsub$genes))))
matrixNames <- dimnames(RGsub$R)
- 4) Populate complete list
RGfull$R[ind,] <- RGsub$R RGfull$Rb[ind,] <- RGsub$Rb RGfull$G[ind,] <- RGsub$G RGfull$Gb[ind,] <- RGsub$Gb RGfull$genes[ind,] <- RGsub$genes RGfull$targets <- RGsub$targets
dimnames(RGfull$R) <- dimnames(RGfull$Rb) <- dimnames(RGfull$G) <- dimnames(RGfull$Gb) <- matrixNames
- Checking the rows that needed padding
rowstopad <- as.numeric(rownames(RGsub$genes) [ !(rownames(RGsub$genes) %in% ind) ]) rowstopad
- Indexing looks ok
RGfull[rowstopad,]
RGfull$printer <- structure(list(ngrid.r=1, ngrid.c=1, nspot.r=105, nspot.c=215), class = "PrintLayout") rm(RGsub) return(RGfull)
} "readGPR" <-
function (sourceType = "median", dataDir = "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR", ...)
{
require(limma) print(sessionInfo()) source.type <- switch(sourceType, mean = "genepix", median = "genepix.median") print(paste("source =", source.type)) if (exists("targets")) { RG <- read.maimages(targets$Filename, path = dataDir, source = source.type, wt.fun = wtflags(0), names = targets$Filename, ...) } else { RG <- read.maimages(dir(dataDir, pattern = ".gpr"), path = dataDir, source = source.type, wt.fun = wtflags(0), ...) } RG$printer <- getLayout(RG$genes) return(RG)
}
- Temporarily modified toptable to allow unsorted output (masks limma version in Package:limma)
"toptable" <-
function (fit, coef = 1, number = 10, genelist = NULL, A = NULL, eb = NULL, adjust.method = "BH", sort.by = "B", resort.by = NULL, ...)
{
if (is.null(eb)) { fit$coefficients <- as.matrix(fit$coefficients)[, coef] fit$stdev.unscaled <- as.matrix(fit$stdev.unscaled)[, coef] eb <- ebayes(fit, ...) coef <- 1 } M <- as.matrix(fit$coefficients)[, coef] if (length(M) < number) number <- length(M) if (number < 1) return(data.frame()) if (is.null(A)) { if (sort.by == "A") stop("Cannot sort by A-values as these have not been given") } else { if (NCOL(A) > 1) A <- rowMeans(A, na.rm = TRUE) } tstat <- as.matrix(eb$t)[, coef] P.Value <- as.matrix(eb$p.value)[, coef] B <- as.matrix(eb$lods)[, coef] sort.by <- match.arg(sort.by, c("M", "A", "P", "p", "T", "t", "B", "none")) ord <- switch(sort.by, M = order(abs(M), decreasing = TRUE), A = order(A, decreasing = TRUE), P = order(P.Value, decreasing = FALSE), p = order(P.Value, decreasing = FALSE), T = order(abs(tstat), decreasing = TRUE), t = order(abs(tstat), decreasing = TRUE), B = order(B, decreasing = TRUE), none = 1:number) top <- ord[1:number] adj.P.Value <- p.adjust(P.Value, method = adjust.method) if (is.null(genelist)) tab <- data.frame(M = M[top]) else { if (is.null(dim(genelist))) tab <- data.frame(ProbeID = I(genelist[top]), M = M[top]) else tab <- data.frame(genelist[top, , drop = FALSE], M = M[top]) } if (!is.null(A)) tab <- data.frame(tab, A = A[top]) tab <- data.frame(tab, t = tstat[top], P.Value = P.Value[top], adj.P.Val = adj.P.Value[top], B = B[top]) rownames(tab) <- as.character(1:length(M))[top] if (!is.null(resort.by)) { resort.by <- match.arg(resort.by, c("M", "A", "P", "p", "T", "t", "B")) ord <- switch(resort.by, M = order(tab$M, decreasing = TRUE), A = order(tab$A, decreasing = TRUE), P = order(tab$P.Value, decreasing = FALSE), p = order(tab$P.Value, decreasing = FALSE), T = order(tab$t, decreasing = TRUE), t = order(tab$t, decreasing = TRUE), B = order(tab$lods, decreasing = TRUE)) tab <- tab[ord, ] } tab
}
"ttFilter" <-
function( filter = "abs(logFC) > 0.69 & abs(t) > 2", fit, sort.by="t", number = nrow(fit), ...) {
- ----------------------------------------------------------------------------- #
- Function for filtering out unwanted rows in a topTable output
- Author: Marcus Davy
- ----------------------------------------------------------------------------- #
tt <- topTable(fit, sort.by = sort.by, number = number, ...) tCols <- colnames(tt)
# create an enviromnent for topTable columns e <- new.env() for(i in tCols) { ei <- tti }
# Obtain logical from filter toSub <- eval(parse(text=filter), envir=e) return( tt[toSub, ] ) }
"wikitable" <-
function (genelist, filename, title, othernames, table.head, table.center = TRUE, repository = "ll")
{
nl <- "\n" outfile <- file(filename, "w") if (!(missing(title) | is.null(title))) { cat("=", title, "=", nl, file = outfile, sep = "") } cat("{| border=\"0\"", file = outfile, sep = "\n") if (!(missing(table.head) | is.null(table.head))) { headout <- paste("| ", "", table.head, "", sep="") cat("|- align=\"center\" style=\"background:#277ec2; color:white\"", headout, file = outfile, sep = "\n") } if (is.list(genelist)) { if (all.equal(rep(length(genelist1), length(genelist)), unlist(lapply(genelist, length), use.names = FALSE))) { nrows <- length(genelist1) } else { stop("The lists of genes to annotate must all be of equal length.") } } else { if(!is.null(genelist)) { # nrows from either genelist or othernames nrows <- length(genelist) } else { nrows <- length(othernames1) } } if(!is.null(repository)) { if (is.list(repository)) { rows <- "" for (i in seq(along = repository)) { rows <- paste(rows, getWikiRows(genelisti, repositoryi), sep="\n") } } else { rows <- getWikiRows(genelist, repository, sep="\n") } } else { rows <- NULL } if (!missing(othernames)) { if (is.list(othernames)) { others <- "" for (nm in othernames) { if (is.matrix(nm)) { for (i in 1:dim(nm)[2]) { others <- paste(others, "\n| ", nm[, i], sep = "") } } if (is.list(nm)) { out <- vector() for (j in seq(along = nm)) {
out[j] <- paste("
", nmj, "
", sep = "",
collapse = "") } out <- paste("\n| ", out, sep = "") others <- paste(others, out, sep = "") } if ((is.vector(nm) || is.factor(nm)) && !is.list(nm)) others <- paste(others, "\n| ", nm, sep = "") } } else { others <- NULL } if( !(is.null(rows) | is.null(others))) { if (length(rows) != length(others)) stop(paste("There are", length(rows), "rows in your genelist, but", length(others), "rows in othernames.\n This will not give", "good results!\n")) } rows <- paste(rows, others) } for (i in 1:nrows) cat("|- align=\"left\" bgcolor=\"#f2f2f2\"", rows[i], nl, file = outfile, sep = "") cat("|}", file = outfile, sep="") close(outfile)
}