Difference between revisions of "NutriTools.R"
(dump) |
m |
||
(4 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
+ | # {{R}} | ||
+ | checkall <- function() { | ||
+ | require(codetools) | ||
+ | require(limma) | ||
+ | for(i in ls(env=.GlobalEnv)) { | ||
+ | cat(i, sep = "\n") | ||
+ | checkUsage(get(i)) | ||
+ | cat(paste(rep("-", 80), collapse="", sep=""), sep="\n") | ||
+ | } | ||
+ | return("done") | ||
+ | } | ||
+ | clusterPlot <- function(x, rowNames = NULL, dmethod = "euclidian", hmethod="complete", ...) { | ||
+ | oldpar <- par(mfrow=c(2,1)) | ||
+ | |||
+ | if(is.character(rowNames) & (length(rowNames) == nrow(x))) { | ||
+ | rownames(x) <- rowNames | ||
+ | } | ||
+ | xDist <- dist(x, method=dmethod) | ||
+ | |||
+ | # 1) Tree dendrogram | ||
+ | xClust <- hclust(xDist, method=hmethod) | ||
+ | plclust(xClust, sub="") | ||
+ | |||
+ | # 2) Image plot... | ||
+ | image(x = 1:nrow(x), y = 1:ncol(x), z = x[xClust$order, ], col=heat.colors(nrow(x)), ...) | ||
+ | par(oldpar) | ||
+ | invisible() | ||
+ | } | ||
+ | corMA <- function(filter, MA, targets, plot=FALSE) { | ||
+ | tCols <- colnames(targets) | ||
+ | e <- new.env() | ||
+ | for (i in tCols) { | ||
+ | e[[i]] <- targets[[i]] | ||
+ | } | ||
+ | toSub <- eval(parse(text = filter), envir = e) | ||
+ | # return(MA$M[,toSub]) | ||
+ | MAsub <- MA$M[,toSub] | ||
+ | if(plot) { | ||
+ | pairs(MAsub, pch=".") | ||
+ | } | ||
+ | return(list(targets=targets[toSub,], cor=cov2cor(var(MAsub, na.rm=TRUE)))) | ||
+ | } | ||
createPlots <- function(RG, plotDir=getwd(), | createPlots <- function(RG, plotDir=getwd(), | ||
bc.correct = c("none","subtract","minimum", "movingmin"), | bc.correct = c("none","subtract","minimum", "movingmin"), | ||
Line 68: | Line 110: | ||
return("done") | return("done") | ||
} | } | ||
− | |||
dir.exists <- function(x, curdir = ".", rootdir) { | dir.exists <- function(x, curdir = ".", rootdir) { | ||
# -------------------------------------------------------------------------------------- # | # -------------------------------------------------------------------------------------- # | ||
Line 94: | Line 135: | ||
} | } | ||
} | } | ||
− | |||
fixLabel <- function(x, string, sep=".") { | fixLabel <- function(x, string, sep=".") { | ||
# ----------------------------------------------------------------------------- # | # ----------------------------------------------------------------------------- # | ||
Line 110: | Line 150: | ||
return(newlabel) | return(newlabel) | ||
} | } | ||
+ | formatTopTable <- function(OBJ, ...) { | ||
+ | if(!is(OBJ, "MArrayLM")) { | ||
+ | return("OBJ is not of MArrayLM class") | ||
+ | } | ||
+ | genelist <- topTable(OBJ, ...) | ||
+ | # Remove the B statistic column | ||
+ | genelist <- genelist[,-grep("^B$", colnames(genelist))] | ||
+ | rawt <- OBJ$coef / OBJ$stdev.unscaled / OBJ$sigma | ||
+ | colnames(genelist)[grep("^t$", colnames(genelist))] <- "Mod.t" | ||
+ | return(cbind(genelist[,1:(grep("^Mod.t$", colnames(genelist))-1)], | ||
+ | "Raw.t" = rawt[as.numeric(rownames(genelist))], | ||
+ | genelist[,grep("^Mod.t$", colnames(genelist)):length(colnames(genelist))]) | ||
+ | ) | ||
+ | } | ||
"gprDiagnostics" <- | "gprDiagnostics" <- | ||
Line 129: | Line 183: | ||
list(skipcol, dimensions) | list(skipcol, dimensions) | ||
} | } | ||
+ | "getWikiCells" <- | ||
+ | function (ids, repository = "ug") | ||
+ | { | ||
+ | if (is.list(ids)) { | ||
+ | out <- vector() | ||
+ | temp <- lapply(ids, getQueryLink, repository = repository) | ||
+ | for (i in seq(along = ids)) { | ||
+ | if (temp[i] != " ") { | ||
+ | out[i] <- paste("<P>[", temp[[i]], " ", | ||
+ | ids[[i]], "]</P>", sep = "", collapse = "") | ||
+ | } | ||
+ | else out[i] <- temp[i] | ||
+ | } | ||
+ | } | ||
+ | else { | ||
+ | temp <- getQueryLink(ids, repository) | ||
+ | blanks <- temp == " " | ||
+ | out <- paste("[", temp, " ", ids, "]", sep = "") | ||
+ | out[blanks] <- " " | ||
+ | } | ||
+ | return(out) | ||
+ | } | ||
+ | "getWikiRows" <- | ||
+ | function (ids, repository = "ug") | ||
+ | { | ||
+ | out <- paste("|", getWikiCells(ids, repository), sep = " ") | ||
+ | return(out) | ||
+ | } | ||
+ | hr <- function(n=80) { | ||
+ | return(paste(rep("-", n), collapse="")) | ||
+ | } | ||
+ | mergeRG <- function(R, G, Rb, Gb) { | ||
+ | # ----------------------------------------------------------------------------- # | ||
+ | # Function for combining elements of two similar RGLists | ||
+ | # together for the purposes of exploratory graphical investigation | ||
+ | # Author: Marcus Davy | ||
+ | # ----------------------------------------------------------------------------- # | ||
+ | obj <- new("RGList") | ||
+ | obj$R <- R | ||
+ | obj$G <- G | ||
+ | if(!(missing(Rb))) { | ||
+ | obj$Rb <- Rb | ||
+ | } | ||
+ | if(!(missing(Gb))) { | ||
+ | obj$Gb <- Gb | ||
+ | } | ||
+ | return(obj) | ||
+ | } | ||
+ | NApadding <- | ||
+ | function (OBJ, method = "weights") | ||
+ | { | ||
+ | choices = c("weights","unspotted") | ||
+ | method <- match.arg(method, choices) | ||
− | + | if(method == "weights") { | |
+ | ind <- (OBJ$weights == 0) | ||
+ | } | ||
+ | if(method =="unspotted") { | ||
+ | if(is.null(OBJ$genes$ID)) { | ||
+ | stop("Expecting genepix RGList for method 'unspotted'") | ||
+ | } | ||
+ | ind <- (is.na(OBJ$genes$ID) | OBJ$genes$ID=="") | ||
+ | } | ||
+ | |||
if (is(OBJ, "RGList")) { | if (is(OBJ, "RGList")) { | ||
− | OBJ$R[ | + | OBJ$R[ind] <- NA |
− | OBJ$G[ | + | OBJ$G[ind] <- NA |
− | OBJ$Gb[ | + | OBJ$Gb[ind] <- NA |
− | OBJ$Rb[ | + | OBJ$Rb[ind] <- NA |
− | } else { | + | } |
− | OBJ$M[ | + | else { |
− | OBJ$A[ | + | OBJ$M[ind] <- NA |
+ | OBJ$A[ind] <- NA | ||
} | } | ||
+ | |||
return(OBJ) | return(OBJ) | ||
} | } | ||
− | + | "padGPR" <- | |
− | padGPR <- function(RG) { | + | function (RG) |
− | mapGPRind <- function(RG){ | + | { |
− | N <- apply(RG$genes[,c("Block","Column","Row")],2, max) | + | mapGPRind <- function(RG) { |
− | ind <- N["Column"] * (RG$genes[,"Row"]-1) + RG$genes[,"Column"] + (RG$genes[,"Block"] - 1) * (N["Row"] * N["Column"]) | + | N <- apply(RG$genes[, c("Block", "Column", "Row")], 2, |
+ | max) | ||
+ | ind <- N["Column"] * (RG$genes[, "Row"] - 1) + RG$genes[, | ||
+ | "Column"] + (RG$genes[, "Block"] - 1) * (N["Row"] * | ||
+ | N["Column"]) | ||
return(ind) | return(ind) | ||
} | } | ||
− | + | OUT <- new("RGList") | |
− | OUT | + | browser() |
− | OUT$R | + | OUT$R <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) |
− | OUT$Rb | + | OUT$Rb <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) |
− | OUT$G | + | OUT$G <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) |
− | OUT$Gb | + | OUT$Gb <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) |
− | OUT$weights <- matrix(NA, nr=prod(unlist(RG$printer)), nc=ncol(RG)) | + | OUT$weights <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) |
− | OUT$genes | + | OUT$genes <- data.frame(matrix(NA, nr = prod(unlist(RG$printer)), |
− | + | nc = ncol(RG$genes), dimnames=list(NULL, colnames(RG$genes)))) | |
− | OUT$targets | + | OUT$targets <- RG$targets |
− | OUT$source | + | OUT$source <- RG$source |
− | OUT$printer | + | OUT$printer <- RG$printer |
− | ind | + | ind <- mapGPRind(RG) |
− | OUT$R[ind,] | + | OUT$R[ind, ] <- RG$R |
− | OUT$Rb[ind,] | + | OUT$Rb[ind, ] <- RG$Rb |
− | OUT$G[ind,] | + | OUT$G[ind, ] <- RG$G |
− | OUT$Gb[ind,] | + | OUT$Gb[ind, ] <- RG$Gb |
− | OUT$weights[ind,] <- RG$weights | + | OUT$weights[ind, ] <- RG$weights |
− | OUT$genes[ind,] | + | OUT$genes[ind, ] <- RG$genes |
return(OUT) | return(OUT) | ||
} | } | ||
− | + | readAgilent <- | |
− | readAgilent | ||
function(dataDir="/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Agilent"){ | function(dataDir="/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Agilent"){ | ||
require(limma) | require(limma) | ||
# 1) Read in data using limma | # 1) Read in data using limma | ||
− | if(exists("targets" | + | if(exists("targets")) { |
RGsub <- read.maimages(targets$Filename, path = dataDir, source="agilent", name=targets$Filetype) | RGsub <- read.maimages(targets$Filename, path = dataDir, source="agilent", name=targets$Filetype) | ||
} else { | } else { | ||
Line 215: | Line 336: | ||
return(RGfull) | return(RGfull) | ||
} | } | ||
− | |||
− | |||
"readGPR" <- | "readGPR" <- | ||
function (sourceType = "median", dataDir = "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR", | function (sourceType = "median", dataDir = "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR", | ||
Line 235: | Line 354: | ||
RG$printer <- getLayout(RG$genes) | RG$printer <- getLayout(RG$genes) | ||
return(RG) | return(RG) | ||
+ | } | ||
+ | # Temporarily modified toptable to allow unsorted output (masks limma version in Package:limma) | ||
+ | "toptable" <- | ||
+ | function (fit, coef = 1, number = 10, genelist = NULL, A = NULL, | ||
+ | eb = NULL, adjust.method = "BH", sort.by = "B", resort.by = NULL, | ||
+ | ...) | ||
+ | { | ||
+ | if (is.null(eb)) { | ||
+ | fit$coefficients <- as.matrix(fit$coefficients)[, coef] | ||
+ | fit$stdev.unscaled <- as.matrix(fit$stdev.unscaled)[, | ||
+ | coef] | ||
+ | eb <- ebayes(fit, ...) | ||
+ | coef <- 1 | ||
+ | } | ||
+ | M <- as.matrix(fit$coefficients)[, coef] | ||
+ | if (length(M) < number) | ||
+ | number <- length(M) | ||
+ | if (number < 1) | ||
+ | return(data.frame()) | ||
+ | if (is.null(A)) { | ||
+ | if (sort.by == "A") | ||
+ | stop("Cannot sort by A-values as these have not been given") | ||
+ | } | ||
+ | else { | ||
+ | if (NCOL(A) > 1) | ||
+ | A <- rowMeans(A, na.rm = TRUE) | ||
+ | } | ||
+ | tstat <- as.matrix(eb$t)[, coef] | ||
+ | P.Value <- as.matrix(eb$p.value)[, coef] | ||
+ | B <- as.matrix(eb$lods)[, coef] | ||
+ | sort.by <- match.arg(sort.by, c("M", "A", "P", "p", "T", | ||
+ | "t", "B", "none")) | ||
+ | ord <- switch(sort.by, M = order(abs(M), decreasing = TRUE), | ||
+ | A = order(A, decreasing = TRUE), P = order(P.Value, decreasing = FALSE), | ||
+ | p = order(P.Value, decreasing = FALSE), T = order(abs(tstat), | ||
+ | decreasing = TRUE), t = order(abs(tstat), decreasing = TRUE), | ||
+ | B = order(B, decreasing = TRUE), none = 1:number) | ||
+ | top <- ord[1:number] | ||
+ | adj.P.Value <- p.adjust(P.Value, method = adjust.method) | ||
+ | if (is.null(genelist)) | ||
+ | tab <- data.frame(M = M[top]) | ||
+ | else { | ||
+ | if (is.null(dim(genelist))) | ||
+ | tab <- data.frame(ProbeID = I(genelist[top]), M = M[top]) | ||
+ | else tab <- data.frame(genelist[top, , drop = FALSE], | ||
+ | M = M[top]) | ||
+ | } | ||
+ | if (!is.null(A)) | ||
+ | tab <- data.frame(tab, A = A[top]) | ||
+ | tab <- data.frame(tab, t = tstat[top], P.Value = P.Value[top], | ||
+ | adj.P.Val = adj.P.Value[top], B = B[top]) | ||
+ | rownames(tab) <- as.character(1:length(M))[top] | ||
+ | if (!is.null(resort.by)) { | ||
+ | resort.by <- match.arg(resort.by, c("M", "A", "P", "p", | ||
+ | "T", "t", "B")) | ||
+ | ord <- switch(resort.by, M = order(tab$M, decreasing = TRUE), | ||
+ | A = order(tab$A, decreasing = TRUE), P = order(tab$P.Value, | ||
+ | decreasing = FALSE), p = order(tab$P.Value, decreasing = FALSE), | ||
+ | T = order(tab$t, decreasing = TRUE), t = order(tab$t, | ||
+ | decreasing = TRUE), B = order(tab$lods, decreasing = TRUE)) | ||
+ | tab <- tab[ord, ] | ||
+ | } | ||
+ | tab | ||
} | } | ||
+ | |||
+ | "ttFilter" <- | ||
+ | function( filter = "abs(logFC) > 0.69 & abs(t) > 2", fit, sort.by="t", number = nrow(fit), ...) { | ||
+ | # ----------------------------------------------------------------------------- # | ||
+ | # Function for filtering out unwanted rows in a topTable output | ||
+ | # Author: Marcus Davy | ||
+ | # ----------------------------------------------------------------------------- # | ||
+ | tt <- topTable(fit, sort.by = sort.by, number = number, ...) | ||
+ | tCols <- colnames(tt) | ||
+ | |||
+ | # create an enviromnent for topTable columns | ||
+ | e <- new.env() | ||
+ | for(i in tCols) { | ||
+ | e[[i]] <- tt[[i]] | ||
+ | } | ||
+ | |||
+ | # Obtain logical from filter | ||
+ | toSub <- eval(parse(text=filter), envir=e) | ||
+ | return( tt[toSub, ] ) | ||
+ | } | ||
"wikitable" <- | "wikitable" <- | ||
Line 243: | Line 445: | ||
nl <- "\n" | nl <- "\n" | ||
outfile <- file(filename, "w") | outfile <- file(filename, "w") | ||
− | if (!missing(title)) | + | if (!(missing(title) | is.null(title))) { |
cat("=", title, "=", nl, file = outfile, sep = "") | cat("=", title, "=", nl, file = outfile, sep = "") | ||
+ | } | ||
cat("{| border=\"0\"", file = outfile, sep = "\n") | cat("{| border=\"0\"", file = outfile, sep = "\n") | ||
− | if (!missing(table.head)) { | + | if (!(missing(table.head) | is.null(table.head))) { |
headout <- paste("| ", "''", table.head, "''", sep="") | headout <- paste("| ", "''", table.head, "''", sep="") | ||
− | cat("|- align=\"center\" style=\"background:# | + | cat("|- align=\"center\" style=\"background:#277ec2; color:white\"", headout, file = outfile, sep = "\n") |
− | + | } | |
if (is.list(genelist)) { | if (is.list(genelist)) { | ||
if (all.equal(rep(length(genelist[[1]]), length(genelist)), | if (all.equal(rep(length(genelist[[1]]), length(genelist)), | ||
unlist(lapply(genelist, length), use.names = FALSE))) { | unlist(lapply(genelist, length), use.names = FALSE))) { | ||
nrows <- length(genelist[[1]]) | nrows <- length(genelist[[1]]) | ||
+ | } else { | ||
+ | stop("The lists of genes to annotate must all be of equal length.") | ||
+ | } | ||
+ | } else { | ||
+ | if(!is.null(genelist)) { # nrows from either genelist or othernames | ||
+ | nrows <- length(genelist) | ||
+ | } else { | ||
+ | nrows <- length(othernames[[1]]) | ||
} | } | ||
− | |||
} | } | ||
− | + | if(!is.null(repository)) { | |
− | + | if (is.list(repository)) { | |
− | + | rows <- "" | |
− | + | for (i in seq(along = repository)) { | |
− | + | rows <- paste(rows, getWikiRows(genelist[[i]], repository[[i]]), sep="\n") | |
+ | } | ||
+ | } else { | ||
+ | rows <- getWikiRows(genelist, repository, sep="\n") | ||
} | } | ||
+ | } else { | ||
+ | rows <- NULL | ||
} | } | ||
− | + | if (!missing(othernames)) { | |
− | + | if (is.list(othernames)) { | |
− | + | others <- "" | |
− | + | for (nm in othernames) { | |
− | + | if (is.matrix(nm)) { | |
− | + | for (i in 1:dim(nm)[2]) { | |
− | + | others <- paste(others, "\n| ", nm[, i], sep = "") | |
− | + | } | |
− | |||
} | } | ||
if (is.list(nm)) { | if (is.list(nm)) { | ||
Line 286: | Line 500: | ||
others <- paste(others, "\n| ", nm, sep = "") | others <- paste(others, "\n| ", nm, sep = "") | ||
} | } | ||
+ | } else { | ||
+ | others <- NULL | ||
} | } | ||
− | + | if( !(is.null(rows) | is.null(others))) { | |
− | + | if (length(rows) != length(others)) | |
− | + | stop(paste("There are", length(rows), "rows in your genelist, but", | |
− | + | length(others), "rows in othernames.\n This will not give", | |
− | + | "good results!\n")) | |
− | + | } | |
+ | rows <- paste(rows, others) | ||
+ | |||
} | } | ||
− | for (i in 1:nrows) cat("|- bgcolor=\"#f2f2f2\"", rows[i], nl, file = outfile, | + | for (i in 1:nrows) cat("|- align=\"left\" bgcolor=\"#f2f2f2\"", rows[i], nl, file = outfile, |
sep = "") | sep = "") | ||
cat("|}", file = outfile, sep="") | cat("|}", file = outfile, sep="") | ||
close(outfile) | close(outfile) | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
} | } |
Latest revision as of 00:42, 6 June 2007
Code snipits and programs written in R, S or S-PLUS checkall <- function() {
require(codetools) require(limma) for(i in ls(env=.GlobalEnv)) { cat(i, sep = "\n") checkUsage(get(i)) cat(paste(rep("-", 80), collapse="", sep=""), sep="\n") } return("done")
} clusterPlot <- function(x, rowNames = NULL, dmethod = "euclidian", hmethod="complete", ...) {
oldpar <- par(mfrow=c(2,1))
if(is.character(rowNames) & (length(rowNames) == nrow(x))) { rownames(x) <- rowNames } xDist <- dist(x, method=dmethod)
- 1) Tree dendrogram
xClust <- hclust(xDist, method=hmethod) plclust(xClust, sub="")
- 2) Image plot...
image(x = 1:nrow(x), y = 1:ncol(x), z = x[xClust$order, ], col=heat.colors(nrow(x)), ...) par(oldpar) invisible()
} corMA <- function(filter, MA, targets, plot=FALSE) {
tCols <- colnames(targets) e <- new.env() for (i in tCols) { ei <- targetsi } toSub <- eval(parse(text = filter), envir = e)
- return(MA$M[,toSub])
MAsub <- MA$M[,toSub] if(plot) { pairs(MAsub, pch=".") } return(list(targets=targets[toSub,], cor=cov2cor(var(MAsub, na.rm=TRUE))))
} createPlots <- function(RG, plotDir=getwd(),
bc.correct = c("none","subtract","minimum", "movingmin"), method.within = c("none", "loess", "printtiploess"), method.between = c("none","scale", "quantile")) {
if(exists("RG")) {
- 1) Create directories
dir.create(file.path(plotDir, "Densityplots"), recursive=TRUE) for(i in c("ImagePlots","MAplots")) { dir.create(file.path(plotDir, i), recursive=TRUE) if(i == "ImagePlots") { for(x in bc.correct) { for(y in method.within) { for(z in method.between) { for( j in c("R","G", "M","A")) { dir.create(file.path(plotDir, i, paste(x,"-",y,"-",z, sep=""),j), recursive=TRUE) if(x=="none" & y =="none" & z =="none") { for(k in c("Rb", "Gb")) { dir.create(file.path(plotDir, i, paste(x,"-",y,"-",z, sep=""),k), recursive=TRUE) } } } } } } } if(i == "MAplots") { for(x in bc.correct) { for(y in method.within) { for(z in method.between) { dir.create(file.path(plotDir, i, paste(x,"-",y,"-",z, sep="")), recursive=TRUE) } } } } }
- 2) Create plots
for(i in bc.correct) { for( j in method.within) { MA <- normalizeWithinArrays(RG, method=j, bc.method=i) for (k in method.between) { MA.Between <- normalizeBetweenArrays(MA, method=k) print(paste("[ Graphing:",i,j,k,"]", sep=" ")) # plotDensities png(file.path(plotDir, "Densityplots", paste(i,"-",j,"-",k,".png", sep=""))) plotDensities(MA.Between) dev.off() # plotMA3by2 plotMA3by2(MA.Between, path=file.path(plotDir, "MAplots", paste(i,"-",j,"-",k, sep=""))) # Imageplots imageplot3by2(MA.Between, z="M", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""), "M")) imageplot3by2(MA.Between, z="A", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""), "A")) imageplot3by2(RG.MA(MA.Between), z="G", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"G"), low="black", high="green") imageplot3by2(RG.MA(MA.Between), z="R", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"R"), low="black", high="red") if(i=="none" & j=="none" & k=="none") { imageplot3by2(RG, z="Gb", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"Gb"), low="black", high="green") imageplot3by2(RG, z="Rb", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"Rb"), low="black", high="red") } } } } } return("done")
} dir.exists <- function(x, curdir = ".", rootdir) {
- -------------------------------------------------------------------------------------- #
- Author : Marcus Davy
- This is a recursive program to take in a string such as:
- foo/fodda/fi, and create subdirectories foo, fodda, and fi
- from the current directory location (if they don't already exist)
- -------------------------------------------------------------------------------------- #
if( missing(rootdir)) { rootdir <- getwd() } setwd(curdir) x <- unlist(strsplit(x,"/")) if( !file.exists(x[1]) ){ dir.create(x[1]) print(paste("Creating", file.path(getwd(), x[1]))) } curdir <- x[1]
if(length(x)==1) { setwd(rootdir) } else { return(dir.exists(x[-1], curdir = curdir, rootdir = rootdir)) }
} fixLabel <- function(x, string, sep=".") {
- ----------------------------------------------------------------------------- #
- Function for fixing labels for limma package functions:
- modelMatrix and make Contrasts e.g. 14DAFB converts to DAFB.14
- Author: Marcus Davy
- ----------------------------------------------------------------------------- #
beginning <- regexpr(string, x) ending <- nchar(x)
labelstart <- substr(x, beginning, ending) labelend <- substr(x, 0, beginning - 1) newlabel <- paste(labelstart, labelend, sep=sep) return(newlabel)
} formatTopTable <- function(OBJ, ...) {
if(!is(OBJ, "MArrayLM")) { return("OBJ is not of MArrayLM class") } genelist <- topTable(OBJ, ...) # Remove the B statistic column genelist <- genelist[,-grep("^B$", colnames(genelist))] rawt <- OBJ$coef / OBJ$stdev.unscaled / OBJ$sigma colnames(genelist)[grep("^t$", colnames(genelist))] <- "Mod.t" return(cbind(genelist[,1:(grep("^Mod.t$", colnames(genelist))-1)], "Raw.t" = rawt[as.numeric(rownames(genelist))], genelist[,grep("^Mod.t$", colnames(genelist)):length(colnames(genelist))]) )
}
"gprDiagnostics" <-
function(fnames, path = ".", annotation = c("Block","Column","Row", "Name", "ID")) { skipcol <- c() dimensions <- data.frame(matrix(NA,nc=2, nr=length(fnames))) dimnames(dimensions) <- list(fnames, c("nrows", "ncols")) diagnostics <- list() fullfnames <- file.path(path, fnames) for(i in 1:length(fullfnames)){ y <- readLines(fullfnames[i], n = 100) skipcol[fnames[i]] <- grep(annotation[1], y)[1] - 1 h <- scan(fullfnamesi, quiet=TRUE, what=character(1), sep="\t", skip = skipcol[fnames[i]], quote="\"", nlines=1) names(h) <- h h[] <- rep("NULL", length(h)) h[annotation] <- NA dimensions[fnamesi,] <- dim(read.table(fullfnames[i], sep="\t", quote="\"", colClasses=h, skip=skipcol[fnames[i]], header=T)) } list(skipcol, dimensions) }
"getWikiCells" <-
function (ids, repository = "ug")
{
if (is.list(ids)) { out <- vector() temp <- lapply(ids, getQueryLink, repository = repository) for (i in seq(along = ids)) { if (temp[i] != " ") {
out[i] <- paste("
", sep = "", collapse = "")
} else out[i] <- temp[i] } } else { temp <- getQueryLink(ids, repository) blanks <- temp == " " out <- paste("[", temp, " ", ids, "]", sep = "") out[blanks] <- " " } return(out)
} "getWikiRows" <-
function (ids, repository = "ug")
{
out <- paste("|", getWikiCells(ids, repository), sep = " ") return(out)
} hr <- function(n=80) {
return(paste(rep("-", n), collapse=""))
} mergeRG <- function(R, G, Rb, Gb) {
- ----------------------------------------------------------------------------- #
- Function for combining elements of two similar RGLists
- together for the purposes of exploratory graphical investigation
- Author: Marcus Davy
- ----------------------------------------------------------------------------- #
obj <- new("RGList") obj$R <- R obj$G <- G if(!(missing(Rb))) { obj$Rb <- Rb } if(!(missing(Gb))) { obj$Gb <- Gb } return(obj)
} NApadding <-
function (OBJ, method = "weights")
{
choices = c("weights","unspotted") method <- match.arg(method, choices)
if(method == "weights") { ind <- (OBJ$weights == 0) } if(method =="unspotted") { if(is.null(OBJ$genes$ID)) { stop("Expecting genepix RGList for method 'unspotted'") } ind <- (is.na(OBJ$genes$ID) | OBJ$genes$ID=="") } if (is(OBJ, "RGList")) { OBJ$R[ind] <- NA OBJ$G[ind] <- NA OBJ$Gb[ind] <- NA OBJ$Rb[ind] <- NA } else { OBJ$M[ind] <- NA OBJ$A[ind] <- NA } return(OBJ)
} "padGPR" <-
function (RG)
{
mapGPRind <- function(RG) { N <- apply(RG$genes[, c("Block", "Column", "Row")], 2, max) ind <- N["Column"] * (RG$genes[, "Row"] - 1) + RG$genes[, "Column"] + (RG$genes[, "Block"] - 1) * (N["Row"] * N["Column"]) return(ind) } OUT <- new("RGList") browser() OUT$R <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) OUT$Rb <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) OUT$G <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) OUT$Gb <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) OUT$weights <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG))) OUT$genes <- data.frame(matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG$genes), dimnames=list(NULL, colnames(RG$genes)))) OUT$targets <- RG$targets OUT$source <- RG$source OUT$printer <- RG$printer ind <- mapGPRind(RG) OUT$R[ind, ] <- RG$R OUT$Rb[ind, ] <- RG$Rb OUT$G[ind, ] <- RG$G OUT$Gb[ind, ] <- RG$Gb OUT$weights[ind, ] <- RG$weights OUT$genes[ind, ] <- RG$genes return(OUT)
} readAgilent <- function(dataDir="/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Agilent"){
require(limma)
- 1) Read in data using limma
if(exists("targets")) { RGsub <- read.maimages(targets$Filename, path = dataDir, source="agilent", name=targets$Filetype) } else { RGsub <- read.maimages(dir(dataDir, pattern=".txt"), path = dataDir, source="agilent") }
- 2) Find Agilent indices from x ,y coords
getInd <- function(x,y, nx, byrow=T) {
return( (x-1)* nx + y)
}
ind <- getInd(RGsub$genes$Row, RGsub$genes$Col, nx=215)
- 3) Construct complete RGList
RGfull <- new("RGList") RGfull$R <-RGfull$G <-RGfull$Rb <-RGfull$Gb <- matrix(NA, nc=ncol(RGsub), nr=max(ind)) RGfull$genes <- data.frame(matrix(NA, nc=7, nr=max(ind), dimnames = list(NULL, names(RGsub$genes))))
matrixNames <- dimnames(RGsub$R)
- 4) Populate complete list
RGfull$R[ind,] <- RGsub$R RGfull$Rb[ind,] <- RGsub$Rb RGfull$G[ind,] <- RGsub$G RGfull$Gb[ind,] <- RGsub$Gb RGfull$genes[ind,] <- RGsub$genes RGfull$targets <- RGsub$targets
dimnames(RGfull$R) <- dimnames(RGfull$Rb) <- dimnames(RGfull$G) <- dimnames(RGfull$Gb) <- matrixNames
- Checking the rows that needed padding
rowstopad <- as.numeric(rownames(RGsub$genes) [ !(rownames(RGsub$genes) %in% ind) ]) rowstopad
- Indexing looks ok
RGfull[rowstopad,]
RGfull$printer <- structure(list(ngrid.r=1, ngrid.c=1, nspot.r=105, nspot.c=215), class = "PrintLayout") rm(RGsub) return(RGfull)
} "readGPR" <-
function (sourceType = "median", dataDir = "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR", ...)
{
require(limma) print(sessionInfo()) source.type <- switch(sourceType, mean = "genepix", median = "genepix.median") print(paste("source =", source.type)) if (exists("targets")) { RG <- read.maimages(targets$Filename, path = dataDir, source = source.type, wt.fun = wtflags(0), names = targets$Filename, ...) } else { RG <- read.maimages(dir(dataDir, pattern = ".gpr"), path = dataDir, source = source.type, wt.fun = wtflags(0), ...) } RG$printer <- getLayout(RG$genes) return(RG)
}
- Temporarily modified toptable to allow unsorted output (masks limma version in Package:limma)
"toptable" <-
function (fit, coef = 1, number = 10, genelist = NULL, A = NULL, eb = NULL, adjust.method = "BH", sort.by = "B", resort.by = NULL, ...)
{
if (is.null(eb)) { fit$coefficients <- as.matrix(fit$coefficients)[, coef] fit$stdev.unscaled <- as.matrix(fit$stdev.unscaled)[, coef] eb <- ebayes(fit, ...) coef <- 1 } M <- as.matrix(fit$coefficients)[, coef] if (length(M) < number) number <- length(M) if (number < 1) return(data.frame()) if (is.null(A)) { if (sort.by == "A") stop("Cannot sort by A-values as these have not been given") } else { if (NCOL(A) > 1) A <- rowMeans(A, na.rm = TRUE) } tstat <- as.matrix(eb$t)[, coef] P.Value <- as.matrix(eb$p.value)[, coef] B <- as.matrix(eb$lods)[, coef] sort.by <- match.arg(sort.by, c("M", "A", "P", "p", "T", "t", "B", "none")) ord <- switch(sort.by, M = order(abs(M), decreasing = TRUE), A = order(A, decreasing = TRUE), P = order(P.Value, decreasing = FALSE), p = order(P.Value, decreasing = FALSE), T = order(abs(tstat), decreasing = TRUE), t = order(abs(tstat), decreasing = TRUE), B = order(B, decreasing = TRUE), none = 1:number) top <- ord[1:number] adj.P.Value <- p.adjust(P.Value, method = adjust.method) if (is.null(genelist)) tab <- data.frame(M = M[top]) else { if (is.null(dim(genelist))) tab <- data.frame(ProbeID = I(genelist[top]), M = M[top]) else tab <- data.frame(genelist[top, , drop = FALSE], M = M[top]) } if (!is.null(A)) tab <- data.frame(tab, A = A[top]) tab <- data.frame(tab, t = tstat[top], P.Value = P.Value[top], adj.P.Val = adj.P.Value[top], B = B[top]) rownames(tab) <- as.character(1:length(M))[top] if (!is.null(resort.by)) { resort.by <- match.arg(resort.by, c("M", "A", "P", "p", "T", "t", "B")) ord <- switch(resort.by, M = order(tab$M, decreasing = TRUE), A = order(tab$A, decreasing = TRUE), P = order(tab$P.Value, decreasing = FALSE), p = order(tab$P.Value, decreasing = FALSE), T = order(tab$t, decreasing = TRUE), t = order(tab$t, decreasing = TRUE), B = order(tab$lods, decreasing = TRUE)) tab <- tab[ord, ] } tab
}
"ttFilter" <-
function( filter = "abs(logFC) > 0.69 & abs(t) > 2", fit, sort.by="t", number = nrow(fit), ...) {
- ----------------------------------------------------------------------------- #
- Function for filtering out unwanted rows in a topTable output
- Author: Marcus Davy
- ----------------------------------------------------------------------------- #
tt <- topTable(fit, sort.by = sort.by, number = number, ...) tCols <- colnames(tt)
# create an enviromnent for topTable columns e <- new.env() for(i in tCols) { ei <- tti }
# Obtain logical from filter toSub <- eval(parse(text=filter), envir=e) return( tt[toSub, ] ) }
"wikitable" <-
function (genelist, filename, title, othernames, table.head, table.center = TRUE, repository = "ll")
{
nl <- "\n" outfile <- file(filename, "w") if (!(missing(title) | is.null(title))) { cat("=", title, "=", nl, file = outfile, sep = "") } cat("{| border=\"0\"", file = outfile, sep = "\n") if (!(missing(table.head) | is.null(table.head))) { headout <- paste("| ", "", table.head, "", sep="") cat("|- align=\"center\" style=\"background:#277ec2; color:white\"", headout, file = outfile, sep = "\n") } if (is.list(genelist)) { if (all.equal(rep(length(genelist1), length(genelist)), unlist(lapply(genelist, length), use.names = FALSE))) { nrows <- length(genelist1) } else { stop("The lists of genes to annotate must all be of equal length.") } } else { if(!is.null(genelist)) { # nrows from either genelist or othernames nrows <- length(genelist) } else { nrows <- length(othernames1) } } if(!is.null(repository)) { if (is.list(repository)) { rows <- "" for (i in seq(along = repository)) { rows <- paste(rows, getWikiRows(genelisti, repositoryi), sep="\n") } } else { rows <- getWikiRows(genelist, repository, sep="\n") } } else { rows <- NULL } if (!missing(othernames)) { if (is.list(othernames)) { others <- "" for (nm in othernames) { if (is.matrix(nm)) { for (i in 1:dim(nm)[2]) { others <- paste(others, "\n| ", nm[, i], sep = "") } } if (is.list(nm)) { out <- vector() for (j in seq(along = nm)) {
out[j] <- paste("
", nmj, "
", sep = "",
collapse = "") } out <- paste("\n| ", out, sep = "") others <- paste(others, out, sep = "") } if ((is.vector(nm) || is.factor(nm)) && !is.list(nm)) others <- paste(others, "\n| ", nm, sep = "") } } else { others <- NULL } if( !(is.null(rows) | is.null(others))) { if (length(rows) != length(others)) stop(paste("There are", length(rows), "rows in your genelist, but", length(others), "rows in othernames.\n This will not give", "good results!\n")) } rows <- paste(rows, others) } for (i in 1:nrows) cat("|- align=\"left\" bgcolor=\"#f2f2f2\"", rows[i], nl, file = outfile, sep = "") cat("|}", file = outfile, sep="") close(outfile)
}