Difference between revisions of "RP.R"
From Organic Design wiki
(Some comments) |
|||
Line 4: | Line 4: | ||
gene.names = NULL, plot = FALSE, rand = NULL) | gene.names = NULL, plot = FALSE, rand = NULL) | ||
{ | { | ||
+ | # Expects a matrix, warns if a vector | ||
if (is.vector(data)) { | if (is.vector(data)) { | ||
cat("Warning: There is only one gene in the data set", | cat("Warning: There is only one gene in the data set", | ||
Line 9: | Line 10: | ||
data = matrix(data, nrow = 1) | data = matrix(data, nrow = 1) | ||
} | } | ||
+ | # Looks like code taken from sam analysis | ||
+ | # Comapre cl to data to make sure compatible | ||
total.sam <- length(cl) | total.sam <- length(cl) | ||
total.sam2 <- dim(data)[2] | total.sam2 <- dim(data)[2] | ||
if (total.sam != total.sam2) | if (total.sam != total.sam2) | ||
stop("Number of classes should match the columns in the data") | stop("Number of classes should match the columns in the data") | ||
+ | # xyCall generates a list of indices based on structure of cl | ||
xy.out <- xyCall(cl) | xy.out <- xyCall(cl) | ||
x <- xy.out$x | x <- xy.out$x | ||
Line 18: | Line 22: | ||
num.gene <- dim(data)[1] | num.gene <- dim(data)[1] | ||
data <- as.matrix(data) | data <- as.matrix(data) | ||
+ | # Force dataset to be numeric | ||
mode(data) <- "numeric" | mode(data) <- "numeric" | ||
NA.genes <- NULL | NA.genes <- NULL | ||
if (any(is.na(data))) { | if (any(is.na(data))) { | ||
+ | # Could simplify here by using which(..., arr.ind=TRUE) | ||
NA.genes <- unique(ceiling(which(is.na(t(data)))/ncol(data))) | NA.genes <- unique(ceiling(which(is.na(t(data)))/ncol(data))) | ||
cat("Warning: There are", length(NA.genes), "genes with at least one missing value.", | cat("Warning: There are", length(NA.genes), "genes with at least one missing value.", |
Latest revision as of 02:30, 20 March 2007
- ----------------------- From BioC version 1.9 ---------------------- #
RP <- function (data, cl, num.perm = 100, logged = TRUE, na.rm = FALSE,
gene.names = NULL, plot = FALSE, rand = NULL)
{
# Expects a matrix, warns if a vector if (is.vector(data)) { cat("Warning: There is only one gene in the data set", "\n", "\n") data = matrix(data, nrow = 1) } # Looks like code taken from sam analysis # Comapre cl to data to make sure compatible total.sam <- length(cl) total.sam2 <- dim(data)[2] if (total.sam != total.sam2) stop("Number of classes should match the columns in the data") # xyCall generates a list of indices based on structure of cl xy.out <- xyCall(cl) x <- xy.out$x y <- xy.out$y num.gene <- dim(data)[1] data <- as.matrix(data) # Force dataset to be numeric mode(data) <- "numeric" NA.genes <- NULL if (any(is.na(data))) { # Could simplify here by using which(..., arr.ind=TRUE) NA.genes <- unique(ceiling(which(is.na(t(data)))/ncol(data))) cat("Warning: There are", length(NA.genes), "genes with at least one missing value.", "\n", "\n") if (na.rm) data[NA.genes, ] <- NaReplace(data[NA.genes, ]) if (!na.rm) cat(" This value is not used to compute rank product.", "\n", "\n") } if (!is.null(rand)) { set.seed(rand) } if (!is.null(y)) { num.class <- 2 data1 <- as.matrix(data[, x]) data2 <- as.matrix(data[, y]) data1.ave = apply(data1, 1, mean) data2.ave = apply(data2, 1, mean) if (logged) { fold.change = data1.ave - data2.ave } else { fold.change = data1.ave/data2.ave } } if (is.null(y)) { num.class <- 1 data1 <- as.matrix(data[, x]) data2 <- as.matrix(data[, y]) fold.change = apply(data1, 1, mean) } RP.ori.upin2 <- RankProd1(data1, data2, logged, num.class, rev.sorting = FALSE) rank.ori.upin2 <- rank(RP.ori.upin2$RP) RP.ori.downin2 <- RankProd1(data1, data2, logged, num.class, rev.sorting = TRUE) rank.ori.downin2 <- rank(RP.ori.downin2$RP) RP.perm.upin2 <- matrix(NA, num.gene, num.perm) RP.perm.downin2 <- matrix(NA, num.gene, num.perm) cat("Starting", num.perm, "permutations...", "\n") for (p in 1:num.perm) { temp.data <- Newdata(data1, data2, num.class) new.data1 <- temp.data$new.data1 new.data2 = temp.data$new.data2 RP.perm.upin2[, p] <- RankProd1(new.data1, new.data2, logged, num.class, rev.sorting = FALSE)$RP RP.perm.downin2[, p] <- RankProd1(new.data1, new.data2, logged, num.class, rev.sorting = TRUE)$RP } cat("Computing pfp ..", "\n") temp1 <- as.vector(cbind(RP.ori.upin2$RP, RP.perm.upin2)) temp2 <- rank(temp1)[1:num.gene] order.temp <- match(temp2, sort(temp2)) count.perm <- (sort(temp2) - c(1:num.gene))[order.temp] exp.count <- count.perm/num.perm pval.upin2 <- count.perm/(num.perm * num.gene) pfp.upin2 <- exp.count/rank.ori.upin2 if (plot) { par(mfrow = c(2, 1)) plot(rank.ori.upin2, pfp.upin2, xlab = "sorted gene rank of the original rank product", ylab = "estimated PFP") title("Identification of Up-regulated genes under class 2") } rm(temp1, temp2, order.temp, count.perm, exp.count) temp1 <- as.vector(cbind(RP.ori.downin2$RP, RP.perm.downin2)) temp2 <- rank(temp1)[1:num.gene] order.temp <- match(temp2, sort(temp2)) count.perm <- (sort(temp2) - c(1:num.gene))[order.temp] exp.count <- count.perm/num.perm pval.downin2 <- count.perm/(num.perm * num.gene) pfp.downin2 <- exp.count/rank.ori.downin2 if (plot) { plot(rank.ori.downin2, pfp.downin2, xlab = "sorted gene rank of the original rank product", ylab = "estimated PFP") title("Identification of down-regulated genes under class 2") } rm(temp1, temp2, order.temp, count.perm, exp.count) cat("Outputing the results ..", "\n") pfp = data.frame(pfp.upin2, pfp.downin2) colnames(pfp) = c("class1 < class2", "class1 > class 2") pval = data.frame(pval.upin2, pval.downin2) colnames(pval) = c("class1 < class2", "class1 > class 2") RPs = data.frame(RP.ori.upin2$RP, RP.ori.downin2$RP) colnames(RPs) = c("class1 < class2", "class1 > class 2") RPrank = data.frame(rank.ori.upin2, rank.ori.downin2) colnames(RPrank) = c("class1 < class2", "class1 > class 2") Orirank = list(RP.ori.upin2$rank.all, RP.ori.downin2$rank.all) names(Orirank) = c("class1 < class2", "class1 > class 2") fold.change = t(t(fold.change)) colnames(fold.change) = "log/unlog(class1/class2)" if (!is.null(gene.names)) { rownames(pfp) = gene.names rownames(pval) = gene.names rownames(RPs) = gene.names rownames(RPrank) = gene.names rownames(fold.change) = gene.names } list(pfp = pfp, pval = pval, RPs = RPs, RPrank = RPrank, Orirank = Orirank, AveFC = fold.change)
}