Difference between revisions of "R tutorial"

From Organic Design wiki
m (Reading / writing files)
m (Reading / writing files)
Line 157: Line 157:
 
; Reading a GPR file data section using read.table
 
; Reading a GPR file data section using read.table
 
  colClasses <- rep("NULL", 82)
 
  colClasses <- rep("NULL", 82)
  colClasses[c(1:5, 9,12, 18, 21)] <- NA
+
  colClasses[c(1:5, 9,12, 18, 21)] <- NA             <font color="red"> # Set colClasses to ignore unwanted columns</font>
 
  mydata <- read.table(file.path(dataDir,"BE34.gpr"), header=T,  sep="\t",  
 
  mydata <- read.table(file.path(dataDir,"BE34.gpr"), header=T,  sep="\t",  
 
                                   nrows=10, skip=31, colClasses=colClasses) <font color="red"># Get irist 10 lines of data after 31st row</font>
 
                                   nrows=10, skip=31, colClasses=colClasses) <font color="red"># Get irist 10 lines of data after 31st row</font>

Revision as of 03:09, 16 March 2006

Resources

Contributed guides for the beginner
More comprehensive contributed guides

Obtaining help in R

help.start()           # Browser based help documentation
help()                 # Help on a topic (note: help pages have a set format)
? ls                   # alternative help method on ls function
apropos(mean)          # Find Objects by (Partial) Name
example(mean)          # Run an Examples Section from the Online Help
demo()                 # Demonstrations of R Functionality
demo(graphics)         # Demonstration or graphics Functionality
There objects are functions, to run them you must put parentheses '()' after the function name

Useful commands in the R environment

search()              # Give Search Path for R Objects
searchpaths()         # Give Full Search Path for R Objects
ls()                  # List objects
objects()             # alternate function to list objects
data()                # Publically available datasets
rm()                  # Remove Objects from a Specified Environment
save.image()          # Save R Objects
q()                   # Terminate an R Session →  prompted to Save workspace image? [y/n/c]:

Command prompt

  • Type commands after the prompt (>) e.g.
> x <- 1:10        # assignment of 1 to 10 to an object called 'x'
> x                # Returning the x object to the screen
 [1]  1  2  3  4  5  6  7  8  9 10
  • Continuation of commands is expected after the plus symbol (+) e.g.
> x <- 1:          # partial command → parser is expecting more information
+  10
> x
 [1]  1  2  3  4  5  6  7  8  9 10
Text following a '#' is commented out

Basic (atomic) data types

  • Logical
T                 # TRUE
F                 # FALSE
  • Numeric
3.141592654       # Any number [0-9\.]
  • Character
"Putative ATPase" # Any character [A-Za-z] must be single or double quoted 
  • Missing values
NA                # Label for missing information in datasets
See also help("NA"), help("NaN")

Assignment of objects

  • objects must start with a letter [A-Z a-z]
  • "<-" The arrow assigns information to the object on the left
x <- 42                # Assignment to the left
x
x = 42                 # Equivalent assignment (not recommended)
x 
42 -> x                # Assignment to the right
x

Saving objects

getwd()                        # Returns the current directory where R is running
setwd("C:/DATA/Microarray")    # Set the working directory to another location
getwd()                        # Check the directory has changed
x <- 42
save.image()                   # Saves a snapshot of objects to file .RData
y <- x * 2                     # Make a new object called 'y'
y                              # Return value of 'y'
q()                            # quit R

Restart R by double clicking on the file .RData in C:/DATA/Microarray

x              # Returns 'x' as it was saved to .RData
y              # 'y' should not exist

Object data types

  • Create a scalar (vector of length 1)
a <- 3.14            # Assign pythagorus to object 'a'
length(a)            # The scalar is actually a vector of length 1 
pi                   # Already have a built in object for pythagorus 
search()             # Print the search path for all objects
find("pi")           # "pi" is located in package:base
  • Create a vector
x <- c(2,3,5,2,7,1)  # Numbers put into a vector using 'c' function concatenate
x
y <- c(10,15,12) 
y
names(y) <- c("first","second","third")    # Elements can be given names
z <- c(y,x)
z
  • Create a matrix
zmat <- cbind(x,y)   # cbind joins vectors together by column       
zmat 
Whats going on in the second column → number recycling
mat <- matrix(1:20, nrow=5, ncol=4)                   # Constructing a matrix
mat
colnames(mat) <- c("Col1","Col2", "Col3", "Col4")     # Adding column names
mat
  • Create a list
mylist <- list(1:4,c("a","b","c"), LETTERS[1:10]) 
mylist
mylist <- list("element 1" = 1:4,"second vector" = c("a","b","c"), "Capitals" = LETTERS[1:10]) 
mylist

Indexing

  • Subsetting a vector
x[c(1,2,3)]                     # Selecting the first three elements of 'x'
x[1:3]                          # Same subset using ':' sequence generation → see help(":")
y[2]                            # Selecting the second element of 'y'
y["second"]                     # Selecting the second element of 'y' (by name)
  • Subsetting a matrix
mat[,1:2]                       # Selecting the first two columns of 'mat'
mat[1:2, 2:4]                   # Selecting a subset matrix of 'mat'
  • Subsetting a list
mylist[[1]]                    # Subsetting list 'mylist' by index
mylist[["element 1"]]          # Subsetting list 'mylist' by name 'element 1'
mylist$"element 1"             # Alternate way of subsetting
mylist$Capitals[1:5]           # Selecting the first five elements of 'Capitals' in 'mylist' (case sensitive)

Plotting data

  • See help pages for basic plot functions
help(plot) 
help(par)
example(plot)
par(ask=TRUE)         # Set the printing device to prompt user before displaying next graph
example(hist)

Reading / writing files

Reading data
help(scan)
help(read.table)

Reading a GPR file header using scan
dataDir <- "C:/DATA/Microarray/GPR") 
mydata <-  scan(file.path(dataDir, "BE34.gpr"), what="", nlines=29)  # Get first 29 rows of data
mydata
Reading a GPR file data section using read.table
colClasses <- rep("NULL", 82)
colClasses[c(1:5, 9,12, 18, 21)] <- NA              # Set colClasses to ignore unwanted columns
mydata <- read.table(file.path(dataDir,"BE34.gpr"), header=T,  sep="\t", 
                                  nrows=10, skip=31, colClasses=colClasses) # Get irist 10 lines of data after 31st row
mydata
Writing data
help(write)
help(write.table)
See also dump, restore, save, load

User defined functions

  • Writing functions provide a means of adding new functionality to the language. A function has the form:
myfun <- function( arglist ){ body }
File:Functions.png
  • Identity function: returns its input arguement
myfun <- function(x){x}        # Creating identity function
myfun("foo")                   # Running the function
myfun()                        # Fails: no input arguement provided
  • A simple function
square <- function(x){x * x}         # Square the input number
square(10)                  # Returns 10 squared
square(1:4)              # Underlying arithmetic is vectorized
  • Graphical example from user defined function
The following function generates data from sine distributions and examines
bias variance tradeoff of a smoothing function using different smoothing parameters. Paste it into R
"biasVar" <- function(df1=4,  df2=15,  N = 100,  seed=1)
{
 set.seed(seed)
 # 1) Data setup
 ylim <- c(-2,2)
 xlim <- c(-3,3)
 par(mfrow=c(2,2), mar=c(5,4,4-2,2)+0.1,mgp=c(2,.5,0) )
 x <- rnorm(80, 0, 1)
 y <- sin(x) + rnorm(80, 0, 1/9)
 xno   <- 500
 sim <- matrix(NA, nc=N, nr=xno)
 xseq <- seq(min(x),max(x), length=xno)
 plot(x, y, main=paste("df=",df1,sep=""), xlim=xlim, ylim=ylim)    # Using Splines
 truex <- seq(min(x), max(x), length = 80)
 lines(truex, sin(truex), lty = 5)
 splineobj <- smooth.spline(x, y, df = df1)
 lines(splineobj, lty = 1)
 plot(x, y, main=paste("df=",df2,sep=""), xlim=xlim, ylim=ylim)    # Using Splines
 truex <- seq(min(x), max(x), length = 80)
 lines(truex, sin(truex), lty = 5)
 splineobj <- smooth.spline(x, y, df = df2)
 lines(splineobj, lty = 1)
 plot(x, y, main=paste("Bias-Variance tradeoff, df=",df1, sep=""), type="n", xlim=xlim, ylim=ylim)
 for(i in seq(N))
   {
     x <- rnorm(80, 0, 1)
     y <- sin(x) + rnorm(80, 0, 1/9)
     splineobj <- smooth.spline(x, y, df = df1)      
     sim[,i] <- predict(splineobj,xseq)$y
   }
 ci <- qt(0.975, N) * sqrt(apply(sim,1, var))
 bias <- apply(sim,1, mean)
 rect(xseq,bias-ci,xseq,bias+ci, border="grey")
 rect(xseq,sin(xseq),xseq,bias, border="black")
 lines(truex, sin(truex))
 plot(x, y, main=paste("Bias-Variance tradeoff, df=",df2,sep=""), type="n", xlim=xlim, ylim=ylim)  
 for(i in seq(N))
   {
     x <- rnorm(80, 0, 1)
     y <- sin(x) + rnorm(80, 0, 1/9)
     splineobj <- smooth.spline(x, y, df = df2)      
     sim[,i] <- predict(splineobj,xseq)$y
   }
 ci <- qt(0.975,N) * sqrt(apply(sim,1, var))
 bias <- apply(sim,1, mean)
 rect(xseq,bias-ci,xseq,bias+ci, border="grey")
 rect(xseq,sin(xseq),xseq,bias, border="black")
 lines(truex, sin(truex))
}
Running the function
biasVar()                # Generates data from a sine curve looking at bias variance tradeoff 
biasVar(df1=2, df2=30)   # Lets change the smoothing parameters in the function arguements

Quiting R

rm(list=ls())         # Cleaning up: Remove Objects from a Specified Environment
q()