Difference between revisions of "R tutorial"
From Organic Design wiki
m (→Reading / writing files) |
m (→= Basic (atomic) data types) |
||
(17 intermediate revisions by 4 users not shown) | |||
Line 1: | Line 1: | ||
− | + | == Resources == | |
− | |||
*Quickest way to learn R: use the Contributed Documentation (''http://cran.stat.auckland.ac.nz/other-docs.html'') | *Quickest way to learn R: use the Contributed Documentation (''http://cran.stat.auckland.ac.nz/other-docs.html'') | ||
*Thousands of pages of documentation including short guides / reference cards | *Thousands of pages of documentation including short guides / reference cards | ||
Line 12: | Line 11: | ||
:*Using R for Data Analysis and Graphics (''http://cran.stat.auckland.ac.nz/doc/contrib/usingR-2.pdf'') | :*Using R for Data Analysis and Graphics (''http://cran.stat.auckland.ac.nz/doc/contrib/usingR-2.pdf'') | ||
:*Simple R (''http://cran.stat.auckland.ac.nz/doc/contrib/Verzani-SimpleR.pdf'') | :*Simple R (''http://cran.stat.auckland.ac.nz/doc/contrib/Verzani-SimpleR.pdf'') | ||
− | :* | + | :*S programming techniques (''http://www.stat.auckland.ac.nz/S-Workshop/Ihaka/lecture.pdf'') |
+ | |||
---- | ---- | ||
− | + | == Obtaining help in R == | |
help.start() <font color="red"># Browser based help documentation</font> | help.start() <font color="red"># Browser based help documentation</font> | ||
help() <font color="red"># Help on a topic (''note: help pages have a set format'')</font> | help() <font color="red"># Help on a topic (''note: help pages have a set format'')</font> | ||
Line 24: | Line 24: | ||
demo() <font color="red"># Demonstrations of R Functionality</font> | demo() <font color="red"># Demonstrations of R Functionality</font> | ||
demo(graphics) <font color="red"># Demonstration or graphics Functionality</font> | demo(graphics) <font color="red"># Demonstration or graphics Functionality</font> | ||
+ | RSiteSearch() <font color="red"># Searches web newslist archives and retrieves results using http</font> | ||
:<font color="blue">''There objects are functions, to run them you must put parentheses '()' after the function name''</font> | :<font color="blue">''There objects are functions, to run them you must put parentheses '()' after the function name''</font> | ||
---- | ---- | ||
− | + | == Useful commands in the R environment == | |
search() <font color="red"># Give Search Path for R Objects</font> | search() <font color="red"># Give Search Path for R Objects</font> | ||
searchpaths() <font color="red"># Give Full Search Path for R Objects</font> | searchpaths() <font color="red"># Give Full Search Path for R Objects</font> | ||
Line 38: | Line 39: | ||
---- | ---- | ||
− | + | == Command prompt == | |
* <font color="red>''Type commands after the prompt (<font color="blue">></font>) e.g. ''</font> | * <font color="red>''Type commands after the prompt (<font color="blue">></font>) e.g. ''</font> | ||
<font color="blue">></font> x <- 1:10 <font color="red"># assignment of 1 to 10 to an object called 'x'</font> | <font color="blue">></font> x <- 1:10 <font color="red"># assignment of 1 to 10 to an object called 'x'</font> | ||
Line 51: | Line 52: | ||
---- | ---- | ||
− | + | == Basic (atomic) data types == | |
*Logical | *Logical | ||
T <font color="red"># TRUE</font> | T <font color="red"># TRUE</font> | ||
Line 64: | Line 65: | ||
---- | ---- | ||
− | + | == Assignment of objects == | |
* objects must start with a letter [A-Z a-z] | * objects must start with a letter [A-Z a-z] | ||
* "<-" The arrow assigns information to the object on the left | * "<-" The arrow assigns information to the object on the left | ||
Line 75: | Line 76: | ||
---- | ---- | ||
− | + | == Saving objects == | |
getwd() <font color="red"># Returns the current directory where R is running</font> | getwd() <font color="red"># Returns the current directory where R is running</font> | ||
setwd("C:/DATA/Microarray") <font color="red"># Set the working directory to another location</font> | setwd("C:/DATA/Microarray") <font color="red"># Set the working directory to another location</font> | ||
Line 85: | Line 86: | ||
q() <font color="red"># quit R</font> | q() <font color="red"># quit R</font> | ||
''Restart R by double clicking on the file .RData in C:/DATA/Microarray'' | ''Restart R by double clicking on the file .RData in C:/DATA/Microarray'' | ||
− | x <font color="red"># Returns 'x' as it was saved to .RData</font> | + | x <font color="red"># Returns 'x' as it was saved to .RData in "C:/DATA/Microarray"</font> |
y <font color="red"># 'y' should not exist</font> | y <font color="red"># 'y' should not exist</font> | ||
---- | ---- | ||
− | + | == Object data types == | |
*Create a scalar (vector of length 1) | *Create a scalar (vector of length 1) | ||
a <- 3.14 <font color="red"># Assign pythagorus to object 'a'</font> | a <- 3.14 <font color="red"># Assign pythagorus to object 'a'</font> | ||
Line 119: | Line 120: | ||
---- | ---- | ||
− | + | == Indexing == | |
*Subsetting a vector | *Subsetting a vector | ||
Line 137: | Line 138: | ||
---- | ---- | ||
− | + | == Plotting data == | |
*See help pages for basic plot functions | *See help pages for basic plot functions | ||
help(plot) | help(plot) | ||
Line 146: | Line 147: | ||
---- | ---- | ||
− | + | == Reading / writing files == | |
;Reading data | ;Reading data | ||
help(scan) | help(scan) | ||
Line 157: | Line 158: | ||
; Reading a GPR file data section using read.table | ; Reading a GPR file data section using read.table | ||
colClasses <- rep("NULL", 82) | colClasses <- rep("NULL", 82) | ||
− | colClasses[c(1:5, 9,12, 18, 21)] <- NA | + | colClasses[c(1:5, 9,12, 18, 21)] <- NA <font color="red"># Set colClasses to ignore unwanted columns</font> |
mydata <- read.table(file.path(dataDir,"BE34.gpr"), header=T, sep="\t", | mydata <- read.table(file.path(dataDir,"BE34.gpr"), header=T, sep="\t", | ||
− | nrows= | + | nrows=20, skip=31, colClasses=colClasses) <font color="red"># Get first 20 lines of data after 31st row</font> |
mydata | mydata | ||
; Writing data | ; Writing data | ||
Line 167: | Line 168: | ||
---- | ---- | ||
− | + | == User defined functions == | |
*Writing functions provide a means of adding new functionality to the language. A function has the form: | *Writing functions provide a means of adding new functionality to the language. A function has the form: | ||
myfun <- function( arglist ){ body } | myfun <- function( arglist ){ body } | ||
− | [[Image: | + | [[Image:Functions2.png]] |
*Identity function: returns its input arguement | *Identity function: returns its input arguement | ||
Line 237: | Line 238: | ||
;Running the function: | ;Running the function: | ||
biasVar() <font color="red"># Generates data from a sine curve looking at bias variance tradeoff </font> | biasVar() <font color="red"># Generates data from a sine curve looking at bias variance tradeoff </font> | ||
− | biasVar(df1=2, df2=30) <font color="red"># | + | biasVar(df1=2, df2=30) <font color="red"># Let's change the smoothing parameters in the function arguements</font> |
---- | ---- | ||
− | + | == Quiting R == | |
rm(list=ls()) <font color="red"># Cleaning up: Remove Objects from a Specified Environment</font> | rm(list=ls()) <font color="red"># Cleaning up: Remove Objects from a Specified Environment</font> | ||
q() | q() | ||
+ | |||
+ | == Links == | ||
+ | *[[Rlang.R|Advanced tutorial]] | ||
[[Category:Sven/Rosaceae]] | [[Category:Sven/Rosaceae]] |
Latest revision as of 02:41, 23 April 2020
Contents
Resources
- Quickest way to learn R: use the Contributed Documentation (http://cran.stat.auckland.ac.nz/other-docs.html)
- Thousands of pages of documentation including short guides / reference cards
- Contributed guides for the beginner
- A Guide for the Unwilling S User (http://cran.stat.auckland.ac.nz/doc/contrib/Burns-unwilling_S.pdf)
- R for Beginners (http://cran.stat.auckland.ac.nz/doc/contrib/Paradis-rdebuts_en.pdf)
- Reference card (http://cran.stat.auckland.ac.nz/doc/contrib/refcard.pdf)
- More comprehensive contributed guides
- Using R for Data Analysis and Graphics (http://cran.stat.auckland.ac.nz/doc/contrib/usingR-2.pdf)
- Simple R (http://cran.stat.auckland.ac.nz/doc/contrib/Verzani-SimpleR.pdf)
- S programming techniques (http://www.stat.auckland.ac.nz/S-Workshop/Ihaka/lecture.pdf)
Obtaining help in R
help.start() # Browser based help documentation help() # Help on a topic (note: help pages have a set format) ? ls # alternative help method on ls function apropos(mean) # Find Objects by (Partial) Name example(mean) # Run an Examples Section from the Online Help demo() # Demonstrations of R Functionality demo(graphics) # Demonstration or graphics Functionality RSiteSearch() # Searches web newslist archives and retrieves results using http
- There objects are functions, to run them you must put parentheses '()' after the function name
Useful commands in the R environment
search() # Give Search Path for R Objects searchpaths() # Give Full Search Path for R Objects ls() # List objects objects() # alternate function to list objects data() # Publically available datasets rm() # Remove Objects from a Specified Environment save.image() # Save R Objects q() # Terminate an R Session → prompted to Save workspace image? [y/n/c]:
Command prompt
- Type commands after the prompt (>) e.g.
> x <- 1:10 # assignment of 1 to 10 to an object called 'x' > x # Returning the x object to the screen [1] 1 2 3 4 5 6 7 8 9 10
- Continuation of commands is expected after the plus symbol (+) e.g.
> x <- 1: # partial command → parser is expecting more information + 10 > x [1] 1 2 3 4 5 6 7 8 9 10
- Text following a '#' is commented out
Basic (atomic) data types
- Logical
T # TRUE F # FALSE
- Numeric
3.141592654 # Any number [0-9\.]
- Character
"Putative ATPase" # Any character [A-Za-z] must be single or double quoted
- Missing values
NA # Label for missing information in datasets
- See also help("NA"), help("NaN")
Assignment of objects
- objects must start with a letter [A-Z a-z]
- "<-" The arrow assigns information to the object on the left
x <- 42 # Assignment to the left x x = 42 # Equivalent assignment (not recommended) x 42 -> x # Assignment to the right x
Saving objects
getwd() # Returns the current directory where R is running setwd("C:/DATA/Microarray") # Set the working directory to another location getwd() # Check the directory has changed x <- 42 save.image() # Saves a snapshot of objects to file .RData y <- x * 2 # Make a new object called 'y' y # Return value of 'y' q() # quit R
Restart R by double clicking on the file .RData in C:/DATA/Microarray
x # Returns 'x' as it was saved to .RData in "C:/DATA/Microarray" y # 'y' should not exist
Object data types
- Create a scalar (vector of length 1)
a <- 3.14 # Assign pythagorus to object 'a' length(a) # The scalar is actually a vector of length 1 pi # Already have a built in object for pythagorus search() # Print the search path for all objects find("pi") # "pi" is located in package:base
- Create a vector
x <- c(2,3,5,2,7,1) # Numbers put into a vector using 'c' function concatenate x y <- c(10,15,12) y names(y) <- c("first","second","third") # Elements can be given names z <- c(y,x) z
- Create a matrix
zmat <- cbind(x,y) # cbind joins vectors together by column zmat
- Whats going on in the second column → number recycling
mat <- matrix(1:20, nrow=5, ncol=4) # Constructing a matrix mat colnames(mat) <- c("Col1","Col2", "Col3", "Col4") # Adding column names mat
- Create a list
mylist <- list(1:4,c("a","b","c"), LETTERS[1:10]) mylist mylist <- list("element 1" = 1:4,"second vector" = c("a","b","c"), "Capitals" = LETTERS[1:10]) mylist
Indexing
- Subsetting a vector
x[c(1,2,3)] # Selecting the first three elements of 'x' x[1:3] # Same subset using ':' sequence generation → see help(":") y[2] # Selecting the second element of 'y' y["second"] # Selecting the second element of 'y' (by name)
- Subsetting a matrix
mat[,1:2] # Selecting the first two columns of 'mat' mat[1:2, 2:4] # Selecting a subset matrix of 'mat'
- Subsetting a list
mylist[[1]] # Subsetting list 'mylist' by index mylist[["element 1"]] # Subsetting list 'mylist' by name 'element 1' mylist$"element 1" # Alternate way of subsetting mylist$Capitals[1:5] # Selecting the first five elements of 'Capitals' in 'mylist' (case sensitive)
Plotting data
- See help pages for basic plot functions
help(plot) help(par) example(plot) par(ask=TRUE) # Set the printing device to prompt user before displaying next graph example(hist)
Reading / writing files
- Reading data
help(scan) help(read.table)
- Reading a GPR file header using scan
dataDir <- "C:/DATA/Microarray/GPR") mydata <- scan(file.path(dataDir, "BE34.gpr"), what="", nlines=29) # Get first 29 rows of data mydata
- Reading a GPR file data section using read.table
colClasses <- rep("NULL", 82) colClasses[c(1:5, 9,12, 18, 21)] <- NA # Set colClasses to ignore unwanted columns mydata <- read.table(file.path(dataDir,"BE34.gpr"), header=T, sep="\t", nrows=20, skip=31, colClasses=colClasses) # Get first 20 lines of data after 31st row mydata
- Writing data
help(write) help(write.table)
- See also dump, restore, save, load
User defined functions
- Writing functions provide a means of adding new functionality to the language. A function has the form:
myfun <- function( arglist ){ body }
- Identity function: returns its input arguement
myfun <- function(x){x} # Creating identity function myfun("foo") # Running the function myfun() # Fails: no input arguement provided
- A simple function
square <- function(x){x * x} # Square the input number square(10) # Returns 10 squared square(1:4) # Underlying arithmetic is vectorized
- Graphical example from user defined function
- The following function generates data from sine distributions and examines
bias variance tradeoff of a smoothing function using different smoothing parameters. Paste it into R
"biasVar" <- function(df1=4, df2=15, N = 100, seed=1) { set.seed(seed) # 1) Data setup ylim <- c(-2,2) xlim <- c(-3,3) par(mfrow=c(2,2), mar=c(5,4,4-2,2)+0.1,mgp=c(2,.5,0) ) x <- rnorm(80, 0, 1) y <- sin(x) + rnorm(80, 0, 1/9) xno <- 500 sim <- matrix(NA, nc=N, nr=xno) xseq <- seq(min(x),max(x), length=xno) plot(x, y, main=paste("df=",df1,sep=""), xlim=xlim, ylim=ylim) # Using Splines truex <- seq(min(x), max(x), length = 80) lines(truex, sin(truex), lty = 5) splineobj <- smooth.spline(x, y, df = df1) lines(splineobj, lty = 1) plot(x, y, main=paste("df=",df2,sep=""), xlim=xlim, ylim=ylim) # Using Splines truex <- seq(min(x), max(x), length = 80) lines(truex, sin(truex), lty = 5) splineobj <- smooth.spline(x, y, df = df2) lines(splineobj, lty = 1) plot(x, y, main=paste("Bias-Variance tradeoff, df=",df1, sep=""), type="n", xlim=xlim, ylim=ylim) for(i in seq(N)) { x <- rnorm(80, 0, 1) y <- sin(x) + rnorm(80, 0, 1/9) splineobj <- smooth.spline(x, y, df = df1) sim[,i] <- predict(splineobj,xseq)$y } ci <- qt(0.975, N) * sqrt(apply(sim,1, var)) bias <- apply(sim,1, mean) rect(xseq,bias-ci,xseq,bias+ci, border="grey") rect(xseq,sin(xseq),xseq,bias, border="black") lines(truex, sin(truex)) plot(x, y, main=paste("Bias-Variance tradeoff, df=",df2,sep=""), type="n", xlim=xlim, ylim=ylim) for(i in seq(N)) { x <- rnorm(80, 0, 1) y <- sin(x) + rnorm(80, 0, 1/9) splineobj <- smooth.spline(x, y, df = df2) sim[,i] <- predict(splineobj,xseq)$y } ci <- qt(0.975,N) * sqrt(apply(sim,1, var)) bias <- apply(sim,1, mean) rect(xseq,bias-ci,xseq,bias+ci, border="grey") rect(xseq,sin(xseq),xseq,bias, border="black") lines(truex, sin(truex)) }
- Running the function
biasVar() # Generates data from a sine curve looking at bias variance tradeoff biasVar(df1=2, df2=30) # Let's change the smoothing parameters in the function arguements
Quiting R
rm(list=ls()) # Cleaning up: Remove Objects from a Specified Environment q()