Upload
others
View
3
Download
0
Embed Size (px)
Citation preview
R2
AshishMahabalAyBi199,Caltech
24May2011
RunningR
Youcanop=onallysavedata.
Dealingwithtables/objects(Rframe)• X=read.table(“foo”,header=TRUE)• objects()• objets(X)• names(X)• X• Name1• X$Name1• aSach(X)• Name1
Num Name1 Name2
1 1.1 3.3
2 4.4 5.4
Assignments
• x<‐c(10.4,5.6,3.1,6.4,21.7)• assign("x",c(10.4,5.6,3.1,6.4,21.7))• c(10.4,5.6,3.1,6.4,21.7)‐>x (!)
• 1/x #0.09,0.17,0.32,0.15,0.04
• y<‐c(x,0,x) #10.4,..,21.7,0,10.4,..,21.7
• v<‐rep(x)+y+1#xrepeated2.2=mes,111
• var=sum((x‐mean(x))^2)/(length(x)‐1)
Lists(anddata.frames=restricted)
• Lst<‐list(name="Fred",wife="Mary",no.children=3,child.ages=c(4,7,9))
• Alwaysnumbered
• Lst[[4]] #returns[1]479
• Lst[[4]][2] #returns7
• Lst[4][2] #returnsNull
• Lst[3] #returns$no.children[1]3
Readingfromfiles
• HousePrice<‐read.table("houses.data",header=TRUE)
• read.table(file,header=FALSE,sep="",quote="\"'",
dec=".",row.names,col.names,as.is=!stringsAsFactors,na.strings="NA",colClasses=NA,nrows=‐1,skip=0,check.names=TRUE,fill=!blank.lines.skip,strip.white=FALSE,blank.lines.skip=TRUE,
comment.char="#”,allowEscapes=FALSE,flush=FALSE,stringsAsFactors=default.stringsAsFactors(),fileEncoding="",encoding="unknown”)
read.csv,read.csv2,read.delim,read.delim2
Accessingbuilt‐indatasets
• data()• data(AirPassengers)• ?AirPassengers• new<‐edit(AirPassengers)• x<‐array(c(AirPassengers[1:144]),dim=c(12,12))• pairs(x)
• ?Togethelp• #forcomments• ‐>or<‐(or=)forassignments• ==forcomparison
• Concatenatewithc #x=c(a,b)• seqtocreatesequence #z=seq(‐30,30)• is.na(x)tolookfornotavailableness
• summary(x)• data(x)• read.table(file)• save(file)• source(file) #input
• sink(file) #output
Commonrou=nes
• length(a) #lengthofvector• max(a) #similarlymin,mean
• sort(a),orsort(a,decreasing=T)• unique(a)• duplicated(a) #returnsalogicalarray!
TomShort’sRrefcardhSp://www.rpad.org/Rpad/R‐refcard.pdf
Example
• a=rnorm(100,mean=5,sd=1)• b=rnorm(200,mean=5,sd=1)
• c=c(100*a,100*b)• length(c)• c
• d=as.integer(c)• d• mean(d);max(d);min(d);sd(d)
• unique(d)• duplicated(d)• plot(duplicated(d)) #coercion
• duplicated(unique(d))• five(d) #whatsthefive‐of‐d• ?five #nothing?Doyouknowfive?• ??five #atleastsomethingthathasfive?• fivenum(d) #ah,soitsfivenum.I’lltakethat• ?fivenum #whatisitBTW?…minimum,lower‐hinge,median,upper‐hinge,maximum…
• fivenum(x,na.rm=TRUE) #op=ons!• g=d• g[duplicated(d)]<‐NA• fivenum(d)– [1]264.0425.5483.5561.5742.0
• fivenum(g,na.rm=TRUE)– [1]264.0419.5492.0571.5742.0
• boxplot.stats(x,coef=1.5,do.conf=TRUE,do.out=TRUE)
• quan=le,range,bxp
• whichselectsindices:• a=which(d<500) #returnsindices• a=d[which(d<500)] #returnstheelements
• which.max(d) #indexofmaxelement
• match(x,y) #elementsofxiny,elseNA• merge(a,b) #usingcommoncolumns/rows
choose(n,k) #combina=onsofkfromnchoose(5,3)returns10
sample(x,size) #resamplessizeelementsfromx(withtheop=onofreplacement)
• >sample(cards,5)[1]"S9""H4""D5""C3""C9"• >sample(cards,2)[1]“S1"“S11”
>sample(cards,60,replace=TRUE)
• Conj(4+3i)• fft(x)• solve(a) #matrixinverseofa
• solve(a,b) #solvesa%*%x=bforx
GraphicsinR
Rgraphics,comparison• Basegraphics(RossIhaka,Sgraphicsdriver)– Cannotbemodified
• Gridgraphics(PaulMurrell,2000)– Lowlevel,butnosupportforsta=s=calgraphics
• La{cepackage(Sarkar,2008)– Basedongrid.Detailed.Lacksaformalmodel
• ggplot2(Hedly,2005)– Layered,basedongrammarofgraphics,lowlevel,sta=cplots,extensionseasy,flowlikethatofanalysis
• ggobi,rggobi(Cook,Swayne,Wickham2007,8)– Interac=ve,grammarofgraphics
h*p://cran.r‐project.org/web/views/Graphics.html(onlinelist)
Grammarofgraphics(Wilkinson,2005)
• Mapping(mappings)fromdata(data)toaesthe=caSributesofgeometricobjects(geoms).
• Includingsta=s=caltransforma=ons(stats)inspecificcoordinatesystems
• Face=ng(facet)toalloweasysubse{ng(synonyms:condi=oning,la{cing,trellising)
• scale,coord
HadleyWickham’sggplot2hSp://had.co.nz/ggplot2/
• install.packages(“ggplot2”)• library(ggplot2)
DiamondshSp://www.diamondse.info/
• help(diamonds)• data(diamonds)• dsmall<‐diamonds[sample(nrow(diamonds),100),]• dsmall• help(ggsave)
ggsave(filename="diamond_smooth.png",plot=last_plot(),height=3,width=5,dpi=100)
AestheAcs:color,size,shape
Caratcutcolorclarity
(categories)
qplot(carat,price,data=dsmall,shape=cut)
ShapeistheaestheAchereandacategoricalvariable
qplot(carat,price,data=dsmall,colour=color)
coloristheaestheAchereandacategoricalvariable
qplot(carat,price,data=diamonds,geom=c("smooth","point"))
AnexampleofastaAsAcalplot
qplot(log(carat),log(price),data=diamonds,geom=c("smooth","point"))
se=Falsetoturnconfidenceleveloff
qplot(carat,price,data=dsmall,geom=c("smooth","point"),span=1)
qplot(carat,price,data=dsmall,geom=c("smooth","point"),span=0.2)
Spancanbevariedbetween0(verywiggly)and1(notwiggly)
qplot(carat,x*y*z,data=diamonds)
EasytousemulAplevariables.Outliersbyvolume
qplot(carat,price,data=diamonds,alpha=I(1/500))
Transparency(scale0to1):1/1=>nottransparent
qplot(color,price/carat,data=diamonds,geom="jiSer")
qplot(color,price/carat,data=diamonds,geom="jiSer",alpha=I(1/50))
qplot(color,price/carat,data=diamonds,geom="boxplot")
qplot(carat,data=diamonds,geom="density",colour=color)
CombiningaestheAcandgeom
FuelefficiencydatahSp://fueleconomy.gov
• data(mpg)• help(mpg)
• 234rows,11vars• 1999‐2008popularcars Treehugger.com
qplot(displ,hwy,data=mpg,colour=cyl)
qplot(displ,hwy,data=mpg,colour=factor(cyl))
aesthe=cs:color;geom:point
Thegeomcouldaseasilyhavebeen“line”orevenpointandline!
qplot(displ,hwy,data=mpg,facets=.~year)
qplot(displ,hwy,data=mpg,facets=.~year)+geom_smooth()
• scaletransforma=ons• stattransforma=ons
• A�erscalingsothatlog/linmakesense
• trainscales• Supersetacrossdifferentlayers
• mapdatatoaesthe=cs• rendergeoms
Anatomyofaplot
Whatisaplot(madeof)?
• Adataset• Setofmappingsfromvariablestoaesthe=cs• Atleastonelayer,eachmadeof– Ageometricobject(geom)– Asta=s=caltransforma=on(stat)– Aposi=onadjustment– Perhapsadatasetandaesthe=cmappings
• Onescalepermapping• Acoordinatesystem• FaceAngspecifica=on
Exampleofscalecon=nuousanddiscretevariables
Thingstodowithanobject
• print()–automa=callyunlessinaloop• ggsave()–towriteoutanimage
• summary()–summaryaboutthemakeup
• save()–tosavetheobject
p<‐qplot(displ,hwy,data=mpg,colour=factor(cyl))
summary(p)
• data:manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class[234x11]
• mapping:colour=factor(cyl),x=displ,y=hwy• scales:colour,x,y• face=ng:facet_grid(.~.,FALSE)‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐
• geom_point:• stat_iden=ty:• posi=on_iden=ty:(width=NULL,height=NULL)
• >#Saveplotobjecttodisk• >save(p,file="plot.rdata")• >#Loadfromdisk
• >load("plot.rdata")• >#Savepngtodisk• >ggsave("plot.png",width=5,height=5)
Addinglayers• p<‐ggplot(diamonds,aes(carat,price,colour=cut))
• p<‐p+layer(geom="point")#stat/posdefaults• layer(geom,geom_params,stat,stat_params,data,mapping,posi=on)
• p<‐ggplot(diamonds,aes(x=carat))• p<‐p+layer(
– geom="bar",
– geom_params=list(fill="steelblue"),
– stat="bin",– stat_params=list(binwidth=2))
• p
Everygeomassociatedwitha(default)stat
• geom_histogram(binwidth=2,fill="steelblue")
Andeverystatwitha(default)geom
stat:bingeom:bar
28819moviesfromIMDB
24variablesincludingLengthofmovie.Linearandlogplots
qplot(length,data=movies,geom="histogram",
binwidth=1,xlim=c(80,100))
BlazarlightcurveexampleM
ag
18.8
18.6
18.4
18.2
18.0
17.8
●
●
●●
●
●
●
●
●●●
●
●
●
●
●
●
●
●
●
●
●
●
●●●
●●
●● ●
● Type●● CRTS●● PQ
MJD (actual − 53600 )
Flu
x (x
10^ −9
)
−5
0
5
10
15
20
25
●●
●
●
●
●
● ●
●
● ●
1100 1150 1200 1250 1300 1350 1400
Type● Flux
JudyMou
Somemorelinks…
• hSp://vostat.org• RGUI(RcommanderhSp://socserv.mcmaster.ca/jfox/Misc/Rcmdr/)
• hSp://scc.stat.ucla.edu/mini‐courses• hSp://cran.r‐project.org• hSp://www.r‐project.org• hSp://en.wikipedia.org/wiki/R_(programming_language)
• GGobi(hSp://www.ggobi.org/)/rggobi
Homework
• Applyvariousgraphingtechniqueslearnttotheastronomydataset
• hSp://avyakta.caltech.edu:8080/datasets/dataset2.Rframe• Fluxra=osakaColors(u‐g,g‐r,r‐I,i‐z)andspectroscopicclass
• numumggmrrmiimzspecClass• 11.800.350.420.293• 21.220.01‐0.05‐0.061
colorsandclassifica=on
• d2=read.table(“dataset2.Rframe”,header=TRUE)• objects()• names(d2)• umg• d2$umg• aSach(d2)• umg• plot(umg,gmr)• pairs(d2)
Useggplot2methodsinsteadoftheabove