##------------------------------------------------------------------------## ## Script for Chapter 2, An R and S-PLUS Companion to Applied Regression ## ## John Fox ## ## Sage Publications, 2002 ## ##------------------------------------------------------------------------## # preliminaries options(width=65) options(digits=5) # Reading data # entering data at the keyboard x <- c(1,2,3,4) x names <-c ('John','Georges','Mary') names v <- c(T,F) v cooperation <- scan() 49 64 37 52 68 54 61 79 64 29 27 58 52 41 30 40 39 44 34 44 cooperation rep(5,3) rep(c(1,2,3), 2) rep(c(1,2,3), c(3,2,1)) condition <- rep(c("public", "anonymous"), c(10,10)) condition sex <- rep(rep(c("male", "female"), c(5,5)), 2) sex Guyer <- data.frame(cooperation, condition, sex) Guyer Guyer <- edit(as.data.frame(NULL)) # reading data from a file into a data frame Prestige <- read.table('D:/data/Prestige.txt', header=T) Prestige # using count.fields to locate errors in data Prestige <- read.table('D:/data/Prestige-bugged.txt', header=T) counts <- count.fields('D:/data/Prestige-bugged.txt') counts which(counts != 7) # formatted input Prestige <- read.fwf('d:/data/Prestige-fixed.txt', row.names='occupation', col.names=c('occupation', 'education', 'income', 'women', 'prestige', 'census', 'type'), widths=c(25, 5, 5, 5, 4, 4, 4)) ## S-PLUS alternative: Prestige <- data.frame(scan('d:/data/Prestige-fixed.txt', list(occupation="", education=0, income=0, women=0, prestige=0, census=0, type=""), widths=c(25, 5, 5, 5, 4, 4, 4))) # accessing data in an S library library(car) data(Duncan) Duncan attach(Duncan) # outputing data write.table(Duncan, 'c:/temp/Duncan.txt') # Cleaning up objects() remove(names, v, x) # The search path search() prestige Duncan[,"prestige"] attach(Prestige) search() prestige Duncan[,"prestige"] detach(Prestige) search() prestige mean(prestige) mean(prestige, trim=.1) mean<-function(x){ warning("The mean function in the base package is shadowed") sum(x)/length(x) } mean(prestige) mean(prestige, trim=.1) remove(mean) mean(prestige, trim=.1) mean<-mean(prestige) mean mean(prestige) remove(mean) detach(Duncan) # for S4: stdev <- function (x) { warning("The standard stdev function is shadowed") sqrt(var(x)) } stdev (prestige) remove('stdev') # missing data data(Freedman) attach(Freedman) Freedman[1:10,] density median(density) median(density, na.rm=T) plot(density, crime) identify(density, crime, rownames(Freedman)) log(c(1,10,NA,100), base=10) plot(log(density, base=10), crime) lm(crime ~ log(density, base=10)) abline(lm(crime ~ log(density, base=10)), lty=2) good <- !(is.na(density) | is.na(crime)) good lines(lowess(log(density[good], base=10), crime[good])) options("na.action") detach(Freedman) Freedman.good <- na.omit(Freedman) attach(Freedman.good) Freedman.good[1:10,] dim(Freedman.good) # numeric variables, factors, and ordered factors detach(Freedman.good) objects() remove(good, Freedman.good) condition is.character(condition) remove(cooperation, condition, sex) attach(Guyer) condition is.character(condition) is.factor(condition) summary(Guyer) # modifying data perc.coop <- 100*cooperation/120 perc.coop Guyer$cooperation <- 100*cooperation/120 Guyer cooperation detach(Guyer) attach(Guyer) cooperation Guyer$logit.coop <- log(cooperation/(100-cooperation)) Guyer logit.coop detach(Guyer) attach(Guyer) logit.coop coop.4 <- cut(cooperation, 4) summary(coop.4) coop.groups <- cut(cooperation, quantile(cooperation, c(0, 1/3, 2/3, 1)), include.lowest=T, labels=c('low','med','high')) summary(coop.groups) coop.2 <- recode(cooperation, 'lo:50=1; 50:hi=2') coop.2 detach(Guyer) remove(perc.coop, coop.4, coop.groups, coop.2) data(Womenlf) attach(Womenlf) sample.20 <- sort(sample(nrow(Womenlf), 20)) sample.20 Womenlf[sample.20,] working <- recode(partic, " c('parttime','fulltime')='yes'; 'not.work'='no' ") working[sample.20] working.alt <- recode(partic, " c('parttime','fulltime')='yes'; else='no' ") all(working==working.alt) fulltime <- recode(partic, " 'fulltime'='yes'; 'parttime'='no'; 'not.work'=NA ") fulltime[sample.20] region.4 <- recode(region, " c('Prairie','BC')='West' ") region.4[sample.20] detach(Womenlf) remove(working, working.alt, fulltime, region.4, sample.20) # Matrices, arrays, and lists A <- matrix(1:12, 3, 4) A B <- matrix(c('a','b','c'), 4, 3, byrow=T) B dim(A) dim(B) v <- sample(10,10) v dim(v) array.3 <- array(1:24, c(4,3,2)) array.3 dim(array.3) list.1 <- list(mat.1=A, mat.2=B, vec=v) list.1 # Indexing v v[2] v[c(4,2,6)] v[c(4,2,4)] v[-c(2,4,6,8,10)] names(v) <- letters[1:10] names(v) v[c('f','i','g')] v<6 v[v<6] vv <- v vv vv[c(1,3,5)] <- c(1,2,3) vv vv[c('b','d','f','h','j')] <- 0 vv remove(vv) A A[2,3] A[c(1,2), 2] A[c(1,2), c(2,3)] A[c(1,2),] A[c(1,2), 2, drop=F] A[,-c(1,3)] # delete columns 1 and 3 A[-1,-2] # delete row 1 and column 2 rownames(A)<-c('one', 'two', 'three') colnames(A)<-c('w','x','y', 'z') A A[c('one','two'), c('x','y')] A[c(T,F,T),] AA<-A AA AA[1,]<-0 AA remove(AA) list.1 list.1[c(2,3)] list.1[2] list.1[[2]] list.1["mat.1"] list.1[["mat.1"]] list.1$mat.1 list.1$mat.1<-matrix(1, 2, 2) list.1$title<-'an arbitrary list' list.1$mat.2<-NULL list.1 attach(Guyer) Guyer Guyer[,1] Guyer[,'cooperation'] Guyer[c(1,2),] Guyer[c('1','2'), 'cooperation'] Guyer[-(6:20),] Guyer[sex=='female' & condition=='public',] Guyer$cooperation Guyer[['cooperation']] Guyer['cooperation'] detach(Guyer) remove(A, B, v, array.3, list.1) #modes of S objects x<-1:10 mode(x) length(x) y<-c("one", "two", "three") mode(y) length(y) complex.nos<-c(1, 0+1i, 2-3i) mode(complex.nos) length(complex.nos) list.3<-list(x, y, complex.nos) mode(list.3) length(list.3) mode(Guyer) length(Guyer) attach(Duncan) mod<-lm(prestige ~ income + education) mod mode(mod) length(mod) names(mod) mode(mean) length(mean) A<-matrix(1:15, 3, 5) A rownames(A)<-c('a','b','c') colnames(A)<-c('v','w','x','y','z') attributes(A) attributes(Duncan) type attributes(type) class(Duncan) class(type) class(A) num<-numeric(5) num fac<-factor(c("a","b","c","c","b","a")) fac is.numeric(num) is.numeric(fac) is.factor(fac) char<-as.character(fac) char as.numeric(fac) as.numeric(char) B<-matrix(1:9, 3, 3) B is.matrix(B) as.vector(B) ## S4 differences: vec <- 1:10 char.vec <- letters[1:5] mat <- matrix(1:12, 3, 4) vec char.vec mat class(vec) mode(vec) length(vec) class(char.vec) mode(char.vec) length(char.vec) class(mat) mode(mat) length(mat) is.matrix(mat) is.matrix(vec) is(mat, 'matrix') as.vector(mat) as(mat, 'vector')