##------------------------------------------------------------------------##
## Script for Chapter 2, An R and S-PLUS Companion to Applied Regression  ##
##    John Fox                                                            ##
##    Sage Publications, 2002                                             ##
##------------------------------------------------------------------------##

# preliminaries

options(width=65)
options(digits=5)

# Reading data

# entering data at the keyboard

x <- c(1,2,3,4)
x
names <-c ('John','Georges','Mary')
names
v <- c(T,F)
v


cooperation <- scan()
49 64 37 52 68 54
61 79 64 29
27 58 52 41 30 40 39
44 34 44

cooperation

rep(5,3)
rep(c(1,2,3), 2)
rep(c(1,2,3), c(3,2,1))

condition <- rep(c("public", "anonymous"), c(10,10))
condition
sex <- rep(rep(c("male", "female"), c(5,5)), 2)
sex

Guyer <- data.frame(cooperation, condition, sex)
Guyer

Guyer <- edit(as.data.frame(NULL))

# reading data from a file into a data frame

Prestige <- read.table('D:/data/Prestige.txt', header=T)
Prestige

# using count.fields to locate errors in data

Prestige <- read.table('D:/data/Prestige-bugged.txt', header=T)

counts <- count.fields('D:/data/Prestige-bugged.txt')
counts
which(counts != 7)

# formatted input

Prestige <- read.fwf('d:/data/Prestige-fixed.txt', 
    row.names='occupation', 
    col.names=c('occupation', 'education', 'income', 'women', 
        'prestige', 'census', 'type'),
    widths=c(25, 5, 5, 5, 4, 4, 4))
    
    ## S-PLUS alternative:
    
    Prestige <- data.frame(scan('d:/data/Prestige-fixed.txt', 
        list(occupation="", education=0, income=0, women=0, prestige=0,
            census=0, type=""),
        widths=c(25, 5, 5, 5, 4, 4, 4)))
        
        
# accessing data in an S library

library(car)
data(Duncan)
Duncan
attach(Duncan)

# outputing data

write.table(Duncan, 'c:/temp/Duncan.txt')

# Cleaning up

objects()
remove(names, v, x)

# The search path

search()

prestige
Duncan[,"prestige"]

attach(Prestige)
search()
prestige


Duncan[,"prestige"]

detach(Prestige)
search()
prestige


mean(prestige)
mean(prestige, trim=.1)

mean<-function(x){
    warning("The mean function in the base package is shadowed")
    sum(x)/length(x)
    }

mean(prestige)
mean(prestige, trim=.1)
remove(mean)
mean(prestige, trim=.1)
mean<-mean(prestige)
mean
mean(prestige)

remove(mean)
detach(Duncan)

    # for S4:
    
stdev <- function (x) {
    warning("The standard stdev function is shadowed")
    sqrt(var(x))
    }
stdev (prestige)
remove('stdev')

# missing data

data(Freedman)
attach(Freedman)
Freedman[1:10,]


density
median(density)
median(density, na.rm=T)

plot(density, crime)
identify(density, crime, rownames(Freedman))

log(c(1,10,NA,100), base=10)

plot(log(density, base=10), crime)

lm(crime ~ log(density, base=10))

abline(lm(crime ~ log(density, base=10)), lty=2)

good <- !(is.na(density) | is.na(crime))
good
lines(lowess(log(density[good], base=10), crime[good]))

options("na.action")

detach(Freedman)
Freedman.good <- na.omit(Freedman)
attach(Freedman.good)
Freedman.good[1:10,]
dim(Freedman.good)

# numeric variables, factors, and ordered factors

detach(Freedman.good)
objects()
remove(good, Freedman.good)

condition
is.character(condition)

remove(cooperation, condition, sex)
attach(Guyer)

condition
is.character(condition)
is.factor(condition)

summary(Guyer)

# modifying data

perc.coop <- 100*cooperation/120
perc.coop

Guyer$cooperation <- 100*cooperation/120
Guyer

cooperation
detach(Guyer)
attach(Guyer)
cooperation

Guyer$logit.coop <- log(cooperation/(100-cooperation))
Guyer

logit.coop
detach(Guyer)
attach(Guyer)
logit.coop

coop.4 <- cut(cooperation, 4)
summary(coop.4)

coop.groups <- cut(cooperation, 
    quantile(cooperation, c(0, 1/3, 2/3, 1)),
    include.lowest=T,
    labels=c('low','med','high'))
summary(coop.groups)

coop.2 <- recode(cooperation, 'lo:50=1; 50:hi=2')
coop.2

detach(Guyer)
remove(perc.coop, coop.4, coop.groups, coop.2)
data(Womenlf)
attach(Womenlf)
sample.20 <- sort(sample(nrow(Womenlf), 20))
sample.20
Womenlf[sample.20,]

working <- recode(partic, 
" c('parttime','fulltime')='yes'; 'not.work'='no' ")
working[sample.20]
working.alt <- recode(partic, 
" c('parttime','fulltime')='yes'; else='no' ")
all(working==working.alt)
fulltime <- recode(partic,
" 'fulltime'='yes'; 'parttime'='no'; 'not.work'=NA ")
fulltime[sample.20]
region.4 <- recode(region, " c('Prairie','BC')='West' ")
region.4[sample.20]

detach(Womenlf)
remove(working, working.alt, fulltime, region.4, sample.20)

# Matrices, arrays, and lists

A <- matrix(1:12, 3, 4)
A

B <- matrix(c('a','b','c'), 4, 3, byrow=T)
B

dim(A)
dim(B)
v <- sample(10,10)
v
dim(v)

array.3 <- array(1:24, c(4,3,2))
array.3
dim(array.3)

list.1 <- list(mat.1=A, mat.2=B, vec=v)
list.1

# Indexing
v
v[2]
v[c(4,2,6)]
v[c(4,2,4)]

v[-c(2,4,6,8,10)]

names(v) <- letters[1:10]
names(v)
v[c('f','i','g')]

v<6
v[v<6]

vv <- v
vv
vv[c(1,3,5)] <- c(1,2,3)
vv
vv[c('b','d','f','h','j')] <- 0
vv
remove(vv)

A
A[2,3]
A[c(1,2), 2]
A[c(1,2), c(2,3)]
A[c(1,2),]

A[c(1,2), 2, drop=F]

A[,-c(1,3)]   # delete columns 1 and 3
A[-1,-2]   # delete row 1 and column 2
rownames(A)<-c('one', 'two', 'three')
colnames(A)<-c('w','x','y', 'z')
A
A[c('one','two'), c('x','y')]
A[c(T,F,T),]

AA<-A
AA
AA[1,]<-0
AA
remove(AA)

list.1
list.1[c(2,3)]
list.1[2]

list.1[[2]]

list.1["mat.1"]
list.1[["mat.1"]]

list.1$mat.1

list.1$mat.1<-matrix(1, 2, 2)
list.1$title<-'an arbitrary list'
list.1$mat.2<-NULL
list.1
attach(Guyer)
Guyer

Guyer[,1]
Guyer[,'cooperation']
Guyer[c(1,2),]
Guyer[c('1','2'), 'cooperation']
Guyer[-(6:20),]
Guyer[sex=='female' & condition=='public',]

Guyer$cooperation
Guyer[['cooperation']]
Guyer['cooperation']


detach(Guyer)
remove(A, B, v, array.3, list.1)

#modes of S objects

x<-1:10
mode(x)
length(x)
y<-c("one", "two", "three")
mode(y)
length(y)

complex.nos<-c(1, 0+1i, 2-3i)
mode(complex.nos)
length(complex.nos)

list.3<-list(x, y, complex.nos)
mode(list.3)
length(list.3)
mode(Guyer)
length(Guyer)
attach(Duncan)
mod<-lm(prestige ~ income + education)
mod
mode(mod)
length(mod)
names(mod)

mode(mean)
length(mean)

A<-matrix(1:15, 3, 5)
A
rownames(A)<-c('a','b','c')
colnames(A)<-c('v','w','x','y','z')
attributes(A)

attributes(Duncan)

type
attributes(type)

class(Duncan)
class(type)
class(A)

num<-numeric(5)
num

fac<-factor(c("a","b","c","c","b","a"))
fac

is.numeric(num)
is.numeric(fac)
is.factor(fac)

char<-as.character(fac)
char
as.numeric(fac)
as.numeric(char)

B<-matrix(1:9, 3, 3)
B
is.matrix(B)
as.vector(B)

## S4 differences:

vec <- 1:10
char.vec <- letters[1:5]
mat <- matrix(1:12, 3, 4)
vec
char.vec
mat
class(vec)
mode(vec)
length(vec)
class(char.vec)
mode(char.vec)
length(char.vec)
class(mat)
mode(mat)
length(mat)

is.matrix(mat)
is.matrix(vec)
is(mat, 'matrix')
as.vector(mat)
as(mat, 'vector')