##------------------------------------------------------------------------## ## Script for web appendix on mixed models ## ## An R and S-PLUS Companion to Applied Regression ## ## John Fox ## ## Sage Publications, 2002 ## ##------------------------------------------------------------------------## # preliminaries set.seed(12345) # for reproducibility options(width=65) options(digits=5) # analysis of Bryk and Raudenbush data library(nlme) library(lattice) # for Trellis graphics data(MathAchieve) MathAchieve[1:10,] # first 10 students data(MathAchSchool) MathAchSchool[1:10,] # first 10 schools # data management attach(MathAchieve) mses <- tapply(SES, School, mean) # school means mses[as.character(MathAchSchool$School[1:10])] # for first 10 schools detach(MathAchieve) Bryk <- as.data.frame(MathAchieve[, c("School", "SES", "MathAch")]) names(Bryk) <- c("school", "ses", "mathach") sample20 <- sort(sample(7185, 20)) # 20 randomly sampled students Bryk[sample20, ] Bryk$meanses <- mses[as.character(Bryk$school)] Bryk$cses <- Bryk$ses - Bryk$meanses sector <- MathAchSchool$Sector names(sector) <- row.names(MathAchSchool) Bryk$sector <- sector[as.character(Bryk$school)] Bryk[sample20,] # examine 20 Catholic and 20 public schools attach(Bryk) cat <- sample(unique(school[sector=='Catholic']), 20) Cat.20 <- groupedData(mathach ~ ses | school, data=Bryk[is.element(school, cat),]) pub <- sample(unique(school[sector=='Public']), 20) Pub.20 <- groupedData(mathach ~ ses | school, data=Bryk[is.element(school, pub),]) trellis.device(color=F) xyplot(mathach ~ ses | school, data=Cat.20, main="Catholic", panel=function(x, y){ panel.xyplot(x, y) panel.loess(x, y, span=1) panel.lmline(x, y, lty=2) } ) xyplot(mathach ~ ses | school, data=Pub.20, main="Public", panel=function(x, y){ panel.xyplot(x, y) panel.loess(x, y, span=1) panel.lmline(x, y, lty=2) } ) # examine coefficients from within-schools regressions cat.list <- lmList(mathach ~ ses | school, subset = sector=='Catholic', data=Bryk) pub.list <- lmList(mathach ~ ses | school, subset = sector=='Public', data=Bryk) plot(intervals(cat.list), main='Catholic') plot(intervals(pub.list), main='Public') cat.coef <- coef(cat.list) cat.coef[1:10,] pub.coef <- coef(pub.list) pub.coef[1:10,] old <- par(mfrow=c(1,2)) boxplot(cat.coef[,1], pub.coef[,1], main='Intercepts', names=c('Catholic', 'Public')) boxplot(cat.coef[,2], pub.coef[,2], main='Slopes', names=c('Catholic', 'Public')) par(old) # fit mixed models Bryk$sector <- factor(Bryk$sector, levels=c('Public', 'Catholic')) contrasts(Bryk$sector) bryk.lme.1 <- lme(mathach ~ meanses*cses + sector*cses, random = ~ cses | school, data=Bryk) summary(bryk.lme.1) intervals(bryk.lme.1) # check for ill-conditioning bryk.lme.2 <- update(bryk.lme.1, random = ~ 1 | school) # omitting random effect of cses anova(bryk.lme.1, bryk.lme.2) bryk.lme.3 <- update(bryk.lme.1, random = ~ cses - 1 | school) # omitting random intercept anova(bryk.lme.1, bryk.lme.3) summary(bryk.lme.2) # clean up detach(Bryk) remove(list=objects()) # analysis of Blackmoor and Davis data set.seed(2468) # for reproducibility library(car) # for data only data(Blackmoor) # from car Blackmoor[1:20,] # first 20 observations Blackmoor$log.exercise <- log(Blackmoor$exercise + 5/60, 2) # S4: Blackmoor$log.exercise <- logb(Blackmoor$exercise + 5/60, 2) attach(Blackmoor) pat <- sample(unique(subject[group=='patient']), 20) Pat.20 <- groupedData(log.exercise ~ age | subject, data=Blackmoor[is.element(subject, pat),]) con <- sample(unique(subject[group=='control']), 20) Con.20 <- groupedData(log.exercise ~ age | subject, data=Blackmoor[is.element(subject, con),]) print(plot(Con.20, main='Control Subjects', xlab='Age', ylab='log2 Exercise', ylim=1.2*range(Con.20$log.exercise, Pat.20$log.exercise), layout=c(5, 4), aspect=1.0), position=c(0, 0, 1, .5), more=T) print(plot(Pat.20, main='Patients', xlab='Age', ylab='log2 Exercise', ylim=1.2*range(Con.20$log.exercise, Pat.20$log.exercise), layout=c(5, 4), aspect=1.0), position=c(0, .5, 1, 1)) pat.list <- lmList(log.exercise ~ I(age - 8) | subject, subset = group=='patient', data=Blackmoor) con.list <- lmList(log.exercise ~ I(age - 8) | subject, subset = group=='control', data=Blackmoor) pat.coef <- coef(pat.list) con.coef <- coef(con.list) old <- par(mfrow=c(1,2)) boxplot(pat.coef[,1], con.coef[,1], main='Intercepts', names=c('Patients', 'Controls')) boxplot(pat.coef[,2], con.coef[,2], main='Slopes', names=c('Patients', 'Controls')) par(old) bm.lme.1 <- lme(log.exercise ~ I(age - 8)*group, random = ~ I(age - 8) | subject, data=Blackmoor) summary(bm.lme.1) bm.lme.2 <- update(bm.lme.1, random = ~ 1 | subject) anova(bm.lme.1, bm.lme.2) bm.lme.3 <- update(bm.lme.1, random = ~ I(age - 8) - 1 | subject) anova(bm.lme.1, bm.lme.3) bm.lme.4 <- update(bm.lme.1, correlation = corCAR1(form = ~ age |subject)) summary(bm.lme.4) anova(bm.lme.1, bm.lme.4) intervals(bm.lme.4) bm.lme.5 <- update(bm.lme.4, random = ~ 1 | subject) anova(bm.lme.4, bm.lme.5) bm.lme.6 <- update(bm.lme.4, random = ~ I(age - 8) - 1 | subject) anova(bm.lme.4, bm.lme.6) pdata <- expand.grid(age=seq(8, 18, by=2), group=c('patient', 'control')) pdata$log.exercise <- predict(bm.lme.5, pdata, level=0) pdata$exercise <- (2^pdata$log.exercise) - 5/60 pdata plot(pdata$age, pdata$exercise, type='n', xlab='Age (years)', ylab='Exercise (hours/week)') points(pdata$age[1:6], pdata$exercise[1:6], type='b', pch=19, lwd=2) points(pdata$age[7:12], pdata$exercise[7:12], type='b', pch=22, lty=2, lwd=2) legend(locator(1), c('Patients', 'Controls'), pch=c(19, 22), lty=c(1,2), lwd=2) # S-PLUS: plot(pdata$age, pdata$exercise, type='n', xlab='Age (years)', ylab='Exercise (hours/week)') points(pdata$age[1:6], pdata$exercise[1:6], type='b', pch=16, lwd=2) points(pdata$age[7:12], pdata$exercise[7:12], type='b', pch=0, lty=2, lwd=2) legend(locator(1), c('Patients', 'Controls'), marks=c(16, 0), lty=c(1,2), lwd=2)