From: Subject: Data and Code for Modern Regression Techniques Date: Tue, 8 Apr 2008 14:16:59 +0100 MIME-Version: 1.0 Content-Type: text/html; charset="Windows-1252" Content-Transfer-Encoding: quoted-printable Content-Location: http://www.sussex.ac.uk/Users/danw/temp/forbook/codedata.htm X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2900.3198 Data and Code for Modern Regression = Techniques

Data and Code for = Modern=20 Regression Techniques

 

This page is so you = can copy and=20 paste commands rather than having to retype them. Explanations are all = in the=20 book.  The location of the data is shown in the read.table command. If you = run the=20 whole thing at once (not sure why, but?), you should set par(ask=3DT), and change=20 back to par(ask=3DF) afterwards. This will prompt you for each new graph. = EXCEPT,=20 lattice graphs will still write over traditional graphs (at least in = R2.5.0). We=20 assume you run these in order, but if you do not, you may need to run=20 install.packages before loading some packages. Also, some packages may = write over=20 function names from other packages, and may even write over variable = names (we=20 think this only happens with mgcv in this code). This may happen with = variables=20 with names that are used elsewhere in R. So, this happened one time = running the=20 memory recognition example with the variable time (which is also the = name of a=20 function for time series analysis). Therefore, we used memrec$time and the remaining code worked. If you want the Figures = to look=20 the same as in the text you will need to the scale the graphics window=20 appropriately. These commands = load the=20 latest versions of many of the packages. Thus, if they change some of = the code=20 may stop working. Email danw@sussex.ac.uk=20 if this happens and he will try to fix the code.

 

The data can be read from the CD or = web page. In=20 the book several methods are described so that you do not have to write = out the=20 URL every time (also so that the command to read the data did not take = up=20 multiple lines). The simplest is changing RProfile.site. A function = could also=20 be written so that R automatically reads data from there. In the book we = assigned the URL name to webreg. This was so we = could still show=20 how to use the read.table command since = when you want=20 to access your own data they will not be on the web page. We just make = this=20 assignment once here so in case you are doing something different you = can just=20 skip this command.

 

#####################

 

webreg <-=20 "http://www.sussex.ac.uk//Users//danw//modreg//"

 

 

#  Chapter=20 1. Loading libraries and = some=20 univariate statistics (i.e., skewness and = histograms)

#  Example 1. Length of a chile. =

library("foreign")

chile = <-=20 read.table(paste(webreg,"chile.dat",sep=3D""), = header=3DT)

attach(chile)

names(chile)

chile$HEAT

install.packages("e1071")

library(e1071)

skewness(LENGTH)

library(boot)

lengthboot = <-=20 boot(LENGTH,function(x,i) skewness(x[i]), = R=3D1000)

boot.ci(lengthboot)

lnlength = <-=20 log(LENGTH)

skewness(lnlength)

par(mfrow=3Dc(1,3))

hist(LENGTH)

hist(log(LENGTH))

hist(log(LENGTH+2.54))

par(mfrow=3Dc(1,1))

detach(chile)

 

#  Chapter=20 2.

set.seed = =3D=20 121

x1 <-=20 rnorm(100,10,10)

x2 <-=20 rbinom(100,1,.5)

y <- x1 = + x2*40 +=20 rnorm(100,0,10)

reg1 <- = lm(y~x1)  

reg2 <- = lm(y~x2)

par(mfrow=3Dc(1,2))

plot(x1,y)

abline(reg1) =20

plot(x2,y)

abline(reg2)

par(mfrow=3Dc(1,1))

par(mfrow=3Dc(2,2))

plot(reg1)

reg3 <- = lm(y~x1+x2)

plot(reg3)

summary(reg3)

reg4 <- lm(y~x1+x2-1)

reg5 <- lm(y~x1*x2)

anova(reg4,reg5)

 

# Example 2.=20 Dissociation and suggestibility

suggest = <- c(12, 2,=20 -2, 5, 10,-13, 10, -3, 4, 9, 13, 6, 18, 12, 14, -6, 0, 5, 6, 19, -5, 8, = 5, 14,=20 -1, -2, 10, 17, 2, 10, -1, 21, 14, 4, 20, 24, 5,-12, 8, 7, 0, 2, 7, -1, = 12, 4,=20 0, 19, 8,-12)

DES=20 <-c(45.00,49.28,38.57,55.71,48.21,14.60,12.86,43.50,27.10,46.40,46.40,= =20 35.00,55.00,52.85,46.40,18.21,36.79,48.93,45.50,22.10,31.43,32.50,52.14, = 18.21,22.50,27.14,16.67,14.64,39.64,16.78,34.28,69.64,38.90,23.20,41.67, = 62.86,=20 47.00, 22.86,31.42,48.21,18.57,48.21,40.00,35.00,57.85,50.35,41.07,=20 45.00,48.00,49.28)

reg <-=20 lm(suggest~DES)

summary(reg)

plot(DES,suggest,xlab=3D"Dissociation score = (0-100)",ylab=3D"Suggestibility=20 (-25-25)")

abline(reg)

text(65,-12,"r =3D=20 .31")

dataset1 = <-=20 cbind(DES,suggest)

write.table(dataset1,"c:\\temp\\dataset1.dat")=

library("foreign")

write.foreign(as.data.frame(dataset1),"c:\\temp\\dataspss.dat","= c:\\temp\\dataspss.sps",package=3D"SPSS")

detach(dataset1)

 

# Chapter 3. =

# Example 3. Cognitive=20 dissonance.

control = <- c(=20 0,-3,3,2,-2,-1,2,3,-3,-5,2,-3,3,0,-2,-2,-2,-2,-1,2)

=

dollar1 = <-=20 c(3,1,1,3,2,3,3,2,2,2,2,2,-4,4,0,-3,4,1,1,-2)

dollar20 = <-=20 c(1,2,3,0,1,3,0,-2,2,1,0,0,-1,-2,-1,-4,-3,-1,0,0)

enjoy = <-=20 c(control,dollar1,dollar20)

group = <-=20 as.factor(rep(c("control","$1","$20"),each=3D20))

tapply(enjoy,group,mean)

tapply(enjoy,group,sd)

plot(group,enjoy,ylab=3D"Enjoyment (-5 to=20 5)",xlab=3D"Condition",cex.lab=3D1.3)

anova1 = <-=20 aov(enjoy~group)

summary(anova1)

anova2 = <-=20 lm(enjoy~group)

summary(anova2)

anova(anova2)

contrasts(group)

wcontrol = <-=20 c(1,0,0,0,1,0)

dim(wcontrol) <-=20 c(3,2)

contrasts(group) <-=20 wcontrol

contrasts(group)

summary(lm(enjoy~group))

 

# Example 4. Children's = forgetting=20 I.

lordex = <-=20 read.table(paste(webreg,"lordex.txt",sep=3D""), = header=3DT)

attach(lordex)

names(lordex)

par(mfrow=3Dc(1,2))

hist(Final,main=3D"Untransformed Final")

library(e1071)

library(boot)

skewness(Final)

skewboot = <-=20 boot(Final, function(x,i) skewness(x[i]), = R=3D1000)

boot.ci(skewboot,type=3D"bca")

newfinal = <-=20 sqrt(Final + .5)

hist(newfinal,=20 main=3Dexpression(sqrt(Final + .5)))

par(mfrow=3Dc(1,1))

skewness(newfinal)

skewboot = <-=20 boot(newfinal, function(x,i) skewness(x[i]), = R=3D1000)

boot.ci(skewboot,type=3D"bca")

hist(AGEMOS,=20 breaks=3Dc(seq(36,120,12)),xlab=3D"Age in months")

AGEYRS = <-=20 trunc(AGEMOS/12)

AGEYRSfac = <-=20 as.factor(AGEYRS)

AGE2 <- = cut(AGEYRS,=20 br=3Dc(0,6.5,10))

reg1 <- = lm(newfinal~AGEMOS)

summary(reg1)

reg2 <- = lm(newfinal~AGEYRS)

summary(reg2)

par(mfrow=3Dc(1,2))

plot(AGEMOS,newfinal,ylab=3Dexpression(sqrt(Final + = .5)),xlab=3D"Age in=20 months")

lines(sort(AGEMOS),predict(reg1)[order(AGEMOS)],col=3D"red")

lines(sort(AGEMOS),predict(reg2)[order(AGEMOS)],col=3D"blue")=20

reg3 <- = lm(newfinal=20 ~ AGEYRSfac)

summary(reg3)

tapply(newfinal,AGEYRSfac,mean)

anova(reg2,reg3)=20

t.test(newfinal~AGE2,var.equal=3DT)

reg4 <- = lm(newfinal~AGE2)

summary(reg4)

plot(AGEMOS,newfinal,ylab=3Dexpression(sqrt(Final + = .5)),xlab=3D"Age in=20 months")

lines(sort(AGEMOS),predict(reg3)[order(AGEMOS)],col=3D"red")

lines(sort(AGEMOS),predict(reg4)[order(AGEMOS)],col=3D"blue")

par(mfrow=3Dc(1,1))=20

Diff  = <- Final=20 - Initial

dreg1 = <-=20 lm(Diff~AGEMOS)

dreg2 = <-=20 lm(Diff~AGEYRS)

dreg3 = <-=20 lm(Diff~AGEYRSfac)

dreg4 = <-=20 lm(Diff~AGE2)

par(mfrow=3Dc(1,2))

plot(AGEMOS,Diff,=20 ylab=3D"Final - Initial", xlab=3D"Age in months")

lines(sort(AGEMOS),predict(dreg1)[order(AGEMOS)],col=3D"red")

lines(sort(AGEMOS),predict(dreg2)[order(AGEMOS)],col=3D"blue")

plot(AGEMOS,Diff,ylab=3D"Final - Initial", xlab=3D"Age in=20 months")

lines(sort(AGEMOS),predict(dreg3)[order(AGEMOS)],col=3D"red")

lines(sort(AGEMOS),predict(dreg4)[order(AGEMOS)],col=3D"blue")

par(mfrow=3Dc(1,1))=20

tapply(Diff,AGEYRSfac,mean)

summary(dreg1)

detach(lordex)

 

# Chapter 4

# Example 4. Children's = forgetting=20 II

lordex = <-=20 read.table(paste(webreg,"lordex.txt",sep=3D""), = header=3DT)

attach(lordex)

lm1 <-=20 lm(Final ~ Initial)

summary(lm1)

lm2 <-=20 lm(Final ~ Initial + AGEMOS)

summary(lm2)

lm3 <-=20 lm(Final ~ Initial*AGEMOS)

summary(lm3)

anova(lm1,lm2,lm3)

summary(lm2)

summary(lm3)=20

library(lattice)

AGEYRS = <-=20 trunc(AGEMOS/12)

scatmat = <-=20 xyplot(Final~Initial | as.factor(AGEYRS))

print(scatmat)

tapply(Initial,=20 AGEYRS, sd)

tapply(Final, AGEYRS,=20 sd)

sexit = <-=20 split(Initial,AGEYRS)

sfoll = <-=20 split(Final,AGEYRS)

predmod4 = <-=20 split(lm(Final~Initial*as.factor(AGEYRS))$fitted.values,AGEYRS)

plot(Initial,Final,pch=3D19,cex=3D.5,col=3D"black", = xlab=3D"Initial interview",=20 ylab=3D"Final=20 interview",xlim=3Dc(0,25),ylim=3Dc(0,10),cex.lab=3D1.3,las=3D1,font.lab=3D= 1.5)

abline(0,1,lty =3D=20 3)

for (i = in 1:6)=20 lines(sexit[[i]],predmod4[[i]], col=3D"black")

meanexit = <-=20 c(mean(sexit[[1]]),mean(sexit[[2]]), mean(sexit[[3]]), mean(sexit[[4]]), = mean(sexit[[5]]), mean(sexit[[6]]))

meanfoll<-=20 c(mean(sfoll[[1]]),mean(sfoll[[2]]), mean(sfoll[[3]]), mean(sfoll[[4]]), = mean(sfoll[[5]]), mean(sfoll[[6]]))

lines(meanexit,meanfoll,col=3D"grey",lwd=3D4,lend=3D"round")

text(12,10,"Final =3D=20 Initial",pos=3D4)

arrows(12,10,10,10,length=3D.1)

text(c(13,21,12.1,0,0,4.4),c(5.5,4.5,2.2,3.7,1.5,.5),c("9","7","= 8","6","5","4"))

points(meanexit,meanfoll,pch=3D19)

lines(c(14,16.7),=20 c(9.2,9.2),col=3D"grey",lwd=3D4)

text(16.7,9.2,"Age=20 group means", pos=3D4)

text(14,8.5,"Means=20 increase with age\nfor both interviews",pos=3D4)

text(-.3,9.5,"Intercepts higher for\nolder children\n=20 (Ancova)",pos=3D4)

text(14,1,"Older=20 children further away\nfrom dashed line (Anova).", = pos=3D4)

detach(lordex)

 

# Mediation = example

set.seed(143)

leaflet = <-=20 rep(c(0,1),each=3D50)

fairskin = <-=20 rbinom(100,1,.5)

likely = <-=20 rbinom(100,10,.20 + .2*leaflet + .2*fairskin)

plan <- = rbinom(100,7,likely/15+leaflet*.2)

summary(lm(likely~leaflet))$coef

summary(lm(plan~leaflet))$coef

summary(lm(plan~likely))$coef

summary(lm(plan~likely+leaflet))$coef

(1.92*.45)/sqrt(.37^2*.45^2 +=20 .06^2*1.92^2+.37^2*.06^2)

sobel1 = <-=20 function(a,b,sea,seb){

  = sobel <-=20 (a*b)/sqrt(sea^2*b^2+seb^2*a^2+sea^2*seb^2)

  = paste("Sobel z=20 =3D", format(sobel,nsmall=3D2,digits=3D3))}

sobel1(1.92,.45,.37,.06)

sobel1(1.920,.454,.367,.057)

#=20 source(paste(webreg,"mediator.R",sep=3D""))

# This will work once Dan returns to = Brighton=20 and updates the webpage.

# You need to run the function first=20

##=20 Simple Mediation Function with Figure of effects

mediator=20 <- function(x,y,m, ...){

  #=20 Put in experimental variable x, outcome variable y, then mediator=20 m.

  #=20 Some of this is written so that it works in a = mediator

  #=20 function for more complex problems.

  reg0=20 <- lm(y~x)

  reg1=20 <- lm(m~x)

  reg2=20 <- lm(y~m+x)

  c=20 <- summary(reg0)$coefficients[2,1]

  sc=20 <- summary(reg0)$coefficients[2,2]

  a=20 <- summary(reg1)$coefficients[2,1]

  sa=20 <- summary(reg1)$coefficients[2,2]

  b=20 <- summary(reg2)$coefficients[2,1]

  sb=20 <- summary(reg2)$coefficients[2,2]

  = cp <-=20 summary(reg2)$coefficients[3,1]

  = scp <-=20 summary(reg2)$coefficients[3,2]

  = sobel <-=20 (a*b)/sqrt(b^2*sa^2 + a^2*sb^2 + sa^2*sb^2)

  = psobel <-=20 format(2*(1-pnorm(abs(sobel))),digits=3D2,nsmall=3D2)

 =20 plot(c(0,100),c(0,110),col=3D"white",ann=3DF,tck=3D0,col.axis=3D"white")<= /FONT>

 =20 rect(c(10,10,70,70,40),c(10,50,10,50,80),c(30,30,90,90,60),c(30,70,30,70,= 100))

 =20 arrows(c(30,30,20,60),c(20,60,70,90),c(70,70,40,80),c(20,60,90,70),length= =3D.15)

 =20 text(c(20,20,80,80,50),c(20,60,20,60,90),c("X","X","Y","Y","M"),= cex=3D2)

 =20 text(30,80,paste(format(a,digits=3D2,nsmall=3D2)),pos=3D2,cex=3D1.3)

 =20 text(70,80,paste(format(b,digits=3D2,nsmall=3D2)),pos=3D4,cex=3D1.3)

 =20 text(50,20,paste(format(c,digits=3D2,nsmall=3D2)),pos=3D3,cex=3D1.3)

 =20 text(50,60,paste(format(cp,digits=3D2,nsmall=3D2)),pos=3D3,cex=3D= 1.3)

 =20 text(50,2,paste("Sobel z =3D ",format(sobel,digits=3D2,nsmall=3D2),"; p = =3D=20 ",psobel),cex=3D1.3)

 =20 print(paste("Sobel z =3D", format(sobel,digits=3D3,nsmall=3D2),", p = =3D",=20 psobel))

 =20 invisible(c(sobel,psobel))

}

mediator(leaflet,plan,likely)

 

# = Chapter=20 5.

# Example 6. Fathers'=20 ptsd.

maleptsd = <-=20 read.table(paste(webreg,"maleptsd.dat",sep=3D""), = header=3DT)

attach(maleptsd)

ptsd <- = log(PTSD +=20 1)

preds = <-=20 cbind(OVER2,OVER3,OVER5,BOND,POSIT,NEG,CONTR,SUP,CONS,AFF)<= /B>

ptsdmat = <-=20 cor(preds)

print(ptsdmat,digits=3D2)

print(sd(preds),digits=3D2)

spmat = <-=20 cor(preds,method=3D"spearman")

print(spmat,digits=3D2)

cor.test(OVER2,OVER3)

new <-=20 diag(sd(preds)) + upper.tri(spmat)*spmat +=20 lower.tri(ptsdmat)*ptsdmat

print(new,digits=3D2)

library(car)

spm(preds)allvars = <-=20 lm(ptsd~preds)

summary(allvars)

install.packages("leaps")

library(leaps)

x1 <-=20 regsubsets(preds,ptsd)

summary(x1)

par(mfrow=3Dc(2,2))

plot(x1,scale=3Dc("r2"))

title("Default for=20 leaps")

plot(x1,scale=3Dc("adjr2"))

title("Default for=20 leaps")

x2 <-=20 leaps(preds,ptsd,nbest=3D1, method=3D"r2")

x3 <-=20 leaps(preds,ptsd,nbest=3D1, method=3D"adjr2")

plot(x2$size-1,x2$r2,xlab=3D"Number of=20 predictors",ylab=3Dexpression(R^2))

lines(spline(x2$size-1,x2$r2))

plot(x3$size-1,x3$adjr2, xlab=3D"Number of=20 predictors",ylab=3Dexpression(adj.R^2))

lines(spline(x3$size-1,x3$adjr2))

par(mfrow=3Dc(1,1))

plot(x3$size-1,x3$adjr2,xlab=3D"# of predictors not including = B0",=20 ylab=3D"Adj. R-squared")

lines(spline(x3$size-1,x3$adjr2))

m2 <-=20 max(x3$adjr2)

m1 <-=20 which.max(x3$adjr2)

arrows(m1,m2-.02,m1,m2)

text(m1,m2-.023,expression(paste("max ",r[adj]^2 =3D=3D=20 0.2)))

text(m1,m2-.031,=20 paste("with",format(m1,digits=3D1),"variables"),pos=3D3)

par(mfrow=3Dc(1,1))

x3$which[which.max(x3$adjr2),]

labels(preds[1,])

summary(lm(ptsd~preds[,c(1,2,4,6,8,10)]))

library(MASS)

lm.ridge(ptsd~preds,lambda=3Dseq(0,100,by=3D1))-> = x

plot(x)

title("Ridge=20 Regression")

abline(h=3D0)

abline(v=3D50,lty=3D3)

x$coef[,50]

install.packages("lars")

library("lars")

lasso1 = <-=20 lars(preds,ptsd)

plot(lasso1)

predict(lasso1,s=3D7,mode=3D"step", type=3D"coefficient")$coefficients

lassocv = <-=20 cv.lars(preds,ptsd)=20

frac = <-=20 lassocv$fraction[which.min(lassocv$cv)]

predict(lasso1,s=3Dfrac,mode=3D"fraction", type=3D"coefficient")$coefficients

install.packages("pls")

library("pls")

pcr1 <-=20 pcr(ptsd~preds,ncomp=3D10)

plot(1:10,summary(pcr1)[1,],col=3D"blue",ylab=3D"Cumulative = variance=20 accounted for",xlab=3D"Number of=20 components",ylim=3Dc(0,100),pch=3D19,cex.lab=3D1.3)

=

lines(1:10,summary(pcr1)[1,],col=3D"blue",lwd=3D1.5)

points(1:10,summary(pcr1)[2,],col=3D"red",pch=3D19)

lines(1:10,summary(pcr1)[2,],col=3D"red",lwd=3D1.5)

text(4,85,"variance of=20 predictors",col=3D"blue",pos=3D4,cex=3D1.2)

text(5,35,"variance of=20 PTSD",col=3D"red",pos=3D4,cex=3D1.2)

pcr1$loadings

pca1 <-=20 princomp(preds)

pcrreg=20 <- lm(ptsd ~ pca1$scores[,1]+ pca1$scores[,2])

summary(pcrreg)

plsr1 <-=20 plsr(ptsd~preds,ncomp=3D10)

plot(1:10,summary(plsr1)[1,],col=3D"blue",ylab=3D"Cumulative = variance=20 accounted for",xlab=3D"Number of=20 components",ylim=3Dc(0,100),pch=3D19,cex.lab=3D1.3)

=

lines(1:10,summary(plsr1)[1,],col=3D"blue",lwd=3D1.5)

points(1:10,summary(plsr1)[2,],col=3D"red",pch=3D19)

lines(1:10,summary(plsr1)[2,],col=3D"red",lwd=3D1.5)

text(4.3,85,"variance of=20 predictors",col=3D"blue",pos=3D4,cex=3D1.2)

text(5,35,"variance of=20 PTSD",col=3D"red",pos=3D4,cex=3D1.2)

summary(plsr1)

plsr1$loadings

plsreg=20 <- lm(ptsd ~ plsr1$scores[,1])

summary(plsreg)

detach(maleptsd)

 

# Chapter = 6.

plot(0:40,dpois(0:40,1),xlab=3D"Variable",ylab=3D"",ylim=3Dc(0,.= 5),col=3D"white")

for (i in=20 c(1,2,5,10,20)) lines(spline(0:40,dpois(0:40,i)))

text(1,=20 .37, expression(lambda,"   =3D = 1"),pos=3D4)

text(2.4,=20 .26, expression(lambda,"   =3D = 2"),pos=3D4)

text(4,=20 .20, expression(lambda,"   =3D 5"), = pos=3D4)

text(6.8,=20 .14, expression(lambda,"    =3D 10"), = pos=3D4)

text(17,=20 .11, expression(lambda,"    =3D 20"), = pos=3D4)

 

# Example 7. Associations = with test=20 score.

glmexample = <-=20 read.table(paste(webreg,"glmexample.dat",sep=3D""),header=3DT)

attach(glmexample)

socreg=20 <- glm(social~test)

summary(socreg)

detreg = <-=20 glm(detent~test,binomial)

summary(detreg)

x <-=20 cbind(math,10-math)

mathreg = <-=20 glm(x~test,binomial)

summary(mathreg)

bookreg = <-=20 glm(books~test,poisson)

summary(bookreg)

par(mfrow=3Dc(2,2))

plot(test,social)

lines(test,predict(socreg,type=3D"response"))<= /P>

plot(test,detent)

lines(test,predict(detreg,type=3D"response"))<= /P>

plot(test,math/10)

lines(test,predict(mathreg,type=3D"response"))=

plot(test,books)

lines(test,predict(bookreg,type=3D"response"))=

par(mfrow=3Dc(1,1))

det2 <- = glm(detent~test+social,binomial)

anova(detreg,det2,test=3D"Chi")

det3 <- = glm(detent~test*social,binomial)

anova(det2,det3,test=3D"Chi")

socialpoly = <-=20 glm(social~poly(test,2))

plot(test,social)

lines(test,predict(socialpoly,type=3D"response"))<= /B>

detach(glmexample)

 

# Example 8. Manipulation = reasonable=20 doubt.

juryrd = <-=20 read.table(paste(webreg,"juryrd.dat",sep=3D""),header=3DT)<= /B>

attach(juryrd)

names(juryrd)

par(mfrow=3Dc(1,2))

hist(BELIEF)

library(e1071)

skewness(BELIEF)

lb <-=20 format(skewness(BELIEF) -=20 sqrt(6/length(BELIEF))*1.96,digit=3D3)

ub <-=20 format(skewness(BELIEF) +=20 sqrt(6/length(BELIEF))*1.96,digit=3D3)

print(paste("Lower=20 Bound =3D ", lb, "Upper Bound =3D ", ub))

library(boot)

beliefboot = <-=20 boot(BELIEF,function(x,i) skewness(x[i]), = R=3D1000)

boot.ci(beliefboot)

shapiro.test(BELIEF)

ks.test(BELIEF,"pnorm")

beliefsq = <-=20 BELIEF^2

hist(beliefsq)

par(mfrow=3Dc(1,1))

bboot2 = <-=20 boot(beliefsq,function(x,i) skewness(x[i]), = R=3D1000)

boot.ci(bboot2)

reg1 <- = lm(beliefsq~FORM)

summary(reg1)

t.test(beliefsq~FORM)

wbeliefsq = <-=20 split(beliefsq,FORM)

wilcox.test(wbeliefsq[[1]],wbeliefsq[[2]])

=

reg2 <- = glm(GUILTY~beliefsq, binomial)

reg3 <- = glm(GUILTY~beliefsq+FORM, binomial)

anova(reg2,reg3)=20

1-pchisq(15.351,1)

summary(reg3)

plot(BELIEF,predict(reg3,type=3D"response"),ylab=3D"Probability = of a guilty=20 verdict")

beliefs = <-=20 split(BELIEF,FORM)

predicts = <-=20 split(predict(reg3,type=3D"response"),FORM)

o1 <-=20 order(beliefs[[1]])

o2 <-=20 order(beliefs[[2]])

lines(beliefs[[1]][o1],predicts[[1]][o1])

lines(beliefs[[2]][o2],predicts[[2]][o2])

abline(h=3D.5,lty=3D3)

ld1 <-=20 sqrt(-(reg3$coef[1]/reg3$coef[2]))

ld2 <-=20 sqrt(-((reg3$coef[1]+reg3$coef[3])/reg3$coef[2]))

abline(v=3Dld1,lty=3D3)

abline(v=3Dld2,lty=3D3)

paste("Control LD50=20 =3D", format(ld1,dig=3D2), "Instruction LD50=20 =3D",format(ld2,dig=3D2))

anova(reg3,glm(GUILTY~FORM*beliefsq,binomial),test=3D"Chi")

table(GUILTY,FORM)

chisq.test(GUILTY,FORM,correct=3DF)

cells = <-=20 c(54,44,32,42)

imagine = <-=20 c(0,1,0,1)

guilty = <-=20 c(0,0,1,1)

glm(cells~imagine+guilty,poisson)

detach(juryrd)

 

 

# Chapter 7. =

set.seed=3D2007

x <-=20 (1:100)/50

y <- 4 +=20 2*x - 3*x^2 + x^3 + rnorm(100,0,.3)

par(mfrow=3Dc(1,4))

plot(x,y,=20 main=3D"0 degree", xlab=3D"", ylab=3D"", pch=3D19, = axes=3DF)

box()

abline(h=3Dmean(y),lwd=3D1.5)

for (i in=20 1:3) {

plot(x,y,=20 main=3Dpaste(i,"degree"), xlab=3D"", ylab=3D"", pch=3D19, = axes=3DF)

box()

lines(x,predict(lm(y~poly(x,i))),lwd=3D1.5)}

par(mfrow=3Dc(1,1))

anova(lm(y~1),lm(y~poly(x,1)),lm(y~poly(x,2)),lm(y~poly(x,3)))

qreg1 <-=20 lm(y[1:50]~poly(x[1:50],1))

qreg2 <-=20 lm(y[51:100]~poly(x[51:100],1))

par(mfrow=3Dc(1,2))

plot(x,y,=20 main=3D"Piecewise linear", xlab=3D"", ylab=3D"", pch=3D19, = axes=3DF)

box()

lines(x[1:50],predict(qreg1),lwd=3D1.5)

lines(x[51:100],predict(qreg2),lwd=3D1.5)

plot(x,y,=20 main=3D"Cubic spline with 1 knot", xlab=3D"", ylab=3D"", pch=3D19,=20 axes=3DF)

box()

library(splines)

lines(x,predict(lm(y~bs(x,degree=3D3,df=3D4))),lwd=3D1.5)=

par(mfrow=3Dc(1,1))

 

# Example 9. Academics'=20 salaries

years <-=20 c(1973,1975,1977,1979,1981,1983,1985,1986,1987,1988,1989,1990,1991,1992,1= 993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006)

salary=20 <-=20 c(-3.0,-8.0,-1.7,-10.2,-4.0,3.7,5.2,4.8,0.5,1.4,1.5,-0.7,  &nbs= p; =20 0.4,-0.4,0.3,0.7,0.4,-0.3,1.6,2.0,1.0,0.1,2.2,0.6,0.2,-0.5,-0.3,1.3)=20

plot(years,salary,xlab=3D"Year",ylab=3D"Salary change in real=20 terms",pch=3D20)

abline(h=3D0,lty=3D2)

salary1=20 <- lm(salary ~ bs(years,degree=3D1))

summary(salary1)

par(mfrow=3Dc(2,3))

plot(years,salary,xlab=3D"Years",ylab=3D"Salary change",pch =3D = 19)

abline(h=3D0,lty=3D2)

lines(years, fitted(salary1))

salary2=20 <- lm(salary ~ bs(years,degree=3D1,df=3D2))

plot(years,salary,xlab=3D"Years",ylab=3D"Salary change",pch =3D = 19)

abline(h=3D0,lty=3D2)

lines(years,fitted(salary2))

salary3=20 <- lm(salary ~ = bs(years,degree=3D1,knots=3D1986))

plot(years,salary,xlab=3D"Years",ylab=3D"Salary change",pch =3D = 19)

abline(h=3D0,lty=3D2)

lines(years,fitted(salary3))

anova(salary1, salary2, salary3)

salary4=20 <- lm(salary ~ bs(years,degree=3D2,df=3D2))

plot(years,salary,xlab=3D"Years",ylab=3D"Salary change",pch =3D = 19)

abline(h=3D0,lty=3D2)

lines(years,fitted(salary4))

salary5=20 <- lm(salary ~ bs(years,degree=3D2,df=3D3))

plot(years,salary,xlab=3D"Years",ylab=3D"Salary change",pch =3D = 19)

abline(h=3D0,lty=3D2)

lines(years,fitted(salary5))

salary6=20 <- lm(salary ~ = bs(years,degree=3D2,knots=3D1986))

plot(years,salary,xlab=3D"Years",ylab=3D"Salary change",pch =3D = 19)

abline(h=3D0,lty=3D2)

lines(years,fitted(salary6))

par(mfrow=3Dc(1,1))

anova(salary4,salary5,salary6)

anova(salary2,salary5)

 

# Example 10. Experience, = Gender,=20 Education, and Salary

cp85 = <-=20 read.table(paste(webreg,"cp85.dat",sep=3D""), = header=3DT)

library(splines)

attach(cp85)

names(cp85)

wage1 = <-=20 lm(LNWAGE~bs(EXPER,df=3D1,degree=3D1) + bs(EDUC,df=3D4) + FEMALE)=20

summary(wage1)=20

anova(wage1)

experq = <-=20 quantile(EXPER)

minimales = <-=20 data.frame(EXPER=3Drep(experq,each=3D21),EDUC=3Drep(0:20,5),FEMALE=3Drep(= 0,105))

minifemales <-=20 data.frame(EXPER=3Drep(experq,each=3D21),EDUC=3Drep(0:20,5),FEMALE=3Drep(= 1,105))

par(mfrow=3Dc(1,2))

plot(minimales$EDUC,predict(wage1,minimales),xlab=3D"Years in = education",=20 ylab=3D"Predicted = ln(wage)",pch=3D19,cex=3D.5,ylim=3Dc(.5,3))

lines(0:20,=20 predict(wage1,minimales[1:21,]),lwd=3D.5)

lines(0:20,=20 predict(wage1,minimales[22:42,]),lwd=3D1)

lines(0:20,=20 predict(wage1,minimales[43:63,]),lwd=3D1.5)

lines(0:20,=20 predict(wage1,minimales[64:84,]),lwd=3D2)

lines(0:20,=20 predict(wage1,minimales[85:105,]),lwd=3D2.5)

text(6,2.5,"More=20 experience")

text(15,1.5,"Less=20 experience")

title("For = males")

plot(minifemales$EDUC,predict(wage1,minifemales),xlab=3D"Years = in=20 education", ylab=3D"Predicted=20 ln(wage)",pch=3D19,cex=3D.5,ylim=3Dc(.5,3))

lines(0:20,=20 predict(wage1,minifemales[1:21,]),lwd=3D.5)

lines(0:20,=20 predict(wage1,minifemales[22:42,]),lwd=3D1)

lines(0:20,=20 predict(wage1,minifemales[43:63,]),lwd=3D1.5)

lines(0:20,=20 predict(wage1,minifemales[64:84,]),lwd=3D2)

lines(0:20,=20 predict(wage1,minifemales[85:105,]),lwd=3D2.5)

text(6,2.5,"More=20 experience")

text(15,1.2,"Less=20 experience")

title("For = females")

par(mfrow=3Dc(1,1))

install.packages("gam")

library(gam)

wage1a = <-=20 gam(LNWAGE~bs(EXPER,df=3D1,degree=3D1) + bs(EDUC,df=3D4) + FEMALE)=20

summary(wage1a)=20

par(mfrow=3Dc(1,3))

plot(wage1a, se=3DT)=20

wage2a = <-=20 gam(LNWAGE~bs(EXPER,df=3D2,degree=3D1) + bs(EDUC,df=3D4) + FEMALE)=20

anova(wage1a,wage2a,test=3D"F")

plot(wage2a,se=3DT)

summary(lm(LNWAGE ~=20 bs(EXPER, degree=3D3, df=3D4)+bs(EDUC,degree=3D3, df=3D4) +=20 FEMALE))

plot(gam(LNWAGE~bs(EXPER,degree=3D3,df=3D4)+bs(EDUC,degree=3D3,d= f=3D4)+=20 FEMALE),se=3DT)

par(mfrow=3Dc(1,1))

detach(cp85)

 

# Example 11. Detecting=20 deception.

set.seed(406)

age <-=20 runif(1000,3,23)

truth <-=20 rbinom(1000,1,.5)

cbca <-=20 round(-3*truth+2*truth*log(age)+.2*age+rbinom(1000,25,.7))<= /B>

summary(glm(truth~age,family=3Dbinomial))

library(gam)

plot(gam(truth~bs(age,df=3D4),family=3Dbinomial),se=3DT)<= /SPAN>

gam0 <-=20 gam(truth~bs(cbca, df=3D4), family=3Dbinomial)

gam1 <-=20 gam(truth ~ bs(cbca, df=3D4) + bs(age, df=3D4), family=3Dbinomial)=20

anova(gam0,gam1)

inter <- cbca*age

gam2 <-=20 gam(truth~bs(cbca,df=3D4)+bs(age,df=3D4)+bs(inter,df=3D4), = family=3Dbinomial)=20

anova(gam1,gam2)

par(mfrow=3Dc(3,3))=20

truth2 = <-=20 factor(truth,label=3Dc("false","true"))

plot(gam0, = se=3DT)=20

plot(age,cbca,pch=3D".")=20

plot(truth2,cbca,ylab=3D"cbca")

plot(gam1, = se=3DT)=20

plot(truth2,age,ylab=3D"age")

plot(gam2, = se =3D T)=20

par(mfrow=3Dc(1,1))

 

# Here is the code for = making the=20 generalized additive model for the 

# London et al. (in press) = data. Other=20 models were examined. 

attach(lordex)=20

gam1=20 <- gam(Final ~ Initial + bs(AGEMOS,df=3D4),poisson) =

plot(gam1,se=3DT,terms=3D"bs(AGEMOS,=20 df =3D 4)",1,pch=3D19,xlab=3D"Age in months",ylab=3D"Predicted = conditional=20 recall",cex.lab=3D1.5,cex.axis=3D1.3)

 

 

# Chapter = 8.

# Example 12. Getting = children to=20 exercise

exer <-=20 read.table(paste(webreg,"exercise.dat",sep=3D""),header=3DT)

attach(exer)

cond <-=20 factor(wcond, labels=3Dc("Control","Leaftet",=20 "L+quiz","L+plan"))

install.packages("nlme")

library("nlme")

model1=20 <- lm(sqw2 ~ cond)

summary(model1)

plot(cond,sqw2,xlab=3D"Conditions",ylab=3Dexpression(sqrt(exerci= se + .5)))=20

mexer <-=20 aggregate(sqw2,list(class),mean)[,2]

mcond <-=20 aggregate(wcond,list(class),mean)[,2]

model2=20 <- lm(mexer~factor(mcond))

summary(model2)

points(mcond,mexer,pch=3D19,cex=3D1.3)

model3=20 <- lm(sqw2 ~ factor(class) + cond)

summary(model3)

model4=20 <- lme(sqw2 ~ 1, random =3D = ~1|class,method=3D"ML")

model4a=20 <- lme(sqw2 ~ cond, random =3D = ~1|class,method=3D"ML")

anova(model4,model4a)

summary(model4a)

newcontrasts <- c(-3, 1, 1, 1, 0, -2, 1, 1, 0, 0, -1,=20 1)

dim(newcontrasts) <- c(4,3)

contrasts(cond) <- newcontrasts

contrasts(cond)

model4b=20 <- lme(sqw2 ~ cond, random =3D = ~1|class,method=3D"ML")

summary(model4b)

model5=20 <- lme(sqw2 ~ sqw1, random =3D = ~1|class,method=3D"ML")

model5a=20 <- lme(sqw2 ~ sqw1 + cond, random =3D=20 ~1|class,method=3D"ML")

model5b=20 <- lme(sqw2 ~ sqw1*cond, random =3D = ~1|class,method=3D"ML")

anova(model5,model5a,model5b)

par(mar=3Dc(5,5,4,2))

plot(jitter(sqw1),jitter(sqw2), = xlab=3Dexpression(sqrt(pre-exercise + .5)),=20 ylab=3Dexpression(sqrt(post-exercise +=20 .5)),pch=3Dwcond,col=3Dwcond)

legend(3,1.3,c(levels(cond)),pch=3D1:4,col=3D1:4)<= /B>

sexer1=20 <- split(sqw1,cond)

spred <-=20 split(model5b$fitted[,1],cond)

for (i in=20 1:4) lines(sexer1[[i]],spred[[i]],col=3Di)

par(mar=3Dc(5,4,4,2))

library(lattice)

xyplot(sqw2~sqw1 | class)

intervals(model5a)

varprop=20 <- function(m1,m2)

  = {v1 <-=20 sum(as.numeric(VarCorr(m1)[,1]))

    v2 <-=20 sum(as.numeric(VarCorr(m2)[,1]))

   rsq <- (v1-v2)/v1

   return(rsq)

 =20 }

model50=20 <- lme(sqw2 ~ 1, random =3D = ~1|class,method=3D"ML")

varprop(model50,model5)

varprop(model50,model5a)

varprop(model50,model5b)

detach(exer)

 

# Example 13. Response = times and memory=20 recognition accuracy

memrec = <-=20 read.table(paste(webreg,"memrec.dat",sep=3D""),header=3DT)<= /B>

attach(memrec)

par(mfrow=3Dc(2,1))

hist(time,xlab=3D"Response time (in = msec)",main=3D"Untransformed=20 variable")

library(e1071)

text(6000,850,paste("skewness =3D=20 ",format(skewness(time),digits=3D2)),pos=3D4)

lntime = <-=20 log(time)

hist(lntime,xlab=3D"ln(response time in = msec)",main=3D"Transformed=20 variable")

text(8.5,400,paste("skewness =3D=20 ",format(skewness(lntime),digits=3D2)),pos=3D4)

par(mfrow=3Dc(1,1))

install.packages("lme4")

library(lme4)

model1=20 <- lmer(saysold ~ faceold + (1|partno),=20 family=3Dbinomial,method=3D"Laplace")

model2=20 <- update(model1, .~. + facewhite)

model3=20 <- update(model2, .~. + facewhite:faceold)

model4=20 <- update(model3, .~. + lntime)

model5=20 <- update(model4, .~. + lntime:faceold)

model6=20 <- update(model5, .~. + lntime:facewhite)

model7=20 <- update(model6, .~. + = lntime:faceold:facewhite)

anova(model1,model2,model3,model4,model5,model6,model7)

model5

mod5=20 <-  -5.8387 +=20 faceold*10.7439+facewhite*-1.0359+lntime*0.6486+faceold*facewhite*0.9677+= faceold*lntime*-1.1774

predprob=20 <- exp(mod5)/(1+exp(mod5))

rightprob=20 <- predprob*faceold + (1-faceold)*(1-predprob)

par(mfrow=3Dc(1,1))

plot(memrec$time,rightprob,pch=3D20,xlab=3D"Time in = msec",ylab=3D"Probability=20 of a correct response",cex.lab=3D1.3)

stime <-=20 split(time,facewhite+2*faceold)

srightprob=20 <- split(rightprob,facewhite+2*faceold)

for (i in=20 1:4)=20 lines(sort(stime[[i]]),srightprob[[i]][order(stime[[i]])],col=3Di,lwd=3D1= .5)

text(12000,.8,"previously seen White faces",cex=3D1.3)=20

model5b=20 <- update(model5, .~. -(1|partno) + = (faceold|partno))

anova(model5,model5b)

model5b

#=20 The mgcv package overwrites stuff in gam and some other things, so if = you run=20 this

# = and then the=20 models above you may get some funny answers

install.packages("mgcv")

library(mgcv)

par(mfrow=3Dc(2,2))

ssaysold=20 <- split(saysold,facewhite+2*faceold)

slntime=20 <- split(lntime,facewhite+2*faceold)

spartno=20 <- split(partno,facewhite+2*faceold)

for (i in=20 1:4) {

   part <- spartno[[i]]

   time <- slntime[[i]]

   sold <- ssaysold[[i]]

   gammy <- gamm(sold ~=20 s(time),random=3Dlist(part=3D~1),family=3Dbinomial)

=

   plot(gammy$gam,xlab=3D"ln(time in=20 msec)")

   if (i =3D=3D 1) title("New Black = faces")

   if (i =3D=3D 2) title("New White = faces")

   if (i =3D=3D 3) title("Old Black = faces")

   if (i =3D=3D 4) title("Old White = faces")

}

par(mfrow=3Dc(1,1))

detach(memrec)

 

# Chapter 9. Robust=20 regression

# Example 14. Crime in=20 Sussex.

crime = <-=20 read.table(paste(webreg,"sussexcrime.dat",sep=3D""),header=3DT)

crime <-=20 crime[order(crime[,9]),1:10]

attach(crime)

par(mfrow=3Dc(1,3))

plot(theft, Drugs,=20 xlab=3D"Theft offences",ylab=3D"Drug = offences",pch=3D19,main=3D"Scatterplot of raw=20 data",cex.lab=3D1.3)

text(2900,300,"Regency",pos=3D3,cex=3D1.3)

plot(rank(theft),=20 rank(Drugs) ,xlim=3Dc(0,300),ylim=3Dc(0,300), xlab=3D"Rank of theft=20 offences",ylab=3D"Rank of drug offences",pch=3D19,main=3D"Scatterplot of = ranked=20 data",cex.lab=3D1.3)

text(180,270,"Regency",pos=3D3,cex=3D1.3)

arrows(230,285,=20 255,265, length=3D.07)

plot(log(theft+1),=20 log(Drugs+1), xlab=3D"Log of theft offences + 1",ylab=3D"Log of drug = offences +=20 1",pch=3D19,main=3D"Scatterplot of logged = data",cex.lab=3D1.3)

text(6.7,5.4,"Regency",pos=3D3,cex=3D1.3)

par(mfrow=3Dc(1,1))

cor.test(theft,Drugs)

cor.test(theft,Drugs,method=3D"spearman")

library(boot)

bootspear = <-=20 function(x,i)=20 cor.test(x$Drugs[i],x$theft[i],method=3D"spearman")$estimate

thevars = <-=20 as.data.frame(cbind(Drugs,theft))

spearboot = <-=20 boot(thevars,bootspear, R=3D1000)

boot.ci(spearboot)

cor.test(rank(theft),rank(Drugs))

lnreg = <-=20 lm(log(Drugs+1)~log(theft+1))

par(mfrow=3Dc(1,2))

plot(log(theft+1),=20 log(Drugs+1), xlab=3D"LN of theft offences",ylab=3D"LN of drug=20 offences",pch=3D19,main=3D"Scatterplot of logged=20 data",cex.lab=3D1.3)

lines(log(theft+1),predict(lnreg))

plot(theft, Drugs,=20 xlab=3D"Theft offences",ylab=3D"Drug = offences",pch=3D19,main=3D"Scatterplot of raw=20 data",cex.lab=3D1.3)

lines(theft,exp(predict(lnreg))-1)

abline(lm(Drugs~theft),col=3D"red")

par(mfrow=3Dc(1,1))

detach(crime)

 

# Example 15. Children's = well=20 being

children = <-=20 read.table(paste(webreg,"unicef.txt",sep=3D""),header=3DT)<= /B>

attach(children)

cor.test(risks,health)

cor.test(risks,health,method=3D"spearman")

=

source("ftp://ftp.usc.edu/pub/wilcox/Rallfunv1.v4")

source("ftp://ftp.usc.edu/pub/wilcox/Rallfunv2.v4")

scor(risks,health,xlab=3D"Risk rank",ylab=3D"Health=20 rank")

text(c(2,8,4),c(15,18,19),c("Poland","Greece","Ireland"),pos=3D4= )

detach(children)

 

# Example 16. Food and = drink=20 intake.

anscombe = <-=20 read.table(paste(webreg,"animal.dat",sep=3D""),header=3DT)<= /B>

attach(anscombe)

cbind(anscombe[1:11,],=20 anscombe[12:22,], anscombe[23:33,], = anscombe[34:44,])

meansd = <-=20 rbind(tapply(Food,Type,mean),tapply(Food,Type,sd),=20 tapply(Drink,Type,mean),tapply(Drink,Type,sd))

row.names(meansd)=20 <- c("Food mean","Food sd","Drink mean","Drink = sd")

meansd

summary(aov(Food~Type))

summary(aov(Drink~Type))

Foodg = <-=20 split(Food,Type)

Drinkg = <-=20 split(Drink,Type)

for (i in = 1:4)=20 {print(paste("Group =3D",names(Drinkg)[i])) 

   =20 print(summary(lm(Drinkg[[i]]~Foodg[[i]])))}

library(lattice)

xyplot(Drink~Food|Type)

par(mfrow=3Dc(2,2))

for (i in = 1:4)=20 {

   x <-=20 lm(Drinkg[[i]]~Foodg[[i]])

  =20 plot(Foodg[[i]],Drinkg[[i]],main=3D(paste(names(Foodg)[i])),xlab=3D"Food = intake",ylab=3D"Drink = intake",ylim=3Dc(0,15),xlim=3Dc(0,20))

  =20 abline(x)}

par(mfrow=3Dc(1,1))

library(MASS)

birds = <-=20 rlm(Drinkg[[1]]~Foodg[[1]])

summary(birds)

insect = <-=20 rlm(Drinkg[[2]]~Foodg[[2]])

summary(insect)

mammals = <-=20 rlm(Drinkg[[3]]~Foodg[[3]])

summary(mammals)

mammals2 = <-=20 lm(Drinkg[[3]] ~ poly(Foodg[[3]],2))

summary(mammals2)

reptiles = <-=20 rlm(Drinkg[[4]]~Foodg[[4]])

summary(reptiles)

detach(anscombe)

 

 

# = Chapter 10.=20 Conclusion

set.seed(47)

genoheight = <-=20 runif(100,60,80)

parentheight <-=20 genoheight + runif(100,-10,10)

kidheight = <-=20 genoheight + runif(100,-10,10)

plot(parentheight,kidheight,pch=3D20,xlab=3D"Parent's height in = inches",ylab=3D"Child's height in inches")

abline(0,1)

abline(h=3Dmean(kidheight),v=3Dmean(parentheight),lty=3D"dashed"= )

text(55,84,"Most=20 kids of short\nparents above\nthe diagonal", pos=3D4) =

text(73,57,"Most=20 kids of tall\nparents below diagonal", pos=3D4) =

arrows(58,54,58,58,length=3D.1)

text(54,53, "kids =3D=20 parents",pos=3D4)

cor.test(parentheight,kidheight)

 

 

etasq <- function(aovob){=20

  = effects=20 <- dim(aovob)[1] - 1

  = for (i in=20 1:effects){

    partialeta <-=20 format(aovob[i,2]/(aovob[i,2] + aovob[effects+1,2]), nsmall=3D2, = digits=3D2)=20

    eta <-=20 format(aovob[i,2]/sum(aovob[,2]),nsmall=3D2,digits=3D2)=20

   =20 print(paste("Partial eta sq. =3D", partialeta,", eta squared = =3D",=20  eta,"for",=20 dimnames(aovob[i,])[[1]]))} }=20