# R commands for Chapter 4
# Section 4.1
# Read the HERS data for into a data frame (needs only to be done once if the workspace is saved)
hers=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/hers.txt",sep="\t",header=T,na.strings=".")
# Unadjusted regression of glucose on exercise (cf Table 4.1, page 71):
unadj.fit=lm(glucose~exercise, data=hers, subset=(diabetes==0))
summary(unadj.fit)
anova(unadj.fit)
# Adjusted regression of glucose on exercise (cf Table 4.2, page 72):
adj.fit=lm(glucose~exercise+age+drinkany+BMI, data=hers, subset=(diabetes==0))
summary(adj.fit)
anova(adj.fit)
# Section 4.3
# Regression of glucose on physical activity (cf first part of Table 4.4, page 80):
hers$physact=factor(hers$physact)
phys.fit=lm(glucose~physact, data=hers, subset=(diabetes==0))
summary(phys.fit)
anova(phys.fit)
# Estimated mean for individuals with level 2 of physical activity and comparison of levels 5 and 3 of physical activity (cf last part of Table 4.4, page 80)
# These commands use the package "contrasts"
library(contrast) # load the contrast-package
contrast(phys.fit,list(physact="2"))
contrast(phys.fit,list(physact="5"),list(physact="3"))
# Section 4.4
# Regression of LDL on BMI (cf first part of Table 4.8, page 94):
bmi.fit=lm(LDL~BMI,data=hers)
summary(bmi.fit)
anova(bmi.fit)
# Regression of LDL on BMI adjusted for other predictors (cf first last of Table 4.8, page 94):
adjbmi.fit=lm(LDL~BMI+age+nonwhite+smoking+drinkany,data=hers)
summary(adjbmi.fit)
anova(adjbmi.fit)
# Section 4.6
# Regression of LDL1 on hormone therapy (HT) and statin use with interaction (cf first part of Table 4.12, page 102):
ht.fit=lm(LDL1~HT+statins+HT:statins,data=hers)
summary(ht.fit)
anova(ht.fit)
# Estimated effect of HT among baseline statin users (cf last part of Table 4.12, page 102)
contrast(ht.fit,list(HT=1,statins=1),list(HT=0,statins=1)) # the command uses the "contrast" package
# Interaction model for BMI and statin use (cf first part of Table 4.13, page 104):
hers$cBMI=hers$BMI- mean(hers$BMI[!is.na(hers$BMI)]) # define centered predictor
stat.fit=lm(LDL~statins+cBMI+statins:cBMI+age+nonwhite+smoking+drinkany,data=hers)
summary(stat.fit)
anova(stat.fit)
# Estimated effect of HT among baseline statin users (cf last part of Table 4.13, page 104)
par1=list(statins=1,cBMI=1,age=0,nonwhite=0,smoking=0,drinkany=0)
par2=list(statins=1,cBMI=0, age=0,nonwhite=0,smoking=0,drinkany=0)
contrast(stat.fit,par1,par2) # the command uses the "contrast" package
# Interaction model for HT effects on absolute change in LDL (cf. Table 4.14, page 107):
hers$LDLch=hers$LDL1 - hers$LDL # define absolute change in LDL
hers$cLDL0=hers$LDL- mean(hers$LDL[!is.na(hers$LDL)]) # define centered LDL at baseline
LDL.fit=lm(LDLch~HT+cLDL0+HT:cLDL0,data=hers)
summary(LDL.fit)
anova(LDL.fit)
# Interaction model for HT effects on percent change in LDL (cf. Table 4.15, page 107):
hers$LDLpctch=100*(hers$LDL1 - hers$LDL)/hers$LDL # define percent change in LDL
pLDL.fit=lm(LDLpctch~HT+cLDL0+HT:cLDL0,data=hers)
summary(pLDL.fit)
anova(pLDL.fit)
# Section 4.7
# CPR (or partial residuals plot) for multiple regression of LDL and HDL on BMI (cf. Fig 4.4, page 111)
# These commands use the package "car"
library(car) # load the car-package
fit.ldl=lm(LDL~BMI+age+nonwhite+smoking+drinkany,data=hers)
fit.hdl=lm(HDL~BMI+age+nonwhite+smoking+drinkany,data=hers)
par(mfrow=c(1,2))
crPlots(fit.ldl, terms=~BMI)
crPlots(fit.hdl, terms=~BMI)
par(mfrow=c(1,1))
# CPR (or partial residuals plot) for multiple regression of HDL with quadratic term in BMI (cf. Fig 4.4, page 111)
fit.hdl2=lm(HDL~BMI+I(BMI^2)+age+nonwhite+smoking+drinkany,data=hers)
crPlots(fit.hdl2, terms=~BMI)
# Some plot of the residuals in multiple regression of LDL on BMI adjusted for other predictors (cf. Fig 4.8, page 115)
fit.ldl=lm(LDL~BMI+age+nonwhite+smoking+drinkany,data=hers)
hist(fit.ldl$res) # histogram
boxplot(fit.ldl$res) # boxplot
qqnorm(fit.ldl$res); qqline(fit.ldl$res) # normal Q-Q plot
# Some plot of the residuals in multiple regression of log(LDL) on BMI adjusted for other predictors (cf. Fig 4.8, page 115)
fit.logldl=lm(log(LDL)~BMI+age+nonwhite+smoking+drinkany,data=hers)
hist(fit.logldl$res) # histogram
boxplot(fit.logldl$res) # boxplot
qqnorm(fit.logldl$res); qqline(fit.logldl$res) # normal Q-Q plot
# Plots of residuals versus predictor and fitted values for multiple regression of HDL with quadratic term in BMI and adjusted for other predictors (cf. Fig 4.10, page 118)
fit.hdl2=lm(HDL~BMI+I(BMI^2)+age+nonwhite+smoking+drinkany,data=hers)
par(mfrow=c(1,2))
ind=(!is.na(hers$HDL))&(!is.na(hers$BMI))&(!is.na(hers$drinkany)) # needed to omit individuals with missing values that are not used when fitting the model
plot(hers$BMI[ind],fit.hdl2$res, xlab="BMI",ylab=""); abline(0,0)
plot(fit.hdl2$fit,fit.hdl2$res, xlab="Fitted value", ylab=""); abline(0,0)
par(mfrow=c(1,1))
# Plots of residuals versus predictor and fitted values for multiple regression of log(HDL) with quadratic term in BMI and adjusted for other predictors (cf. Fig 4.11, page 118)
fit.loghdl2=lm(log(HDL)~BMI+I(BMI^2)+age+nonwhite+smoking+drinkany,data=hers)
par(mfrow=c(1,2))
plot(hers$BMI[ind],fit.loghdl2$res,xlab="BMI",ylab=""); abline(0,0)
plot(fit.loghdl2$fit,fit.loghdl2$res,xlab="Fitted value", ylab=""); abline(0,0)
par(mfrow=c(1,1))
# Boxplot of standardized "dfbetas" for regression of LDL on BMI adjusted for other predictors (cf. Fig 4.14, page 124)
fit.ldl=lm(LDL~BMI+age10+nonwhite+smoking+drinkany,data=hers) # use age10=age/10 as predictor
boxplot(dfbetas(fit.ldl))