# R commands for Chapter 6
# Section 6.1
# Read the WCGS data for
into a data frame (needs only to be done once if the workspace is saved)
wcgs=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/wcgs.txt",sep="\t",header=T,na.strings=".")
# Logistic
model for the relationship between chd and age (cf Table 6.2, page 163):
wcgs.age=glm(chd69~age, data=wcgs,
family=binomial)
summary(wcgs.age)
anova(wcgs.age,test="Chisq")
# Logistic
model for chd and arcus senilis (cf Table 6.4, page 165):
wcgs.arcus=glm(chd69~arcus, data=wcgs,
family=binomial)
summary(wcgs.arcus)? #gives estimated regression coefficient, not odds ratio
anova(wcgs.arcus,test="Chisq")
# The
summary-command for a logistic regression fit, does not give odds ratio (i.e.
the exponential of the estimated coefficients)
# To obtain
odds ratios with corresponding 95 % confidence intervals we may use the
function:
expcoef=function(glmobj){
regtab=summary(glmobj)$coef
expcoef=exp(regtab[,1])
lower=expcoef*exp(-1.96*regtab[,2])
upper=expcoef*exp(1.96*regtab[,2])
cbind(expcoef,lower,upper)}
# For chd and arcus senilis we then get the
odds ratio with 95% confidence interval by (cf Table
6.4, page 165):
expcoef(wcgs.arcus)
# Logistic
model for chd and age as factor (cf
Table 6.5, page 166):
wcgs.agec=glm(chd69~factor(agec), data=wcgs, family=binomial)
summary(wcgs.agec)? #gives estimated regression coefficient, not odds
ratio
expcoef(wcgs.agec)? #gives odds ratios with confidence intervals (using the function defined
above)
anova(wcgs.agec,test="Chisq")
# Section 6.2
# Multiple
logistic model for chd risk (cf
Table 6.6, page 168):
# As in the
book we omit an individual with an unusually high cholesterol value
wcgs.mult=glm(chd69~age+chol+sbp+bmi+smoke, data=wcgs,
family=binomial, subset=(chol<600))
summary(wcgs.mult)
# Multiple
logistic model with rescaled predictors (cf Table
6.7, page 170):
wcgs.resc=glm(chd69~age_10+chol_50+bmi_10+sbp_50+smoke, data=wcgs, family=binomial, subset=(chol<600))
summary(wcgs.resc)?? #gives estimated regression coefficient, not odds
ratio
expcoef(wcgs.resc)? #gives odds ratios with confidence intervals (using the function defined
above)
# Logistic
model for wcgs behavioral pattern (cf Table 6.8, page 171):
wcgs.beh=glm(chd69~age_10+chol_50+sbp_50+bmi_10+smoke+factor(behpat),
data=wcgs, family=binomial, subset=(chol<600))
summary(wcgs.beh)??
#gives estimated
regression coefficient, not
odds ratio
expcoef(wcgs.beh)????? #gives
odds ratios with confidence intervals (using the function
defined above)
#
Likelihood ratio test for the four-level wcgs
behavioral pattern (cf Table 6.9, page 172):
anova(wcgs.resc,wcgs.beh, test="Chisq")
#
Likelihood ratio test for the two-level wcgs
behavioral pattern (cf Table 6.10, page 173):
wcgs.beh2=glm(chd69~age_10+chol_50+sbp_50+bmi_10+smoke+dibpat, data=wcgs, family=binomial, subset=(chol<600))
summary(wcgs.beh2)??
#gives estimated
regression coefficient, not
odds ratio
expcoef(wcgs.beh2)????? #gives
odds ratios with confidence intervals (using the function
defined above)
anova(wcgs.beh2,wcgs.beh,
test="Chisq")
# Logistic
model for type A behavior pattern and selected predictors? (cf Table 6.11,
page 174):
wcgs.dibpat=glm(dibpat~age_10+chol_50+sbp_50+bmi_10+smoke, data=wcgs, family=binomial, subset=(chol<600))
summary(wcgs.dibpat)?? #gives estimated regression coefficient, not odds
ratio
expcoef(wcgs.dibpat)????? #gives odds ratios with confidence intervals (using the function defined
above)
# Logistic
model for interaction between arcus and age as a
categorical predictor? (cf Table 6.12, page 176):
wcgs.int=glm(chd69~bage_50+arcus+bage_50:arcus, data=wcgs, family=binomial)
summary(wcgs.int)
# Contrast
for arcus-age interaction model (cf. Table 6.14, page
177)
library(contrast)??????? # load the contrast-package
par1=list(bage_50=1,arcus=1)
par2=list(bage_50=0,arcus=1)
contr.arcus.age=contrast(wcgs.int,par1,par2)??? # #gives estimated contrast, not odds
ratio (should be compared with standard normal, not t-distribution)
contr.arcus.age
# The
contrast-command for a logistic regression fit, does not give odds ratio (i.e.
the exponential of the estimated coefficients)
# To obtain
odds ratios for a contrast with corresponding 95 % confidence intervals we may
use the function:
expcontrast=function(contrastobj){
expcontrast=exp(contrastobj$Contrast)
lower=exp(contrastobj$Contrast-1.96* contrastobj$SE)
upper=exp(contrastobj$Contrast+1.96* contrastobj$SE)
cbind(expcontrast,lower,upper)}
# For arcus-age interaction model
we then get the odds
ratio with 95% confidence interval by (cf Table 6.14,
page 177):
expcontrast(contr.arcus.age)
# Logistic
model for interaction between arcus and age as a
continuous predictor? (cf Table 6.15, page 178):
wcgs.intcont=glm(chd69~arcus+age+arcus:age, data=wcgs,
family=binomial)
summary(wcgs.intcont)
# Logistic
model for interaction between arcus and age as a
continuous predictor, continued? (cf Table 6.16, page 179):
par1=list(age=55,arcus=1)
par2=list(age=55,arcus=0)
contr.arcus.age55=contrast(wcgs.intcont,par1,par2)??? # #gives estimated contrast, not odds
ratio (should be compared with standard normal, not t-distribution)
contr.arcus.age55
expcontrast(contr.arcus.age55)? # odds ratio using function defined above
# Expanded logistic
model for chd events?
(cf Table 6.17, page 181):
# First make
a "clean" data set without missing cholesterol values and without the
individual with the unusually high cholesterol value
wcgs.clean=wcgs[!is.na(wcgs$chol)&(wcgs$chol<600), ]
wcgs.all=glm(chd69~age_10+chol_50+sbp_50+bmi_10+smoke+dibpat+bmi_10:chol_50+bmi_10:sbp_50,
data=wcgs.clean, family=binomial)
summary(wcgs.all)
#
Predictions for five individuals (id=2001 to id=2005) from the logistic
model? (cf
Table 6.18, page 181)
pred=predict(wcgs.all,type="response")
idpred=wcgs.clean$id<=2005
usepred=c("id","chd69","age","chol","sbp","bmi","smoke","dibpat")
cbind(wcgs.clean[idpred,usepred],pred[idpred])
# Section 6.4
# Pearson
residuals from the logistic model plotted according to increasing id-number (cf Figure 6.4, page 189)
idorder=order(wcgs$id)
plot(1:dim(wcgs)[1],residuals(wcgs.all,type="pearson")[idorder],xlab="Observation
number",ylab="Pearson residuals")