# R commands Chapter 3
# Section 3.1
# Read the HERS data into a data frame (needs only to be done once if the workspace is saved)
hers=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/hers.txt",sep="\t",header=T,na.strings=".")
# Attach the data frame:
attach(hers)
# t-test of difference in average glucose by exercise (cf Table 3.1, page 30):
t.test(glucose~exercise,subset=(diabetes==0),var.equal=T) # two-sided test (diff != 0), which is the default
t.test(glucose~exercise,subset=(diabetes==0),var.equal=T,alternative="less") # one-sided test (diff < 0)
t.test(glucose~exercise,subset=(diabetes==0),var.equal=T,alternative="greater") # one-sided test (diff > 0)
# Summary and one-way ANOVA of systolic blood pressure (SBP) by ethnicity (raceth) (cf Table 3.2, page 32):
summary(SBP[raceth==1])
summary(SBP[raceth==2])
summary(SBP[raceth==3])
summary(aov(SBP~factor(raceth)))
# t-test of difference in average glucose by exercise allowing for unequal variances (cf Table 3.3, page 34):
t.test(glucose~exercise,subset=(diabetes==0)) # two-sided test (note that unequal variances is the default)
# Detach the hers data frame:
detach(hers)
# Section 3.3
# In Section 3.3 a 10% sample of the HERS data is used. We read this sample into a data frame:
hers.sample=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/hers.sample.txt",header=T)
# Plot of systolic blood pressure versus age with fitted regression line (cf. Figure 3.1, page 37):
plot(hers.sample$age,hers.sample$sbp,xlab="Age in Years",ylab="Systolic Blood Pressure",pch=20)
abline(lm(sbp~age,data=hers.sample))
# Linear regression of systolic blood pressure versus age (cf. Table 3.4, page 40):
hers.fit=lm(sbp~age,data=hers.sample)
summary(hers.fit)
anova(hers.fit)
# Section 3.4
# Read the WCGS data into a data frame (needs only to be done once if the workspace is saved)
wcgs=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/wcgs.txt",sep="\t",header=T,na.strings=".")
# Attach the data frame:
attach(wcgs)
# 2x2 contingency table for chd and arcus (cf. first part of Table 3.5, page 45):
table(arcus,chd69)
# Risk difference for chd according to arcus (cf. first line in second part of Table 3.5, page 45)
# (need to read in the 2x2 table to get it in the right format)
tab.arcus=matrix(c(102,153,839,2058),ncol=2)
# first (second) row is number of chd cases and number of non-cases for group 1 (group 2)
prop.test(tab.arcus,correct=F)
# Fisher exact test for femal partner's HIV status (cf. Table 3.6, page 49):
tab.hiv=matrix(c(3,4,2,22),nrow=2) # read data table in similar format as above
fisher.test(tab.hiv) # odds ratio estimate differs from the one in the book
# CHD by age group (cf. Table 3.7, page 50):
table(chd69,agec)
chisq.test(table(chd69,agec),correct=F)
# Twenty-year vital status by smoking behaviour (cf. Table 3.9, page 52):
tab.smoke=matrix(c(139,230,443,502),nrow=2)
prop.test(tab.smoke, correct=F)
# Twenty-year vital status by smoking behaviour stratified by age (cf. Table 3.10, page 53):
tab.smoke.age=array(c(19,13,269,327,78,52,167,147,42,165,7,28),dim=c(2,2,3))
mantelhaen.test(tab.smoke.age, correct=F)
# Detach the wcgs data frame:
detach(wcgs)
# Section 3.5
# Read the leukemia data into a data frame (needs only to be done once if the workspace is saved)
leuk=read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/leuk.txt",sep="\t",header=T)
# Survival curves by treatment for leukemia patients (cf. Figure 3.2, page 58):
library(survival) # we need to use the survival library
leuk.fit=survfit(Surv(time,cens)~group,data=leuk)
plot(leuk.fit, lty=1:2,mark.time=F,xlab="Weeks since randomization",ylab="Proportion in remission")
# Logrank test leukemia example (cf. Table 3.14, page 61):
survdiff(Surv(time,cens)~group,data=leuk)