#R-help to exercise 5 in BSS

 

 

# QUESTION a)

 

# Read the data into a dataframe, give names to the variables, and inspect the data:

salinity<-read.table("http://www.math.uio.no/avdc/kurs/STK4900/data/exer5.dat")

names(salinity)<-c("salt","saltprev","trend","discharge")

salinity

 

# Check that the data correspond to those given in the exercise.

 

 

# Get an overview of the data:

summary(salinity)

plot(salinity)

 

# Make sure that you understand what the summary measures tell you!

# What do you see from the scatter plots?

 

 

# Attach the dataframe

attach(salinity)

 

# Do linear regression with all three covariates and inspect the results:

lmfull<-lm(salt~saltprev+trend+discharge)

summary(lmfull)

 

# How important are each of the covariates? How does this agree with the scatter plots?

 

 

# QUESTION ?b)

 

# Compute fitted values and residuals::

saltfit<-lmfull$fit

saltres<-salt-saltfit

summary(saltres)

 

# You may get the residuals directly as "lmfull$res". Check that this is the case:

summary(lmfull$res)

 

#(The sum of the residuals is zero, so small differences in their means are due to rounding.)

 

 

# QUESTION ?c)

 

# We will make various plots of the residuals

 

# Histogram and Q-Q plot (make one plot at a time)

hist(lmfull$res)

qqnorm(lmfull$res)

 

# What do the plots tell you?

 

 

# Residuals versus fitted values:

plot(lmfull$fit, lmfull$res, xlab="Fitted values", ylab="Residuals")

 

# What does the plot tell you?

 

 

# Residuals versus each of the covariates (make one at a time):

plot(saltprev, lmfull$res, ylab="Residuals")

plot(trend, lmfull$res, ylab="Residuals")

plot(discharge, lmfull$res, ylab="Residuals")

 

# What do the plots tell you? Are there indications of deviation from linearity?

 

 

# Residuals versus observation number (we do not know if ?the data are recorded in the order they are given, which would be the case in a real study):

plot(lmfull$res, xlab="obs.number", ylab="residuals")

 

# What do the plot tell you?

#Can you see any signs of autocorrelation?

 

 

# There are a number of other useful plots for the residuals.

# You get a number of plots (some of those just given and some others) by the command

# (the command may give six different plots ? we show the four first of these):

par(mfrow=c(2,2))

plot(lmfull, 1:4)

par(mfrow=c(1,1))

 

# Try to understand what each of the plots tells you!.

 

 

 

# QUESTION d)

 

# Fit a model without trend? (why?):

 

lmred1<-lm(salt~saltprev+discharge)

summary(lmred1)

 

# How is this model compared with the one that also includes trend?

 

 

# Try yourself other models (e.g. with second order term for discharge)

# Which model would you suggest to use for predicting salinity?