## com23a of October 2018, # in loco parentis for Celine stk 4021, # doing my stk 4020 exam 2010 Exercise 2, # with the 189 babies of Lemeshow and Hosmer, # tenk at kona hans var med i OL, hvilket # er evig imponating, selv om hun fikk siste plass library("numDeriv") yy <- scan("babies") # weight of babies, in kg nn <- length(yy) # 189 plot(density(yy)) # looks reasonably normal, after all, # apart from the tails -- this is a qq plot: plot(qnorm((1:nn)/(nn+1)), sort(yy)) qq <- 0.10 # wish inference for the 0.10 quantile ## model 0, the normal: # can do everything explicitly by math, # but ok to let the nlm do the job: logL0 <- function(para) { mu <- para[1] sigma <- para[2] # hei <- -log(sigma) - 0.5*(yy-mu)^2/sigma^2 - 0.5*log(2*pi) sum(hei) } minuslogL0 <- function(para) {-logL0(para)} nils0 <- nlm(minuslogL0,c(4.444,1),hessian=T) ML0 <- nils0$estimate Jhat0 <- nils0$hessian logL0max <- logL0(ML0) # -207.9445 se0 <- sqrt(diag(solve(Jhat0))) show0 <- cbind(ML0,se0) print(round(show0,4)) # ML0 se0 #[1,] 2.9447 0.0529 mu #[2,] 0.7271 0.0374 sigma mu0hat <- ML0[1] sigma0hat <- ML0[2] kappa0hat <- mu0hat + sigma0hat*qnorm(qq) # 2.0128 # close to the 0.10 quantile of the data, 2.038 ## then the more demanding three-parameter model: # F = Phi((y-mu)/sigma)^exp(a) # f = exp(a) Phi((y-mu)/sigma)^(exp(a)-1) phi((y-mu)/sigma) (1/sigma) logL1 <- function(para) { mu <- para[1] sigma <- para[2] a <- para[3] # hei0 <- -log(sigma) - 0.5*(yy-mu)^2/sigma^2 - 0.5*log(2*pi) hei1 <- a + (exp(a)-1)*log(pnorm((yy-mu)/sigma)) sum(hei0) + sum(hei1) } logL1(c(ML0[1],ML0[2],0)) minuslogL1 <- function(para) {-logL1(para)} nils1 <- nlm(minuslogL1,c(ML0[1],ML0[2],0),hessian=T) ML1 <- nils1$estimate Jhat1 <- nils1$hessian logL1max <- logL1(ML1) # -207.9445 se1 <- sqrt(diag(solve(Jhat1))) show1 <- cbind(ML1,se1) print(round(show1,4)) # ML1 se1 #[1,] 3.6051 0.3751 mu #[2,] 0.4827 0.1509 sigma #[3,] -1.2193 0.8528 a # wald ratio for a is -1.429, not significant, # so we expect a not too far from zero Delta <- logL1max - logL0max # 0.928, not quite big enough increase, # so at least Model 0 wins the aic competition mu1hat <- ML1[1] sigma1hat <- ML1[2] ahat <- ML1[3] kappa1hat <- mu1hat + sigma1hat*qnorm(qq^(1/exp(ahat))) # 1.9906 # so the two kappa estimates are quite close, actually # a bit more, to get the approximate standard deviation, # via the delta method: kappa1 <- function(para) { mu <- para[1] sigma <- para[2] a <- para[3] # mu + sigma*qnorm(qq^(1/exp(a))) } kappaML <- kappa1(ML1) cc1 <- grad(kappa1,ML1) stdevkappa1 <- sqrt( t(cc1) %*% solve(Jhat1) %*% cc1 ) lazylow <- kappaML - 1.645*stdevkappa1 lazyup <- kappaML + 1.645*stdevkappa1 c(lazylow,kappaML,lazyup) # 1.858, 1.990, 2.123 ## yval <- seq(0.70,5.00,by=0.01) # f0val <- dnorm((yval-mu0hat)/sigma0hat)*(1/sigma0hat) # aux11 <- dnorm((yval-mu1hat)/sigma1hat)*(1/sigma1hat) aux12 <- exp(ahat)*pnorm((yval-mu1hat)/sigma1hat)^(exp(ahat)-1) f1val <- aux11*aux12 hist(yy,prob=T,ylim=c(0,0.55),main=" ", xlab="birthweight",ylab="density") matlines(yval,f0val,type="l",lwd=2,col=1) matlines(yval,f1val,type="l",lwd=2,col=2) ## ok, prior for (mu,sigma) for Model 0: # c \le \sigma \le d, # 1/c^2 \ge \lambda \ge 1/d^2 with 0.90 - 0.10: cc <- 0.50 dd <- 2.00 1/cc^2 # 4.00 1/dd^2 # 0.25 Q <- function(ab) { a <- ab[1] b <- ab[2] # oi1 <- pgamma(1/cc^2,a,b) - 0.90 oi2 <- pgamma(1/dd^2,a,b) - 0.10 oi1^2 + oi2^2 } oi <- nlm(Q,c(1.5,1.1),hessian=T) a0 <- oi$estimate[1] # 1.1733 b0 <- oi$estimate[2] # 0.6493 Q(c(a0,b0)) # zero # so with 1/sigma^2 from Gamma(a0,b0) we succeed # in having 0.50 as 0.10 quantile and 2.00 as 0.90 quantile # for the prior for sigma mu0 <- 2.500 nu0 <- 3.333 # Nils Exercise 22: # prior is GN(a0, b0, mu0, nu0) # posterior is GN(an, bn, mun, nun) ybar <- mean(yy) Q0 <- sum((yy-ybar)^2) dn <- (1/nn + 1/nu0)^(-1) an <- a0 + 0.5*nn bn <- b0 + 0.5*Q0 + 0.5*dn*(ybar - mu0)^2 mun <- (nu0*mu0 + nn*ybar) / (nu0 + nn) nun <- nu0 + nn ## simulating lots of (mu,lambda) # and then via lambda = 1/sigma^2, sigma = 1/sqrt(lambda) # so sigma is such that 1/sigma^2 is Gamma(a0,b0) # and mu | sigma is N(mu0, sigma^2/nu0) sim <- 10^5 lambdasim <- rgamma(sim,an,bn) sigmasim <- 1/sqrt(lambdasim) musim <- mun + (sigmasim/sqrt(nun))*rnorm(sim) plot(musim,sigmasim) kappa0sim <- musim + sigmasim*qnorm(qq) hist(kappa0sim) quantile(kappa0sim,c(0.05,0.50,0.95)) # 0.05 0.50 0.95 # 1.876 2.000 2.112 # choose to represent model in terms of (mu,lambda,a) # can then transform back to sigma afterwards logL3 <- function(para) { mu <- para[1] lambda <- para[2] a <- para[3] # sigma <- 1/sqrt(lambda) # hei0 <- -log(sigma) - 0.5*(yy-mu)^2/sigma^2 - 0.5*log(2*pi) hei1 <- a + (exp(a)-1)*log(pnorm((yy-mu)/sigma)) sum(hei0) + sum(hei1) } logL3(c(mu1hat,1/sigma1hat^2,ahat)) # correct ## mcmc for (mu,lambda,a): ccc <- 4.00 # prior for a is uniform on [-ccc, ccc] big <- 135 logprior <- function(para) { mu <- para[1] lambda <- para[2] a <- para[3] sigma <- 1/sqrt(lambda) # aux21 <- log(dgamma(lambda,a0,b0)) aux22 <- - 0.5*nu0*(mu-mu0)^2/sigma^2 - log(sigma/sqrt(nu0)) - 0.5*log(2*pi) aux23 <- log(1/(2*ccc))*(abs(a) <= ccc) - big*(abs(a) > ccc) # aux21 + aux22 + aux23 } logprior(c(3.3,0.7,0.4)) sim <- 10^4 mcmc <- 0*(1:sim) %*% t(1:3) mcmc[1, ] <- c(mu1hat,1/sqrt(sigma1hat),ahat) uu <- runif(sim) ok <- 0*(1:sim) pracc <- 0*(1:sim) for (ss in 2:sim) { old <- mcmc[ss-1, ] prop <- old + 0.11*se1*rnorm(3) # aux11 <- logprior(prop) - logprior(old) aux12 <- logL3(prop) - logL3(old) # oi <- exp(aux11+aux12) pracc[ss] <- min(c(1,oi)) ok[ss] <- 1*(uu[ss] <= pracc[ss]) mcmc[ss, ] <- (1-ok[ss])*old + ok[ss]*prop } # plot(mcmc[ ,1:2]) plot(mcmc[ ,3]) mu1sim <- mcmc[ ,1] lambda1sim <- mcmc[ ,2] sigma1sim <- 1/sqrt(lambda1sim) asim <- mcmc[ ,3] kappa1sim <- mu1sim + sigma1sim*qnorm(qq^(1/exp(asim))) hist(kappa1sim) print(quantile(kappa0sim,c(0.05,0.50,0.95))) print(quantile(kappa1sim,c(0.05,0.50,0.95))) # 0.05 0.50 0.95 # 1.877 2.000 2.112 via model 0 # 1.864 2.003 2.115 via model 1 and mcmc # 1.854 1.991 2.123 via lazy Bayes and delta method ## last point: p1 f1 / (p1 f1 + p2 f2) # p2 f1 / (p1 f1 + p2 f2) # with f = \int L_n(\theta) \pi(\theta) d\theta # for the two models # f = \exp(lmax) \int \exp(ll - lmax) (pi(theta)/pi^*(theta)) # pi^*(theta) d\theta ## simple bic Laplace type approximations first: logf0 <- logL0max - 0.5*length(ML0)*log(nn) logf1 <- logL1max - 0.5*length(ML1)*log(nn) aux0 <- logf0 + 213 aux1 <- logf1 + 213 p0star <- exp(aux0) / (exp(aux0) + exp(aux1)) # 0.8446 p1star <- exp(aux1) / (exp(aux0) + exp(aux1)) # 0.1554 print(c(p0star,p1star)) # more careful Laplace approximation: # f = exp(lmax) n^(-p/2) |\hat J|^{-1/2} \pi(\hat\theta) logf0more <- logf0 - 0.5*log(det(Jhat0/nn)) + 0 + 0.5*2*log(2*pi) logf1more <- logf1 - 0.5*log(det(Jhat1/nn)) + log(1/8) + 0.5*3*log(2*pi) aux0 <- logf0more + 213 aux1 <- logf1more + 213 p0starmore <- exp(aux0) / (exp(aux0) + exp(aux1)) # 0.8446 p1starmore <- exp(aux1) / (exp(aux0) + exp(aux1)) # 0.1554 print(c(p0starmore,p1starmore))