#########################################################################
# COMPUTATIONAL STATISTICS
# by Geof Givens and Jennifer Hoeting
# CHAPTER 7 EXAMPLES
#########################################################################

# pacman must already be installed; then load contributed
# packages (including pacman) with pacman
pacman::p_load(pacman, rio, tidyverse, VGAM)

#Set Working Directory to Source File Location
library("rstudioapi")                                 # Load rstudioapi package

setwd(dirname(getActiveDocumentContext()$path))       # Set working directory to source file location
getwd()                                               # Check updated working directory

############################################################################
### EXAMPLE 7.2 ESTIMATING A MIXTURE PARAMETER
############################################################################
############################################################################
# y     = observed data
# n     = number of iterations 
# x_val = contains mixture parameter values
# f     = posterior density
# R     = computes Metropolis-Hastings ratio
# g     = proposal density
############################################################################
# NOTES
# In the following, the density of the prior is uniform over the 
# support for the proposal and hence not included in the computation of the
# Metropolis-Hastings ratio.
############################################################################

## Mixture data
#mix_dat = read.table(file.choose(),header=TRUE)
mix_dat = read.table(file="../Datasets/mixture.dat",sep="",header=T)
head(mix_dat)
y = mix_dat$y

## HISTOGRAM OF DATA AND PLOT OF MIXTURE DISTRIBUTION (See figure 7.1)
par(mfrow=c(1,1))
x=seq(5,14,by=.01)
d=.7*dnorm(x,7,.5) + .3*dnorm(x,10,.5)
hist(y,breaks=17,freq=FALSE,ylab="density",main="Histogram of Mixture Data \n See Fig 7.1 in Givens & Hoeting")
points(x,d,type="l")

#plot of the prior Beta distributions
p = seq(0,1, length=100)
plot(p, dbeta(p, 2,10), ylab='density', type ='l', col='red',lty=2)
lines(p, dbeta(p, 1,1), col='black') 
abline(v=0.7,lty=3,lwd=2)
legend("topright", legend=c("Beta(2,10)", "Beta(1,1)"),
       col=c("red","black"), lty=c(2,1), cex=1)

1-pbeta(.5,1,1)
1-pbeta(.5,2,10)

## INITIAL VALUES
n = 10000
x_val1 = NULL
x_val2 = NULL
set.seed(0)

## FUNCTIONS
f = function(x){prod(x*dnorm(y,7,0.5) + (1-x)*dnorm(y,10,0.5))}
R = function(xt,x){f(x)*g(xt)/(f(xt)*g(x))}

## MAIN
# BETA(1,1) PROPOSAL DENSITY
g = function(x){dbeta(x,1,1)}
x_val1[1] = rbeta(1,1,1)
for(i in 1:n){
      xt = x_val1[i]
      x = rbeta(1,1,1)
      p = min(R(xt,x),1)
      d = rbinom(1,1,p)
      x_val1[i+1] = x*d + xt*(1-d)
}

mean(x_val1[201:(n+1)])

par(mfrow=c(2,2))
plot(x_val1[201:(n+1)],ylim=c(0,1),type="l",ylab=expression(delta^(t)),xlab="t")
title("Sample path for Beta(1,1) Proposal Dist.")
hist(x_val1[201:(n+1)],breaks=20,xlab=expression(delta^(t)),
      main="Hist. for Beta(1,1) Proposal Dist.")

# BETA(2,10) PROPOSAL DENSITY
g = function(x){dbeta(x,2,10)}
x_val2[1] = rbeta(1,2,10)
for(i in 1:n){
      xt = x_val2[i]
      x = rbeta(1,2,10)
      p = min(R(xt,x),1)
      d = rbinom(1,1,p)
      x_val2[i+1] = x*d + xt*(1-d)
}

mean(x_val2[201:(n+1)])

plot(x_val2[201:(n+1)],ylim=c(0,1),type="l",ylab=expression(delta^(t)),xlab="t")
title("Sample path for Beta(2,10) Proposal Dist.")
hist(x_val2[201:(n+1)],breaks=20,xlab=expression(delta^(t)),
      main="Hist. for Beta(2,10) Proposal Dist.")


############################################################################
### EXAMPLE 7.3 ESTIMATING A MIXTURE PARAMETER (RANDOM WALK)
############################################################################
#
# x     	= observed data
# log_like 	= function to compute the log likelihood
# p        	= mixture parameter values
# u        	= logit of the mixture parameter values
# R     	= Metropolis-Hastings ratio
# num_itr 	= number of iterations for the MCMC chain
############################################################################

## NOTES
# In the following, the density of the prior distribution is uniform over the 
# support for the proposal and hence not included in the computation of
# the Metropolis-Hastings ratio. Also, since the proposal density is 
# symmetric about zero, the Metropolis-Hastings ratio can be
# reduced to just the posterior densities and Jacobian. In
# this example we leave the unreduced version of the ratio to show 
# implementation, but in subsequent implementations it may be reduced when
# possible.
############################################################################

# UNIFORM(-1,1) WALK
## INITIAL VALUES
#READ IN THE DATA
mix_dat = read.table(file="../Datasets/mixture.dat",sep="",header=T)
x = mix_dat$y

#ESTABLISH INITIAL VALUES
num_itr = 10000
u=rep(0,num_itr)
u[1]= runif(1,-1,1)
p=rep(0,num_itr)
p[1]=exp(u[1])/(1+exp(u[1]))
burn = 1:1000

set.seed(1)

## FUNCTIONS
log_like<-function(p,x) {
	  sum(log(p*dnorm(x,7,.5)+(1-p)*dnorm(x,10,.5)))
}

## MAIN
for (i in 1:(num_itr-1)) {
	u[i+1]=u[i]+runif(1,-1,1)
	p[i+1]=exp(u[i+1])/(1+exp(u[i+1]))
	R=exp(log_like(p[i+1],x)-log_like(p[i],x))*exp(u[i])/exp(u[i+1])
	if (R<1)
		 if(rbinom(1,1,R)==0)	{p[i+1]=p[i]; u[i+1]=u[i]}
}

mean(p[-burn])

par(mfrow=c(2,2))
plot(p,ylim=c(0,1),type="l",ylab=expression(delta^(t)),xlab="t")
hist(p,breaks=20,xlab=expression(delta^(t)), main="Hist. for Unif(-1,1) Walk")

# UNIFORM(-0.01,0.01) WALK
##INITIAL VALUES
p2=rep(0,num_itr)
p2[1]=exp(u[1])/(1+exp(u[1]))

## MAIN
for (i in 1:(num_itr-1)) {
	u[i+1]=u[i]+runif(1,-.01,.01)
	p2[i+1]=exp(u[i+1])/(1+exp(u[i+1]))
	R= exp(log_like(p2[i+1],x)-log_like(p2[i],x))*exp(u[i])/exp(u[i+1])
	if (R<1)
		 if(rbinom(1,1,R)==0)	{p2[i+1]=p2[i]; u[i+1]=u[i]}
}

mean(p2[-burn])

plot(p2,ylim=c(0,1),type="l",ylab=expression(delta^(t)),xlab="t")
hist(p2,breaks=20,xlab=expression(delta^(t)), main="Hist. for Unif(-0.01,0.01) Walk")

dev.off()

#Exercises:  
# 1. Recode example 7.3 using the likelihood instead of the log-likelihood. What changes in the code?  
# 2. Re-run the Uniform(-0.01,0.01) code using more iterations. What happens?  
# 3. Is either case sensitive to starting value? Why or why not?

############################################################################
### EXAMPLE 7.6 FUR SEAL PUP CAPTURE-RECAPTURE STUDY (GIBBS SAMPLING)
############################################################################
# ci    	= number captured during each census
# mi    	= number newly caught during each census
# r     	= number of unique fur seal pups observed
# I		= number of census attempts
# N     	=  estimated population size
# alpha     = computes estimated capture probabilities for each census
# num_itr  	= number of iterations
# burn  	= iterations to discard for burn-in
############################################################################
## INITIAL VALUES
ci = c(30,22,29,26,31,32,35)  
mi = c(30,8,17,7,9,8,5)
r = sum(mi)
I=7

num_itr=100000
alpha= matrix(0,num_itr,I)
N=rep(0,num_itr)
N[1]=sample(84:500,1) 
set.seed(4)
burn = 1:1000

#MAIN
for (i in 2:num_itr) {
   alpha[i,]=rbeta(I,ci+.5,N[i-1]-ci+.5)
   N[i]=rnbinom(1,r+1,1-prod(1-alpha[i,]))+r
}

# DISCARDING BURN-IN
alpha_out = alpha[-burn,]
N_out = N[-burn]

## OUTPUT
mean(N_out)      # POSTERIOR MEAN OF N

#MANUAL COMPUTATION OF AN HPD INTERVAL
#NOTE:  this is not a sophisticated method to compute an HPD interval, but
#it is adequate for a one-time computation of an HPD interval and a good learning tool.

table(N_out)
sum(N_out>=85 & N_out<=95)/length(N_out)
sum(N_out>=85 & N_out<=94)/length(N_out)
sum(N_out>=84 & N_out<=93)/length(N_out)
sum(N_out>=84 & N_out<=94)/length(N_out)

## OUTPUT PLOTS
plot((max(burn)+1):num_itr,N_out,type="l",ylim=c(79,110),
      xlab="t",ylab="N",main="Sample path for N")

boxplot(split(rowMeans(alpha_out),N_out),xlab="mean capture probability",
      ylab="N",main="Split Boxplot for Seal Pup Example",horizontal=T)

hist(N_out,freq=FALSE,xlab="N",
      main="Estimated marginal posterior probabilities for N")

#Exercise:
#1.  Write a function to compute the HPD interval for discrete data like the N in the seal example.  

############################################################################
### EXAMPLE 7.8 MIXTURE DISTRIBUTION, CONTINUED
############################################################################
#See EXAMPLE 7.2 to calculate these inputs:
#x_val1 = delta values from independence chain with Beta(1,1) proposal density
#x_val2 = delta values from independence chain with Beta(2,10) proposal density

acf(x_val1,xlab="lag",ylab="acf",
    main="delta values from independence chain with Beta(1,1) proposal density")
acf(x_val2,xlab="lag",ylab="acf",
    main="delta values from independence chain with Beta(2,10) proposal density")

############################################################################
### EXAMPLE 7.10 FUR SEAL PUP CAPTURE-RECAPTURE STUDY, CONTINUED
############################################################################
# ci    	= number captured during each census
# mi    	= number newly caught during each census
# r     	= number of unique fur seal pups observed
# I		= number of census attempts
# N_vals 	=  estimated population size
# alpha_vals = computes estimated capture probabilities for each census
# u_vals 	= contains log values of the parameters for estimating the capture probs
# num_itr  	= number of iterations
# burn  	= iterations to discard for burn-in

# f     = computes posterior density, p(theta|alpha)
# J     = computes Jacobian
# R     = computes Metropolis-Hastings ratio

############################################################################
############################################################################
## NOTES
# Since the proposal density is symmetric about zero, the 
# Metropolis-Hastings ratio is reduced to just the posterior 
# densities and Jacobian. 
############################################################################

## INITIAL VALUES
ci = c(30,22,29,26,31,32,35)  
mi = c(30,8,17,7,9,8,5)
r = sum(mi)
I = 7

set.seed(4)
num_itr=100000
burn = 1:1000

alpha_vals= matrix(0,num_itr,I)
u_vals = matrix(0,num_itr,2)
N_vals=rep(0,num_itr)
N_vals[1]=sample(84:500,1) 

## FUNCTIONS
new_log_target=function(exp.uv,p) {
  I*(lgamma(sum(exp.uv))-lgamma(exp.uv[1])-lgamma(exp.uv[2])) +
    exp.uv[1]*sum(log(p))+exp.uv[2]*sum(log(1-p))-sum(exp.uv)/1000+
    log(exp.uv[1])+log(exp.uv[2])
}

R = function(new,old,alphas) {
   exp(new_log_target(exp(new),alphas)-new_log_target(exp(old),alphas))
}


## MAIN
for (i in 2:num_itr) {
   alpha_vals[i,]=rbeta(I,ci+exp(u_vals[i-1,1]),N_vals[i-1]-ci+exp(u_vals[i-1,2]))
   N_vals[i]=rnbinom(1,r,1-prod(1-alpha_vals[i,])) + r
   u.star=u_vals[i-1,]+rnorm(2,0,.085)
   p=min(R(u.star,u_vals[i-1,],alpha_vals[i,]),1)
   d = rbinom(1,1,p)
   u_vals[i,] = u.star*d + u_vals[i-1,]*(1-d)
}

# DISCARDING BURN-IN
N_out = N_vals[-burn]
u_out = u_vals[-burn,]
alpha_out = alpha_vals[-burn,]

## OUTPUT
mean(N_out) # POSTERIOR MEAN OF N

#MANUAL COMPUTATION OF AN HPD INTERVAL
#NOTE:  this is not a sophisticated method to compute an HPD interval, but
#it is adequate for a one-time computation of an HPD interval and a good learning tool.

table(N_out)
sum(N_out>=85 & N_out<=95)/length(N_out)
sum(N_out>=85 & N_out<=94)/length(N_out)
sum(N_out>=84 & N_out<=93)/length(N_out)
sum(N_out>=84 & N_out<=94)/length(N_out)

## OUTPUT PLOTS 
#Plots like Figures 7.10
acf(alpha_out[,1],xlab="lag",ylab="acf",
    main=expression(paste("Autocorrelation Plot for ",alpha[1])))
a=(dim(u_out)[1]-999):dim(u_out)[1]
plot(u_out[a,1],u_out[a,2],type="l",xlab=expression(U[1]^{(t)}),ylab=expression(U[2]^{(t)}))

#These plots aren't in the book, but are similar to the plots in Example 7.6
plot((max(burn)+1):num_itr,N_out,type="l",
      xlab="t",ylab=expression(N^{(t)}),main="Sample path for N")

boxplot(split(rowMeans(alpha_out),N_out),xlab="Mean Capture Probability",
        ylab=expression(N^{(t)}),main="Split boxplot for seal pup example",horizontal = T)

hist(N_out,freq=FALSE,xlab="N", ylab="density",
      main="Estimated marginal posterior probabilities for N")

############################################################################
### END OF FILE

