#================================================================================#
# Stat 7650: Computational Statistics -- Spring 2025                             #
# R code examples for Markov Chain Monte Carlo                                   #
# Based in part on Givens & Hoeting (Chap 7-8) and Lange (Chap 26-27)            #
# Modified from code of R. Martin                                                #
#================================================================================#

#Set Working Directory to Source File Location
library("rstudioapi")                                 # Load rstudioapi package
setwd(dirname(getActiveDocumentContext()$path))       # Set working directory to source file location
#getwd()  

source("../RCode/AuxFunctions.r")

library(ggplot2)

# Set the path for saving plots
path.name <- "../STAT7650-Slides/figures"

############################################################################
## --- Example: Simple Random Walk
############################################################################

# Number of steps
n <- 1000

# Generate iid U_i values from Unif({-1, 1})
# use sample() function to generate our desired values
set.seed(11)
U_i <- sample(c(-1, 1), size = n, replace = TRUE)

# Initialize X_0
X_0 <- 0

# Compute X_n as the cumulative sum of U_i values, starting from X_0
X_n <- cumsum(c(X_0, U_i))

# Plot the random walk
plot(X_n, type = 'l', main = "Simple Random Walk", xlab = "step (n)", 
     ylab = expression(paste("position (",X[n],")")), col = "blue")
abline(h = 0, lty = 1,lwd=2, col = "red")  # Adds a horizontal line at X_0 = 0 for reference

############################################################################
# Example -- Cosine Model
############################################################################

# Dataset
X <- c(3.91, 4.85, 2.28, 4.06, 3.70, 4.04, 5.46, 3.53, 2.28, 1.96, 2.53,
       3.88, 2.22, 3.47, 4.82, 2.46, 2.99, 2.54, 0.52, 2.50)

# Likelihood function
lik <- function(theta) {
    valid_ind <- theta <= pi & theta >= -pi
    out <- numeric(length(theta))
    out[valid_ind] <- exp(colSums(log(1 - cos(outer(X, theta[valid_ind], "-")))))
    return(out)
}

# Proposal functions
a <- 0.5
dprop <- function(theta, theta0) dunif(theta, theta0 - a, theta0 + a)
rprop <- function(theta0) runif(1, theta0 - a, theta0 + a)

# Denominator calculation for posterior
den <- integrate(lik, -pi, pi)$value

# Posterior density function
dpost <- function(theta) lik(theta) / den

# Setup for MCMC and plotting
x_seq <- seq(-pi, pi, length.out = 150)
dpost_vals <- dpost(x_seq)
ylim <- c(0, 1.05 * max(dpost_vals))
N <- 10000
B <- 5000

# Running the Metropolis-Hastings algorithm (assuming mh function is defined elsewhere)
theta_ini <- runif(1, -pi, pi)
theta_mcmc <- mh(theta_ini, lik, dprop, rprop, N, B)

# Plotting the histogram of sampled theta values
#pdf(file=paste0(path.name, "/hist_mcmc_cosine.pdf"), width=7, height=5)
hist(theta_mcmc$x, freq = FALSE, col = "gray", border = "white", ylim = ylim, 
     xlab = expression(theta), ylab="density", main = "Posterior - Cosine Model")
lines(x_seq, dpost_vals,lwd=2,col=2)
#dev.off()

# Trace plot for MCMC samples
#pdf(file=paste0(path.name, "/trace_mcmc_cosine.pdf"), width=7, height=5)
plot(theta_mcmc$x, type = "l", col = "gray", xlab = "iteration", ylab = expression(theta),
     main=expression(paste("Trace Plot for ",theta)))
lines(1:N, cumsum(theta_mcmc$x) / (1:N),lwd=2)
#dev.off()

# The 5th and 95th percentiles of the sampled theta values
print(quantile(theta_mcmc$x, c(0.05, 0.95)))

############################################################################
# Example -- Weibull Model
############################################################################
# Weibull model parameters and data initialization
X <- c(0.56, 2.26, 1.90, 0.94, 1.40, 1.39, 1.00, 2.32, 2.08, 0.89, 1.68)
n <- length(X)  # Sample size
bb <- 2  # Prior shape parameter for kappa
cc <- 1  # Prior scale parameter for kappa

# Posterior density function
dpost<- function(theta) {
  kappa <- theta[1]
  lambda <- theta[2]
  t1 <- sum(log(X)) - cc  
  t2 <- sum(X^kappa) 
  o <- log(lambda)*(-kappa * n) + log(kappa)*(n + bb - 1) +(kappa * t1 -lambda^(-kappa) *t2 - lambda)
  return(exp(o))
}

# Proposal density function
dprop <- function(theta, theta0) exp(sum(dexp(theta, rate = 1 / theta0, log = TRUE)))

# Random proposal function
rprop <- function(theta0) rexp(2, rate = 1 / theta0)

# Running the Metropolis-Hastings algorithm
init_theta <- c(1, 1)  # Initial values for kappa and lambda
out <- mh(init_theta, dpost, dprop, rprop, 10000, 5000)

# Plotting the results
op <- par(mfrow = c(2, 2))  # Setting up plotting area
# kappa iteration plot
plot(out$x[, 1], type = "l", col = "gray", xlab = "iteration", ylab = expression(kappa))
# lambda iteration plot
plot(out$x[, 2], type = "l", col = "gray", xlab = "iteration", ylab = expression(lambda))
# Histogram for kappa
hist(out$x[, 1], freq = FALSE, col = "gray", border = "white", xlab = expression(kappa), main = "")
# Histogram for lambda
hist(out$x[, 2], freq = FALSE, col = "gray", border = "white", xlab = expression(lambda), main = "")
# Restoring previous plotting settings
par(op)

# Histogram for kappa
#pdf(file=paste0(path.name, "/marg_post_kappa.pdf"), width=7, height=5)
hist(out$x[, 1], freq = FALSE, col = "gray", border = "white", xlab = expression(kappa), main = "")
abline(v=1,lty=1,col="red",lwd=2)

#dev.off()

############################################
#Modernized Version
############################################

# Data setup is as above

# Prior distribution function
prior <- function(lambda, kappa, bb, cc) {
  exp(-(lambda + cc * kappa)) * kappa^(bb - 1)
}

# Likelihood function
like <- function(lambda, kappa, X) {
  prod((kappa/lambda) * (X/lambda)^(kappa-1) * exp(-(X/lambda)^kappa))
}

# Posterior function (unnormalized)
post <- function(lambda, kappa, X, bb, cc) {
  like(lambda, kappa, X) * prior(lambda, kappa, bb, cc)
}

# Metropolis-Hastings algorithm for sampling
mh_samp <- function(n_iter, start_val, X, bb, cc) {
  samples <- matrix(nrow = n_iter, ncol = 2)  # Store lambda and kappa
  samples[1,] <- start_val  # Initial values
  
  for (i in 2:n_iter) {
    current <- samples[i-1,]
    proposal <- rexp(2, 1/current)  # Exponential proposal distribution
    
    # Calculate acceptance ratio
    a <- post(proposal[1], proposal[2], X, bb, cc) / post(current[1], current[2], X, bb, cc)
    
    if (runif(1) < a) {
      samples[i,] <- proposal  # Accept proposal
    } else {
      samples[i,] <- current  # Reject proposal, keep current
    }
  }
  
  colnames(samples) <- c("lambda", "kappa")
  return(samples)
}

# Perform sampling
set.seed(123)  # For reproducibility

n_iter <- 11000  # Total iterations
start_val <- c(1, 1)  # Starting values for lambda and kappa
samples <- mh_samp(n_iter, start_val, X, bb, cc)

# Discard burn-in
burn_in <- 1000
samples <- samples[(burn_in+1):n_iter,]

##############
# Visualize the marginal posterior of kappa
#pdf(file=paste0(path.name, "/marg_post_kappa.pdf"), width=7, height=5)
ggplot(data.frame(kappa = samples[, "kappa"]), aes(x = kappa)) +
  geom_histogram(aes(y = after_stat(density)), binwidth = 0.05, fill = "skyblue", color = "black") +
  geom_density(col = "black", adjust = 2, lwd=1) +  # Increase adjust for smoother density
  geom_vline(aes(xintercept = mean(kappa)), color = "green", linetype = "dashed", linewidth = 1) +
  geom_vline(aes(xintercept = 1), color = "red", linetype = "solid", linewidth = 1) +
  labs(title = expression(paste("Marginal Posterior Distribution of ", kappa)), 
       x = expression(kappa), y = "density")
#dev.off()

# You might want to explore summary statistics or other visualizations
summary(samples[, "kappa"])
mean(samples[, "kappa"]>1)

##############
# Visualize the marginal posterior of lambda
#pdf(file=paste0(path.name, "/marg_post_lambda.pdf"), width=7, height=5)
ggplot(data.frame(lambda = samples[, "lambda"]), aes(x = lambda)) +
  geom_histogram(aes(y = after_stat(density)), binwidth = 0.05, fill = "skyblue", color = "black") +
  geom_density(col = "black", adjust = 2,lwd=1) +  # Increase adjust for smoother density
  geom_vline(aes(xintercept = mean(lambda)), color = "green", linetype = "dashed", linewidth = 1) +
  geom_vline(aes(xintercept = 1), color = "red", linetype = "solid", linewidth = 1) +
  labs(title = expression(paste("Marginal Posterior Distribution of ", lambda)), 
       x = expression(lambda), y = "density")
#dev.off()

# You might want to explore summary statistics or other visualizations
summary(samples[, "lambda"])
mean(samples[, "lambda"]>1)

############################################################################
# Example -- Logistic Regression 
############################################################################

# Data preparation
y <- c(rep(1, 4), rep(0, 8), 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0)
x <- c(53, 57, 58, 63, 66, 67, 67, 67, 68, 69, 70, 70, 70, 70, 72, 73, 75, 75, 76, 76, 78, 79, 81)

# Logistic regression using glm
log_reg_out <- glm(y ~ x, family = binomial(logit))
print(summary(log_reg_out))

# Extracting MLE estimates and variance
a_mle <- coef(log_reg_out)[1]
b_mle <- coef(log_reg_out)[2]
var_a_mle <- vcov(log_reg_out)[1, 1]
var_b_mle <- vcov(log_reg_out)[2, 2]
b_mme <- exp(a_mle + 0.577216) # Euler-Mascheroni constant approximation

# Posterior density function
dpost <- function(theta) {
    a <- theta[1]
    b <- theta[2]
    p <- 1 / (1 + exp(-(a + b * x))) # Logistic model for probability
    lik <- prod(dbinom(y, size = 1, prob = p)) # Likelihood
    dprior <- exp(a) * exp(-exp(a) / b_mme) / b_mme # Prior density for a
    return(lik * dprior)
}

# Proposal density function
dprop <- function(theta, theta0) {
    a <- theta[1]
    b <- theta[2]
    pr1 <- exp(a) * exp(-exp(a) / b_mme) / b_mme
    pr2 <- dnorm(b, mean = b_mle, sd = sqrt(var_b_mle))
    return(pr1 * pr2)
}

# Random proposal function
rprop <- function(theta0) {
    a <- log(rexp(1, rate = 1 / b_mme))
    b <- rnorm(1, mean = b_mle, sd = sqrt(var_b_mle))
    return(c(a, b))
}

# Metropolis-Hastings algorithm setup
N <- 20000
B <- 5000
mh_out <- mh(c(a_mle, b_mle), dpost, dprop, rprop, N, B)

Ylab = "density"

# Plotting
op <- par(mfrow = c(3, 2))
hist(mh_out$x[, 1], freq = FALSE, col = "gray", border = "white", ylab=Ylab, xlab = expression(alpha), main = "")
hist(mh_out$x[, 2], freq = FALSE, col = "gray", border = "white", ylab=Ylab,xlab = expression(beta), main = "")
plot(mh_out$x[, 1], type = "l", col = "gray", xlab = "iteration", ylab = expression(alpha))
lines(1:N, cumsum(mh_out$x[, 1]) / (1:N))
plot(mh_out$x[, 2], type = "l", col = "gray", xlab = "iteration", ylab = expression(beta))
lines(1:N, cumsum(mh_out$x[, 2]) / (1:N))
acf(mh_out$x[, 1], ylab = expression(ACF(alpha)), main = "")
acf(mh_out$x[, 2], ylab = expression(ACF(beta)), main = "")
par(op)

# Predictive probabilities for different x values
calc_p <- function(beta_vals, x_val) {
    1 / (1 + exp(-(beta_vals[, 1] + beta_vals[, 2] * x_val)))
}

# Histograms for predictive probabilities
beta_vals = mh_out$x
op <- par(mfrow = c(2, 2))
hist(calc_p(beta_vals, 80), freq = FALSE, col = "gray", border = "white", ylab=Ylab, xlab = "p(80)", main = "")
hist(calc_p(beta_vals, 65), freq = FALSE, col = "gray", border = "white", ylab=Ylab, xlab = "p(65)", main = "")
hist(calc_p(beta_vals, 45), freq = FALSE, col = "gray", border = "white", ylab=Ylab, xlab = "p(45)", main = "")
hist(calc_p(beta_vals, 31), freq = FALSE, col = "gray", border = "white", ylab=Ylab, xlab = "p(31)", main = "")
par(op)

#pdf(file=paste0(path.name, "/log_reg_p65.pdf"), width=7, height=5)
hist(calc_p(beta_vals, 65), freq=FALSE, col="gray", border="white", ylab=Ylab, xlab="p(65)", main="")
#dev.off()

#pdf(file=paste0(path.name, "/log_reg_p31.pdf"), width=7, height=5)
hist(calc_p(beta_vals, 31), freq=FALSE, col="gray", border="white", ylab=Ylab, xlab="p(31)", main="")
#dev.off()

# Calculation and printing of credible intervals
cred.int.p31 <- quantile(calc_p(mh_out$x, 31), c(0.025, 0.975))
print(cred.int.p31)

############################################################################
# Example -- Gibbs Sampler for Bivariate Normal
############################################################################

# The Gibbs sampler function for bivariate normal distribution
b_in <- 1000  # Specify the burn-in period
rBVN_gibbs <- function(n, rho, burn_in) {
    sqrt_v <- sqrt(1 - rho^2)  # Pre-compute the square root term for efficiency
    samples <- matrix(0, nrow = n + burn_in, ncol = 2)  # Initialize matrix to store samples
    
    # Gibbs sampling loop
    for (i in 2:(n + burn_in)) {
        samples[i, 1] <- rho * samples[i - 1, 2] + sqrt_v * rnorm(1)
        samples[i, 2] <- rho * samples[i, 1] + sqrt_v * rnorm(1)
    }
    
    samples[-(1:burn_in), ]  # Return samples excluding the burn-in period
}

# The Gibbs sampler
rho <- 0.8  # Correlation coefficient
Nsim <- 1000  # Number of samples to generate
bvn_vals <- rBVN_gibbs(Nsim, rho, b_in)

# The samples with added theoretical contour
plot(bvn_vals, col = "gray", xlab = "x", ylab = "y",
     main = "Gibbs Sampling for Bivariate Normal Distribution")
f <- function(x, y) {
    exp(-(x^2 + y^2 - 2 * rho * x * y) / (2 * (1 - rho^2)))
}
x <- seq(-4, 4, length.out = 100)
z <- outer(x, x, Vectorize(f))
contour(x, x, z, add = TRUE, col = "red")

# Enhanced visualization
legend("bottomright", legend = c("Sampled Points", "Theoretical Contour"), 
       col = c("gray", "red"), lty = 1, cex = 0.8)

#dev.off()

############################################################################
# Example -- Many-Normal-Means
############################################################################
# The Gibbs sampling function for the Many-Normal-Means model
mnm_gibbs <- function(X, a, b, M) {
    n <- length(X)
    burn_in <- round(0.2 * M)  # Specify the burn-in period
    theta_vals <- matrix(0, nrow = n, ncol = burn_in + M)  # Initialize matrix for theta samples
    psi_vals <- numeric(burn_in + M)  # Initialize vector for psi samples
    theta_vals[, 1] <- X
    
    # Gibbs sampling loop
    for (i in 1:(burn_in + M)) {
        psi_vals[i] <- rgamma(1, shape = a + n / 2, rate = b + sum(theta_vals[, i]^2) / 2)
        if (i < burn_in + M) {
            sig_sq <- 1 / (1 + psi_vals[i])
            theta_vals[, i + 1] <- rnorm(n, mean = X / (1 + psi_vals[i]), sd = sqrt(sig_sq))
        }
    }
    
    list(theta = theta_vals[, -(1:burn_in)], psi = psi_vals[-(1:burn_in)])  # Return samples excluding the burn-in period
}

# Simulation function for the Many-Normal-Means model using Gibbs sampling
mnm_sim <- function(reps, theta_true, M, a, b) {
    n <- length(theta_true)
    true_SS <- sum(theta_true^2)
    mse_mle <- mse_bayes <- numeric(reps)
    
    for (r in 1:reps) {
        X <- theta_true + rnorm(n)  # Generate data
        mle <- sum(X^2)  # Maximum likelihood estimate
        
        gibbs_res <- mnm_gibbs(X, a, b, M)  # Gibbs sampling
        bayes_est <- colMeans(gibbs_res$theta^2)  # Bayesian estimate using the mean of posterior samples
        mse_mle[r] <- mean((mle - true_SS)^2)
        mse_bayes[r] <- mean((sum(bayes_est) - true_SS)^2)
    }
    
    mse <- c(mle_mse = mean(mse_mle), bayes_mse = mean(mse_bayes))
    return(mse)
}

# Example usage
set.seed(123)  # Set seed for reproducibility
theta_true <- rnorm(10, mean = 0, sd = 2)  # True theta values
reps <- 100  # Number of repetitions
M <- 1000  # Number of Gibbs iterations
a <- 2  # Shape parameter for psi
b <- 2  # Rate parameter for psi

mse_res <- mnm_sim(reps, theta_true, M, a, b)
print(mse_res)

############################################################################
# Example -- Gibbs Sampler for Capture-Recapture (Example 26.3.3 in Lange)
############################################################################
# Gibbs Sampler for Capture-Recapture in Lange
cap_cnts <- c(10, 27, 17, 7, 1, 5, 6, 15, 9, 18, 16, 5, 7, 19)
recap_cnts <- c(0, 0, 0, 0, 0, 0, 2, 1, 5, 5, 4, 2, 2, 3)
unrecap <- cumsum(cap_cnts - recap_cnts)  # Cumulative count of unrecap animals
n <- length(cap_cnts)  # Number of capture occasions
tot_iter <- 10000
burn_in <- 1000
Npop_ests <- numeric(burn_in + tot_iter)  # To store population size estimates
cap_probs <- matrix(0, nrow = burn_in + tot_iter, ncol = n)  # Capture probabilities
alpha <- beta <- 1  # Beta distribution parameters
init_pop_est <- 457  # Initial population size estimate
Npop_ests[1] <- init_pop_est
cap_probs[1, ] <- 0.02  # Initial capture probabilities

for (iter in 2:(burn_in + tot_iter)) {
    # Update capture probabilities
    cap_probs[iter, ] <- rbeta(n, alpha + cap_cnts, beta + Npop_ests[iter - 1] - cap_cnts)
    # Update population size estimates
    lam <- init_pop_est * prod(1 - cap_probs[iter, ])
    Npop_ests[iter] <- unrecap[n] + rpois(1, lam)
}

# Discard burn-in period
post_Npop_ests <- Npop_ests[-(1:burn_in)]

# histogram of population size estimates
hist(post_Npop_ests, freq = FALSE, ylab=Ylab, xlab = "N", col = "gray", 
     border = "white", main = "Population Size Estimates")
lines(density(post_Npop_ests,adj=2), main="Density of Population Size Estimates", lwd=2,
     xlab="Estimated Population Size", ylab="Density", col="darkgreen")

summary(post_Npop_ests)
    
