#================================================================================#
# Stat 7650: Computational Statistics -- Spring 2025                             #
# R code examples for the Monte Carlo method                                #
# Based in part on Givens & Hoeting (Chap 6) and Lange (Chap 23)      #
# Modified from code of R. Martin                                                #
#================================================================================#

#Set Working Directory to Source File Location
library("rstudioapi")                                 # Load rstudioapi package
setwd(dirname(getActiveDocumentContext()$path))       # Set working directory to source file location
#getwd()  

# Set the path for saving plots
path_name <- "../STAT7650-Slides/figures"

library(ggplot2)

############################################################################
# Example -- Another Motivating Example
############################################################################

#plot of the integrand, f and g
# Define the integrand function
int <- function(x) cos(pi * x / 2)

# Define the g(y) function
g <- function(y) 1.5 * (1 - y^2)

# Create a sequence of values from 0 to 1
x_vals <- seq(0, 1, by = 0.01)

# Calculate values for both functions
int_vals <- int(x_vals)
g_vals <- g(x_vals)

# Create a data frame for plotting
fnc_dat <- data.frame(x = x_vals, int = int_vals, G = g_vals)

# Plotting
#pdf(file=paste0(path_name, "/cos_int.pdf"), width=7, height=5)
ggplot(fnc_dat, aes(x = x)) +
  geom_line(aes(y = int, colour = "Integrand")) +
  geom_line(aes(y = G, colour = "g(x)")) +
  geom_line(aes(y = 1, colour = "Uniform")) +
  scale_colour_manual("", values = c("Integrand" = "blue", "g(x)" = "red", "Uniform" = "magenta")) +
  labs(title = "Comparison of target, envelope, and the integrand", x = "x", y = "function value") +
  # theme_minimal() +
  theme(legend.title = element_blank(), # Hide the legend title
        text = element_text(size = 14), # Increase text size
        legend.text = element_text(size = 12)) # Increase legend text
        #legend.background = element_rect(colour = "black", size = 0.5)) # Add bounding box
#dev.off()

#plot of the weights
# Create a sequence of values from 0 to 1
y_vals <- seq(0, .99, by = 0.01)

# Calculate values for weights
w_vals <- 1/g(y_vals)

# Create a data frame for plotting
w_dat <- data.frame(y = y_vals, Weights = w_vals)

# Plotting
#pdf(file=paste0(path_name, "/cos_int_wts.pdf"), width=7, height=5)
ggplot(w_dat, aes(x = y, y = Weights)) +
  geom_line(colour = "blue") +
  labs(title = "Plot of Weights", x = "x", y = "weights") +
  #  theme_minimal() +
  theme(text = element_text(size = 14), # Increase text size
        axis.title = element_text(size = 14), # Increase axis title size
        axis.text = element_text(size = 12)) # Increase axis text size
#dev.off()

############################################################################
# Example -- Estimating the Size and Power of a Hypothesis Test
############################################################################

lam <- 2
n <- 10
N <- 10000
prob_calc <- function(x, lam, n) dpois(x, lam * n)
is_sig <- function(x, lam, n) (x / n - lam) / sqrt(lam / n) > 1.645
unif_sam <- runif(N)

# Monte Carlo simulation
phi_mc <- is_sig(qpois(unif_sam, lam * n), lam, n)
mean_mc <- mean(phi_mc)
ci_mc <- mean_mc + 1.96 * c(-1, 1) * sqrt(mean_mc * (1 - mean_mc) / N)

# Importance Sampling
lam0 <- 2.4653
samp_dat <- qpois(unif_sam, n * lam0)
phi_is <- is_sig(samp_dat, lam, n)
wts_num <- prob_calc(samp_dat, lam, n)
wts_den <- prob_calc(samp_dat, lam0, n)
wts <- wts_num / wts_den
mean_is <- sum(phi_is * wts) / sum(wts)
ci_is <- mean_is + 1.96 * c(-1, 1) * sqrt(mean_is * (1 - mean_is) / N)
N_eff <- N / (1 + var(wts))
print(N_eff)

# Summary Output
res_summ <- cbind(c(mean_mc, mean_is), c(ci_mc[1], ci_is[1]), c(ci_mc[2], ci_is[2]))
colnames(res_summ) <- c("Estimate", "CI Lower", "CI Upper")
rownames(res_summ) <- c("MC", "IS")
print(res_summ)

# Power Analysis
lam_vals <- seq(2.2, 4, by=0.1)
power_mc <- sapply(lam_vals, function(l) mean(is_sig(qpois(unif_sam, n * l), lam, n)))
power_is <- sapply(lam_vals, function(l) {
  wts <- prob_calc(samp_dat, l, n) / wts_den
  sum(phi_is * wts) / sum(wts)
})

# Plotting
plot(x=lam_vals, y=power_is, type="l", col="black", xlab=expression(lambda), lty=1, ylab="power")
lines(x=lam_vals, y=power_mc, col="red", lty=2)
legend("bottomright", inset=0.05, lty=1:2, col=c("black", "red"), legend=c("Importance Sampling", "Monte Carlo"))

############################################################################
# Example -- Small Tail Probability
############################################################################
# Function to check if a value exceeds a threshold (4.5)
is_gt_thre <- function(x, threshold = 4.5) as.numeric(x > threshold)

# Standard normal density function
norm_pdf <- function(x) dnorm(x)

# # of samples
Nsim <- 10000

# Monte Carlo estimation using the normal distribution
o_mc <- mean(is_gt_thre(rnorm(Nsim)))

# Function for the weighting scheme based on the exponential distribution
exp_wgt <- function(y, offset = 4.5) exp(-(y - offset)) / (1 - exp(-offset))

# Weighted function for importance sampling
w_is_gt_thre <- function(x, threshold = 4.5) {
  is_gt_thre(x, threshold) * norm_pdf(x) / exp_wgt(x)
}

# Importance Sampling estimation using the exponential distribution as the proposal
o_is <- mean(w_is_gt_thre(4.5 + rexp(Nsim)))

# Print results: Monte Carlo, Importance Sampling estimations, and the true value
print(cbind(MC = o_mc, IS = o_is, Truth = 1 - pnorm(4.5)))

############################################################################
# Example --  Bivariate Normal Probabilities (Rao-Blackwellization)
############################################################################
# Parameters
rho <- 0.9
Nsim <- 10000
Niter <- 5000

# Generate correlated bivariate normal samples
rBVN <- function(M, rho) {
  Z <- matrix(rnorm(2 * M), ncol = M) # Generating 2*M normal samples
  R <- chol(matrix(c(1, rho, rho, 1), nrow = 2)) # Cholesky decomposition for correlation
  XY <- t(R %*% Z)
  return(XY)
}

# Function to compare two vectors
comp_vec <- function(v) as.numeric(v[1] > v[2])

# Rao-Blackwellized function
RB_fnc <- function(y, rho) 1 - pnorm((1 - rho) * y / sqrt(1 - rho^2))

# Generate samples
XY_sam <- rBVN(Nsim, rho)

# Naive estimator
naive_est <- mean(apply(XY_sam, 1, comp_vec))

# Rao-Blackwellized estimator
Z1 <- XY_sam[, 1]
RB_est <- mean(RB_fnc(Z1, rho))

# Printing estimators
print(cbind(Naive = naive_est, RB = RB_est))

# Calculate standard deviations for the RB estimator
RB_est_vals <- replicate(Niter, mean(RB_fnc(rnorm(Nsim), rho)))

# Standard error calculations
naive_sd_est <- 0.5 / sqrt(Niter)
RB_sd_est <- sd(RB_est_vals)
print(cbind(Naive_SD = naive_sd_est, RB_SD = RB_sd_est))

naive_sd_est/RB_sd_est

############################################################################
# Example -- Student-t Percentile via Stochastic Approximation
############################################################################
set.seed(123) # Ensure reproducibility

# Constants
df <- 3
prob <- 0.8
Niter <- 10000

# Target function for stochastic approximation
tar_fnc<- function(x, z, prob, df) {
  pnorm(x, mean = 0, sd = sqrt(df / z)) - prob
}

# Initial setup
approxs <- numeric(Niter)
approxs[1] <- 1 # Initial guess
wts <- 1 / (1 + 1:Niter)^(0.75) # Decreasing wts
chi_sq_sam <- rchisq(Niter, df = df)

# Stochastic approximation algorithm
for (iter in 2:Niter) {
  approxs[iter] <- approxs[iter - 1] - 
    wts[iter] * tar_fnc(approxs[iter - 1], chi_sq_sam[iter], prob, df)
}

# Plotting the approximation convergence
#pdf(file=paste0(path_name, "/t_percentile.pdf"), width=7, height=5)
plot(approxs, type = "l", xlab = "Iteration", ylab = "Approximation",
     main = "Stochastic Approximation of Student-t Percentile")
abline(h = qt(prob, df), col = "red", lwd = 2, lty = 3)
legend("bottomright", legend = c("Approximation", "True Quantile"), 
       col = c("black", "red"), lty = c(1, 3), lwd = c(1, 2))
#dev.off()

############################################################################
# Example -- Variable Selection in Regression, via Simulated Annealing (Example 3.4 in G&H)
############################################################################
#read in the data
baseball <- read.table(file = "../Datasets/baseball.dat", header = TRUE)

head(baseball)

# Transform and prepare data
log_sal <- log(baseball$salary)
pred_mat <- as.matrix(baseball[, -1])
print(c("# of observations" = length(log_sal), 
        "# of predictors" = ncol(pred_mat),
        "# of possible models" = 2^ncol(pred_mat)))

# Define the AIC calculation function
AIC_val <- function(model_mask) {
  num_obs <- length(log_sal)
  num_pred <- sum(model_mask)
  mod_logic <- as.logical(model_mask)
  
  mod_fit <- lm(log_sal ~ pred_mat[, mod_logic])
  aic_val <- num_obs * log(mean(mod_fit$residuals^2)) + 2 * (num_pred + 2)
  
  return(aic_val)
}

# Define the proposal function for simulated annealing
new_mod <- function(curr_model, num_chang = 1) {
  ind2mod<- sample(1:length(curr_model), size = num_chang)
  new_mod <- curr_model
  new_mod[ind2mod] <- (1 + new_mod[ind2mod]) %% 2
  return(new_mod)
}

# Initial model (random start)
ini_mod <- sample(0:1, size = ncol(pred_mat), replace = TRUE)

# Perform simulated annealing for model selection #actually gr is not needed here
#R can run with its defaults w/o specifying the new_mod
SA_res <- optim(par = ini_mod, fn = AIC_val, gr = new_mod, method = "SANN")

# Display results
print(SA_res)
select_pred <- names(baseball[, -1])[as.logical(SA_res$par)]
print("Selected Predictors:")
print(select_pred)


############################################################################
# Gamma Model Example: Likelihood Ratio-Based Confidence Region
############################################################################

#Gamma Model Extension
set.seed(123)

# 1. True parameters and observed data
n <- 20
theta1_true <- 7
theta2_true <- 13
x_obs <- rgamma(n, shape = theta1_true, scale = theta2_true)

# 2. MLE under unconstrained model
loglik_gamma <- function(par, x) {
  -sum(dgamma(x, shape = par[1], scale = par[2], log = TRUE))
}
mle_fit <- optim(c(1, 1), loglik_gamma, x = x_obs, method = "L-BFGS-B", lower = c(0.01, 0.01))
theta_hat <- mle_fit$par

# 3. LRT statistic at a given theta
lrt_stat <- function(theta, x, theta_hat) {
  ll_null <- sum(dgamma(x, shape = theta[1], scale = theta[2], log = TRUE))
  ll_mle <- sum(dgamma(x, shape = theta_hat[1], scale = theta_hat[2], log = TRUE))
  -2 * (ll_null - ll_mle)
}
t_obs <- lrt_stat(c(theta1_true, theta2_true), x_obs, theta_hat)

# 4. Function to estimate p-value at a given theta
estimate_pval <- function(theta, t_obs, B = 1000) {
  sim_data <- replicate(B, {
    x_sim <- rgamma(n, shape = theta[1], scale = theta[2])
    lrt_stat(theta, x_sim, theta_hat)
  })
  mean(sim_data > t_obs)
}

# 5. Grid search over (theta1, theta2)
theta1_vals <- seq(0, 40, length.out = 100)
theta2_vals <- seq(0, 40, length.out = 100)
alpha <- 0.10
p_grid <- matrix(NA, nrow = length(theta1_vals), ncol = length(theta2_vals))

#This might take a while
for (i in seq_along(theta1_vals)) {
  for (j in seq_along(theta2_vals)) {
    theta <- c(theta1_vals[i], theta2_vals[j])
    p_grid[i, j] <- estimate_pval(theta, t_obs, B = 500)
  }
}

# 6. Plot contour where p(theta) = alpha
library(ggplot2)
library(reshape2)

df_plot <- expand.grid(theta1 = theta1_vals, theta2 = theta2_vals)
df_plot$pval <- as.vector(p_grid)

ggplot(df_plot, aes(x = theta1, y = theta2, z = pval)) +
  geom_contour(breaks = alpha, color = "blue") +
  geom_point(aes(x = theta_hat[1], y = theta_hat[2]), color = "red", size = 2) +
  labs(title = paste0("LRT p-value Contour at Level ", 100*(1 - alpha), "%"),
       x = expression(theta[1]), y = expression(theta[2])) +
  theme_minimal()

#Robbins-Monro Stochastic Approximation
# Robbins-Monro for root-finding p(theta) = alpha in 1D (fixing theta2 for simplicity)
theta1_rm <- 10  # initial guess
theta2_fixed <- 13
w_t <- function(t) (1 + t)^(-0.75)

for (t in 1:200) {
  z_sim <- rgamma(n, shape = theta1_rm, scale = theta2_fixed)
  t_sim <- lrt_stat(c(theta1_rm, theta2_fixed), z_sim, theta_hat)
  h <- as.numeric(t_sim > t_obs)
  theta1_rm <- theta1_rm - w_t(t) * (h - alpha)
}
cat("Estimated theta1 such that p(theta) ≈", alpha, ": ", theta1_rm, "\n")

# Assuming everything from the previous code is already run

# Bayesian posterior samples using a flat prior on (theta1, theta2)
library(MASS)

log_post <- function(par, x) {
  if (any(par <= 0)) return(-Inf)
  sum(dgamma(x, shape = par[1], scale = par[2], log = TRUE))
}
# Use the mode from MLE as a proposal center
posterior_samples <- matrix(NA, nrow = 5000, ncol = 2)
current <- theta_hat
accepts <- 0

for (i in 1:5000) {
  proposal <- mvrnorm(1, mu = current, Sigma = diag(c(0.5, 1)))  # adjust step size
  log_ratio <- log_post(proposal, x_obs) - log_post(current, x_obs)
  if (log(runif(1)) < log_ratio) {
    current <- proposal
    accepts <- accepts + 1
  }
  posterior_samples[i, ] <- current
}
cat("Acceptance rate:", accepts / 5000, "\n")

# Add asymptotic ellipse around MLE
library(ellipse)
library(numDeriv)
hessian <- numDeriv::hessian(function(par) loglik_gamma(par, x = x_obs), theta_hat)
cov_mle <- solve(hessian)
ellipse_pts <- data.frame(ellipse::ellipse(cov_mle, centre = theta_hat, level = 0.90))

# Plot everything
ggplot(df_plot, aes(x = theta1, y = theta2, z = pval)) +
  geom_contour(breaks = alpha, colour = "blue", linewidth = 1.2) +
  
  geom_point(data = as.data.frame(posterior_samples),
             aes(x = V1, y = V2),
             colour = "gray60", alpha = 0.2, shape = 16,
             inherit.aes = FALSE) +          # <- already fixed here
  
  geom_point(aes(x = theta_hat[1], y = theta_hat[2]),
             colour = "red", size = 2, shape = 4) +
  
  geom_path(data = ellipse_pts,
            aes(x = x, y = y),
            colour = "darkgreen", linewidth = 1,
            inherit.aes = FALSE) +           # <- add this
  
  labs(title     = "Exact vs. Bayesian vs. Asymptotic 90% Regions",
       subtitle  = "Blue: LRT Contour | Gray: Posterior Sample | Green: Normal Ellipse",
       x = expression(theta[1]), y = expression(theta[2])) +
  theme_minimal()
