## Lecture Slides 13 R Examples

#Set Working Directory to Source File Location
library("rstudioapi")  # Load rstudioapi package
setwd(dirname(getActiveDocumentContext()$path)) # Set working directory to source file location
#getwd()  

##################################################################################
# Binary Response Model
##################################################################################

# This code implements logistic regression with a Bayesian perspective,
# adapted from an example at:
# https://brunaw.com/phd/mcmc/report.pdf

# Load required library
library(mvtnorm)

# Define binary response variable:
# Y = 0: Individual has no senility
# Y = 1: Individual has senility present

# x = wais: Scores on a subset of the Wechsler Adult Intelligence Scale (WAIS)

# Response variable (y = senility): 14 with senility (1), 40 without senility (0)
senility <- c(rep(1, times = 14), rep(0, times = 40))

# Predictor variable (x): WAIS scores
wais <- c(9, 13, 6, 8, 10, 4, 14, 8, 11, 7, 9, 7, 5, 14, 13, 16, 10, 12, 11, 14, 
       15, 18, 7, 16, 9, 9, 11, 13, 15, 13, 10, 11, 6, 17, 14, 19, 9, 11, 14, 
       10, 16, 10, 16, 14, 13, 13, 9, 15, 10, 11, 12, 4, 14, 20)

# Fit logistic regression model using the logit link
log_reg_out <- glm(senility ~ wais, family = binomial(logit))
summary(log_reg_out)

# Prepare design matrix for Bayesian inference
# X includes an intercept (column of 1s) and the predictor variable (x1)
X <- cbind(1, wais)  # Automatically handles intercept addition

# Define prior distribution parameters
beta_pri_mean <- c(0, -0.35)  # Prior mean for beta (intercept, slope)
beta_pri_cov <- diag(c(100, 0.175^2))  # Prior covariance matrix

# Scale factor for the proposal covariance matrix
# Adjust `k` to control acceptance rate in MCMC
k <- 1  

# Compute proposal covariance matrix based on Fisher Information
pro_cov_mat <- k * solve(t(X) %*% diag(fitted(log_reg_out)) %*% X)
# Uncomment the following if you want to use a diagonal proposal covariance matrix:
# pro_cov_mat <- k * diag(ncol(X))

##################################################################################
# MCMC for Logistic Regression Coefficients

# Initial setup
beta_curr <- beta_pri_mean   # Initial values for beta
V <- pro_cov_mat        # Proposal covariance matrix
mu <- beta_pri_mean           # Mean of prior distribution
Sig_inv <- solve(beta_pri_cov)  # Inverse of prior covariance matrix

acs <- 0 # will be to track "acceptance rate"
burn <- 1000                  # Number of burn-in iterations
Niter <- 50000                # Total number of iterations
beta_vals <- matrix(0, nrow = Niter, ncol = length(beta_pri_mean))  # Storage for MCMC results

# MCMC sampling loop
for (i in 1:Niter) {
  # Generate candidate beta from proposal distribution
  beta_pro <- as.vector(rmvnorm(1, beta_curr, V))  # Proposed betas
  
  # Calculate Metropolis ratio
  log_r <- (
    -k * sum(log(1 + exp(X %*% beta_pro))) + sum(senility * X %*% beta_pro) -
      0.5 * t(beta_pro - mu) %*% Sig_inv %*% (beta_pro - mu)
  ) - (
    -k * sum(log(1 + exp(X %*% beta_curr))) + sum(senility * X %*% beta_curr) -
      0.5 * t(beta_curr - mu) %*% Sig_inv %*% (beta_curr - mu)
  )
  
  # Accept/reject step
  if (runif(1) < exp(log_r)) {
    beta_curr <- beta_pro  # Accept candidate
    acs <- acs + 1
  }
  
  # Save current beta values
  beta_vals[i, ] <- beta_curr
}

# Calculate acceptance rate
acc_rate <- acs / Niter
cat("Acceptance Rate:", acc_rate, "\n")

# Diagnostic plots of the original MCMC sample
par(mfrow = c(2, 2))  # 2x2 plotting grid
acf(beta_vals[, 1], main = "acf of intercept")  # Autocorrelation for intercept
acf(beta_vals[, 2], main = "acf of slope")  # Autocorrelation for slope
plot(beta_vals[, 1], type = 'l', 
     main = "trace plot of intercept", xlab = "Iteration", ylab = "value")
plot(beta_vals[, 2], type = 'l', 
     main = "trace plot of slope", xlab = "Iteration", ylab = "value")
par(mfrow = c(1, 1))  # Reset plotting window

# Thinning results (every 5th value)
thin <- 5
beta_vals_thin <- beta_vals[seq(1, Niter, by = thin), ]

# Diagnostic plots of the original MCMC sample
par(mfrow = c(2, 2))  # 2x2 plotting grid
acf(beta_vals_thin[, 1], main = "acf of intercept")  # Autocorrelation for intercept
acf(beta_vals_thin[, 2], main = "acf of slope")  # Autocorrelation for slope
plot(beta_vals_thin[, 1], type = 'l', 
     main = "trace plot of intercept", xlab = "Iteration", ylab = "value")
plot(beta_vals_thin[, 2], type = 'l', 
     main = "trace plot of slope", xlab = "Iteration", ylab = "value")
par(mfrow = c(1, 1))  # Reset plotting window

# Remove burn-in samples
beta_vals_thin_b <- beta_vals_thin[-(1:burn), ]

# Diagnostic plots
par(mfrow = c(2, 2))  # 2x2 plotting grid
acf(beta_vals_thin_b[, 1], main = "acf of intercept")  # Autocorrelation for intercept
acf(beta_vals_thin_b[, 2], main = "acf of slope")  # Autocorrelation for slope
plot(beta_vals_thin_b[, 1], type = 'l', 
     main = "trace plot of intercept", xlab = "Iteration", ylab = "value")
plot(beta_vals_thin_b[, 2], type = 'l', 
     main = "trace plot of slope", xlab = "Iteration", ylab = "value")
par(mfrow = c(1, 1))  # Reset plotting window

# Posterior summary
post_meds <- apply(beta_vals_thin_b, 2, median)  # Posterior medians
post_low <- apply(beta_vals_thin_b, 2, quantile, probs = 0.025)  # 2.5% quantile
post_up <- apply(beta_vals_thin_b, 2, quantile, probs = 0.975)  # 97.5% quantile

# Combine summary into a data frame
 names_preds <- c("wais")  # Predictor names (intercept + predictors)
beta_post_summ <- data.frame(
  `0.025 Quantile` = post_low,
  `0.5 Quantile` = post_meds,
  `0.975 Quantile` = post_up,
  row.names = c("Intercept", names_preds)
)

# Print posterior summary
print(beta_post_summ)

##################################################################################
# Bayesian Logistic Regression using rstanarm
##################################################################################

# Load required packages
library(bayesrules)    # For Bayesian methods and tools
library(rstanarm)      # For Bayesian regression models
library(bayesplot)     # For visualizing MCMC diagnostics
library(tidyverse)     # For data manipulation and visualization
library(tidybayes)     # For working with Bayesian models
library(broom.mixed)   # For generating tidy summaries of Bayesian models

# Create the dataset
wais_data <- data.frame(senility, wais)

# Fit the Bayesian logistic regression model
wais_mod <- stan_glm(
  senility ~ wais,
  data = wais_data, 
  family = binomial,                          # Logistic regression model
  prior_intercept = normal(-0.5, 0.45),       # Prior for intercept
  prior = normal(-0.35, 0.175),              # Prior for slope
  chains = 4,                                # Number of MCMC chains
  iter = 10000,                              # Total iterations per chain (post-warmup = 5000)
  prior_PD = FALSE                           # Include likelihood in posterior estimation
)

# Prior justification:
# - Intercept prior: The log-odds of senility (y = 1) for a "typical" subject are between -1.4 and 0.4.
#   Prior mean is -0.5, and prior SD is 0.45.
# - Slope prior: For a one-unit increase in WAIS score, the odds of senility change by 0.5x to 1x.
#   Prior mean is -0.35, and prior SD is 0.175.

# MCMC diagnostics
mcmc_trace(wais_mod)                    # Trace plots for convergence diagnostics
mcmc_dens_overlay(wais_mod)            # Overlay density plots for posterior distributions
mcmc_acf(wais_mod)                     # Autocorrelation plots for MCMC chains

# Note: Slight autocorrelation is acceptable, but chains otherwise show good convergence.

# Visualizing posterior plausible models
wais_data %>%
  add_epred_draws(wais_mod, ndraws = 100) %>%  # Use updated function for posterior draws
  ggplot(aes(x = wais, y = senility)) +
  geom_line(aes(y = .epred, group = .draw), alpha = 0.15) +  # Use `.epred` for expected values
  labs(
    y = "Probability of Senility",
    x = "WAIS Score",
    title = "Posterior Plausible Models"
  )

# Summary of posterior estimates (point estimates and credible intervals)
post_summ <- tidy(wais_mod, conf.int = TRUE, conf.level = 0.95)
print(post_summ)

# Comparison with frequentist maximum likelihood estimates (MLEs)
log_reg_out <- glm(senility ~ wais, family = binomial(logit))
summary(log_reg_out)

# Posterior predictions for a new observation (WAIS score = 10)
bin_pred <- posterior_predict(wais_mod, newdata = data.frame(wais = 10))

# Plot posterior predictive distribution
mcmc_hist(bin_pred) +
  labs(
    x = "Binary Outcome (Y)",
    title = "Posterior Predictive Distribution for WAIS Score = 10"
  )

# Summarize posterior predictions as a frequency table
bin_tab <- table(bin_pred)
print(bin_tab)

# Compute posterior predictive mean (probability of senility for WAIS score = 10)
post_mean <- colMeans(bin_pred)
cat("Posterior Predictive Probability of Senility (WAIS = 10):", post_mean, "\n")

# Check posterior predictive distribution
prop_senile <- function(x){mean(x == 1)}

pp_check(
  wais_mod,
  plotfun = "stat",
  stat = "prop_senile"
) + xlab("Probability of Senility")

# Note: Observed values being near the center of the posterior predictive distribution indicate good model fit.

# Classification accuracy and confusion matrix for cutoff = 0.5
classification_summary(
  model = wais_mod,
  data = wais_data,
  cutoff = 0.5
)

## Cross-Validated Classification Accuracy Using k-fold Cross-Validation

# Load required library
library(caret)

# Create stratified folds
set.seed(123)  # For reproducibility
folds <- createFolds(wais_data$senility, k = 10, list = TRUE, returnTrain = FALSE)

# Function to compute classification accuracy for each fold
comp_CV_acc <- function(train_ind, test_ind, model, data, cutoff) {
  train_data <- data[train_ind, ]
  test_data <- data[test_ind, ]
  
  # Refit the model on the training data
  fold_mod <- update(model, data = train_data)
  
  # Predict probabilities on the test data
  pred_probs <- posterior_epred(fold_mod, newdata = test_data)  # Updated function
  pred_classes <- ifelse(colMeans(pred_probs) > cutoff, 1, 0)
  
  # Compute confusion matrix
  confusion <- table(
    Predicted = factor(pred_classes, levels = c(0, 1)),
    Actual = factor(test_data$senility, levels = c(0, 1))
  )
  
  # Handle missing classes in the confusion matrix
  if (nrow(confusion) < 2 || ncol(confusion) < 2) {
    confusion <- matrix(0, nrow = 2, ncol = 2, dimnames = list(c("0", "1"), c("0", "1")))
  }
  
  # Compute accuracy
  accuracy <- sum(diag(confusion)) / sum(confusion)
  return(accuracy)
}

# Perform k-fold cross-validation
CV_acc_vals <- sapply(
  folds,
  function(test_ind) {
    train_ind <- setdiff(seq_len(nrow(wais_data)), test_ind)
    comp_CV_acc(train_ind, test_ind, wais_mod, wais_data, cutoff = 0.5)
  }
)

# Calculate mean cross-validated accuracy
CV_acc <- mean(CV_acc_vals)
cat("Cross-Validated Classification Accuracy:", CV_acc, "\n")

##################################################################################
# Bayesian Logistic Regression with Multiple Predictors
##################################################################################

# Rain example from the book

# Load required libraries
library(rstanarm)      # For Bayesian logistic regression
library(tidyverse)     # For data manipulation and visualization
library(broom.mixed)   # For tidy summaries of Bayesian models
library(bayesrules)    # For Bayesian tools
library(caret)         # For stratified cross-validation

# Load and process the data
data(weather_perth)  # Assuming `weather_perth` is preloaded
weather <- weather_perth %>% 
  select(day_of_year, raintomorrow, humidity9am, humidity3pm, raintoday)

# Prior belief for logistic regression:
# On a "typical" day, the chance of rain is 20% (0.2).
# The prior mean on the CENTERED beta_0 (intercept) is log(0.2/(1 - 0.2)) = -1.4.
# A prior SD of 0.7 implies a 95% chance the log-odds are between -2.8 and 0.
# This corresponds to odds of 0.06 to 1, or probabilities between 0.057 and 0.5.

# Fit a Bayesian logistic regression model with multiple predictors
rain_stanglm2 <- stan_glm(
  raintomorrow ~ humidity9am + humidity3pm + raintoday, 
  data = weather, 
  family = binomial,                                # Logistic regression
  prior_intercept = normal(-1.4, 0.7),             # Prior for intercept
  prior = normal(0, 2.5, autoscale = TRUE),        # Weakly informative prior for coefficients
  chains = 4,                                      # Number of MCMC chains
  iter = 10000                                     # Number of iterations (post-warmup = 5000)
)

# Summarize posterior estimates with confidence intervals
rain_stanglm2_summ <- tidy(rain_stanglm2, effects = "fixed", conf.int = TRUE, conf.level = 0.95)
print(rain_stanglm2_summ)

# Model comparison: Fit a simpler model with a single predictor
rain_stanglm1 <- stan_glm(
  raintomorrow ~ humidity9am, 
  data = weather, 
  family = binomial,
  prior_intercept = normal(-1.4, 0.7),             # Same prior for intercept
  prior = normal(0.07, 0.035),                    # Prior for slope (adjusted based on context)
  chains = 4, 
  iter = 10000, 
  prior_PD = FALSE                                # Use posterior data
)

# Compare classification accuracy using k-fold cross-validation
# The book suggests c = 0.2 as a reasonable cutoff, but feel free to explore others
set.seed(123)  # For reproducibility

# Cross-validation for rain_stanglm1
CV_acc_1 <- classification_summary_cv(
  model = rain_stanglm1, 
  data = weather, 
  cutoff = 0.2, 
  k = 10  # 10-fold cross-validation
)

# Cross-validation for rain_stanglm2
CV_acc_2 <- classification_summary_cv(
  model = rain_stanglm2, 
  data = weather, 
  cutoff = 0.2, 
  k = 10
)

# Print cross-validated classification accuracy for both models
cat("Cross-Validated Accuracy for Model 1 (Single Predictor):\n")
CV_acc_1$cv
cat("Cross-Validated Accuracy for Model 2 (Multiple Predictors):\n")
CV_acc_2$cv

# One approach to model selection:
# LOO for rain_stanglm1 (Single Predictor)
loo1 <- loo(rain_stanglm1)
cat("LOO Estimates for Model 1 (Single Predictor):\n")
print(loo1$estimates)

# LOO for rain_stanglm2 (Multiple Predictors)
loo2 <- loo(rain_stanglm2)
cat("LOO Estimates for Model 2 (Multiple Predictors):\n")
print(loo2$estimates)

# Comparing LOO-CV:
cat("\nModel Comparison using LOO:\n")
loo_comp <- loo_compare(loo1, loo2)
print(loo_comp)

# Frequentist approach: Use Bayesian Information Criterion (BIC)
# BIC does not incorporate prior information, allowing direct comparison of models

# Fit frequentist logistic regression models
rain_glm1 <- glm(
  raintomorrow ~ humidity9am, 
  data = weather, 
  family = binomial(logit)
)

rain_glm2 <- glm(
  raintomorrow ~ humidity9am + humidity3pm + raintoday, 
  data = weather, 
  family = binomial(logit)
)

# Calculate BIC for both models
BIC1 <- BIC(rain_glm1)
BIC2 <- BIC(rain_glm2)

cat("\nBIC Comparison:\n")
cat("Model 1 (Single Predictor): BIC =", BIC1, "\n")
cat("Model 2 (Multiple Predictors): BIC =", BIC2, "\n")

# Interpretation:
# Lower BIC indicates better model fit while penalizing for model complexity.
# Compare BIC values to decide which model is more appropriate.

###################################################################
# Bayesian logistic regression model with simplified predictors
# with only humidity3pm & raintoday as predictors:
rain_stanglm_simp <- stan_glm(
  raintomorrow ~ humidity3pm + raintoday, 
  data = weather, 
  family = binomial,                                # Logistic regression
  prior_intercept = normal(-1.4, 0.7),             # Prior for intercept
  prior = normal(0, 2.5, autoscale = TRUE),        # Weakly informative priors
  chains = 1,                                      # Single MCMC chain for simplicity
  iter = 10000                                     # Total iterations (post-warmup = 5000)
)

# Cross-validated classification accuracy (cutoff = 0.2, 10-fold CV)
CV_acc_simp <- classification_summary_cv(
  model = rain_stanglm_simp, 
  data = weather, 
  cutoff = 0.2, 
  k = 10
)

# Print cross-validated accuracy
cat("Cross-Validated Classification Accuracy (Simplified Model):\n")
print(CV_acc_simp$cv)

# Evaluate model using Leave-One-Out Cross-Validation (LOO)
loo_simp <- loo(rain_stanglm_simp)
cat("\nLOO Estimates for Simplified Model:\n")
print(loo_simp$estimates)

# Frequentist logistic regression model with the same predictors
rain_glm_simp <- glm(
  raintomorrow ~ humidity3pm + raintoday, 
  data = weather, 
  family = binomial(logit)
)

# Calculate BIC for the simplified model
BIC_simp <- BIC(rain_glm_simp)
cat("\nBIC for Simplified Model (Frequentist):\n", BIC_simp, "\n")

# Comparison Notes:
# - LOO: Lower `elpd_loo` values indicate a better Bayesian model fit.
# - BIC: Lower BIC indicates a better frequentist model fit while penalizing complexity.

##################################################################################
# Multiple Logistic Regression Model using Base R
##################################################################################

# Load required package
library(mvtnorm)  # For multivariate normal distributions

# Extract variables from the weather dataset
rain_tom <- weather$raintomorrow
humid9am <- weather$humidity9am
humid3pm <- weather$humidity3pm
rain_today <- weather$raintoday

# Convert variables to numeric for calculations
rain_tom <- as.numeric(rain_tom) - 1  # Convert rain_tom to 0's and 1's
rain_today <- as.numeric(rain_today)    # Ensure rain_today is numeric

# Construct the design matrix (X) with an intercept
X <- cbind(rep(1, times = length(humid9am)), humid9am, humid3pm, rain_today)

# Prior specifications
beta_pri_mean <- c(0, -0.35, -0.2, 0.5)  # Prior means for beta parameters
# Prior reflects:
# - No specific belief about beta_0 (intercept)
# - Moderate beliefs about the slopes for humid9am, humid3pm, and rain_today
beta_pri_cov <- diag(c(100, 40, 40, 40))  # Prior covariance matrix

# Scaling factor for proposal covariance matrix (can be tuned for MCMC acceptance rates)
k <- 1  

# Frequentist Logistic Regression Model (using glm)
log_reg_out <- glm(rain_tom ~ humid9am + humid3pm + rain_today, family = binomial(logit))

# Summary of the logistic regression model
cat("\nSummary of the Logistic Regression Model:\n")
summary(log_reg_out)

# Set up the proposal covariance matrix
pro_cov_mat <- k * solve(t(X) %*% diag(fitted(log_reg_out)) %*% X)
# Alternative option for proposal covariance matrix:
# pro_cov_mat <- k * diag(ncol(X))

# Initialize MCMC parameters
beta_curr <- beta_pri_mean                # Initial parameter estimates
V <- pro_cov_mat                     # Proposal covariance matrix
mu <- beta_pri_mean                        # Prior mean vector
Sig_inv <- solve(beta_pri_cov)           # Inverse of prior covariance matrix

# MCMC settings
acs <- 0                                       # Counter for accepted proposals
burn <- 1000                               # Number of burn-in iterations
Niter <- 50000                             # Total number of iterations
mcmc_res <- matrix(0, nrow = Niter, ncol = length(beta_pri_mean))  # Storage for MCMC samples

# MCMC sampling loop
for (i in 1:Niter) {
  # Generate candidate beta from proposal distribution
  beta_pro <- rmvnorm(1, beta_curr, V)
  beta_pro <- as.vector(beta_pro)
  
  # Calculate the Metropolis ratio
  log_ratio <- (
    -k * sum(log(1 + exp(X %*% beta_pro))) + sum(rain_tom * X %*% beta_pro) -
      0.5 * t(beta_pro - mu) %*% Sig_inv %*% (beta_pro - mu)
  ) - (
    -k * sum(log(1 + exp(X %*% beta_curr))) + sum(rain_tom * X %*% beta_curr) -
      0.5 * t(beta_curr - mu) %*% Sig_inv %*% (beta_curr - mu)
  )
  
  # Accept/reject step
  if (runif(1) < exp(log_ratio)) {
    beta_curr <- beta_pro  # Accept candidate
    acs <- acs + 1                # Increment acceptance counter
  }
  
  # Save the current beta values
  mcmc_res[i, ] <- beta_curr
}

# Calculate acceptance rate
acc_rate <- acs / Niter
cat("Acceptance Rate:", acc_rate, "\n")

# Thinning (every 5th value)
thin <- 5
beta_vals_thin <- mcmc_res[seq(1, Niter, by = thin), ]

# Remove burn-in samples
beta_vals_thin_b <- beta_vals_thin[-(1:burn), ]

# Diagnostic plots: Autocorrelation for each parameter
par(mfrow = c(2, 2))  # Set up a 2humid3pm plotting grid
for (i in 1:ncol(beta_vals_thin_b)) {
  acf(beta_vals_thin_b[, i], main = bquote("ACF of " ~ beta[.(i - 1)]) )
}
par(mfrow = c(1, 1))  # Reset plotting grid

# Diagnostic plots: Trace plots for each parameter
par(mfrow = c(2, 2))  # Set up a 2humid3pm plotting grid
for (i in 1:ncol(beta_vals_thin_b)) {
  plot(beta_vals_thin_b[, i], type = 'l', 
       main = bquote("Trace Plot of " ~ beta[.(i - 1)]), 
       xlab = "Iteration", ylab = "value")
}
par(mfrow = c(1, 1))  # Reset plotting grid

# Posterior summaries
post_meds <- apply(beta_vals_thin_b, 2, median)      # Posterior medians
post_low <- apply(beta_vals_thin_b, 2, quantile, probs = 0.025)  # 2.5% quantile
post_up <- apply(beta_vals_thin_b, 2, quantile, probs = 0.975)  # 97.5% quantile

# Combine posterior summaries into a tidrain_tom data frame
names_preds <- c("humid9am", "humid3pm", "rain_today")  # Predictor names
beta_post_summ <- data.frame(
  `0.025 Quantile` = post_low,
  `0.5 Quantile` = post_meds,
  `0.975 Quantile` = post_up,
  row.names = c("Intercept", names_preds)
)

# Print the posterior Summary
cat("Posterior Summary:\n")
print(beta_post_summ)

