## Lecture Slides 11 R Examples

# Load packages
library(rstan)
library(TeachingDemos) # loading TeachingDemos package, to use hpd function
library(pscl)  # loading pscl package, to use inverse gamma distribution
library(mvtnorm)   # for multivariate normal simulations

#Set Working Directory to Source File Location
library("rstudioapi")  # Load rstudioapi package
setwd(dirname(getActiveDocumentContext()$path)) # Set working directory to source file location
#getwd()  

###############################
# Bayesian Linear Regression  #
###############################

###################################################
### Example 1, Oxygen Uptake Data
###################################################

# Data from Hoff (2009)
y <- c(-0.87, -10.74, -3.27, -1.97, 7.50, -7.25, 17.05, 4.96, 10.40, 11.05, 0.26, 2.51)
# y = change in maximal oxygen uptake (positive number = improvement)
x1 <- c(0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1)              # 0 = running group, 1 = aerobics group
x2 <- c(23, 22, 22, 25, 27, 20, 31, 23, 27, 28, 22, 24)  # ages of subjects
x3 <- x1 * x2              # interaction term between group and age

# Design matrix with an intercept term
X <- cbind(1, x1, x2, x3)

# Function to calculate posterior median and credible intervals for regression parameters
t_CI_table <- function(coefs, cov_mat, level=0.95, degrees=Inf, quantiles=c(0.025, 0.50, 0.975)) {
  # Compute standard errors
  std_errors <- sqrt(diag(cov_mat))
  
  # Generate quantile-based intervals by repeating coefs to match the shape of quantile_mat
  quantile_mat <- outer(std_errors, qt(quantiles, degrees), "*")
  quantile_mat <- sweep(quantile_mat, 1, coefs, "+")
  
  # Combine mean, standard error, and quantiles into a result matrix
  result <- cbind(Mean=coefs, Std_Error=std_errors, quantile_mat)
  
  # Set column names for quantiles
  quantile_names <- paste0(quantiles * 100, "%")
  colnames(result) <- c("Mean", "Std_Error", quantile_names)
  
  return(round(result, 4))
}

### Noninformative Prior Analysis:
# Estimate regression coefficients (bhat) and residual variance (sig2hat)
XtX_inv <- solve(t(X) %*% X) # Store result to avoid recalculating
bhat <- XtX_inv %*% t(X) %*% y
residuals <- y - X %*% bhat
sig2hat <- crossprod(residuals) / (length(y) - ncol(X))

# Covariance matrix of the posterior for beta
my_Cov <- XtX_inv * as.numeric((length(y) - ncol(X)) * sig2hat / (length(y) - ncol(X) - 2))

# Posterior information about the coefficients (betas)
post_beta <- t_CI_table(bhat, my_Cov, degrees=length(y) - ncol(X))
rownames(post_beta) <- paste0("beta_", 0:(ncol(X) - 1))
print(post_beta)

# Posterior information about sigma^2 using inverse gamma parameters
alpha_post <- (length(y) - ncol(X) - 1) / 2
beta_post <- 0.5 * sig2hat * (length(y) - ncol(X))

# Posterior median and HPD for sigma^2
# Using `qgamma` with parameters converted to gamma distribution parameters if `qigamma` is unavailable
sig2_post_med <- qgamma(0.50, shape=alpha_post, rate=beta_post)
cat("Posterior median of sigma^2:", round(sig2_post_med, 3), "\n")

# Calculate HPD interval for sigma^2 using the gamma distribution
hpd_sig2 <- qgamma(c(0.025, 0.975), shape=alpha_post, rate=beta_post)
cat("HPD interval for sigma^2:", round(hpd_sig2, 3), "\n")

###################################################
### Example 2, Automobile Data
###################################################

# Load data
auto_data <- read.table("../STAT7630-Data/autoregresslarge.txt", header = TRUE)
y <- auto_data$mpg
X <- cbind(1, auto_data$displacement, auto_data$horsepower, auto_data$weight)

###### Setting up prior specification:
# 3 predictor variables, so we set up exactly 4 hypothetical "prior observations"
# Based on "expert opinion," we have the following estimates:

# Prior beliefs about mpg for specific car configurations
prior_data <- list(
  xtil = matrix(c(
    150, 100, 2000,  # Observation 1
    200, 160, 4500,  # Observation 2
    250, 140, 3000,  # Observation 3
    100, 80, 1800    # Observation 4
  ), ncol=3, byrow=TRUE),
  ytil = c(25, 10, 20, 35)
)

#That is, prior beliefs or expert opinions:  
#a car with displacement=150, horsepower=100, weight=2000 should have mpg around 25
#a car with displacement=200, horsepower=160, weight=4500 should have mpg around 10
#a car with displacement=250, horsepower=140, weight=3000 should have mpg around 20
#a car with displacement=100, horsepower=80, weight=1800 should have mpg around 35

# Include intercept in prior design matrix
xtil <- cbind(1, prior_data$xtil)

# Prior weights (D) for the prior observations, indicating confidence level
D <- diag(4, 4)  # Equal weight (4) on each prior observation

# Prior mean on the beta vector induced from prior observations
pri_mean_beta <- as.vector(solve(t(xtil) %*% xtil) %*% t(xtil) %*% prior_data$ytil)
cat("Prior mean for beta vector:", round(pri_mean_beta, 3), "\n")

### Choosing prior parameters a and b for the gamma prior on tau:
# The parameter "a" reflects the confidence level in prior knowledge about tau.
a <- 0.5  # Sets prior knowledge to be equivalent to 1 sample observation

# Prior guess for tau, based on an expert's estimate of realistic mpg range
tau_guess <- 0.027  # Estimated based on (35 - 25)^2 / 1.645^2 = 36.97, thus tau = 1/36.97
b <- a / tau_guess  # Set b such that prior mean of tau equals tau_guess
cat("Value of b based on tau guess:", round(b, 2), "\n")

##### Posterior Information for tau and beta:

# Calculate posterior mean of beta (beta_hat) using the combined information from data and prior
XtX_inv <- solve(t(X) %*% X + t(xtil) %*% solve(D) %*% xtil)  # Invert only once for efficiency
beta_hat <- XtX_inv %*% (t(X) %*% y + t(xtil) %*% solve(D) %*% prior_data$ytil)
cat("Posterior mean for beta:", round(beta_hat, 3), "\n")

# Calculate posterior for s* (estimate of residual variance):
n <- length(y)
resid_dar <- t(y - X %*% beta_hat) %*% (y - X %*% beta_hat)
resid_pri <- t(prior_data$ytil - xtil %*% beta_hat) %*% solve(D) %*% (prior_data$ytil - xtil %*% beta_hat)
sstar <- as.numeric((resid_dar + resid_pri + 2 * b) / (n + 2 * a))

### Posterior Point Estimates for tau and sigma^2:
p_mean_tau <- 1 / sstar
p_mean_sig2 <- 1 / p_mean_tau
p_med_tau <- qgamma(0.50, shape=(n + 2 * a) / 2, rate=((n + 2 * a) / 2) * sstar)
p_med_sig2 <- 1 / p_med_tau

cat("Posterior mean for sigma^2:", round(p_mean_sig2, 3), "\n",
    "Posterior median for sigma^2:", round(p_med_sig2, 3), "\n")

### 95% Marginal Interval Estimate for tau (and sigma^2):
hpd95_tau <- hpd(qgamma, shape=(n + 2 * a) / 2, rate=((n + 2 * a) / 2) * sstar)
hpd95_sig2 <- 1 / hpd95_tau

cat("95% Posterior Interval for sigma^2:", round(sort(hpd95_sig2), 3), "\n")

### Point estimates for beta, given the point estimate for tau:
### NOT QUITE THE BEST APPROACH:
#   - The code uses a single point estimate (the posterior median `p_med_tau`) for \( \tau \) 
#when calculating the posterior intervals for \( \beta \). 
#However, Bayesian inference typically involves integrating over the posterior distribution of \( \tau \),
#not just using a single point estimate.
#- By using only the posterior median of \( \tau \), the intervals for \( \beta \) do not fully 
#reflect the uncertainty in \( \tau \). 
#This can lead to underestimation of the true posterior variance of \( \beta \).
# Posterior mean, median and 95% intervals for the betas:
post_beta <- t_CI_table(beta_hat, (1/p_med_tau)*solve(t(X)%*%X + t(xtil)%*%solve(D)%*%xtil) )
# Set row names to beta_0, beta_1, beta_2, beta_3
rownames(post_beta) <- paste0("beta_", 0:3)
post_beta

### A BETTER APPROACH TO GET POINT ESTIMATES FOR BETA:
# Generate posterior samples for tau and beta with 95% credible intervals

# Number of posterior samples to generate
Nsim <- 10000

# Generate posterior samples for tau based on its gamma distribution
# This reflects the uncertainty in tau based on observed data
tau_vals <- rgamma(Nsim, shape = (n + 2 * a) / 2, rate = ((n + 2 * a) / 2) * sstar)

# Generate posterior samples for beta based on tau samples
# For each sampled tau value, we sample beta from a multivariate normal distribution
# The distribution of beta depends on tau through its covariance matrix
cov_mat <- solve(t(X) %*% X + t(xtil) %*% solve(D) %*% xtil)  # Compute once for efficiency
beta_vals <- t(sapply(tau_vals, function(tau) {
  rmvnorm(1, mean = beta_hat, sigma = cov_mat / tau)
}))

# Calculate posterior summaries for beta: median and 95% credible interval bounds
# `apply` is used to obtain column-wise medians and quantiles across the samples
post_meds <- apply(beta_vals, 2, median)  # Posterior median for each beta
post_low <- apply(beta_vals, 2, quantile, probs = 0.025)  # Lower bound of 95% credible interval
post_up <- apply(beta_vals, 2, quantile, probs = 0.975)  # Upper bound of 95% credible interval

# Summarizing posterior estimates for beta in a readable format
names_preds <- c("intercept", "displacement", "horsepower", "weight")  # Predictor names
beta_post_summary <- data.frame(
  `.025Quantile` = post_low,
  `.5Quantile` = post_meds,
  `.975Quantile` = post_up,
  row.names = names_preds
)

# Display posterior summary table for beta coefficients
print(beta_post_summary)

# Load another dataset to compare results: A smaller built-in R dataset for reference
# Load mpg (response) and key predictors from mtcars dataset for illustration
y <- mtcars$mpg
x1 <- mtcars$disp
x2 <- mtcars$hp
x3 <- mtcars$wt

# Redo analysis with both types of priors...

###################################################
### Bayesian Regression with rstanarm
###################################################

# Load necessary libraries
library(rstanarm)       # For Bayesian regression modeling
library(tidyverse)      # For data manipulation and visualization
library(bayesplot)      # For visualizing MCMC diagnostics
library(broom.mixed)    # For summarizing Bayesian model output
library(tidybayes)      # For handling Bayesian models in tidy format

# Notes on Prior Specification:
# The first value in `prior_intercept` (22 here) represents the expected mpg 
# for a "typical" observation (e.g., a car with average characteristics).
# `prior` specifies weakly informative priors for predictors.

# Define the Bayesian linear regression model with rstanarm
car_mod <- stan_glm(
  mpg ~ displacement + horsepower + weight, 
  data = auto_data,
  family = gaussian,  # Gaussian family for linear regression
  prior_intercept = normal(22, 20),  # Prior for intercept, centered at 22
  prior = normal(c(0.325, -1.563, 0.025), 40, autoscale = TRUE),  # Priors for predictors
  prior_aux = exponential(1, autoscale = TRUE),  # Prior for auxiliary (error) parameter
  chains = 4,    # Number of MCMC chains
  iter = 10000   # Number of iterations per chain
)

# MCMC Diagnostics to assess convergence and sampling quality
# Trace plot for checking mixing across chains
mcmc_trace(car_mod, size = 0.1)

# Density overlay plot to examine distribution of MCMC samples
mcmc_dens_overlay(car_mod)

# Autocorrelation plot to assess sampling independence
mcmc_acf(car_mod)

# Effective sample size ratio and R-hat statistic for convergence diagnostics
cat("Effective Sample Size Ratio:\n")
print(neff_ratio(car_mod))

cat("\nR-hat Diagnostics:\n")
print(rhat(car_mod))

# Summarize the posterior distributions of parameters
# Provides median, 90% credible intervals, and auxiliary parameters
broom_summary <- broom.mixed::tidy(
  car_mod, 
  effects = c("fixed", "aux"), 
  conf.int = TRUE, 
  conf.level = 0.90
)

cat("\nPosterior Summary with 90% Credible Intervals:\n")
print(broom_summary)

# Detailed summary of posterior distributions with specific credible intervals
summary(car_mod, digits = 4, probs = c(0.025, 0.5, 0.975))

# Classical regression model for comparison
freq_mod <- lm(mpg ~ displacement + horsepower + weight, data = auto_data)
cat("\nClassical (Frequentist) Regression Summary:\n")
summary(freq_mod)

# Posterior Prediction for a New Observation
# Define new data for prediction (car with specific displacement, horsepower, and weight)
new_obs <- data.frame(displacement = 150, horsepower = 100, weight = 2.7)

# Generate posterior predictive samples
# This provides samples from the posterior predictive distribution for the new data point
shortcut_pred <- posterior_predict(car_mod, newdata = new_obs)

# Point Prediction - Calculate the median of the predictive distribution
point_pred <- median(shortcut_pred)
cat("Median of predictive distribution for new observation:", point_pred, "\n")

# 95% Posterior Credible Interval for the Prediction
# Provides uncertainty bounds around the point prediction
cred_int <- quantile(shortcut_pred, probs = c(0.025, 0.975))
cat("95% posterior credible interval for prediction:", cred_int, "\n")

# Plot the predictive distribution
# Visualizing the distribution of predictions for the new observation
mcmc_dens(shortcut_pred) + 
  xlab("Predicted MPG for a car with displacement = 150, HP = 100, weight = 2.7") +
  ggtitle("Posterior Predictive Distribution for MPG")

###################################################
### A Bayesian Approach to Model Selection
###################################################

# Load necessary libraries
library(MASS)  # for matrix operations if needed

# Function to calculate the marginal probability of y given X using a Bayesian approach
# Inputs:
#   y  - response vector
#   X  - design matrix of predictors
#   g  - prior scaling parameter (default is length(y))
#   nu0 - prior degrees of freedom
#   s20 - prior guess for sigma^2 (default uses residual variance from linear model)
log_Py_x <- function(y, X, g = length(y), nu0 = 1, 
                  s20 = try(summary(lm(y ~ -1 + X))$sigma^2, silent = TRUE)) {
  n <- nrow(X)
  p <- ncol(X)
  
  if (p == 0) {
    Hg <- 0
    s20 <- mean(y^2)
  } else {
    Hg <- (g / (g + 1)) * X %*% solve(t(X) %*% X) %*% t(X)
  }
  
  SSRg <- t(y) %*% (diag(1, nrow = n) - Hg) %*% y
  
  -.5 * (n * log(pi) + p * log(1 + g) + (nu0 + n) * log(nu0 * s20 + SSRg) - nu0 * log(nu0 * s20)) +
    lgamma((nu0 + n) / 2) - lgamma(nu0 / 2)
}

# Oxygen uptake data example
y <- c(-0.87, -10.74, -3.27, -1.97, 7.50, -7.25, 17.05, 4.96, 10.40, 11.05, 0.26, 2.51)
x1 <- c(0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1)  # indicator for group (0 = running, 1 = aerobics)
x2 <- c(23, 22, 22, 25, 27, 20, 31, 23, 27, 28, 22, 24)  # ages of subjects
x3 <- x1 * x2  # interaction term between group and age
X <- cbind(1, x1, x2, x3)  # design matrix with intercept and predictors

### Initialize Gibbs Sampler for Bayesian Model Selection
z <- rep(1, ncol(X))  # start with all predictors in the model
log_Py_c <- log_Py_x(y, X[, z == 1, drop = FALSE])  # initial log-probability of y given X with all predictors
S <- 10000  # number of Gibbs sampler iterations
Z <- matrix(NA, nrow = S, ncol = ncol(X))  # storage matrix for sampled model indicators

### Gibbs Sampling Process for Model Selection
for (s in 1:S) {
  for (j in sample(seq_along(z))) {  # randomly iterate over predictors
    zp <- z
    zp[j] <- 1 - zp[j]  # toggle inclusion of predictor j
    lpy_p <- log_Py_x(y, X[, zp == 1, drop = FALSE])  # calculate log-marginal for proposed model
    
    # Calculate acceptance probability for the proposed model
    r <- (lpy_p - log_Py_c) * (-1)^(zp[j] == 0)
    z[j] <- rbinom(1, 1, 1 / (1 + exp(-r)))  # accept/reject step based on acceptance probability
    if (z[j] == zp[j]) log_Py_c <- lpy_p  # update log-probability if model changes
  }
  Z[s, ] <- z  # store current model indicators
}
# Considering all possible subsets:
all_z_vecs <- unique(Z, MARGIN = 1)  # unique models encountered in sampling

# Calculate the probability for each unique subset configuration
all_z_probs <- rowSums(apply(Z, 1, function(row) apply(all_z_vecs, 1, function(poss_row) all(row == poss_row)))) / S
sum(all_z_probs)

# Combine model configurations and probabilities, then sort by probability
all_res <- cbind(all_z_vecs, all_z_probs)
all_res <- all_res[order(-all_z_probs), ]
print(all_res)

# Constrained model selection: Include only models with interaction term if main effects are present
# Define constrained models manually
sub_z_vecs <- matrix(c(
  1, 0, 0, 0,
  1, 1, 0, 0,
  1, 0, 1, 0,
  1, 1, 1, 0,
  1, 1, 1, 1
), ncol = 4, byrow = TRUE)

# Calculate probabilities for each constrained model configuration
sub_z_probs <- rowSums(apply(Z, 1, 
                         function(row) apply(sub_z_vecs, 1, function(poss_row) all(row == poss_row)))) / S

sub_z_probs = sub_z_probs / sum(sub_z_probs)

# Combine constrained results and sort by probability
sub_res <- cbind(sub_z_vecs, sub_z_probs)
sub_res <- sub_res[order(-sub_z_probs), ]
print(sub_res)

# Small automobile data set analysis
# Using mtcars data for a similar analysis with different predictors
y <- mtcars$mpg
x1 <- mtcars$disp
x2 <- mtcars$hp
x3 <- mtcars$wt
X <- cbind(1, x1, x2, x3)

# Further analysis can be performed here for mtcars dataset using the same approach

###################################################
### Posterior Predictive Distribution with Regression
###################################################

# Load necessary libraries for multivariate t-distribution sampling
library(MASS)  # for rmvt function

##### Regression example:
# Using the small automobile data set built into R
# Define response variable and predictors
y <- mtcars$mpg
X <- cbind(1, mtcars$disp, mtcars$hp, mtcars$wt)  # Adding intercept directly

# Estimate regression coefficients (bhat) and residual variance (sig2hat)
XtX_inv <- solve(crossprod(X))  # Efficiently calculates (X'X)^-1
bhat <- XtX_inv %*% t(X) %*% y  # Coefficients for predictors
sig2hat <- as.numeric(crossprod(y - X %*% bhat) / (nrow(X) - ncol(X)))  # Residual variance estimate

# Set up posterior predictive sampling for observed X values
Xstar <- X
df <- nrow(X) - ncol(X)  # Degrees of freedom for the t-distribution
sc_fact <- sig2hat * df / (df - 2)  # Scale factor for predictive covariance

# Posterior predictive covariance matrix for observed X values
my_Sig <- sc_fact * (diag(nrow(Xstar)) + Xstar %*% XtX_inv %*% t(Xstar))

# Generate posterior predictive samples for observed X values
# Sampling from multivariate t-distribution centered at fitted values
post_pred_err <- rmvt(n = 500, sigma = my_Sig, df = df)
post_pred_samp <- matrix(Xstar %*% bhat, nrow = nrow(post_pred_err), 
                         ncol = ncol(post_pred_err), byrow = TRUE) + post_pred_err
# Plot observed mpg against posterior predictive samples
mat_ys <- matrix(y, nrow = nrow(post_pred_samp), ncol = ncol(post_pred_samp), byrow = TRUE)

matplot(mat_ys, post_pred_samp, pch = 'o', cex = 0.3, 
        xlab = "Observed mpg", 
        ylab = "Predicted mpg (Posterior Predictive)", 
        main = "Posterior Predictive Distribution of MPG")
abline(0, 1, col = "red", lty = 2, lwd = 2)  # Add y=x reference line

# Examine model residuals to check for lack of fit
resids <- y - X %*% bhat
resid_an <- cbind(mtcars[, c("mpg", "disp")], Residuals = resids)[order(y), ]
print(resid_an)

### Predicting mileage for a new car with specific characteristics
Xstar_new <- matrix(c(1, 150, 100, 2.7), nrow = 1)  # New car with specified features

# Posterior predictive covariance for new prediction
my_Sig_new <- sc_fact * (1 + Xstar_new %*% XtX_inv %*% t(Xstar_new))

# Generate posterior predictive samples for new data
post_pred_err_new <- rmvt(n = 500, sigma = my_Sig_new, df = df)
post_pred_samp_new <- as.numeric(Xstar_new %*% bhat) + post_pred_err_new

# Calculate 95% prediction interval and median prediction for new data
pred_int <- quantile(post_pred_samp_new, probs = c(0.025, 0.5, 0.975))
names(pred_int) <- c("2.5%", "Median", "97.5%")
print(pred_int)

############################################################
### Using 'bayesrules' Package for Posterior Predictive Analysis
### Example: Bayesian Regression on the 'mtcars' Dataset
############################################################

# Load necessary libraries
library(bayesrules)
library(ggplot2)  # for plotting with ggplot2

# Load the 'mtcars' dataset
data("mtcars")

# Fit a Bayesian linear regression model to predict mpg based on displacement, horsepower, and weight
mod_mtcars <- stan_glm(
  mpg ~ disp + hp + wt, 
  data = mtcars,
  family = gaussian,
  prior_intercept = normal(22, 20),  # Prior for intercept centered around typical mpg
  prior = normal(c(0.325, -1.563, 0.025), 40, autoscale = TRUE),  # Priors for predictors
  prior_aux = exponential(1, autoscale = TRUE),  # Prior for residual scale parameter
  chains = 4, iter = 10000  # MCMC specifications
)

# Generate posterior predictive samples for observed data in 'mtcars'
preds <- posterior_predict(mod_mtcars, newdata = mtcars)

# Visualize posterior predictive intervals for mpg as a function of weight
ppc_plot <- ppc_intervals(
  y = mtcars$mpg,  # Observed values of mpg
  yrep = preds,    # Posterior predictive samples
  x = mtcars$wt,   # Predictor variable for x-axis
  prob = 0.5, prob_outer = 0.95  # Set inner and outer credible intervals
) +
  labs( x = "Weight (wt)",
        y = "Miles per Gallon (mpg)",
        title = "Posterior Predictive Intervals for MPG by Weight")

# Display posterior predictive intervals plot
print(ppc_plot)

# Examine posterior predictive accuracy metrics
pred_summ <- prediction_summary(mod_mtcars, data = mtcars)
print(pred_summ)

# Metrics Explanation:
# - Median Absolute Error (MAE): Measures typical deviation between observed and predictive means.
# - Scaled MAE: Indicates typical deviation in terms of standard deviations from the predictive mean.
# - within_50 and within_95: Proportions of observed values within 50% and 95% posterior intervals.

#################################################
# Cross-Validation to Measure Predictive Accuracy
# Cross-validation divides data into 10 subsets, sequentially leaving out each subset for validation.

# Perform 10-fold cross-validation
set.seed(84735)
cv_res <- prediction_summary_cv(
  model = mod_mtcars, data = mtcars, k = 10
)

# Display cross-validation results for each fold
print(cv_res$folds)

# Display average cross-validation metrics across all folds
print(cv_res$cv)

################################################################################
### Model Comparison Using ELPD (loo) and BIC for Bayesian Models on mtcars Data
################################################################################

# Load necessary libraries
library(rstanarm)
library(loo)
library(bayesplot)

# Fit initial model with three predictors
mod_mtcars <- stan_glm(
  mpg ~ disp + hp + wt, 
  data = mtcars,
  family = gaussian,
  prior_intercept = normal(82.5, 20),  # Center prior intercept on typical mpg
  prior = normal(c(0.325, -1.563, 0.025), 40, autoscale = TRUE),  # Priors for predictors
  prior_aux = exponential(1, autoscale = TRUE),  # Prior for residual error scale
  chains = 4, iter = 10000
)

# Calculate ELPD for mod_mtcars
mod_elpd <- loo(mod_mtcars)
cat("ELPD Estimates for Initial Model (mpg ~ disp + hp + wt):\n")
print(mod_elpd$estimates)

#The output gives below information
#elpd_loo (Expected Log Predictive Density for LOO-CV)
#gives an overall measure of how well the model is expected to predict new data points. 
#Higher values indicate better predictive performance (less negative is better in this case since it’s negative).

#p_loo (Effective Number of Parameters):
#accounts for model complexity and flexibility. 
#If the model overfits, this number could be high, as it reflects how much each observation influences the model fit.

#looic (Leave-One-Out Information Criterion):
#calculated as \( \text{looic} = -2 \times \text{elpd\_loo} \). 
#This metric serves as an information criterion similar to AIC or DIC.
#Lower values indicate better predictive performance, with `looic = 160.06` being comparable across models.

# Define alternative model formulas to compare
mod_formulas <- list(
  mpg ~ disp + hp,
  mpg ~ disp + wt
)

# Fit models based on alternative formulas, dynamically setting priors
models <- lapply(mod_formulas, function(formula) {
  n_predictors <- length(all.vars(formula)) - 1  # Exclude the response variable
  stan_glm(
    formula, data = mtcars,
    family = gaussian,
    prior_intercept = normal(82.5, 20),
    prior = normal(rep(0, n_predictors), 40, autoscale = TRUE), 
    prior_aux = exponential(1, autoscale = TRUE),
    chains = 4, iter = 10000
  )
})

# Calculate ELPD and BIC for each model and store results, adding error handling
mod_names <- c("mpg ~ disp + hp + wt", "mpg ~ disp + hp", "mpg ~ disp + wt")

# Calculate ELPD
elpd_res <- sapply(c(list(mod_mtcars), models), function(mod) {
  elpd <- tryCatch(loo(mod)$estimates[1, 1], error = function(e) NA)  # Return NA if loo fails
})

elpd_res

# Identify the best model based on ELPD and BIC
best_mod_elpd <- mod_names[which.max(elpd_res) ]

cat("\nBest Model Based on ELPD:\n")
print(best_mod_elpd)

## Also see BIC values to compare regression models...
# Calculate BIC
# Define function to calculate BIC for a stan_glm model
calc_BIC <- function(model, data) {
  log_lik_mat <- as.matrix(log_lik(model))
  log_lik <- sum(apply(log_lik_mat, 2, mean))  # Total log-likelihood (mean over iterations)
  num_params <- length(fixef(model))  # Number of effective parameters (fixed effects)
  num_observations <- nrow(data)  # Number of observations in the dataset
  BIC_value <- -2 * log_lik + num_params * log(num_observations)  # BIC formula
  return(BIC_value)
}

# Calculate BIC for each model
BIC_res <- sapply(c(list(mod_mtcars), models), calc_BIC, data = mtcars)
print(BIC_res)

best_mod_BIC<- mod_names[which.min(BIC_res) ]

cat("\nBest Model Based on BIC:\n")
print(best_mod_BIC)

