## Lecture Slides 5 & Bayes Rules! Chapter 5 R Examples 
## Enhanced for Efficiency

########################################################
# Chapter 5 R Examples: Fraud Risk Phone Call Analysis
########################################################

# Load necessary libraries
library(bayesrules)
library(tidyverse)
library(ggplot2)
library(TeachingDemos)  # for HPD credible intervals
library(pscl)            # for inverse gamma distribution (may require installation)
library(patchwork)  # For easier plot composition

#Set Working Directory to Source File Location
library("rstudioapi")  # Load rstudioapi package
setwd(dirname(getActiveDocumentContext()$path)) # Set working directory to source file location
#getwd()  

# Exploring Gamma Priors for λ (daily rate of fraud risk phone calls)
# Plots of different gamma prior distributions
# Plot Gamma(5, 1) prior
gam_5_1 <- plot_gamma(shape = 5, rate = 1) + ggtitle("Gamma(5,1)")

# Plot Gamma(10, 2) prior
gam_10_2 <- plot_gamma(shape = 10, rate = 2) + ggtitle("Gamma(10,2)")

# Plot Gamma(15, 3) prior
gam_15_3 <- plot_gamma(shape = 15, rate = 3) + ggtitle("Gamma(15,3)")

# Display the three plots in a vertical layout
(gam_5_1 / gam_10_2 / gam_15_3 )

#or, equivalently
#(gam_5_1 + gam_10_2 + gam_15_3) +  plot_layout(ncol = 1, nrow = 3)

########################################################
# Posterior Distribution Analysis with Gamma-Poisson Model
########################################################

# Data: 11 calls observed over 4 days
obs_calls <- 11
days_obs <- 4

# Plot Gamma-Poisson model: Posterior distribution, prior, and likelihood
plot_gamma_poisson(shape = 10, rate = 2, sum_y = obs_calls, n = days_obs)

# Summarize the Gamma-Poisson model: Posterior distribution and prior
post_summary <- summarize_gamma_poisson(shape = 10, rate = 2, sum_y = obs_calls, n = days_obs)

# Display summary in a clean format
print(post_summary)

########################################################
# Credible Intervals for Bayesian Models
########################################################

# Custom print function for intervals (confidence and credible intervals), rounded to r decimals
print_int <- function(interval,r=3) {
  interval <- round(interval,r)
  return(paste0("(", interval[1], ",", interval[2], ")"))
}

########################################################
# Fraud Risk Example: 90% Credible Intervals
########################################################

# Parameters for Gamma distribution
alpha <- 21
beta <- 6

# 90% Quantile-Based Credible Interval for daily mean fraud risk call
quant_int <- qgamma(c(0.05, 0.95), shape = alpha, rate = beta)
cat("90% Quantile Interval for mu:", print_int(quant_int), "\n")

# 90% Highest Posterior Density (HPD) Credible Interval using HPD function
hpd_int <- hpd(qgamma, shape = alpha, rate = beta, conf = 0.90)
cat("95% HPD Interval for mu:", print_int(hpd_int), "\n")

########################################################
# Coin Tossing Example: 95% Credible Intervals for Theta
########################################################

# Parameters for coin tossing
successes <- 2
trials <- 10

# 95% Quantile-Based Credible Interval for Theta (Probability of success)
quant_int_theta <- qbeta(c(0.025, 0.975), successes + 1, trials - successes + 1)
cat("95% HPD Interval for mu:", print_int(quant_int_theta), "\n")

# 95% HPD Credible Interval for Theta using HPD function
hpd_int_theta <- hpd(qbeta, shape1 = successes + 1, shape2 = trials - successes + 1, conf = 0.95)
cat("95% HPD Interval for mu:",  print_int(hpd_int_theta), "\n")

########################################################
# Conjugate Priors with Normal Data (Midge Data Example)
########################################################

# Midge data (in mm)
y<- c(1.64, 1.70, 1.72, 1.74, 1.82, 1.82, 1.82, 1.90, 2.08)

# Calculate basic statistics
ybar <- mean(y)  # Sample mean
n <- length(y)     # Sample size

# Sequence of mu values for plotting
mu_seq <- seq(0.5, 3, length.out = 300)

# Known variance: sigma^2 = 0.01
sig2 <- 0.01

# Prior parameters for normal prior on mu
delta <- 1.9  # Prior mean (delta)
tau <- 0.3    # Prior standard deviation (tau)

# Calculate the prior distribution for mu
pri_mu_pdf <- dnorm(mu_seq, mean = delta, sd = tau)

# Calculate the posterior distribution for mu
post_mean <- ( (delta / tau^2 + sum(y) / sig2) / (1 / tau^2 + n / sig2) )
post_sd <- sqrt( sig2 * tau^2 / (sig2 + n * tau^2) )
post_mu_pdf <- dnorm(mu_seq, mean = post_mean, sd = post_sd)

# Plotting the prior and posterior distributions
ggplot(data = NULL, aes(x = mu_seq)) +
  geom_line(aes(y = pri_mu_pdf), linetype = "dashed", color = "blue", linewidth = 1, alpha = 0.8) +
  geom_line(aes(y = post_mu_pdf), color = "red", linewidth = 1) +
  labs(title = "Prior and Posterior Distributions for mu (Known Variance)",
       x = expression(mu), y = "density") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

# Print point estimates for mu
cat("Posterior Mean:", round(post_mean, 3), "\n")
cat("Posterior Median:", round(qnorm(0.50, mean = post_mean, sd = post_sd), 3), "\n")

###############################################
# Interval Estimate for mu and sigma^2 (HPD)
###############################################

# Interval estimate for mu using 95% HPD credible interval
y_sum <- sum(y)  # sum of observations

# Calculate the 95% HPD credible interval for mu
hpd_mu <- hpd(qnorm, mean = post_mean, sd = post_sd, conf = 0.95)
cat("95% HPD Interval for mu:", print_int(hpd_mu), "\n")

# Visualizing the HPD interval on the posterior plot
plot_normal_normal(mean = delta, sd = tau, sigma = sqrt(sig2), 
                   y_bar = 1.804444, n = n)

#########################################################
# Inference for sigma^2 when both Mean and Variance are Unknown
#########################################################

# Prior parameters for the inverse gamma prior on sigma^2
alpha_pri <- 18
beta_pri <- 0.34  # Based on guess: E[sig^2] = 0.02, var[sig^2] = 0.005^2

alpha_post <- alpha_pri + n / 2 - 0.5
beta_post <- beta_pri + 0.5 * (sum(y^2) - n * (ybar^2)) 
c(alpha_post,beta_post)

# Posterior mean and median estimates for sigma^2
post_mean_sig2 <- beta_post/(alpha_post - 1)
post_med_sig2 <- qigamma(0.50, alpha = alpha_post, beta = beta_post)

cat("Posterior Mean for sigma^2:", round(post_mean_sig2, 4), "\n")
cat("Posterior Median for sigma^2:", round(post_med_sig2, 4), "\n")

# Marginal interval estimate for sigma^2 using HPD
hpd_sig2 <- hpd(qigamma, alpha = alpha_post, beta = beta_post) 
cat("95% HPD Interval for sigma^2:", print_int(hpd_sig2,4), "\n")

# Randomly sample posterior values for sigma^2 and mu
sig2_vals <- rigamma(n = 10^6, alpha = alpha_post, beta = beta_post)

mu_vals <- rnorm(n = 10^6, mean = (sum(y) + delta) / (n + 1), 
                 sd = sqrt(sig2_vals / (n + 1)))

# 95% HPD interval estimates for sigma^2 and mu
cat("95% HPD Interval for sigma^2 (Sampled):", print_int(emp.hpd(sig2_vals), 4), "\n")
cat("95% HPD Interval for mu (Sampled):", print_int(emp.hpd(mu_vals), 4), "\n")

#############################
# Bayesian Inference for mu
#############################

#### 1. Sampling for Posterior of sigma^2 and mu | sigma^2 ####

# Randomly sample values for sigma^2 from the posterior inverse gamma distribution
sig2_vals <- rigamma( n = 10^6, alpha = alpha_post, beta = beta_post)

# Randomly sample values for mu from the posterior normal distribution, given sigma^2
delta <- 1.9
s0 <- 1  # Low value of s0 indicates lack of prior knowledge
mu_vals <- rnorm(
  n = 10^6, mean = (y_sum + delta * s0) / (n + s0), 
  sd = sqrt(sig2_vals / (n + s0))
)

#### 2. Point Estimates ####
# Point estimates for sigma^2 and mu
cat("Posterior Median for sigma^2:", round(median(sig2_vals), 4), "\n")
cat("Posterior Median for mu:", round(median(mu_vals), 4), "\n")

#### 3. 95% HPD Interval Estimates ####
# Compute 95% HPD credible intervals for sigma^2 and mu
cat("95% HPD Interval for sigma^2:", print_int(emp.hpd(sig2_vals), 4), "\n")
cat("95% HPD Interval for mu:", print_int(emp.hpd(mu_vals), 4), "\n")

#########################################################
#### 4. Concussion Example (Hippocampal Volume Data) ####
#########################################################
# Load the data and filter for concussed subjects
data(football)
conc_subj <- football %>%
  filter(group == "fb_concuss")

# Sample mean hippocampal volume
mean_vol <- conc_subj %>%
  summarize(mean_vol = mean(volume)) %>%
  pull(mean_vol)
cat("Mean Hippocampal Volume:", mean_vol, "\n")

# Visualizing the hippocampal volume distribution to check normality assumption
ggplot(conc_subj, aes(x = volume)) + 
  geom_density() + 
  labs(title = "Density Estimate of Hippocampal Volumes", 
       x = expression(paste("Hippocampal Volume (",cm^3,")")), y = "density") + 
  theme_minimal()

#### 5. Posterior Distribution and Summary ####
# Show posterior distribution of mu (hippocampal volume) along with prior and likelihood
plot_normal_normal(mean = 6.5, sd = 0.4, sigma = 0.5, y_bar = mean_vol, n = 25)

# Summarize the posterior distribution for hippocampal volume
summarize_normal_normal(mean = 6.5, sd = 0.4, sigma = 0.5, y_bar = mean_vol, n = 25)

# Compute and display 95% HPD credible interval for mu (hippocampal volume)
hpd_95_mu <- hpd(qnorm, mean = 5.78, sd = 0.097, conf = 0.95)
cat("95% HPD Interval for mu (Hippocampal Volume):", print_int(hpd_95_mu, 3), "\n")

########################################################
########################################################
# R Code for Chapter 5 Content
########################################################
########################################################

# Load necessary packages
library(bayesrules)
library(tidyverse)

# Beta(1, 2) Prior Plot
plot_beta(alpha = 1, beta = 2)

# Gamma(10, 2) Prior for λ (fraud risk calls)
plot_gamma(shape = 10, rate = 2)

# Poisson Likelihood Plot given data
plot_poisson_likelihood(y = c(6, 2, 2, 1), lambda_upper_bound = 10)

# Gamma-Poisson Posterior Plot for λ (fraud risk calls)
plot_gamma_poisson(shape = 10, rate = 2, sum_y = 11, n = 4)

# Summarize the Gamma-Poisson Posterior
gamma_poisson_summary <- summarize_gamma_poisson(shape = 10, rate = 2, sum_y = 11, n = 4)
print(gamma_poisson_summary)

# Normal(6.5, 0.4) Prior for μ (hippocampal volume)
plot_normal(mean = 6.5, sd = 0.4)

# Load and Filter Football Data for Concussion Subjects
data(football)
conc_subj <- football %>% filter(group == "fb_concuss")

# Summarize Mean Hippocampal Volume for Concussion Subjects
mean_vol <- conc_subj %>% summarize(mean_vol = mean(volume))
print(mean_vol)

# Density Plot of Hippocampal Volume for Concussion Subjects
ggplot(conc_subj, aes(x = volume)) +
  geom_density(fill = "lightblue") +
  labs(title = "Density of Hippocampal Volume for Concussion Subjects",
       x = "Hippocampal Volume", y = "Density") +
  theme_minimal()

# Normal Likelihood Plot for Mean Hippocampal Volume
plot_normal_likelihood(y = conc_subj$volume, sigma = 0.5)

# Normal-Normal Posterior Plot for μ (average hippocampal volume)
plot_normal_normal(mean = 6.5, sd = 0.4, sigma = 0.5, y_bar = 5.735, n = 25)

# Summarize the Normal-Normal Posterior for μ
normal_normal_summary <- summarize_normal_normal(mean = 6.5, sd = 0.4, sigma = 0.5, y_bar = 5.735, n = 25)
print(normal_normal_summary)

