## Lecture Slides 8 & Bayes Rules! Chapter 6 R Examples 
## Enhanced for Efficiency

# Load necessary packages
library(bayesrules)
library(tibble)
library(ggplot2)
library(purrr)
library(TeachingDemos) #for HPD interval

library(tidyverse)  # For data manipulation and plotting
library(MCMCpack)   # For HPD interval calculations


#Set Working Directory to Source File Location
library("rstudioapi")  # Load rstudioapi package
setwd(dirname(getActiveDocumentContext()$path)) # Set working directory to source file location
#getwd()  

# Custom print function for intervals (confidence and credible intervals), rounded to r decimals
print_int <- function(interval,r=3) {
  interval <- round(interval,r)
  return(paste0("(", interval[1], ",", interval[2], ")"))
}

####################################################
# Chapter 6 R Examples:
####################################################

# Load necessary libraries
library(tidyverse)  # For data manipulation and plotting
library(MCMCpack)   # For HPD interval calculations

# Plot the gamma(3,1) prior
plot_gamma(shape = 3, rate = 1)

# Step 1: Define a grid of 501 lambda values
grid_data <- tibble(lam_grid = seq(from = 0, to = 15, length = 501))

# Step 2: Evaluate the prior and likeli at each lambda
grid_data <- grid_data %>%
  mutate(prior = dgamma(lam_grid, shape = 3, rate = 1),
         likeli = dpois(2, lam_grid) * dpois(8, lam_grid))

# Step 3: Approximate the posterior
grid_data <- grid_data %>%
  mutate(unnorm = likeli * prior,
         post = unnorm / sum(unnorm))

# Set seed for reproducibility (optional)
set.seed(84735)

# Step 4: Sample from the discretized posterior
post_sam <- grid_data %>%
  sample_n(size = 10000, weight = post, replace = TRUE)

# Histogram of the grid simulation with the posterior pdf
ggplot(post_sam, aes(x = lam_grid)) +
  geom_histogram(aes(y = after_stat(density)), color = "white", bins = 30) +
  stat_function(fun = dgamma, args = list(13, 3), color = "blue") +
  labs(x = expression(lambda), y = "density",
       title="Histogram of posterior samples with Gamma(13,3) density") +
  xlim(0, 10) 

# Approximate Posterior summary statistics
post_mean <- mean(post_sam$lam_grid)  # Posterior mean
post_med <- median(post_sam$lam_grid)  # Posterior median
post_quant <- quantile(post_sam$lam_grid, probs = c(0.025, 0.975))  # 95% credible interval
post_hpd <- emp.hpd(post_sam$lam_grid, conf = 0.95)  # 95% HPD interval

# Display the posterior summary
cat("Posterior mean:", post_mean, "\n")
cat("Posterior median:", post_med, "\n")
cat("95% credible interval (quantile-based):", print_int(post_quant),"\n",sep="")
cat("95% HPD interval:", print_int(post_hpd), "\n")

####################################################
##### Example 1, Chapter 6a notes (Birth rate data)
####################################################

# Prior parameters
alpha_pri <- 2
beta_pri <- 1

# Summary statistics from data
data_summary <- tibble(
  sum_y = c(217, 66),
  n = c(111, 44)
)

# Sampling from the posterior distribution using map for efficient vectorization
post_vals <- data_summary %>%
  mutate(theta_post_vals = map2(sum_y, n, ~ rgamma(10000, shape = alpha_pri + .x, rate = beta_pri + .y)))

theta1_post_vals <- post_vals$theta_post_vals[[1]]
theta2_post_vals <- post_vals$theta_post_vals[[2]]

# Approximating the posterior probability that theta1 > theta2
post_prob <- mean(theta1_post_vals > theta2_post_vals)
post_prob  # Print the result

# Examining the posterior distribution of the ratio theta1 / theta2
theta_ratio <- theta1_post_vals / theta2_post_vals

# Plot of the estimated posterior for theta1/theta2 using ggplot2
ggplot(tibble(ratio = theta_ratio), aes(x = ratio)) +
  geom_density(fill = "lightblue") +
  labs(title = expression("Posterior distribution of " ~ theta[1] / theta[2]),
       x = expression(theta[1] / theta[2]),
       y = "posterior density") +
  theme_minimal()

# Quantiles for the ratio
med_ratio <- quantile(theta_ratio, probs = 0.5)
quant_int <- quantile(theta_ratio, probs = c(0.025, 0.975))

# Print results
cat("Posterior median of theta1 / theta2:", med_ratio, "\n")
cat("95% quantile-based interval for theta1/theta2:", print_int(quant_int), "\n")

# Calculating and printing the 95% HPD interval
emp_hpd_int <- emp.hpd(theta_ratio, conf = 0.95)
cat("95% HPD interval for theta1/theta2:", print_int(emp_hpd_int), "\n")

########################################################
########################################################
# R Code for Chapter 6 Content
########################################################
########################################################

# Load necessary packages
library(tidyverse)
library(janitor)
library(rstan)  # Required for some Bayesian methods
library(bayesplot)  # For plotting posterior diagnostics

### 6.1 Grid Approximation: Beta-Binomial Example

# Step 1: Define a grid of 6 pi values (probability grid)
grid_data <- tibble(pi_grid = seq(0, 1, length.out = 6))

# Step 2: Evaluate the prior & likelihood at each pi
grid_data <- grid_data %>% 
  mutate(prior = dbeta(pi_grid, 2, 2),
         likeli = dbinom(9, size = 10, prob = pi_grid))

# Step 3: Approximate the posterior
grid_data <- grid_data %>% 
  mutate(unnorm = likeli * prior,
         post = unnorm / sum(unnorm))  # Normalize the posterior

# Confirm the posterior sums to 1
grid_data %>% summarize(sum_unnorm = sum(unnorm), sum_post = sum(post))

# Step 4: Sample from the discretized posterior
set.seed(84735)
post_sam <- sample_n(grid_data, size = 10000, 
                        weight = post, replace = TRUE)

# Plotting the grid approximated posterior:
ggplot(grid_data, aes(x = pi_grid, y = post)) + 
  geom_point() + 
  geom_segment(aes(x = pi_grid, xend = pi_grid, y = 0, yend = post)) +
  labs(title = "Grid Approximation of the Posterior", 
       x = expression(pi), 
       y = "posterior probability") +
  theme_minimal()

# Histogram of the grid simulation with posterior pdf
ggplot(post_sam, aes(x = pi_grid)) + 
  geom_histogram(aes(y = after_stat(density)), color = "white", bins = 30) + 
  stat_function(fun = dbeta, args = list(11, 3), color = "blue") + 
  labs(title = "Posterior histogram with true posterior density", 
       x = expression(pi), 
       y = "density") +
  theme_minimal() +
  lims(x = c(0, 1))

# Define a finer grid:
# Step 1: Define a grid of 101 pi values
grid_data <- tibble(pi_grid = seq(from = 0, to = 1, length.out = 101))

# Step 2: Evaluate the prior & likelihood at each pi
grid_data <- grid_data %>% 
  mutate(prior = dbeta(pi_grid, 2, 2),
         likeli = dbinom(9, size = 10, prob = pi_grid))

# Step 3: Approximate the posterior
grid_data <- grid_data %>% 
  mutate(unnorm = likeli * prior,
         post = unnorm / sum(unnorm))  # Normalize the posterior

# Set the seed
set.seed(84735)

# Step 4: Sample from the discretized posterior
post_sam <- sample_n(grid_data, size = 10000, 
                        weight = post, replace = TRUE)

# Plot the final histogram with posterior density
ggplot(post_sam, aes(x = pi_grid)) + 
  geom_histogram(aes(y = after_stat(density)), color = "white", binwidth = 0.05) + 
  stat_function(fun = dbeta, args = list(11, 3), color = "blue") + 
  labs(title = "Posterior histogram with true posterior overlay", 
       x = expression(pi), 
       y = "density") +
  theme_minimal() +
  lims(x = c(0, 1))

#################################################
#### Gamma-Poisson Example
#################################################

# Step 1: Define a grid of 501 lambda values
grid_data <- tibble(lam_grid = seq(from = 0, to = 15, length.out = 501))

# Step 2: Evaluate the prior & likelihood at each lambda
grid_data <- grid_data %>%
  mutate(prior = dgamma(lam_grid, shape = 3, rate = 1),
         likeli = dpois(2, lam_grid) * dpois(8, lam_grid))

# Step 3: Approximate the posterior by normalizing the unnorm posterior
grid_data <- grid_data %>%
  mutate(unnorm = likeli * prior,
         post = unnorm / sum(unnorm))  # Normalize the posterior

# Set the seed
set.seed(84735)

# Step 4: Sample from the discretized posterior
post_sam <- sample_n(grid_data, size = 10000, weight = post, replace = TRUE)

# Plot histogram of the grid simulation with posterior pdf overlay
ggplot(post_sam, aes(x = lam_grid)) + 
  geom_histogram(aes(y = after_stat(density)), color = "white", bins = 30) + 
  stat_function(fun = dgamma, args = list(13, 3), color = "blue") + 
  labs(title = "Posterior histogram with Gamma(13,3) density overlay", 
       x = expression(lambda), 
       y = "density") +
  theme_minimal() +
  lims(x = c(0, 10))

