#================================================================================#
# Stat 7650: Computational Statistics -- Spring 2025                             #
# R code examples for simulating random variables                                #
# Based on Givens & Hoeting (Ch 6), Lange (Ch 22), Robert & Casella (Ch 2)       #
# Modified from code of R. Martin                                                #
#================================================================================#

#Set Working Directory to Source File Location
library("rstudioapi")                                 # Load rstudioapi package
setwd(dirname(getActiveDocumentContext()$path))       # Set working directory to source file location
#getwd()  

# Set the path for saving plots
path_name <- "../STAT7650-Slides/figures"

############################################################################
# Example -- Sampling from a Symmetric Triangular Distribution
############################################################################

# Plot the pdf
pdf_tri <- function(x) {
  ifelse(x < 0, 1 + x, ifelse(x <= 1, 1 - x, 0))
}

x_pdf <- seq(-1, 1, len = 1000)
y_pdf <- pdf_tri(x_pdf)

#pdf(file=paste0(path_name, "/tri_pdf.pdf"), width=7, height=5)
plot(x_pdf, y_pdf, type = 'l', main = 'pdf of Symmetric Triangular Distribution, STri(-1,1)', xlab = 'x', ylab = 'f(x)')
abline(v=0,lty=2)
#dev.off()

# Plot the CDF
cdf_tri <- function(x) {
  ifelse(x < 0, 0.5 * x^2 + x + 0.5, ifelse(x <= 1, 1 - 0.5 * (1 - x)^2, 1))
}

x_cdf <- seq(-1, 1, len = 1000)
y_cdf <- cdf_tri(x_cdf)

#pdf(file=paste0(path_name, "/tri_cdf.pdf"), width=7, height=5)
plot(x_cdf, y_cdf, type = 'l', main = 'cdf of Symmetric Triangular Distribution, STri(-1,1)', xlab = 'x', ylab = 'F(x)')
abline(v=0,h=1/2,lty=2)
#dev.off()

# Function to generate samples from a triangular distribution
rtri <- function(n, pic = FALSE) {
  # Generate uniform random numbers
  U <- runif(n)
  # Transform uniform random numbers to triangular distribution
  X0 <- 1 - sqrt(1 - U)
  X <- sample(c(-1, 1), size = n, replace = TRUE) * X0
  
  # Plot histogram and theoretical density, if requested
  if (pic) {
    # Create histogram but don't plot it yet
    h <- hist(X, breaks = 25, plot = FALSE)
    ymax <- 1.01 * max(1, h$density)
    
    # Plot histogram
    hist(X, breaks = 25, ylim = c(0, ymax), freq = FALSE, 
         col = "gray", border = "white", main = "Symmetric Triangular Distribution, STri(-1,1)",
         xlab = "x", ylab="density")
    
    # Add theoretical density line
    lines(x = c(-1, 0, 1), y = c(0, 1, 0), lwd = 2, col = "blue", type = "o", pch = 19)
    legend("topright", legend = "theoretical pdf", col = "blue", lwd = 2, pch = 19)
  }
  
  return(X)
}

# Example usage of the function
out <- rtri(n = 2000, pic = TRUE)

############################################################################
# Example -- Box-Muller Method for Normal RV Generation
############################################################################

#Box-Muller transformation for generating normal random variables
rnorm_bm <- function(n = 1, mean = 0, sd = 1) {
  # Ensure we generate an even number of samples for the Box-Muller transform
  p <- if (n %% 2 == 0) n / 2 else (n + 1) / 2
  
  # Generate uniform random samples
  U <- runif(p)
  V <- runif(p)
  
  # Apply the Box-Muller transform
  R <- sqrt(-2 * log(U))
  Theta <- 2 * pi * V
  Z1 <- R * cos(Theta)
  Z2 <- R * sin(Theta)
  
  # Concatenate the two sets of samples and adjust for mean and standard deviation
  Z <- c(Z1, Z2)
  norm_samples <- mean + sd * Z[1:n]
  
  return(norm_samples)
}

# Benchmark the enhanced Box-Muller method
system.time(out_bm <- rnorm_bm(50000))
# Benchmark R's built-in normal distribution sampling function
system.time(out_rnorm <- rnorm(50000))

############################################################################
# Example -- Sampling from a Poisson Distribution
############################################################################
# function for sampling from a Poisson distribution
rpois_pp <- function(n = 1, lam = 1) {
  # Initialize counters
  X <- numeric(n)  # Store Poisson samples
  total <- numeric(n)  # Accumulated exponential samples
  w <- rep(TRUE, n)  # Logical vector to track which samples need more addition
  
  # Loop until all samples reach or exceed lam
  while (any(w)) {
    # Generate exponential samples where needed
    new <- rexp(sum(w), rate = 1)  # rate parameter of exponential is 1
    total[w] <- total[w] + new  # Update totals
    
    # Update condition
    w <- total < lam  # Only those still under lam
    X[w] <- X[w] + 1  # Increment counts for samples still under lam
  }
  
  return(X)
}

# Benchmark the enhanced Poisson sampler
system.time(out_pp <- rpois_pp(n = 10000, lam = 1))
# Benchmark R's built-in Poisson sampler for comparison
system.time(out_rpois <- rpois(n = 10000, lam = 1))

############################################################################
# Example -- Beta sampling via the Fundamental Theorem of Simulation
############################################################################
# Parameters for the beta distribution
a <- 2.7
b <- 6.3

# The target density function
f <- function(x) dbeta(x, a, b)

# Maximum of the density function (for the acceptance-rejection method)
mod = (a - 1) / (a + b - 2)
mod

fmax <- f(mod)
fmax

# Number of samples to generate
n <- 2000

# Generate uniform samples for acceptance-rejection
set.seed(111)
Z <- runif(n)
U <- runif(n, 0, fmax)
ix <- U <= f(Z)
X <- Z[ix]

#pdf(file=paste0(path_name, "/beta_sim1.pdf"), width=7, height=5)
plot(Z, U, col=ifelse(ix, "red", "gray"), xlab="Z (uniform)", ylab="U (scaled uniform)",
     main="Acceptance-Rejection Sampling for Beta(2.7,6.3) Distribution")
curve(f, add=TRUE, col="blue", lwd=2)
legend("topright", legend=c("f(x)", "Accepted", "Rejected"), col=c("blue", "red", "gray"), lwd=2, pch=1)
#dev.off()

#pdf(file=paste0(path_name, "/beta_sim2.pdf"), width=7, height=5)
hist(X, breaks=40, ylim=c(0, 2.7), freq=FALSE, col="gray", border="white", 
     main="Histogram of Accepted Samples with Theoretical pdf", 
     xlab="sampled values",ylab="density")
curve(f, add=TRUE, col="blue", lwd=2)
#dev.off()

############################################################################
# Example -- Complicated One from Robert & Casella Book Cover
############################################################################

# Define the complex function and a proposal function
f <- function(x) exp(-x^2 / 2) * (sin(6 * x)^2 + 3 * cos(x)^2 * sin(4 * x)^2 + 1)
g <- function(x) 5 * exp(-x^2 / 2)

# Sequence for plotting
xseq <- seq(-4, 4, len = 500)

# Plot the functions
#pdf(file=paste0(path_name, "/trig_dens1.pdf"), width=7, height=5)
plot(xseq, f(xseq), type="l", ylim=c(0, 5), xlab="x", ylab="density values", main="pdf and proposal density")
lines(xseq, g(xseq), col="red", lwd=2)
legend("topright", legend=c("f(x)", "g(x)"), col=c("black", "red"), lwd=2, cex=0.8)
#dev.off()

# Sampling Method
n <- 2000
X <- numeric(n)  # Preallocate space for efficiency
i <- 1

cnt = 1 # to get the acceptance rate
while(i <= n) {
  Z <- rnorm(1)
  U <- runif(1, 0, g(Z))
  if(U <= f(Z)) {
    X[i] <- Z
    i <- i + 1
  }
  cnt <- cnt + 1
}

#total of values generated to get n=1000 samples
cnt
#Acceptance Rate
n/cnt

# Normalize the complicated function for histogram overlay
norm_const <- integrate(f, -Inf, Inf)$value
fnor <- function(x) f(x) / norm_const

# Histogram of sampled values with theoretical density overlay
#pdf(file=paste0(path_name, "/trig_dens2.pdf"), width=7, height=5)
hist(X, freq=FALSE, ylim=c(0, 0.8), breaks=50, col="gray", border="white", 
     xlab="x",ylab="density",main="Sampled Histogram with Theoretical pdf")
lines(xseq, fnor(xseq), col="blue", lwd=2)
#dev.off()

############################################################################
# Example -- Sampling from Beta Distribution via Accept-Reject
############################################################################
# Define target density function
f <- function(x) dbeta(x, 1, 3)

# Initial proposal density function based on a crude guess
h <- function(x) 2 * dexp(x, rate=4)

# Improved proposal density function based on an "optimal" M for given rate=4
hh <- function(x) 3 * exp(2) / 16 * dexp(x, rate=4)

# Calculate optimal parameters for the proposal distribution
x_opt <- 1 / 3
lam_opt <- 2 / (1 - x_opt)
M_opt <- 1.5 * (1 - x_opt)**3 * exp(2 * x_opt / (1 - x_opt))

# Optimal proposal density function
h_opt <- function(x) M_opt * dexp(x, rate=lam_opt)

# Plotting the target and proposal distributions for comparison
plot(f, xlim=c(0, 1), ylim=c(0, 5), main="Beta(1,3) Distribution and Proposal Distributions",
     ylab="density", xlab="x", type="l")
curve(h, add=TRUE, col="red", lty=2)
curve(hh, add=TRUE, col="blue", lty=3)
curve(h_opt, add=TRUE, col="green", lwd=2, lty=1)
legend("topright", legend=c("Beta(1,3)", "Crude Guess", "Improved Guess", "Optimal Proposal"),
       col=c("black", "red", "blue", "green"), lty=c(1, 2, 3, 1), cex=1)

# Accept-reject sampling function using the optimal proposal distribution
r_beta_AR <- function(n) {
  samples <- numeric(n)
  count <- 0
  while(count < n) {
    Z <- rexp(1, rate=lam_opt)
    U <- runif(1, 0, M_opt * dexp(Z, rate=lam_opt))
    if(U <= f(Z)) {
      count <- count + 1
      samples[count] <- Z
    }
  }
  return(samples)
}

# Perform sampling
set.seed(123) # For reproducibility
samples <- r_beta_AR(1000)

# Plotting the histogram of the sampled data with the theoretical curve
hist(samples, freq=FALSE, xlim=c(0, 1), col="gray", border="white", main="Histogram of Sampled Data",
     xlab="sampled values",ylab="density")
curve(f, add=TRUE, col="blue", lwd=2)

############################################################################
# Example -- Sample from von Mises Distribution with piecewise accept-reject
############################################################################

# 1) von Mises un/normalized density
dvm <- function(x, kappa = 1, norm = FALSE) {
  d <- exp(kappa * cos(x)) * (x > 0 & x < 2 * pi)
  if (norm) d <- d / (2 * pi * besselI(kappa, 0))
  return(d)
}

# 2) Majorant for kappa >= 2/pi
major <- function(x, kappa = 2) {
  if (kappa < 0) stop("kappa must be non-negative!")
  # This is only valid for kappa >= 2/pi
  g <- function(u) kappa * u * sin(u) - 1
  x_left <- uniroot(g, c(0, pi / 2))$root
  lam    <- kappa * sin(x_left)
  k      <- exp(kappa * (x_left * sin(x_left) + cos(x_left))) / (kappa * sin(x_left))
  
  maj <- k * lam * exp(-lam * ifelse(x < pi, x, 2 * pi - x))
  return(maj)
}

# 3) Piecewise random sampler
rvm <- function(n, kappa = 2) {
  if (kappa <= 0) stop("kappa must be positive!")
  
  # If kappa < 2/pi, uniform-based accept-reject
  if (kappa < 2/pi) {
    X <- numeric(n)
    nsamp <- count <- 0
    
    while (nsamp < n) {
      x_prop <- runif(1, 0, 2*pi)
      count  <- count + 1
      
      # ratio = exp(kappa*cos(x)) / e^kappa = exp(kappa*(cos(x)-1))
      ratio <- exp(kappa * (cos(x_prop) - 1))
      
      if (runif(1) <= ratio) {
        nsamp <- nsamp + 1
        X[nsamp] <- x_prop
      }
    }
    
    cat("Used uniform-based accept-reject for kappa < 2/pi.\n")
    cat("Acceptance rate =", n / count, "\n\n")
    return(X)
  }
  
  # If kappa >= 2/pi, use exponential-based accept-reject
  g <- function(x) kappa * x * sin(x) - 1
  x_left <- uniroot(g, c(0, pi / 2))$root
  lam    <- kappa * sin(x_left)
  k      <- exp(kappa * (x_left * sin(x_left) + cos(x_left))) / (kappa * sin(x_left))
  
  r <- function(x) dvm(x, kappa) / major(x, kappa)
  
  X <- numeric(n)
  nsamp <- count <- 0
  
  while (nsamp < n) {
    x_prop <- rexp(1, lam)
    count  <- count + 1
    
    # accept?
    if (runif(1) <= r(x_prop)) {
      nsamp <- nsamp + 1
      # reflect around pi half the time
      X[nsamp] <- ifelse(runif(1) <= 0.5, x_prop, 2*pi - x_prop)
    }
  }
  
  cat("Used exponential envelope for kappa >= 2/pi.\n")
  cat("Acceptance rate =", n / count, "\n\n")
  return(X)
}


# -------------------------------------------------------------------------
# 4) Example usage, kappa < .64
#    (change kappa below to see each regime in action)
# -------------------------------------------------------------------------
kappa <- 0.2
n     <- 5000
samples <- rvm(n, kappa)

# -------------------------------------------------------------------------
# 5) Plot the results
# -------------------------------------------------------------------------
# A) Plot the unnormalized target, dvm(x, kappa)
curve(dvm(x, kappa), xlim = c(0, 2*pi), lwd=2,
      main="von Mises target vs. envelope",
      ylab="(Unnormalized) Density")

# B) Overlay either the constant or the exponential envelope
if (kappa < 2/pi) {
  # Envelope is a constant e^kappa
  abline(h = exp(kappa), col="red", lty=2, lwd=2)
  legend("top", inset=0.05, 
         legend=c("von Mises", "Uniform envelope = e^kappa"), 
         col=c("black", "red"), lty=c(1,2), lwd=2)
  
} else {
  # Envelope is major(x, kappa)
  curve(major(x, kappa), add=TRUE, col="red", lty=2, lwd=2)
  legend("top", inset=0.05,
         legend=c("von Mises", "Majorant envelope"),
         col=c("black", "red"), lty=c(1,2), lwd=2)
}

# C) Plot the histogram of samples vs. the normalized pdf
hist(samples, breaks=50, freq=FALSE, col="gray", border="white",
     main="Histogram of Samples vs. Normalized PDF",
     xlab="x", ylab="density")

curve(dvm(x, kappa, norm=TRUE), add=TRUE, lwd=2)

# -------------------------------------------------------------------------
# 4) Example usage with kappa > 64
#    (change kappa below to see each regime in action)
# -------------------------------------------------------------------------
kappa <- 2
n     <- 5000
samples <- rvm(n, kappa)

# -------------------------------------------------------------------------
# 5) Plot the results
# -------------------------------------------------------------------------
# A) Plot the unnormalized target, dvm(x, kappa)
curve(dvm(x, kappa), xlim = c(0, 2*pi), lwd=2,
      main="von Mises target vs. envelope",
      ylab="(Unnormalized) Density")

# B) Overlay either the constant or the exponential envelope
if (kappa < 2/pi) {
  # Envelope is a constant e^kappa
  abline(h = exp(kappa), col="red", lty=2, lwd=2)
  legend("top", inset=0.05, 
         legend=c("von Mises", "Uniform envelope = e^kappa"), 
         col=c("black", "red"), lty=c(1,2), lwd=2)
  
} else {
  # Envelope is major(x, kappa)
  curve(major(x, kappa), add=TRUE, col="red", lty=2, lwd=2)
  legend("top", inset=0.05,
         legend=c("von Mises", "Majorant envelope"),
         col=c("black", "red"), lty=c(1,2), lwd=2)
}

# C) Plot the histogram of samples vs. the normalized pdf
hist(samples, breaks=50, freq=FALSE, col="gray", border="white",
     main="Histogram of Samples vs. Normalized PDF",
     xlab="x", ylab="density")

curve(dvm(x, kappa, norm=TRUE), add=TRUE, lwd=2)

############################################################################
# Example -- Small Shape Gamma Distribution 
############################################################################

# Parameters
nu <- 0.01  # Shape parameter
lam <- 1  # Rate parameter (scale = 1/lam for rgamma)
n <- 1000  # Number of random variables to generate

# Generating Gamma distributed random variables
X <- rgamma(n, shape = nu, rate = lam)
mean(round(X,5)==0)

# Plotting the histogram of generated values with density instead of counts
#pdf(file=paste0(path_name, "/gamma_sim1.pdf"), width=7, height=5)
hist(X, breaks = 100, freq = FALSE, xlim = c(0, 0.1), ylim = c(0, 50),
     main = expression(paste("Gamma Distribution (", nu, " = 0.01) with pdf Overlaid")),
     xlab = "values", ylab = "density",
     col = "skyblue")
curve(dgamma(x, shape = nu, rate = lam), add = TRUE, col = "darkblue", lwd = 2)
legend("topright", legend = c("Histogram", "Gamma pdf"), fill = c("skyblue", "darkblue"), 
       border = c("skyblue", "darkblue"), bty = "n")
#dev.off()

############################################################################
# Example -- Gamma Distribution Simulations via the Ratio Method
############################################################################
# function h(x) and its square root
h <- function(x) x^(nu - 1) * exp(-x)
sqrt_h <- function(x) sqrt(h(x))

# Function to simulate Gamma distributed random variables using the ratio method
rgam_ratio <- function(n, nu = 1) {
    if (nu <= 0) stop("nu must be positive")
    
    X <- numeric(n)  # Preallocate memory for efficiency
    nsamp <- 0
    count <- 0
    k_u <- ((nu - 1) / exp(1)) ** ((nu - 1) / 2)
    k_v <- ((nu + 1) / exp(1)) ** ((nu + 1) / 2)
    
    while (nsamp < n) {
        count <- count + 1
        U <- runif(1, 0, k_u)
        V <- runif(1, 0, k_v)
        x <- V / U
        if (U <= sqrt_h(x)) {
            nsamp <- nsamp + 1
            X[nsamp] <- x
        }
    }
    
    cat("Acceptance Rate:", n / count, "\n")
    return(X)
}

nu <- 7.7
gam_out <- rgam_ratio(n = 5000, nu = nu)

#pdf(file=paste0(path_name, "/gamma_sim2.pdf"), width=7, height=5)
hist(gam_out, freq = FALSE, breaks = 50, xlab = "x", ylab="density", col = "gray", border = "white", 
     main = "Gamma Simulation vs Theoretical pdf")
curve(dgamma(x, shape = nu, rate = 1), add = TRUE, col = "blue", lwd = 2)
#dev.off()

#######################################################################
#The code for the illustrative figure for S_h and the Bounding rectangle

# Calculate rectangle bounds for the ratio method
k_u <- ((nu - 1) / exp(1)) ** ((nu - 1) / 2)
k_v <- ((nu + 1) / exp(1)) ** ((nu + 1) / 2)

# Function to check if a point (u,v) is inside the set S_h
in_S_h <- function(u, v) {
    return(u > 0 & v > 0 & u <= sqrt_h(v / u))
}

# Set up a grid of u and v values
u_vals <- seq(0, k_u, len = 250)
v_vals <- seq(0, k_v, len = 250)
grid <- expand.grid(u = u_vals, v = v_vals)

# Apply the in_S_h function to all points in the grid to determine if they are inside S_h
grid$in_S_h <- with(grid, in_S_h(u, v))
u_inSh = grid$u[grid$in_S_h]
v_inSh = grid$v[grid$in_S_h]

# Plot the points that are inside S_h and the bounding rectangle
#pdf(file=paste0(path_name, "/Sh_bound_rect.pdf"), width=7, height=5)
plot(u_inSh, v_inSh, 
     xlim = c(0, max(u_vals)), ylim = c(0, max(v_vals)), 
     xlab = "u", ylab = "v", 
     main = expression("Region " * S[h] * " and The Bounding Rectangle"),
     col = rgb(0.8, 0.8, 0.8), pch = 16)
rect(0, 0, k_u, k_v, col = NA, border = "red", lwd = 2) # the bounding rectangle
#dev.off()

############################################################################
# Example -- Bayesian Posterior Sampling via SIR
############################################################################

# Data: observations
X <- c(3.91, 4.85, 2.28, 4.06, 3.70, 4.04, 5.46, 3.53, 2.28, 1.96, 2.53,
       3.88, 2.22, 3.47, 4.82, 2.46, 2.99, 2.54, 0.52, 2.50)

# Likelihood function
L <- function(x, theta) {
  dif <- outer(x, theta, "-")
  out <- apply(log(1 - cos(dif)), 2, sum)
  return(exp(out))
}

#Plot the curve of the corresponding pdf
# Define the range of x values
x_vals <- seq(0, 2*pi, by = 0.01)

# Define theta
theta <- 1

# Calculate the function values
y_vals <- (1 - cos(x_vals - theta))/(2*sin(theta))

# Plot the curve
plot(x_vals, y_vals, type = 'l', col = 'blue',
     main =  expression(paste("Curve of f(x) for ",theta, " = 1")),
     xlab = "x", ylab = "f(x)")

#Start sampling with SIR
# Sampling parameters
N <- 10000
M <- 10 * N
theta <- runif(M, -pi, pi)
w <- L(X,theta)
ww <- w / sum(w)

#pdf(file=paste0(path_name, "/bayesian_sir1.pdf"), width=7, height=5)
hist(ww, breaks=25, freq=FALSE, col="gray", border="white",
     xlab="w",ylab="density", main="Distribution of (Standardized) Weights")
#dev.off()

# SIR: Sampling theta values based on weights
theta_rs <- sample(theta, size=N, replace=TRUE, prob=ww)

#pdf(file=paste0(path_name, "/bayesian_sir2.pdf"), width=7, height=5)
hist(theta_rs, breaks=25, freq=FALSE, col="gray", border="white", ylim=c(0,2),
     xlab=expression(theta), ylab="density",main="Posterior Distribution (histogram and pdf)")
Lint <- function(theta) L(X,theta)
norm <- integrate(Lint, -pi, pi)$value
post <- function(theta) L(X,theta) / norm
curve(post, add=TRUE)
#dev.off()

