#########################################################################
# COMPUTATIONAL STATISTICS
# by Geof Givens and Jennifer Hoeting
# CHAPTER 5 EXAMPLES
#########################################################################

#Set Working Directory to Source File Location
library("rstudioapi")                                 # Load rstudioapi package
setwd(dirname(getActiveDocumentContext()$path))       # Set working directory to source file location
#getwd()   

############################################################################
### EXAMPLE 5.1 ALZHEIMER'S DISEASE 
############################################################################
# y        = observed data
# b0      = initial beta estimates
# s02     = initial variance estimate
# interval = vector of length 2 containing (min,max)
# n        = number of subintervals
# f        = target function
# riemann  = computes Riemann's rule for (n) subintervals
############################################################################

## INITIAL VALUES
# y    = read.table(file.choose(),header=T)
y <- read.table("../Datasets/alzheimers.dat",header=T)
head(y)

options(digits = 15)
b0 <- c(1.804, 0.165)
s02 <- 0.015^2
interval <- c(-0.07, 0.085)
n <- 256

f <- function(g, s2, b, id) {
    x <- matrix(c(rep(1, 5), 1:5), 5, 2)
    lambda <- exp(x %*% b + rep(g, 5))
    y_sub <- y[y$subject == id, 3]
    out <- dnorm(g, mean = 0, sd = sqrt(s2)) * (1:5) %*% (y_sub - lambda) * 
      prod(dpois(y_sub, lambda))
    return(out)
}

f <- Vectorize(f, vectorize.args = "g")

riemann <- function(interval, n, s2, b, id) {
    h <- (interval[2] - interval[1]) / n
    x <- seq(interval[1], by = h, length.out = n)
    out <- sum(h * f(x, s2, b, id))
    return(out)
}

# RIEMANN APPROXIMATION
riemann(interval,n,s02,b0,1)

# OUTPUT PLOT
x <- seq(interval[1], interval[2], length.out = 1000)
# Vectorized function call to compute values for plotting
d <- f(x, s02, b0, 1)

# Efficient plotting with base R
plot(x, d, type = "l", xlab = expression(gamma[1]),
     ylab = expression(paste("integrand in Eqn (5.7) - in units of ", 10^-4)))

############################################################################
### EXAMPLE 5.2 ALZHEIMER'S DISEASE (TRAPEZOIDAL RULE)
############################################################################
# y, b0, s02, interval, n, f = as in Example 5.1 above
# trapezoidal = computes trapezoidal rule for (n) subintervals
############################################################################
# INITIAL VALUES and FUNCTIONS are as in Example 5.1 above

trapezoidal <- function(interval, n, s2, b, id) {
    h <- (interval[2] - interval[1]) / n
    x <- seq(from = interval[1], to = interval[2], length.out = n + 1)
    f_values <- f(x, s2, b, id)
    trapezoid_sum <- sum((f_values[-1] + f_values[-length(f_values)]) * h / 2)
    return(trapezoid_sum)
}

# MAIN
T1 <- trapezoidal(interval, n, s02, b0, 1)
# OUTPUT
T1 # This gives the TRAPEZOIDAL APPROXIMATION sum

############################################################################
### EXAMPLE 5.3 ALZHEIMER'S DISEASE (SIMPSON'S RULE)
############################################################################
# y, b0, s02, interval, n, f = as in Example 5.1 above
# simpsons = computes simpson's rule for (n) subintervals
############################################################################
# INITIAL VALUES and FUNCTIONS are as in Example 5.1 above

simpsons <- function(interval, n, s2, b, id) {
    if (n %% 2 == 1) n <- n + 1  # Adjust to ensure n is even
    h <- (interval[2] - interval[1]) / n
    x <- seq(from = interval[1], to = interval[2], length.out = n + 1)
    f_values <- f(x, s2, b, id)
    # Simpson's rule calculation
    result <- h / 3 * (f_values[1] + 2 * sum(f_values[3:(n - 1)][c(FALSE, TRUE)]) +
                           4 * sum(f_values[2:n][c(TRUE, FALSE)]) + f_values[n + 1])
    return(result)
}

# MAIN
S1 <- simpsons(interval, n, s02, b0, 1)
# OUTPUT
S1  # SIMPSON'S APPROXIMATION

############################################################################
### EXAMPLE 5.4 ALZHEIMER'S DISEASE (ROMBERG INTEGRATION)
############################################################################
# y, b0, s02, interval, n, f = as in Example 5.1 above
# trapezoidal = computes trapezoidal rule for (n) subintervals
# romberg     = computes Romberg integration to (m) for (n) subintervals
############################################################################
# OTHER INITIAL VALUES and FUNCTIONS are as in Example 5.1 above
m    = 10

trapezoidal = function(interval,n,s2,b,id){
      h   = (interval[2]-interval[1])/n
      x   = interval[1] + (1:(n-1))*h
      out = h/2*f(interval[1],s2,b,id)
      out = c(out, h*f(x,s2,b,id))
      out = c(out, h/2*f(interval[2],s2,b,id))
      return(out)
}

romberg = function(interval,m,s2,b,id){
      out = matrix(0,m+1,m+1)
      out[1,1] = sum(diff(interval)/2*f(interval,s2,b,id))
      for(i in 2:(m+1)){
       out[i,1] = sum(trapezoidal(interval,2^(i-1),s2,b,id))
        for(j in 2:i){
         out[i,j] = out[i,j-1] + (out[i,j-1]-out[i-1,j-1])/((4^(j-1))-1)
        }
      }
      diag(out) = 0
      out = out[-1,]
      return(out)
}

# MAIN
That = romberg(interval,m,s02,b0,1)
# OUTPUT
That[,1:3]      # ROMBERG INTEGRATION ESTIMATES

############################################################################
### EXAMPLE 5.5 ALZHEIMER'S DISEASE (GAUSSIAN QUADRATURE)
############################################################################

#For the second edition, we prefer to direct our readers to
#code available online for Gaussian quadrature.  For example,
#the functions gauss.quad.prob() and gauss.quad() calculate
#the nodes and weights.  These functions are available within
#the R package {statmod} found via www.r-project.org

# Load the statmod package
library(statmod)

# Use gauss.quad.prob() for nodes and weights with a probability weight function
# Example for standard normal distribution
n <- 5 # Number of nodes
(nodes_wts_prob <- gauss.quad.prob(n))

# Use gauss.quad() for nodes and weights without a specific probability distribution
(nodes_wts <- gauss.quad(n))
