#================================================================================#
# Stat 7650: Computational Statistics -- Spring 2025                           #
# R code examples for Slides #1 some introductory material                                 #
# Based in part on Lange (Chapters 7-9)                                          #
# Adapted from Dr Ryan Martin's code                                            #
#================================================================================#

# Vectors
# Entries are coerced into the most general type (logical < integer < double < character)
c(FALSE, 7)         # Logical coerced to numeric
c(11.7, 'abc')      # Numeric coerced to character

# Vectorized arithmetic
x <- c(7, 10, 11)
y <- seq(5, 3, by = -1) #or 5:3
x + y               # Element-wise addition of vectors

# Subsetting entries
x <- seq(5, 25, by = 5)
x[c(2, 3)]          # Extract the 2nd and 3rd elements of x

# Implicit loops using apply family
mylist <- list(var1 = x, var2 = y)
lapply(mylist, mean)    # List output
sapply(mylist, mean)    # Simplified output

# Ensure column vectors are of the same length for matrix operations
mymat <- cbind(var1 = x[1:3], var2 = y)
apply(mymat, 2, mean, na.rm = TRUE)  # Column means

# Example with lists
M <- matrix(c(2, 5, 7, 7), nrow = 2)
f <- function(x) log(x) + x^2        # Custom function
mylist <- list(mymat = M, myfun = f)

# Apply the function to the matrix within the list
mylist$myfun(mylist$mymat)

# Accessing elements of the matrix inside the list
mylist$mymat[2, 2]   # Element at row 2, column 2
mylist$mymat[-1, ]   # Exclude the first row

################################################################################
# Graphics and Simulations: Scatterplot with margin labels and features

set.seed(123)  # Reproducibility
x <- runif(50, 0, 2)  # Random data
y <- runif(50, 0, 2)

# Base scatterplot with customization
plot(x, y, 
     xlab = "X-axis Label", ylab = "Y-axis Label", 
     main = "Main Title", sub = "Subtitle", 
     pch = 19, col = "blue")

# Add text annotation and reference lines
text(0.6, 0.6, "Text at (0.6, 0.6)", col = "red", cex = 0.8)
abline(h = 0.6, v = 0.6, lty = 2, col = "gray")

# Adding margin text along all four sides with dynamic positioning
for (s in 1:4) {
  mtext(
    text = -1:4,   # Labels for the margin
    side = s,      # Side of the plot (1 = bottom, 2 = left, 3 = top, 4 = right)
    at = 0.7,      # Fixed position on the margin
    line = -1:4    # Dynamic line offset
  )
}


# Margin text on all sides
mtext(-1:4, side = 1:4, at = 0.7, line = -1:4, col = "darkgreen", font = 2)

################################################################################
# Histogram with two normal densities overlaid
################################################################################

mean_med_hist <- function(n = 500, mu = 0, sig = 1, draw.pdf = FALSE, 
                          filename = "hist.pdf") {
  X <- rnorm(n, mu, sig)  # Generate random data
  
  if (draw.pdf) pdf(file = filename)  # Open PDF device if required
  
  h <- hist(X, plot = FALSE)  # Compute histogram for density info
  ylim <- range(0, h$density, dnorm(mu, mu, sig))  # Set y-axis limits
  
  hist(X, freq = FALSE, ylim = ylim, col = "gray", border = "white",
       main = sprintf("Histogram with Normal Densities\nn = %d, µ = %.1f, σ = %.1f", n, mu, sig),
       xlab = "X", ylab = "Density")  # Plot histogram
  
  # Overlay density curves
  curve(dnorm(x, mean(X), sig), lwd = 2, col = "red", add = TRUE)
  curve(dnorm(x, median(X), sig), lwd = 2, col = "blue", add = TRUE)
  
  # Add legend
  legend("topright", inset = 0.05, lwd = 2, col = c("red", "blue"), 
         legend = c("mean", "median"))
  
  if (draw.pdf) dev.off()  # Close PDF device
}

# Example usage
mean_med_hist()

################################################################################
# Compare estimation performance of sample mean and median
################################################################################

mean_med_comp <- function(n = 100, mu = 0, sig = 1, reps = 100, plot.title = NULL) {
  # Generate samples and compute estimates
  XX <- matrix(rnorm(n * reps, mu, sig), nrow = reps)
  est <- list(mean = rowMeans(XX), median = apply(XX, 1, median))
  
  # Create boxplot
  boxplot(est, col = "lightblue", border = "darkblue",
          main = plot.title %||% sprintf("Comparison of Sample Mean and Median\nn = %d", n),
          ylab = expression(hat(mu)))
  
  # Add reference line at the true mean
  abline(h = mu, lty = 3, col = "red")
}

# Utility function for default title handling
`%||%` <- function(a, b) if (!is.null(a)) a else b

# Example usage
mean_med_comp()

################################################################################
# Approximation of Square Root Using Newton-Raphson Method
################################################################################

my_sqrt_repeat <- function(y, tol = 1e-10, verbose = FALSE) {
  if (y < 0) stop("Square root of a negative number is undefined for real numbers.")
  if (y == 0) return(0)
  
  x <- y / 2  # Initial guess
  if (verbose) cat("Initial guess:", x, "\n")
  
  repeat {
    x_new <- (x + y / x) / 2  # Newton-Raphson update
    if (verbose) cat("Updated guess:", x_new, "\n")
    if (abs(x_new - x) < tol) break
    x <- x_new
  }
  
  x_new
}

# Example usage
cat("Verbose output for y = 16:\n")
my_sqrt_repeat(16, verbose = TRUE)

y <- 12345
result <- my_sqrt_repeat(y)
cat("\nApproximated sqrt of", y, ":", result, "\nBuilt-in sqrt of", y, ":", sqrt(y), "\n")

################################################################################
# Compute the square root of a non-negative number using a while loop
################################################################################

# Input: Non-negative number
y <- 12345

# Initial guess
x <- y / 2

# Refine the guess until precision is achieved
while (abs(x^2 - y) > 1e-10) {
  x <- (x + y / x) / 2
}

# Output results
cat("Computed square root using while loop:", x, "\n")
cat("Built-in square root function:", sqrt(y), "\n")

################################################################################
# Plot Polynomial Functions Using a For Loop
################################################################################

x <- seq(0, 1, by = 0.05)  # Define x values

# Base plot for y = x
plot(x, x, type = "l", col = 1, lwd = 2, 
     main = "Polynomial Functions", xlab = "x", ylab = "y")

# Add lines for y = x^j (j = 2 to 5)
for (j in 2:5) lines(x, x^j, col = j, lwd = 2)

# Add legend with mathematical annotations
legend("topleft", 
       legend = as.expression(lapply(1:5, function(j) bquote(y == x^.(j)))), 
       col = 1:5, lwd = 2)

################################################################################
# Plotting the PMF and CDF of a Binomial Distribution
################################################################################

# Parameters
n <- 25    # Number of trials
p <- 0.4   # Probability of success

# Set up an empty plot
plot(0, 0, type = "n", xlim = c(0, n), ylim = c(0, 1), 
     xlab = "x", ylab = "Probability", main = "Binomial PMF and CDF")

# Add the CDF as a step function
lines(0:n, pbinom(0:n, n, p), type = "s", lwd = 2, col = "blue")

# Add the PMF as vertical bars
lines(0:n, dbinom(0:n, n, p), type = "h", lwd = 2, col = "red")

# Add legend
legend("bottomright", inset = 0.05, lwd = 2, col = c("red", "blue"), 
       legend = c("PMF (dbinom)", "CDF (pbinom)"), box.lty = 0)

################################################################################
# Custom Cholesky Decomposition Function
################################################################################

my_chol <- function(A) {
  if (!is.matrix(A) || nrow(A) != ncol(A)) stop("Input must be a square matrix.")
  
  n <- nrow(A)
  L <- matrix(0, n, n)  # Initialize lower triangular matrix
  
  for (i in 1:n) {
    for (j in 1:i) {
      s <- sum(L[i, 1:(j - 1)] * L[j, 1:(j - 1)])
      if (j < i) {
        L[i, j] <- (A[i, j] - s) / L[j, j]  # Off-diagonal elements
      } else {
        diag_element <- A[i, i] - s
        if (diag_element <= 0) stop("Matrix is not positive definite.")
        L[i, j] <- sqrt(diag_element)  # Diagonal elements
      }
    }
  }
  L
}

################################################################################
# Example Usage
################################################################################

# Define a positive definite matrix
mat_A <- matrix(c(4, 12, -16, 12, 37, -43, -16, -43, 98), nrow = 3, byrow = TRUE)

# Perform Cholesky decomposition
chol_A <- my_chol(mat_A)

# Display results
cat("Input matrix (mat_A):\n")
mat_A
cat("\nCholesky factor (L):\n")
chol_A
cat("\nVerification (L %*% t(L)):\n")
chol_A %*% t(chol_A)

################################################################################
# Sweep Operation on a Matrix
################################################################################

swp <- function(A, k) {
  if (!is.matrix(A) || nrow(A) != ncol(A)) stop("Input must be a square matrix.")
  if (k < 1 || k > nrow(A)) stop("Index k must be valid and within matrix dimensions.")
  
  a <- A[k, k]
  if (a == 0) stop("The k-th diagonal element is zero. Sweep operation not possible.")
  
  A[-k, -k] <- A[-k, -k] - outer(A[-k, k], A[k, -k]) / a
  A[k, -k] <- A[k, -k] / a
  A[-k, k] <- A[-k, k] / a
  A[k, k] <- -1 / a
  
  A
}

################################################################################
# Compute Matrix Inverse Using Sweep
################################################################################

swp_inv <- function(A) {
  if (!is.matrix(A) || nrow(A) != ncol(A)) stop("Input must be a square matrix.")
  if (prod(dim(A)) == 1) return(if (A == 0) stop("Matrix is singular.") else 1 / A)
  
  n <- nrow(A)
  for (i in 1:n) A <- swp(A, i)
  
  -A
}

################################################################################
# Example and Verification
################################################################################

# Example: Sweep Operation
mat_A <- matrix(c(4, 12, -16, 12, 37, -43, -16, -43, 98), nrow = 3, byrow = TRUE)
cat("Original Matrix A:\n", mat_A, "\n")
cat("\nMatrix after sweeping along diagonal entry 2:\n")
swp(mat_A, 2)

# Example: Matrix Inverse
set.seed(123)
A <- matrix(rnorm(9), nrow = 3)
A <- A + t(A)  # Symmetric matrix

cat("\nOriginal Matrix A:\n")
A
invA_usual <- solve(A)
invA_sweep <- swp_inv(A)

cat("\nInverse using `solve`:\n")
invA_usual
cat("\nInverse using `swp_inv`:\n")
invA_sweep
cat("\nDifference between inverses:\n")
invA_usual - invA_sweep
cat("\nVerification (A %*% invA_sweep):\n") 
A %*% invA_sweep

################################################################################
## Statistical Methods in R: One-Sample t-Test and Visualization
################################################################################

# Load required library
library(ISwR)

# Load and explore the 'react' dataset
data(react)
?react

################################################################################
# Step 1: Data Exploration and Visualization
################################################################################

# Histogram
hist(react, col = "lightblue", border = "white",
     main = "Histogram of 'react' Dataset", 
     xlab = "Reaction Time", ylab = "Frequency")

# Stem-and-leaf plot
cat("Stem-and-Leaf Plot for 'react' Dataset:\n")
stem(react)

################################################################################
# Step 2: One-Sample t-Test
################################################################################

# Perform and display one-sample t-test results
t_out <- t.test(react)
cat("\nOne-Sample t-Test Results:\n")
print(t_out)

# Display t-statistic
cat("\nt-statistic from the t-Test:\n", t_out$statistic, "\n")

################################################################################
# Step 3: Normality Check Using QQ Plot
################################################################################

# QQ Plot with reference line
qqnorm(react, main = "QQ Plot of 'react' Dataset", 
       xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
abline(a = 0, b = 1, col = "red", lwd = 2)

################################################################################
# Step 4: Summary
################################################################################

# Summary of the dataset
cat("\nSummary of 'react' Dataset:\n")
print(summary(react))

################################################################################
# Simulate the Null Distribution of the t-Statistic for Cauchy Data
################################################################################

null_dist <- function(n = 10, reps = 500, alpha = 0.05) {
  df <- n - 1  # Degrees of freedom
  t_out <- replicate(reps, t.test(rt(n, df = 1))$statistic)  # Simulate t-statistics
  
  # Histogram and axis limits
  h <- hist(t_out, plot = FALSE)
  ylim <- range(0, h$density, dt(0, df = df))
  xlim <- range(range(t_out), qt(c(0.01, 0.99), df = df))
  
  # Plot histogram
  hist(t_out, breaks = 25, freq = FALSE, col = "lightblue", border = "white",
       xlim = xlim, ylim = ylim, xlab = "t", 
       main = "Sampling Distribution of t-Statistics\nfrom Cauchy Data")
  
  # Overlay theoretical t-distribution
  curve(dt(x, df = df), col = "red", lwd = 2, add = TRUE)
  
  # Empirical alpha level
  alpha_hat <- mean(abs(t_out) > qt(1 - alpha / 2, df = df))
  
  # Annotate with empirical alpha
  text(min(xlim), 0.9 * max(ylim), 
       bquote(hat(alpha) == .(round(alpha_hat, 3))), pos = 4, col = "darkblue", font = 2)
  
  return(alpha_hat)  # Return empirical alpha level
}

################################################################################
# Example Usage
################################################################################

null_dist(n = 10, reps = 500)

################################################################################
# Two-Sample t-Test Example Using the 'energy' Dataset
################################################################################

# Load the dataset and explore
data(energy)
?energy  # Display help page

################################################################################
# Visualization: Boxplot Comparison
################################################################################

boxplot(
  expend ~ stature, data = energy, 
  names = c("Lean", "Obese"), 
  col = "lightblue", 
  main = "Energy Expenditure by Stature", 
  ylab = "Energy Expenditure", xlab = "Stature"
)

################################################################################
# Two-Sample t-Tests
################################################################################

# Unequal variance t-test (default)
t_neqvar <- t.test(expend ~ stature, data = energy)
cat("Two-Sample t-Test (Unequal Variance):\n")
t_neqvar

# Equal variance t-test
t_eqvar <- t.test(expend ~ stature, data = energy, var.equal = TRUE)
cat("\nTwo-Sample t-Test (Equal Variance):\n")
t_eqvar

################################################################################
# Explanation:
# - Unequal variance test: No assumption of equal variances (Welch's t-test).
# - Equal variance test: Assumes equal variances (pooled variance test).
################################################################################

################################################################################
# Paired t-Test Example Using the 'intake' Dataset
################################################################################

# Load the dataset and display help
data(intake)
?intake

################################################################################
# Perform a Paired t-Test
################################################################################

# Direct paired t-test
paired_ttest <- t.test(intake$pre, intake$post, paired = TRUE)
cat("Paired t-Test Results:\n")
print(paired_ttest)

################################################################################
# Alternative Method: Paired t-Test via Differences
################################################################################

# Compute differences and perform one-sample t-test
diff_ttest <- t.test(intake$pre - intake$post)
cat("\nPaired t-Test via Differences:\n")
diff_ttest

################################################################################
# Explanation:
# - Method 1: Direct paired t-test using `paired = TRUE`.
# - Method 2: One-sample t-test on differences, equivalent to the paired test.
################################################################################

################################################################################
# Simple Linear Regression Example Using the 'thuesen' Dataset
################################################################################

# Load and clean the dataset
data(thuesen)
thuesen <- na.omit(thuesen[c("blood.glucose", "short.velocity")])

# Fit the linear model
lmout <- lm(short.velocity ~ blood.glucose, data = thuesen)

# Print actual and fitted values
cat("Original and Fitted Values:\n")
cbind(Actual = thuesen$short.velocity, Fitted = lmout$fitted.values)

# Scatterplot with regression line
plot(thuesen$blood.glucose, thuesen$short.velocity,
     xlab = "Blood Glucose", ylab = "Short Velocity",
     main = "Simple Linear Regression: Short Velocity vs. Blood Glucose",
     pch = 19, col = "blue")
abline(lmout, lwd = 2, col = "red")
segments(thuesen$blood.glucose, thuesen$short.velocity,
         thuesen$blood.glucose, lmout$fitted.values, col = "gray", lty = 2)

# Q-Q plot of residuals
qqnorm(lmout$residuals, main = "Q-Q Plot of Residuals",
       xlab = "Theoretical Quantiles", ylab = "Sample Quantiles",
       pch = 19, col = "blue")
qqline(lmout$residuals, col = "red", lwd = 2)

################################################################################
# Polynomial Regression via Multiple Regression
################################################################################

poly_reg <- function(y, x, deg = 5) {
  if (length(y) != length(x)) stop("Length of 'y' and 'x' must be the same.")
  
  # Create the design matrix
  X <- sapply(0:deg, function(d) x^d)
  
  # Fit the model using least squares
  beta <- solve(t(X) %*% X, t(X) %*% y)
  y.hat <- X %*% beta
  
  # Plot data and fitted polynomial
  plot(x, y, main = paste("Polynomial Regression (Degree =", deg, ")"),
       xlab = "x", ylab = "y", pch = 19, col = "blue")
  lines(x, y.hat, col = "red", lwd = 2)
  
  # Return results
  list(fitted = y.hat, coefficients = beta)
}

################################################################################
# Example: Polynomial Regression
################################################################################

# Generate data
set.seed(123)
x <- seq(0, 2 * pi, length.out = 100)
y <- sin(x) + 0.1 * rnorm(100)

# Apply the function
result <- poly_reg(y, x, deg = 5)

################################################################################
# Display Results
################################################################################

cat("Polynomial Coefficients:\n", result$coefficients, "\n")
cat("\nFirst 10 Fitted Values:\n", head(result$fitted, 10), "\n")

################################################################################
# Basic ANOVA Example Using the 'red.cell.folate' Dataset
################################################################################

# Load the dataset and explore
data(red.cell.folate)
?red.cell.folate
str(red.cell.folate)
cat("\nSummary of the 'red.cell.folate' Dataset:\n")
summary(red.cell.folate)

################################################################################
# Visualization: Boxplot of Folate Levels by Ventilation Type
################################################################################

boxplot(folate ~ ventilation, data = red.cell.folate, col = "lightblue",
        main = "Folate Levels by Ventilation Type", 
        xlab = "Ventilation Type", ylab = "Folate Levels")

################################################################################
# Perform ANOVA
################################################################################

linmod <- lm(folate ~ ventilation, data = red.cell.folate)
anova_results <- anova(linmod)
cat("\nANOVA Results:\n")
print(anova_results)

