import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t

#Motivation for using the student-t distribution to
#obtain proper coverage of 95% intervals.

# Define true population parameters
mu = 1
sigma = 1

# Sample size and number of simulated datasets
n = 5
Nsim = 100

# Coverage using the Normal distribution
xbar = np.zeros(Nsim)
s = np.zeros(Nsim)
lower = np.zeros(Nsim)
upper = np.zeros(Nsim)
coverN = np.zeros(Nsim, dtype=int)

# Set up the plot for displaying the confidence intervals
plt.figure(figsize=(8, 6))
plt.xlim(-1, 3)
plt.ylim(0, Nsim)
plt.xlabel("95% CI")
plt.ylabel("Dataset")
plt.axvline(x=mu, color='gray')  # True mean line

# Simulate data and calculate confidence intervals using the Normal distribution
for i in range(Nsim):
    Y = np.random.normal(mu, sigma, n)    # Simulate sample data
    xbar[i] = np.mean(Y)                  # Compute sample mean
    s[i] = np.std(Y, ddof=1)              # Compute sample standard deviation (with Bessel's correction)
    
    # Calculate confidence intervals
    lower[i] = xbar[i] - 1.96 * s[i] / np.sqrt(n)
    upper[i] = xbar[i] + 1.96 * s[i] / np.sqrt(n)
    
    # Check if true mean is within the interval
    coverN[i] = (lower[i] < mu) & (mu < upper[i])
    
    # Plot the confidence intervals
    plt.plot([lower[i], upper[i]], [i, i], color='red' if not coverN[i] else 'blue')

# Title displaying the experimental coverage using the Normal distribution
coverageN = np.mean(coverN)
plt.title(f"Experimental Coverage with Z interval is {100 * coverageN:.1f}%")
plt.tight_layout()
plt.show()

plt.clf()

# Set up the plot for displaying the confidence intervals
# based on t-distribution
plt.figure(figsize=(8, 6))
plt.xlim(-1, 3)
plt.ylim(0, Nsim)
plt.xlabel("95% CI")
plt.ylabel("Dataset")
plt.axvline(x=mu, color='gray')  # True mean line

# Initialize arrays
xbar = np.zeros(Nsim)
s = np.zeros(Nsim)
lower = np.zeros(Nsim)
upper = np.zeros(Nsim)
coverT = np.zeros(Nsim, dtype=int)

# Simulate data and calculate confidence intervals using the t-distribution
for i in range(Nsim):
    Y = np.random.normal(mu, sigma, n)    # Simulate sample data
    xbar[i] = np.mean(Y)                  # Compute sample mean
    s[i] = np.std(Y, ddof=1)              # Compute sample standard deviation (with Bessel's correction)
    
    # Confidence intervals using t-distribution
    lower[i] = xbar[i] - t.ppf(0.975, df=n-1) * s[i] / np.sqrt(n)
    upper[i] = xbar[i] + t.ppf(0.975, df=n-1) * s[i] / np.sqrt(n)
    
    # Check if true mean lies within the t-interval
    coverT[i] = (lower[i] < mu) & (mu < upper[i])
    
    # Plot the t-distribution intervals
    plt.plot([lower[i], upper[i]], [i, i], color='red' if not coverT[i] else 'blue')

# Title displaying the experimental coverage using the t-distribution
coverageT = np.mean(coverT)
plt.title(f"Experimental Coverage with t interval is {100 * coverageT:.1f}%")
plt.tight_layout()
plt.show()

plt.clf()
