import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, beta, expon, gamma, chi2

##########################################################
#R Code to illustrate Chebychev's inequality applied to a Normal (i.e. Gaussian distribution

#Chebychev's Inequality: P(|X-mu| >= t) <= sigma^2/t^2
#For X ~ N(0,1), it becomes P(|X| >= t) <= 1/t^2

# Define tseq
tseq = np.arange(1, 3.01, 0.01)

# Calculate true probabilities
true = 1 - (norm.cdf(tseq) - norm.cdf(-tseq))

# Chebychev's Upper Bound
approx1 = 1 / (tseq**2)

# Mill's Upper Bound
approx2 = (np.sqrt(2/np.pi) * np.exp(-tseq**2/2)) / tseq

# Plot
plt.plot(tseq, true, label="exact")
plt.plot(tseq, approx1, color='red', label="Chebychev")
plt.plot(tseq, approx2, color='green', label="Mill")
plt.xlabel("t")
plt.ylabel("probability")
plt.title("Normal probability: P(|X| >= t)")
plt.legend(loc="upper right")
plt.show()

plt.clf()

##########################################################
###  Distribution of order statistics
n = 10
nreps = 10000

# For k=1
k = 1
y = []

for _ in range(nreps):
    x = np.random.uniform(0, 1, n)
    y.append(np.sort(x)[k-1])

plt.hist(y, density=True, bins=50, alpha=0.7, label=f'X_({k})')
t = np.linspace(0, 1, 1000)
plt.plot(t, beta.pdf(t, k, n+1-k), color='red', linewidth=2)
plt.title(f'Distribution of X_({k})')
plt.legend()
plt.show()

plt.clf()

# For k=3
k = 3
y = []

for _ in range(nreps):
    x = np.random.uniform(0, 1, n)
    y.append(np.sort(x)[k-1])

plt.hist(y, density=True, bins=50, alpha=0.7, label=f'X_({k})', range=(0, 1))
plt.ylim(0, 3.1)
t = np.linspace(0, 1, 1000)
plt.plot(t, beta.pdf(t, k, n+1-k), color='red', linewidth=2)
plt.title(f'Distribution of X_({k})')
plt.legend()
plt.show()

plt.clf()

##########################################################
#####  Convergence in distribution
n = 100
nreps = 1000
y = []
z = []

for _ in range(nreps):
    x = np.random.uniform(0, 1, n)
    max_x = np.max(x)
    y.append(max_x)
    z.append(n * (1 - max_x))

# Distribution of X_(n)
plt.hist(y, bins=50, density=True, alpha=0.7, label="X_(100)")
t = np.linspace(0, 1, 1000)
k = n
plt.plot(t, beta.pdf(t, k, n + 1 - k), color='red', linewidth=2)
plt.title("Distribution of X_(100)")
plt.legend()
plt.show()

plt.clf()

# Limiting distribution of n(1 - X_(n))
plt.hist(z, bins=50, density=True, alpha=0.7, label="n(1 - X_(n))")
t = np.linspace(0, max(z), 10000)
plt.plot(t, expon.pdf(t, scale=1), color='red', linewidth=2)
plt.title("Distribution of n(1 - X_(n))")
plt.legend()
plt.show()

plt.clf()

##########################################################
######  Central Limit Theorem
# Sample means for random samples from Exponential(1) Distribution
n = 100
nreps = 1000
xbar = []

for _ in range(nreps):
    x = np.random.exponential(scale=10, size=n)  # scale parameter is 1/lambda
    xbar.append(np.mean(x))

# Histogram of Exponential(10) random variable
plt.hist(np.random.exponential(scale=10, size=1000), bins=50, density=True, alpha=0.7)
plt.xlabel("x")
plt.title("emp pdf of a Exponential(10) rv")
plt.show()

plt.clf()

# Histogram for standardized sample mean
mu = 10
sigma2 = 100
plt.hist(np.sqrt(n) * (np.array(xbar) - mu) / np.sqrt(sigma2), bins=50, density=True, alpha=0.7, label="Sample Mean")
t = np.linspace(-3, 3, 1000)
plt.plot(t, norm.pdf(t, 0, 1), color='red', linewidth=2, label="Standard Normal")
plt.xlabel("standardized mean")
plt.title("emp pdf of sample mean from Exponential(1) distr")
plt.legend()
plt.ylim(0, 0.45)
plt.show()

plt.clf()

##########################################################
#######  further convergence demos
n = 10000
u = np.random.uniform(0, 1, n)
x = -5 * np.log(1-u)

z = []
for _ in range(n):
    u = np.random.uniform(0, 1, 3)
    x1 = -2 * np.log(1-u)
    z.append(np.sum(x1))

y = []
for _ in range(n):
    u = np.random.uniform(0, 1, 3)
    x1 = -1.5 * np.log(1-u)
    y.append(np.sum(x1))

# Probabilities
p_x_greater_than_y = np.mean(np.array(x) > np.array(y))
p_y_greater_than_z = np.mean(np.array(y) > np.array(z))
p_x_greater_than_z = np.mean(np.array(x) > np.array(z))

print(f"P(X > Y) = {p_x_greater_than_y}")
print(f"P(Y > Z) = {p_y_greater_than_z}")
print(f"P(X > Z) = {p_x_greater_than_z}")

print(f"E(X) = {np.mean(x)}")
print(f"E(Y) = {np.mean(y)}")
print(f"E(Z) = {np.mean(z)}")

# Plotting Exponential(5) Distribution
plt.hist(x, bins=50, density=True, alpha=0.7, label="Sampled")
t = np.linspace(0, max(x), 1000)
plt.plot(t, expon.pdf(t, scale=5), color='red', linewidth=2, label="Theoretical")
plt.title("Exponential(5) Distribution")
plt.legend()
plt.show()

plt.clf()

# Plotting Gamma(3,2) Distribution (also equivalent to Chi-Square(6) Distribution)
plt.hist(z, bins=50, density=True, alpha=0.7, label="Sampled", range=(0, max(z)))
t = np.linspace(0, max(z), 1000)
plt.plot(t, chi2.pdf(t, df=6), color='red', linewidth=2, label="Chi-Square(6)")
plt.plot(t, gamma.pdf(t, a=3, scale=2), color='blue', linestyle="--", linewidth=2, label="Gamma(3,2)")
plt.ylim(0, 0.14)
plt.title("Gamma(3,2) or Chi-Square(6) Distribution")
plt.legend()
plt.show()


