# S5600-6600_BirthDay.r
# R Code for the Birthday Problem.

# The Exact Probability

# Define the number of students
n<-24 

# Calculate the number of possible ways to assign birthdays to 24 students
365^n

# Calculate the exact probability that all birthdays are unique
PAc<-factorial(n)*choose(365,n)/365^n
PAc

# Compute the exact probability that at least two students share a birthday
PA<-1-PAc 
PA

######################################################
# Empirical Estimation using Monte Carlo Simulations

# Define the number of simulations
Nsim<-10000 

# Initialize a vector to record results of simulations
all.diff.bday<-vector()

# Run simulations to check if all birthdays are unique
for(i in 1:Nsim) {
    b.days<-sample(365,n,replace=TRUE)
    all.diff.bday[i]<-length(unique(b.days))==n
}

# Calculate the empirical probability that all birthdays are unique
mean(all.diff.bday)

# Calculate the empirical probability that at least two students share a birthday
1-mean(all.diff.bday) 

######################################################
# Shortened Empirical Estimation

cnt=0
for(i in 1:Nsim) {
    cnt=ifelse(length(unique(sample(1:365,n, replace=TRUE)))==n, cnt+1, cnt+0)
}
cnt/Nsim
1-cnt/Nsim

######################################################
# One-liner Empirical Estimation

PAc<-mean((apply(matrix(sample(1:365,Nsim*n,replace=TRUE),ncol=n),1,anyDuplicated)==0)*1)
PAc
1-PAc

######################################################
# Plotting the Results

# Calculate the exact probabilities for varying number of students
nseq<-2:81
PA.seq<-vector()
for (i in 1:80) {
    PAc<-factorial(nseq[i])*choose(365,nseq[i])/365^nseq[i]
    PA.seq<-c(PA.seq,1-PAc)
}

# Calculate empirical probabilities for varying number of students
PA.sim<-vector()
for (i in 1:80) {
    cnt=0
    for(j in 1:Nsim) {
        cnt=ifelse(length(unique(sample(1:365,nseq[i], replace=TRUE)))==nseq[i], cnt+1, cnt+0)
    }
    PA.sim<-c(PA.sim,1-cnt/Nsim)
}

# Plot the probabilities
plot(nseq,PA.seq,type="l",col="red",xlab="n",ylab="P(A)",lty=1,main="Probability of Birthday Sharing")
lines(nseq,PA.sim,col="blue",lty=1)
legend("bottomright", legend=c("exact", "empirical"), col=c("red", "blue"), lty=1,cex=0.8)
