#PS4DS_Chap2_RCode.R
#R Code for Chapter 2 of the Book "Probability and Statistics for Data Science"
#Norman Matloff

# -----------------------
# Example 2.1:  Rolling Dice
# -----------------------

# Function to roll d dice and find the probability P(total = k)
probtotk <- function(d, k, nreps) {
    count <- 0
    for (rep in 1:nreps) {
        sum <- 0
        for (j in 1:d) sum <- sum + roll()
        if (sum == k) count <- count + 1
    }
    return(count/nreps)
}

# Function to simulate a roll of a single die
roll <- function() return(sample(1:6,1))

# Call example
probtotk(3,8,1000)
t1 = system.time({probtotk(3,8,100000)})

# -----------------------
# First Improvement
# -----------------------

# Improved function to roll d dice and find P(total = k)
probtotk <- function(d, k, nreps) {
    count <- 0
    for (rep in 1:nreps) {
        total <- sum(sample(1:6, d, replace=TRUE))
        if (total == k) count <- count + 1
    }
    return(count/nreps)
}

# Call example
probtotk(3,8,1000)
t2 = system.time({probtotk(3,8,100000)})

# -----------------------
# Second Improvement
# -----------------------

# Function to simulate rolling nd dice
roll <- function(nd) return(sample(1:6, nd, replace=TRUE))

# Improved function with vectorized sum operations
probtotk <- function(d, k, nreps) {
    sums <- vector(length=nreps)
    for (rep in 1:nreps) sums[rep] <- sum(roll(d))
    return(mean(sums == k))
}

# Call example
probtotk(3,8,1000)
t3 = system.time({probtotk(3,8,100000)})

# -----------------------
# Third Improvement
# -----------------------

# Even more efficient function using replicate for simulations
probtotk <- function(d, k, nreps) {
    sums <- replicate(nreps, sum(roll(d)))
    return(mean(sums == k))
}

# Call example
probtotk(3,8,1000)
t4 = system.time({probtotk(3,8,100000)})

# Print time taken for the three methods
rbind(t1, t2, t3, t4)[, 1:3]

##################################
#Example 2.2: Dice Problem
##################################

# Function to simulate the dice problem
dicesim <- function(nreps) {
    count1 <- 0  # count of rolls with sum > 8
    count2 <- 0  # count of rolls where first die < 3 and sum > 8
    
    # Simulate the dice roll nreps times
    for (i in 1:nreps) {
        d <- sample(1:6,3,replace=T)  # Roll three dice
        if (sum(d) > 8) {
            count1 <- count1 + 1
            if (d[1] < 3) count2 <- count2 + 1
        }
    }
    # Return the conditional probability
    return(count2 / count1)
}

# Simulate and compute the probability with 1000 repetitions
dicesim(1000)

##################################
#Section 2.3 Use of runif() for Simulating Events
##################################

# Simulate a coin toss
# runif(1) generates a random number between 0 and 1
# If it's less than 0.5, we consider it as "heads"
if (runif(1) < 0.5) heads <- TRUE else 
    heads <- FALSE

# Print the result of the coin toss
heads

##################################
#Example: ALOHA Network
##################################

# Simulating ALOHA Network

# This function finds the probabilities P(X1 = 2), P(X2 = 2) 
# and P(X2 = 2|X1 = 1) using simulations for the ALOHA example.
ALOHAsim <- function(p,q,nreps) {
    # Initialize counters
    countx2eq2 <- 0; countx1eq1 <- 0; countx1eq2 <- 0; countx2eq2givx1eq1 <- 0
    
    # Simulate nreps repetitions of the experiment
    for (i in 1:nreps) {
        numsend <- 0
        # Determine the number of sending nodes in slot 1
        for (j in 1:2)
            if (runif(1) < p) numsend <- numsend + 1
        
        if (numsend == 1)  X1 <- 1
        else X1 <- 2
        if (X1 == 2) countx1eq2 <- countx1eq2 + 1
        
        # Determine the number of active nodes in slot 2
        numactive <- X1
        if (X1 == 1 && runif(1) < q) numactive <- numactive + 1
        if (numactive == 1) 
            if (runif(1) < p) X2 <- 0
        else X2 <- 1
        else {
            numsend <- 0
            # Determine the number of sending nodes in slot 2
            for (i in 1:2)
                if (runif(1) < p) numsend <- numsend + 1
            if (numsend == 1) X2 <- 1
            else X2 <- 2
        }
        
        # Count instances where X2 = 2
        if (X2 == 2) countx2eq2 <- countx2eq2 + 1
        # Count instances where X1 = 1 and when X2 = 2 given X1 = 1
        if (X1 == 1) {
            countx1eq1 <- countx1eq1 + 1
            if (X2 == 2) countx2eq2givx1eq1 <- countx2eq2givx1eq1 + 1
        }
    }
    
    # Print the results
    cat("P(X1 = 2):",countx1eq2/nreps,"\n")
    cat("P(X2 = 2):",countx2eq2/nreps,"\n")
    cat("P(X2 = 2 | X1 = 1):",countx2eq2givx1eq1/countx1eq1,"\n")
}

# Run the example
ALOHAsim(.4,.8,1000)

##################################
#Example 2.4: Bus Ridership
##################################

# In this simulation, we consider a bus with multiple stops.
# At each stop, passengers can get off or get on.
# We want to determine the probability that the bus is empty after visiting the nth stop.
# In this example, n=100.

# Number of repetitions for the simulation
nreps <- 10000
# Number of stops the bus visits
nstops <- 100
# Initialize a counter for the number of times the bus is empty after nth stop
count <- 0

# Start the simulation loop for nreps repetitions
for (i in 1:nreps) {
    # Initialize passengers on the bus
    passengers <- 0
    # Loop through each of the stops
    for (j in 1:nstops) {
        # If there are passengers on the bus
        if (passengers > 0) 
            # For each passenger, check if they get off the bus with 20% probability
            for (k in 1:passengers) 
                if (runif(1) < 0.2) 
                    passengers <- passengers - 1
        # At each stop, new passengers can board the bus.
        # There's a 50% chance 0 passengers board, 40% chance 1 passenger boards, and 10% chance 2 passengers board.
        newpass <- sample(0:2,1,prob=c(0.5,0.4,0.1))
        # Add the new passengers to the current passengers on the bus
        passengers <- passengers + newpass
    }
    # Check if the bus is empty after the nth stop and increase the counter if it is
    if (passengers == 0) count <- count + 1
}

# Print the probability that the bus is empty after the nth stop
print(count/nreps)

##################################
#Example 2.5: Board Game
##################################

# In this simulation, a player is simulating a board game.
# The board has 8 spaces numbered 1 through 8, and the player starts on a space by rolling a 6-sided die.
# If the player starts on space 3, they roll again and move that many spaces forward (the board is circular, so moving from space 8 brings you to space 1).
# The goal is to estimate the probability that the player rolled the bonus roll (from landing on space 3) given that they end up on space 4.

# Simulating the Board Game
boardsim <- function(nreps) {
    # Count of times the player ends up on space 4
    count4 <- 0
    # Count of times the player gets the bonus roll and ends up on space 4
    countbonusgiven4 <- 0
    
    # Start the simulation loop for nreps repetitions
    for (i in 1:nreps) {
        # The player rolls a die to determine their starting position
        position <- sample(1:6,1)
        # If they land on space 3, they get a bonus roll
        if (position == 3) {
            bonus <- TRUE
            # Roll the die again and move forward (using modulo to handle the circular board)
            position <- (position + sample(1:6,1)) %% 8
        } else bonus <- FALSE # No bonus roll if not landing on space 3
        # Check if they ended up on space 4
        if (position == 4) {
            count4 <- count4 + 1
            # If they got the bonus roll and ended up on space 4, increase the counter
            if (bonus) 
            {countbonusgiven4 <- countbonusgiven4 + 1}
        }
    }
    # Return the probability that the player got the bonus roll given they ended up on space 4
    return(countbonusgiven4/count4)
}

# Call the function to estimate the probability with 1000 repetitions
boardsim(1000)

##################################
#Example 2.6: Broken Rod
##################################

# In this simulation, a glass rod is dropped and breaks into random pieces.
# We want to estimate the probability that the smallest piece has a length below 0.02.

# Simulating the Broken Rod

# Function minpiece:
# It simulates a single event of the rod breaking into k pieces.
# It returns the length of the smallest piece.
minpiece <- function(k) {
    # Generate random breakpoints for the rod. 
    # k-1 breakpoints will result in k pieces.
    breakpts <- sort(runif(k-1))
    
    # Calculate the lengths of the pieces using the diff function.
    # The c(0,breakpts,1) concatenates 0 and 1 to the breakpoints 
    # so we capture the lengths of all pieces including the first and last.
    lengths <- diff(c(0,breakpts,1))
    
    # Return the length of the shortest piece
    return(min(lengths))
}

# Function bkrod:
# It simulates the rod breaking nreps times and checks how often the smallest 
# piece is shorter than q in length.
bkrod <- function(nreps,k,q) {
    # Replicate the minpiece function nreps times to get the lengths 
    # of the smallest pieces from each simulation.
    minpieces <- replicate(nreps,minpiece(k))
    
    # Calculate and return the fraction of times the smallest piece 
    # was less than q in length.
    return(mean(minpieces < q))
}

# Call the function to estimate the probability with 1000 repetitions,
# breaking the rod into 5 pieces and checking for pieces shorter than 0.02.
bkrod(1000,5,.02)

##################################
#Example: Toss a Coin Until k Consecutive Heads
##################################

# In this simulation, we are tossing a coin until we get k heads in a row.
# The objective is to estimate the probability that more than m tosses are needed
# to achieve k consecutive heads.

# Simulating Tossing a Coin

# Function ngtm:
# Simulates the process of tossing a coin repeatedly to get k consecutive heads.
# It estimates the probability that more than m tosses are needed for this.
ngtm <- function(k, m, nreps) {
    count <- 0
    # Run the simulation nreps times
    for (rep in 1:nreps) {
        # Initialize counter for consecutive heads
        consech <- 0
        # Toss the coin m times
        for (i in 1:m) {
            # Sample 0 (for tails) or 1 (for heads)
            toss <- sample(0:1, 1)
            
            # If the toss results in a head, increase the counter for consecutive heads
            if (toss) {
                consech <- consech + 1
                
                # If we get k consecutive heads, stop this repetition
                if (consech == k) break
            } else {
                # If the toss results in a tail, reset the counter for consecutive heads
                consech <- 0
            }
        }
        
        # If, after m tosses, we didn't get k consecutive heads, increment the count
        if (consech < k) count <- count + 1
    }
    
    # Return the fraction of simulations where more than m tosses were needed
    # to get k consecutive heads.
    return(count/nreps)
}

# Call the function with k=8 consecutive heads, m=10 maximum number of tosses,
# and 1000 repetitions of the simulation.
ngtm(k=8, m=10, nreps=1000)
