## Lecture Slides 2 & Bayes Rules! Chapter 2 R Examples 
## Enhanced for Efficiency

########################################################
# The R Code in Chapter 2
########################################################

# Load necessary packages
library(tidyverse)
library(janitor)
library(bayesrules)

#Set Working Directory to Source File Location
library("rstudioapi")  # Load rstudioapi package
setwd(dirname(getActiveDocumentContext()$path)) # Set working directory to source file location
#getwd() 

# Import article data
data("fake_news")

# Tabulate article type
fake_news %>%
  tabyl(type) %>%
  adorn_totals("row") 

# Tabulate exclamation usage by article type
fake_news %>%
  tabyl(title_has_excl, type) %>%
  adorn_totals("row")

# Define possible articles and prior model
articles <- tibble(type = c("real", "fake"))
prior <- c(0.6, 0.4)

# Set seed for reproducibility and simulate articles
set.seed(84735)
article_sim <- articles %>%
  sample_n(size = 10000, weight = prior, replace = TRUE)

# Plot the distribution of article types
ggplot(article_sim, aes(x = type)) +
  geom_bar() +
  labs(title = "Distribution of Simulated Article Types")

# Summarize the simulated articles
article_sim %>%
  tabyl(type) %>%
  adorn_totals("row") %>%
  print()

# Simulate exclamation point usage
article_sim <- article_sim %>%
  mutate(data_model = if_else(type == "fake", 0.2667, 0.0222))

# Simulate exclamation point usage per article
set.seed(3)
article_sim <- article_sim %>%
  mutate(usage = map_chr(data_model, ~ sample(c("no", "yes"), size = 1, prob = c(1 - .x, .x))))

# Summarize exclamation point usage by article type
article_sim %>%
  tabyl(usage, type) %>%
  adorn_totals(c("col", "row")) %>%
  print()

# Plot exclamation point usage within article types
ggplot(article_sim, aes(x = type, fill = usage)) +
  geom_bar(position = "fill") +
  labs(title = "Exclamation Point Usage by Article Type")

# Plot exclamation point usage overall
ggplot(article_sim, aes(x = type)) +
  geom_bar() +
  labs(title = "Overall Exclamation Point Usage")

# Approximate posterior probability that the article is fake
article_sim %>%
  filter(usage == "yes") %>%
  tabyl(type) %>%
  adorn_totals("row") %>%
  print()

# Plot the real vs fake news breakdown by exclamation point usage
ggplot(article_sim, aes(x = type)) +
  geom_bar() +
  facet_wrap(~ usage) +
  labs(title = "Real vs Fake News by Exclamation Point Usage")

# Load and summarize the pop vs soda dataset
data("pop_vs_soda")

pop_vs_soda %>%
  tabyl(pop, region) %>%
  adorn_percentages("col") %>%
  print()

# Define possible win probabilities and prior model for chess simulation
chess <- tibble(pi = c(0.2, 0.5, 0.8))
prior <- c(0.10, 0.25, 0.65)

# Simulate 10000 values of pi and match outcomes
set.seed(84735)
chess_sim <- chess %>%
  sample_n(size = 10000, weight = prior, replace = TRUE) %>%
  mutate(y = rbinom(n = 10000, size = 6, prob = pi))

# Summarize and plot the prior distribution
chess_sim %>%
  tabyl(pi) %>%
  adorn_totals("row") %>%
  print()

ggplot(chess_sim, aes(x = y)) +
  stat_count(aes(y = after_stat(prop))) +
  facet_wrap(~ pi) +
  labs(title = "Simulated Match Outcomes by Win Probability")

# Filter and summarize simulations with y = 1
win_one <- chess_sim %>% filter(y == 1)

win_one %>%
  tabyl(pi) %>%
  adorn_totals("row") %>%
  print()

# Plot the posterior approximation
ggplot(win_one, aes(x = pi)) +
  geom_bar() +
  labs(title = "Posterior Approximation for pi Given y = 1")

