# R session
# Thinking like a computer
# One of the upshots of R is that we can ``routinize'' programs that do
# the same thing over and over. This requires a bare minimum of computer
# ``programmer'' knowledge. Really, though, I just want you to see what
# it looks like to have some ``code''
# Let's simulate the Central Limit Theorem! Remember, an infinite numer
# of sample means will be normally distributed.
# This means we want to take a lot (``infinite'') number of samples.
# If we want to do the same thing a lot of times, the answer is a
# ``for'' loop. A for loop says, ``for'' a certain number of times,
# do the same thing.
# Define the population
population <- rnorm(100000, 5, 5)
# population <- rpois(100000, 5)
# Look at the population
plot(population)
hist(population)
# Fun math time: can you guess the 68th, 95th, and 99th percentiles?
# use the quantile() function
c(mean(population) - sd(population) - sd(population) - sd(population),
mean(population) - sd(population) - sd(population),
mean(population) - sd(population),
mean(population))
quantile(population, c(0.005, 0.025, 0.16, 0.50))
# CLT: we want to sample, take a mean, and plot the means
# First: a vector to fill with the means (let's do 5,000 of 1,000 observations)
sample.means <- rep(NA, 5000)
# Syntax: for(i in start:num of times) {stuff to do}
for(i in 1:length(sample.means)) {
obs <- sample(population, 1000)
sample.means[i] <- mean(obs)
}
# Standard error of the mean?
se.mean <- sd(obs)/sqrt(5000)
sample.means
hist(sample.means)
quantile(sample.means, c(0.025, 0.16, 0.50))
c(mean(obs) - se.mean - se.mean,
mean(obs) - se.mean,
mean(obs))
# Next: if statements. If statements are logical operators: work off of
# ``Boolean'' logic (yes or no). Syntax: you write (ifelse(test), do if true,
# do if false)
male <- round(runif(1000, 0, 1))
height <- round(rnorm(1000, 64, 4)) + round(male*rnorm(1000, 6, 2))
state <- c(rep("Alaska", 0.25*length(height)),
rep("Alabama", 0.25*length(height)),
rep("Texas", 0.25*length(height)),
rep("Georgia", 0.25*length(height)))
plot(male, height)
# Example: code variables
male.height <- ifelse(male == 1, height, 0)
# Imagine a world where you have negative ``missing'' values
male.recode <- male
male.recode[c(990, 999)] <- -9
male.recode
male.recoded <- ifelse(male.recode < 0, NA, male.recode)
male.recoded
# Can you put them together? Of course!
real.tall <- rep(0, length(male))
for(i in 1:length(male)) {
real.tall[i] <- ifelse(male[i] == 1 & height[i] > mean(male.height) +
sd(male.height), 1, 0)
}
cbind(real.tall, height)
table(state, real.tall)
# Nested for loops! Sure!
# What if we wanted a variable for the average height in the state?
avg.state.height <- rep(NA, length(male))
for(i in 1:length(male)) { #For our observations
for(j in 1:length(unique(state))) {# For our states
ifelse(state[i] == unique(state)[j],
avg.state.height[i] <- mean(subset(height, state == unique(state)[j])),
avg.state.height[i] <- avg.state.height[i])
}
}
mean(subset(height, state == "Alabama"))
mean(subset(height, state == "Georgia"))
mean(subset(height, state == "Texas"))
mean(subset(height, state == "Alaska"))
table(avg.state.height)
cbind(avg.state.height, state)