## CSSS/POLS 510 - Maximum Likelihood Estimation
## TA: Ramses Llobet - UW, Department of Political Science
## Lab Week 2: Intro to RMarkdown and Overleaf
rm(list = ls())
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
print(getwd())
dir()
library(tidyverse)
#### Intro to functions and loops :--------------------------
# Function to calculate the area of a rectangle
calculate_rectangle_area <- function(length, width) {
# Perform the operation
area <- length * width
# Return the output
return(area)
}
# Call the function with specific arguments
calculate_rectangle_area(length = 5, width = 3)
n <- 5 # Set the value of n
# Loop structure from 1 to n
for (i in 1:n) {
# Loop body
result <- 1 + i
# Loop termination, in this case when n=5
print(result)
}
#### Lab Code Key:--------------------------
# Setting the seed ensures that we get the same random draw over and over again.
set.seed(20201009)
rnorm(5) # Check
# 0. Calculate the following operations by hand (... meaning by R)
# a)
sum(1:5)
# b)
prod(1:5)
#Check
1*2*3*4*5
# c)
factorial(5)
5*4*3*2*1
factorial(5) * 10^factorial(3) * exp(4)
# 1. Build a Bernoulli distribution using the sample() function, where the probability
# of "success" is 0.7. Run "?sample" if you are unsure how the function works.
# Create an imaginary person to flip the coin once for you
sample(x = c(0, 1),
size = 1,
prob = c(0.3, 0.7)
)
# 2. How do you know if it is working properly? Conduct simulation to check if the
# assigned probabilities are matched with the empirics
# Specify the number of simulations
sims <- 10000
# Specify the probability
ProbSuccess <- 0.7
# Create an empty vector as "container"
BernResult <- vector(mode = "numeric", length = sims)
# For loop
for (i in 1:sims) {
BernResult[i] <- sample(c(0, 1),
size = 1,
prob = c(1- ProbSuccess,
ProbSuccess)
)
}
mean(BernResult)
## Alternative:
## Faster way w/o loop...but we want to build the intuition
sample(c(0, 1),
size = sims,
replace = TRUE,
prob = c(0.3, 0.7)
)
## Or use rbinom (rmb: bernoulli is just a special case of binomial when N = 1)
rbinom(sims,
size = 1,
prob = 0.7)
# 3. Plot the above Bernoulli distribution
# Base graphics
hist(BernResult)
# ggplot2
BernDF <- tibble(Outcome = BernResult)
ggplot(BernDF, aes(x = Outcome)) +
geom_histogram() +
scale_x_continuous(breaks = c(0, 1), expand = c(1, 0)) +
labs(y = "Frequency", x = "x",
subtitle = "Bernoulli Distribution: Frequency")+
theme_bw()
# Adding: aes(y = stat(count / sum(count))) in geom_hist ; What does it do?
ggplot(BernDF, aes(x = Outcome)) +
geom_histogram(aes(y = stat(count/sum(count)))) +
scale_x_continuous(breaks = c(0, 1), expand = c(1, 0)) +
labs(y = "Prob", x = "x",
subtitle = "Bernoulli Distribution: Probability")+
theme_bw()
# y = stat() means...
BernResult %>%
as_tibble %>%
count(value) %>%
mutate(TotalNum = sum(n),
Prob = n/TotalNum)
# 4. Based on the above, generate a binomial distribution, with number of
# trials equal to 10, without using rbinom()
# Create an imaginary person to flip the coin ten times for you
# Let's test it outside of the loop:
sample(c(0, 1),
size = 10,
replace = TRUE,
prob = c(1- ProbSuccess,
ProbSuccess)
)
# Create number of simulations and an empty vector as container
BinoResult <- vector(mode = "numeric", length = sims)
for (i in 1:sims) {
# Create an imaginary person to flip the coin ten times for you
flips <- sample(c(0, 1),
size = 10,
replace = TRUE,
prob = c(1- ProbSuccess, ProbSuccess)
)
# Sum up the number of "success" for that person
count <- sum(flips)
# Store it into the container; repeat 10,000
BinoResult[i] <- count
}
## Alternative:
# Faster way w/o loop and sample()...but we want to build the intuition
rbinom(n = sims,
size = 10,
prob = ProbSuccess)
# 5. Plot the above binomial distribution
# Base graphics
hist(BinoResult)
# ggplot2
BinoResult %>%
as_tibble %>% # Convert to data frame
ggplot(aes(x = value)) +
geom_histogram(aes(y = stat(count/sum(count)))) +
scale_x_continuous(breaks = 1:10) +
labs(y = "Prob", x = "x",
subtitle = "Binomial Distribution")+
theme_bw()
# 6. Explore the rbinom, dbinom, pbinom functions. What do they do? Answer
# the following questions:
## a) The probability of a coin landing on head is 0.7. If you were to flip the coin
## 10 times, what is the probability of getting exactly 7 heads?
dbinom(x = 7, size = 10, prob = 0.7)
## b) What is the probability of getting 7 heads or less?
# b) Pr(7 heads or less) -> CDF/CMF
pbinom(q = 7, size = 10, prob = 0.7)
pbinom(q = 7, size = 10, prob = 0.7, lower.tail = FALSE)
1- pbinom(q = 7, size = 10, prob = 0.7) # equivalent to the previous line
## c) How do you know (b) is true?
# c) Double check
dbinom(x = 0:7, size = 10, prob = 0.7) %>%
sum()
###### extra - how to plot:---------------------------------------------
# PDF
# Create x axis in data.frame
tibble(x = seq(from = 0, to = 10, by =1)) %>%
# Create y axis
mutate(y = dbinom(x = x, size = 10, prob = 0.7)) %>%
# Plot, map data in aesthetic
ggplot(aes(x=x,y=y))+
# Specify the type of plot
geom_line()+
# Specify x axis breaks
#scale_x_continuous(breaks = 0:10)+
scale_x_continuous(breaks = 0:10)+
# Add vertical line at x = 0.7
geom_vline(xintercept = 7, color ="red", linetype = "dashed")+
# Change label on y axis
labs(y="Prob", subtitle = "PDF")+
# (Optional: specify theme )
theme_bw()
# CDF
# Create x axis in data.frame
tibble(x = seq(from = 0, to = 10, by =1)) %>%
# Create y axis
mutate(y = pbinom(q = x, size =10, prob=.7)) %>%
# Plot, map data in aesthetic
ggplot(aes(x=x,y=y))+
# Specify the type of plot
geom_line()+
# Specify x axis breaks
scale_x_continuous(breaks = 0:10)+
# Add vertical line at x = 0.7
geom_vline(xintercept = 7, color ="red", linetype = "dashed")+
# Change label on y axis
labs(y="Prob", subtitle = "CDF")+
# (Optional: specify theme )
theme_bw()
#############################################################
########################## F I N ############################
#############################################################