---
title: "Lab2CodeKey"
author: "Ramses Llobet"
date: \today
output: pdf_document
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## Prerequisite
```{r, warning=F, message=FALSE}
# Good practice to remove all objects from the workspace
rm(list = ls())

# Use library() for packages you need, or source() for other R files.
library(tidyverse)

# Setting the seed ensures that we get the same random draw over and over again.
set.seed(20201009)

rnorm(5) # Check
```


## 0. Calculate the following operations by hand (... meaning by R)
a) 
$$ \sum\limits_{i=1}^{5} i = $$
b) 
$$ \prod\limits_{i=1}^{5} i = $$
c) 
$$ 5! \times 10^{3!} \times e^{4} = $$

```{r}

# a) 
sum(1:5)

# b)

prod(1:5)
#Check 
1*2*3*4*5

# c)

factorial(5)
5*4*3*2*1

factorial(5) * 10^factorial(3) * exp(4)


```



## 1. Build a Bernoulli distribution using the sample() function, where the probability of "success" is 0.7. Run "?sample" if you are unsure how the function works.
```{r}
# Create an imaginary person to flip the coin once for you 
sample(x = c(0, 1),
       size = 1,
       prob = c(0.3, 0.7)
       )


```

## 2. How do you know if it is working properly? Conduct simulation to check if the assigned probabilities are matched with the empirics 
```{r}
# Specify the number of simulations
sims <- 10000

# Specify the probability
ProbSuccess <- 0.7

# Create an empty vector as "container"
BernResult <- vector(mode = "numeric", length = sims)

# For loop
for (i in 1:sims) {
  BernResult[i] <- sample(c(0, 1), 
                           size = 1, 
                           prob = c(1- ProbSuccess,
                                    ProbSuccess)
                           )
}

mean(BernResult)

```

```{r, results=FALSE}
## Faster way w/o loop...but we want to build the intuition
 sample(c(0, 1),
        size = sims,
        replace = TRUE,
        prob = c(0.3, 0.7)
        )
## Or use rbinom (rmb: bernoulli is just a special case of binomial when N = 1)
 rbinom(sims,
        size = 1,
        prob = 0.7)

```

## 3. Plot the above Bernoulli distribution
```{r, warning=F, message=F}
# Base graphics
hist(BernResult)

# ggplot2
BernDF <- tibble(Outcome = BernResult) 


ggplot(BernDF, aes(x = Outcome)) +
  geom_histogram() + 
  scale_x_continuous(breaks = c(0, 1), expand = c(1, 0)) +
  labs(y = "Frequency", x = "x",
       subtitle = "Bernoulli Distribution: Frequency")+
  theme_bw()


# Adding: aes(y = stat(count / sum(count))) in geom_hist ; What does it do?
ggplot(BernDF, aes(x = Outcome)) +
  geom_histogram(aes(y = stat(count/sum(count)))) +   
  scale_x_continuous(breaks = c(0, 1), expand = c(1, 0)) +
  labs(y = "Prob", x = "x",
       subtitle = "Bernoulli Distribution: Probability")+
  theme_bw()


# y = stat() means...
BernResult %>% 
  as_tibble %>% 
  count(value) %>% 
  mutate(TotalNum = sum(n),
         Prob = n/TotalNum)


```

## 4. Based on the above, generate a binomial distribution, with number of trials equal to 10, without using rbinom()
```{r}
# Create an imaginary person to flip the coin ten times for you
# Let's test it outside of the loop: 
sample(c(0, 1),
       size = 10,
       replace = TRUE,
       prob = c(1- ProbSuccess,
                ProbSuccess)
       )

# Create number of simulations and an empty vector as container 
BinoResult <- vector(mode = "numeric", length = sims)

for (i in 1:sims) {
  
  # Create an imaginary person to flip the coin ten times for you
  flips <- sample(c(0, 1),
                  size = 10,
                  replace = TRUE, 
                  prob = c(1- ProbSuccess, ProbSuccess)
                  )
  
  # Sum up the number of "success" for that person
  count <- sum(flips)
  
  # Store it into the container; repeat 10,000
  BinoResult[i] <- count
  
}

# Faster way w/o loop and sample()...but we want to build the intuition
# rbinom(n = sims,
#        size = 10,
#        prob = ProbSuccess)
```

## 5. Plot the above binomial distribution
```{r}
# Base graphics
hist(BinoResult)

# ggplot2
BinoResult %>% 
  as_tibble %>%  # Convert to data frame
  ggplot(aes(x = value)) +
  geom_histogram(aes(y = stat(count/sum(count)))) +   
  scale_x_continuous(breaks = 1:10) +
  labs(y = "Prob", x = "x",
       subtitle = "Binomial Distribution")+
  theme_bw()

```


```{r, include=F}
 rbinom(n = sims,
        size = 10,
        prob = ProbSuccess) %>% 
  as_tibble %>%  # Convert to data frame
  
  ggplot(aes(x = value)) +
  geom_histogram(aes(y = stat(count/sum(count)))) + 
  scale_x_continuous(breaks = 1:10) +
  labs(y = "Prob", x = "x",
       subtitle = "Binomial Distribution")+
  theme_bw()


```

## 6. Explore the rbinom, dbinom, pbinom functions. What do they do? Answer the following questions:
  a) The probability of a coin landing on head is 0.7. If you were to flip the coin 10 times, what is the probability of getting exactly 7 heads?
  b) What is the probability of getting 7 heads or less?
  c) How do you know (b) is true?
```{r}
# a) Pr(exactly 7 heads) -> PMF
dbinom(x = 7, size = 10, prob = 0.7)
  
# b) Pr(7 heads or less) -> CMF/CDF
pbinom(q = 7, size = 10, prob = 0.7)


pbinom(q = 7, size = 10, prob = 0.7, lower.tail = FALSE)
1- pbinom(q = 7, size = 10, prob = 0.7)



# c) Double check
dbinom(x = c(0:7), size = 10, prob = 0.7) %>% 
  sum()
```


# How to plot?
```{r}

# PDF
# Create x axis in data.frame 
tibble(x = seq(from = 0, to = 10, by =1)) %>% 

# Create y axis 
  mutate(y = dbinom(x = x, size = 10, prob = 0.7)) %>% 

# Plot, map data in aesthetic 
  ggplot(aes(x=x,y=y))+

# Specify the type of plot
  geom_line()+
  
# Specify x axis breaks
  #scale_x_continuous(breaks = 0:10)+
  scale_x_continuous(breaks = 0:10)+
# Add vertical line at x = 0.7  
  geom_vline(xintercept = 7, color ="red", linetype = "dashed")+
  
# Change label on y axis
  labs(y="Prob", subtitle = "PDF")+

# (Optional: specify theme )
  theme_bw()



# CDF
# Create x axis in data.frame 
tibble(x = seq(from = 0, to = 10, by =1)) %>% 

# Create y axis 
  mutate(y = pbinom(q = x, size =10, prob=.7)) %>% 
  
# Plot, map data in aesthetic 
  ggplot(aes(x=x,y=y))+

# Specify the type of plot
  geom_line()+
  
# Specify x axis breaks
  scale_x_continuous(breaks = 0:10)+
  
# Add vertical line at x = 0.7  
  geom_vline(xintercept = 7, color ="red", linetype = "dashed")+
  
# Change label on y axis
  labs(y="Prob", subtitle = "CDF")+

# (Optional: specify theme )
  theme_bw()
```