Nominal Regression application: alligators data

In today’s lab, we will revisit the class example of alligator data. Alligators from a specific lake in Florida were studied, and the following data were collected:

food: An unordered categorical variable with three levels, where 1 = invertebrates, 2 = fish, and 3 = others.
size: The size of the alligators in meters.
female: A binary variable indicating whether the subject was female (1) or male (2).

The code for this application is adapted from Chris’ code, available on his website. To fit the multinomial model, we will use the nnet package. However, we will employ a customized function to easily extract point estimates from objects of class multinom. The data for this exercise can be accessed via Chris’ package simcf.

The research question is how alligator size and sex influences food choice.

## Custom function to extract point estimates from nnet::multinom

pe_multinom <- function(model=x){
  
  # check class "multinom"
  if ((sum(class(model) == "multinom") == 0)) {
    
    # if FALSE, stop and warning message
    warning("Please, provide as input model a 'multinom' object.")
    return(NULL)
    
  }
  # if TRUE, extract estimates
  else {
    
    # number of levels (categories)
    nlevel <- length(model$lev)
    
    # number of estimates
    ncoef <- length(model$coefnames)
    
    # remove the baselines and arrange order
    coef <- model$wts[(ncoef+2):length(model$wts)]
    coef <- coef[-((0:(nlevel-2))*(ncoef+1) + 1)]
    
    return(coef)
  }
}

# Load data (in simcf library)
data(gator, package = "simcf")

# optional: turn into tibble:
gator <- as_tibble(cbind(food,size,female))

# define formula for the model:
model <- food ~ size + female

# Estimate MNL using the nnet library
mlogit.result <- multinom(model, Hess=TRUE)

## # weights:  12 (6 variable)
## initial  value 64.818125 
## iter  10 value 48.292498
## final  value 48.291021 
## converged

# point estimates -- use the custom extractor function!
(pe <- pe_multinom(mlogit.result))

## [1]  4.8969032 -2.5259623 -0.7899437 -1.9471694  0.1337791  0.3817674

# extract standard errors
(vc <- solve(mlogit.result$Hess))       # var-cov matrix

##               2:(Intercept)       2:size      2:female 3:(Intercept)
## 2:(Intercept)     2.9115479 -1.366107554 -5.804950e-01    0.39098055
## 2:size           -1.3661076  0.718356987  1.421922e-01   -0.12153195
## 2:female         -0.5804950  0.142192243  5.072611e-01   -0.09277223
## 3:(Intercept)     0.3909805 -0.121531954 -9.277223e-02    2.34270806
## 3:size           -0.1200661  0.049421444  9.772455e-05   -0.68380811
## 3:female         -0.1132918  0.007862838  1.377394e-01   -0.73013842
##                      3:size     3:female
## 2:(Intercept) -1.200661e-01 -0.113291752
## 2:size         4.942144e-02  0.007862838
## 2:female       9.772455e-05  0.137739426
## 3:(Intercept) -6.838081e-01 -0.730138421
## 3:size         2.684452e-01  0.050762383
## 3:female       5.076238e-02  0.825099451

(se <- sqrt(diag(vc)))

## 2:(Intercept)        2:size      2:female 3:(Intercept)        3:size 
##     1.7063258     0.8475594     0.7122227     1.5305908     0.5181170 
##      3:female 
##     0.9083499

# note: nml does not require the outcome to be factor
# however, other packages like zelig do
# you can fit the outcome as factor if you want (is good practice)

# create a factor
gator <- gator |> 
  mutate(foodFct = factor(food,
                          levels=1:3,
                          labels=c("Invertebrates","Fish","Other")))

# With factor variable
modelFct <- foodFct ~ size + female
mlogit.resultFct <- multinom(modelFct, gator, Hess=TRUE)

## # weights:  12 (6 variable)
## initial  value 64.818125 
## iter  10 value 48.292498
## final  value 48.291021 
## converged

summary(mlogit.resultFct)

## Call:
## multinom(formula = modelFct, data = gator, Hess = TRUE)
## 
## Coefficients:
##       (Intercept)       size     female
## Fish     4.896903 -2.5259623 -0.7899437
## Other   -1.947169  0.1337791  0.3817674
## 
## Std. Errors:
##       (Intercept)      size    female
## Fish     1.706326 0.8475594 0.7122227
## Other    1.530591 0.5181170 0.9083499
## 
## Residual Deviance: 96.58204 
## AIC: 108.582

# extract estimates
(peFct <- pe_multinom(mlogit.resultFct))

## [1]  4.8969032 -2.5259623 -0.7899437 -1.9471694  0.1337791  0.3817674

(vcFct <- solve(mlogit.resultFct$Hess))

##                   Fish:(Intercept)    Fish:size   Fish:female Other:(Intercept)
## Fish:(Intercept)         2.9115479 -1.366107554 -5.804950e-01        0.39098055
## Fish:size               -1.3661076  0.718356987  1.421922e-01       -0.12153195
## Fish:female             -0.5804950  0.142192243  5.072611e-01       -0.09277223
## Other:(Intercept)        0.3909805 -0.121531954 -9.277223e-02        2.34270806
## Other:size              -0.1200661  0.049421444  9.772455e-05       -0.68380811
## Other:female            -0.1132918  0.007862838  1.377394e-01       -0.73013842
##                      Other:size Other:female
## Fish:(Intercept)  -1.200661e-01 -0.113291752
## Fish:size          4.942144e-02  0.007862838
## Fish:female        9.772455e-05  0.137739426
## Other:(Intercept) -6.838081e-01 -0.730138421
## Other:size         2.684452e-01  0.050762383
## Other:female       5.076238e-02  0.825099451

(seFct <- sqrt(diag(vcFct)))

##  Fish:(Intercept)         Fish:size       Fish:female Other:(Intercept) 
##         1.7063258         0.8475594         0.7122227         1.5305908 
##        Other:size      Other:female 
##         0.5181170         0.9083499

Quantities of Interest

From here, you already know that we are going to predict quantities of interest in the post-estimation stage. As usual, we will do this using simcf:

Estimate: MLE \(\hat{\beta}_{(M+1)\times(P+1)}\) and its variance \(\hat{V}(\hat{\beta}_{(M+1)\times(P+1)})\)
\(\color{red}{\rightarrow \texttt{optim(), multinom()}}\)
Simulate estimation uncertainty from a multivariate normal distribution:
Draw \(\tilde{\beta} \sim MVN \big[\hat{\beta}, \hat{V}(\hat{\beta})\big]\)
\(\color{red}{\rightarrow \texttt{MASS::mvrnorm()}}\)
Create hypothetical scenarios of your substantive interest:
Choose values of X: \(X_c\) \(\color{red}{\rightarrow \texttt{simcf::cfmake(), cfchange()} \dots}\)
Calculate expected values:
\(\tilde{\pi_c} = g(X_c, \tilde{\beta})\)
Compute EVs, First Differences or Relative Risks
EV: \(\mathbb{E}(y = j|X_{c1},\tilde{\beta})\)
\(\color{red}{\rightarrow \texttt{simcf::mlogitsimev()} \dots}\)
FD: \(\mathbb{E}(y = j|X_{c2},\tilde{\beta},) - \mathbb{E}(y = j|X_{c1},\tilde{\beta})\)
\(\color{red}{\rightarrow \texttt{simcf::mlogitsimfd()} \dots}\)
RR: \(\frac{\mathbb{E}(y = j|X_{c2},\tilde{\beta})}{\mathbb{E}(y = j|X_{c1},\tilde{\beta})}\)
\(\color{red}{\rightarrow \texttt{simcf::mlogitsimrr()} \dots}\)

Below we implement these steps. An additional step to have in consideration is that we need to create an array to store the simulated QoIs. An array can be thought of as a generalized form of matrices or as a “list” of matrices. We use arrays to store in separate matrices the simulated paramters for each category (in this case, 2).

An array can have more than two dimensions, unlike a matrix, which has exactly two dimensions (rows and columns). In the example below, simB is defined as a 3-dimensional array with dimensions (sims, 3, 2). This means you have two sims matrices, each with 3 rows and 2 columns.

Finally, in this case, we use the function simcf::cfFactorial to create a factorial combination of hypothetical scenarios between size and female variables. Notice, however, that you can manually input hypothetical scenarios as usual via simcf::cfMake and simcf::cfChange (is not necessary to use simcf::cfFactorial to create hypothetical scenarios).

# Simulate parameters from predictive distributions
sims <- 10000
simbetas <- mvrnorm(sims,pe,vc)       # draw parameters, using MASS::mvrnorm
simB <- array(NA, dim = c(sims,3,2))  # re-arrange simulates to array format
simB[,,1] <- simbetas[,1:3]           #   for MNL simulation
simB[,,2] <- simbetas[,4:6]



# Create full factorial set of counterfactuals
sizerange <- seq(1,4,by=0.1)          # range of counterfactual sizes
femalerange <- c(0,1)                 # range of counterfactual sexes
xhyp1 <- cfFactorial(size = sizerange, female = femalerange)

# Simulate expected probabilities with 68% CI
mlogit.ev1 <- mlogitsimev(xhyp1, simB, ci=0.68)




# Simulate parameters from predictive distributions
sims <- 10000
simbetasFct <- mvrnorm(sims,peFct,vcFct)       # draw parameters, using MASS::mvrnorm
simBFct <- array(NA, dim = c(sims,3,2))  # re-arrange simulates to array format
simBFct[,,1] <- simbetasFct[,1:3]           #   for MNL simulation
simBFct[,,2] <- simbetasFct[,4:6]

mlogit.ev1Fct <- mlogitsimev(xhyp1, simBFct, ci=0.68)

Sanity test

Remember that the multinomial logit model estimates point estimates, including intercepts, for each category except the baseline category. For \(M\) categories, the model estimates \((M - 1)\) categories since one category serves as the baseline for comparison.

In an estimated model, the sum of the predicted probabilities across all categories must equal 1 for any given input \(\mathbf{x}_c\). For example, in the case of the alligator data, we have three probabilities for any given counterfactual scenario \(\mathbf{x}_c\), represented as:

\[ \begin{aligned} \Pr(y = 1 | \mathbf{x}_c, \hat{\beta}_2, \hat{\beta}_3) &= \frac{1}{1 + \exp(\mathbf{x}_c \hat{\beta}_2) + \exp(\mathbf{x}_c \hat{\beta}_3)} \\ \Pr(y = 2 | \mathbf{x}_c, \hat{\beta}_2, \hat{\beta}_3) &= \frac{\exp(\mathbf{x}_c \hat{\beta}_2)}{1 + \exp(\mathbf{x}_c \hat{\beta}_2) + \exp(\mathbf{x}_c \hat{\beta}_3)} \\ \Pr(y = 3 | \mathbf{x}_c, \hat{\beta}_2, \hat{\beta}_3) &= \frac{\exp(\mathbf{x}_c \hat{\beta}_3)}{1 + \exp(\mathbf{x}_c \hat{\beta}_2) + \exp(\mathbf{x}_c \hat{\beta}_3)} \end{aligned} \]

In this context, \(y = 1, 2, 3\) represent the probabilities that an alligator primarily eats invertebrates, fish, or other food sources, respectively. Here, “invertebrates” is the baseline category. Therefore, we estimate \((M-1) \times (P+1)\) parameters, resulting in six parameters in this example (intercepts, size, and female coefficients for the two non-baseline categories). You can verify the computations by manually calculating the above probabilities for a given input, ensuring that their sum is equal to 1, as required by the model’s assumptions. This verification is demonstrated in the code provided below.

# Don't blackbox what you do!
# Let's calculate EVs

xhyp0 <- 
  tibble(intercept = 1,
         # size range interval x 2 (for each sex)
         size = rep(sizerange,2),
         # repeat twice
         female = rep(c(0,1), each = length(sizerange))) |> 
  as.matrix()

# estimates for each beta
b2 <- pe[1:3] #Invertebrates
b3 <- pe[4:6] #Other

Expxb2 <- exp(xhyp0 %*% b2)
Expxb3 <- exp(xhyp0 %*% b3)

# Create a denominator
Dinom <- 1 + Expxb2 + Expxb3

# Create a table for prob
Probs <- 
  data.frame(
    ProbInvertebrates = Expxb2 / Dinom,
    ProbOther         = Expxb3 / Dinom,
    ProbFish          = 1 / Dinom
  )

bind_cols(Probs, xhyp0)

##    ProbInvertebrates  ProbOther   ProbFish intercept size female
## 1        0.902018103 0.01373989 0.08424201         1  1.0      0
## 2        0.877112810 0.01743149 0.10545570         1  1.1      0
## 3        0.846948319 0.02196075 0.13109093         1  1.2      0
## 4        0.810971824 0.02743514 0.16159304         1  1.3      0
## 5        0.768842746 0.03393517 0.19722209         1  1.4      0
## 6        0.720555110 0.04149452 0.23795037         1  1.5      0
## 7        0.666550117 0.05008036 0.28336952         1  1.6      0
## 8        0.607783765 0.05957923 0.33263701         1  1.7      0
## 9        0.545713464 0.06979456 0.38449198         1  1.8      0
## 10       0.482183219 0.08046001 0.43735677         1  1.9      0
## 11       0.419218093 0.09126812 0.48951379         1  2.0      0
## 12       0.358772821 0.10190834 0.53931884         1  2.1      0
## 13       0.302498411 0.11210484 0.58539675         1  2.2      0
## 14       0.251583021 0.12164484 0.62677213         1  2.3      0
## 15       0.206693918 0.13039207 0.66291401         1  2.4      0
## 16       0.168012714 0.13828553 0.69370176         1  2.5      0
## 17       0.135332571 0.14532757 0.71933986         1  2.6      0
## 18       0.108180081 0.15156680 0.74025312         1  2.7      0
## 19       0.085931939 0.15708058 0.75698748         1  2.8      0
## 20       0.067909282 0.16196013 0.77013059         1  2.9      0
## 21       0.053444236 0.16629951 0.78025625         1  3.0      0
## 22       0.041920706 0.17018833 0.78789096         1  3.1      0
## 23       0.032794937 0.17370771 0.79349735         1  3.2      0
## 24       0.025602023 0.17692845 0.79746953         1  3.3      0
## 25       0.019953662 0.17991075 0.80013559         1  3.4      0
## 26       0.015531179 0.18270483 0.80176399         1  3.5      0
## 27       0.012076484 0.18535196 0.80257155         1  3.6      0
## 28       0.009382660 0.18788571 0.80273163         1  3.7      0
## 29       0.007285100 0.19033313 0.80238177         1  3.8      0
## 30       0.005653632 0.19271589 0.80163048         1  3.9      0
## 31       0.004385788 0.19505125 0.80056296         1  4.0      0
## 32       0.796855614 0.03917557 0.16396882         1  1.0      1
## 33       0.752421029 0.04826227 0.19931670         1  1.1      1
## 34       0.701897142 0.05873965 0.23936320         1  1.2      1
## 35       0.645908343 0.07052448 0.28356718         1  1.3      1
## 36       0.585605223 0.08342286 0.33097192         1  1.4      1
## 37       0.522615309 0.09713446 0.38025023         1  1.5      1
## 38       0.458890561 0.11127854 0.42983090         1  1.6      1
## 39       0.396475633 0.12543823 0.47808614         1  1.7      1
## 40       0.337252239 0.13921286 0.52353490         1  1.8      1
## 41       0.282725017 0.15226492 0.56501006         1  1.9      1
## 42       0.233897022 0.16435064 0.60175234         1  2.0      1
## 43       0.191248675 0.17533001 0.63342132         1  2.1      1
## 44       0.154801402 0.18515857 0.66004003         1  2.2      1
## 45       0.124229774 0.19386781 0.68190241         1  2.3      1
## 46       0.098985852 0.20154139 0.69947276         1  2.4      1
## 47       0.078410053 0.20829266 0.71329729         1  2.5      1
## 48       0.061816006 0.21424676 0.72393723         1  2.6      1
## 49       0.048547327 0.21952791 0.73192476         1  2.7      1
## 50       0.038010188 0.22425159 0.73773822         1  2.8      1
## 51       0.029687836 0.22852050 0.74179167         1  2.9      1
## 52       0.023143100 0.23242308 0.74443382         1  3.0      1
## 53       0.018013802 0.23603381 0.74595238         1  3.1      1
## 54       0.014004582 0.23941430 0.74658112         1  3.2      1
## 55       0.010877430 0.24261483 0.74650774         1  3.3      1
## 56       0.008442299 0.24567609 0.74588161         1  3.4      1
## 57       0.006548495 0.24863069 0.74482081         1  3.5      1
## 58       0.005077170 0.25150467 0.74341816         1  3.6      1
## 59       0.003934983 0.25431868 0.74174634         1  3.7      1
## 60       0.003048857 0.25708905 0.73986210         1  3.8      1
## 61       0.002361726 0.25982866 0.73780961         1  3.9      1
## 62       0.001829108 0.26254766 0.73562324         1  4.0      1

# Let's check if the row sums are 1s
rowSums(Probs) # Make sense!

##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

Visualization: `ggplot`

# Get 3 colors
cols <- brewer.pal(3,"Set1")


## ggplot

CatNames <- c("Invertebrates", "Other", "Fish")
NumHype <- length(mlogit.ev1$pe)

mlogit.ev1.tidy <- 
  mlogit.ev1 |>
  # Turns array into list 
  map(as.data.frame) |>
  # Set category names
  map(set_colnames, CatNames) |> 
  # Add counterfactual scenarios, crossing for factorial combination
  map(bind_cols, crossing(Female = femalerange, 
                          Size = sizerange)) |> 
  # as tibble
  map_df(as_tibble, .id = "Type") |> 
  # turn into longer format
  pivot_longer(CatNames[1]:CatNames[3], 
               names_to = "Category") |>
  # turn estimates and intervals into wide format
  pivot_wider(names_from = Type, 
              values_from = value) 

# change labels for scenarios of interest and visualize:

mlogit.ev1.tidy |> 
  mutate(Category = factor(Category, levels = CatNames),
         Female = factor(Female, 
                         labels = c("Male alligators", "Female alligators"))) |> 
  ggplot() +
  aes(y = pe, x = Size, ymax = upper, ymin = lower,
      color = Category, fill = Category) +
  
  # For point estimates
  geom_line(show.legend = FALSE) +
  
  # For confidence intervals
  geom_ribbon(alpha = 0.2, linetype = 0, show.legend = FALSE) +
  
  # Facet to Female and Male
  facet_grid(~ Female, labeller = label_value)+
  
  #Annotate category labels
  annotate("text", x = 1.75, y =0.95, label =CatNames[1], color=cols[1])+
  annotate("text", x = 3,    y =0.95, label =CatNames[3], color=cols[2])+
  annotate("text", x = 3,    y =0.375, label =CatNames[2], color=cols[3])+
  
  scale_color_manual(values = c(cols[1], cols[3], cols[2])) +
  scale_fill_manual(values = c(cols[1], cols[3], cols[2])) +
  
  scale_y_continuous(breaks =  seq(0, 1, by = 0.2),
                     limits = c(0,1))+
  
  geom_vline(xintercept = c(2,3), linetype = "dotted")+
  
  labs(x = "Size of alligator (meters)", 
       y = "Probability primary diet is...")+
  
  theme_caviz_hgrid+
  theme(aspect.ratio = 1, 
        axis.text.x = element_text(color = "black", size = 10),
        axis.ticks.x = element_blank())

Visualization: `tile`

#################################################################
# Create a specific comparison for first diffs and relative risks
xhyp2 <- cfMake(model, gator, nscen=1)

# Scenario 1: Large vs small (male mu + 1sd vs male mu - 1sd), holding male fixed
xhyp2 <- cfName(xhyp2, "Large vs Small Males", scen=1)
xhyp2 <- cfChange(xhyp2, "size",
                  x=mean(gator$size[gator$female==0]) + sd(gator$size[gator$female==0]),
                  xpre=mean(gator$size[gator$female==0]) - sd(gator$size[gator$female==0]),
                  scen=1)
xhyp2 <- cfChange(xhyp2, "female", x=0, xpre=0, scen=1)

# Simulate first differences with 95\% CI
mlogit.fd1 <- mlogitsimfd(xhyp2, simB, ci=0.95)

# Simulate relative risks with 95\% CI
mlogit.rr1 <- mlogitsimrr(xhyp2, simB, ci=0.95)



###########################################
# Make ropeladder plot of First Differences

# Make trace of FDs, large vs small males, all categories
traceFD <- ropeladder(x=mlogit.fd1$pe,
                      lower=mlogit.fd1$lower,
                      upper=mlogit.fd1$upper,
                      labels=c("Invertebrates",
                               "\"Other food\"",
                               "Fish"),
                      size=0.65,
                      lex=1.75,
                      lineend="square",
                      plot=1
                      )

# Make reference line trace for first diffs (at 0)
vertmarkFD <- linesTile(x=c(0,0), y=c(0,1), plot=1)

# Set tick marks for x axis
xat <- c(-0.5,-0.25,0, 0.25, 0.5)
xlab <- c("-50%", "-25%", "0%", "+25%", "+50%")

# Make plot with tile
file <- "gatorsFD"
tile(traceFD, vertmarkFD,
     xaxis=list(at=xat, labels=xlab),
     topaxis=list(add=TRUE, at=xat, labels=xlab),
     plottitle=list(labels="Large male gators (+1sd) compared to small (-1sd)"),
     xaxistitle=list(labels="difference in probability this is gator's primary diet"),
     width=list(null=4),
     height=list(xaxistitle=3, plottitle=4),
     gridlines=list(type="xt")
     )

########################################
# Make ropeladder plot of Relative Risks

# Make trace of RRs, large vs small males, all categories
traceRR <- ropeladder(x=mlogit.rr1$pe,
                      lower=mlogit.rr1$lower,
                      upper=mlogit.rr1$upper,
                      labels=c("Invertebrates",
                               "\"Other food\"",
                               "Fish"),
                      size=0.65,
                      lex=1.75,
                      lineend="square",
                      plot=1
                      )

# Make reference line trace for relative risks (at 1)
vertmarkRR <- linesTile(x=c(1,1), y=c(0,1), plot=1)

# Set tick marks for x axis
xat <- c(0.01, 0.1, 0.5, 1, 2, 5, 10 )

# Make plot with tile
file <- "gatorsRR"
tile(traceRR, vertmarkRR,
     xaxis=list(log=TRUE, at=xat, labels=paste0(xat,"x")),
     topaxis=list(add=TRUE, log=TRUE, at=xat, labels=paste0(xat,"x")),
     plottitle=list(labels="Large male gators (+1sd) compared to small (-1sd)"),
     xaxistitle=list(labels="relative likelihood this is gator's primary diet"),
     width=list(null=4),
     height=list(xaxistitle=3, plottitle=4),
     gridlines=list(type="xt")
     )

###############################################################
# Create several comparisons for first diffs and relative risks
xhyp3 <- cfMake(model, gator, nscen=5)

# Scenario 1: Large vs small (male mu + 1sd vs male mu - 1sd), holding male fixed
xhyp3 <- cfName(xhyp3, "Large Male (Small Male)", scen=1)
xhyp3 <- cfChange(xhyp3, "size",
                  x=mean(gator$size[gator$female==0]) + sd(gator$size[gator$female==0]),
                  xpre=mean(gator$size[gator$female==0]) - sd(gator$size[gator$female==0]),
                  scen=1)
xhyp3 <- cfChange(xhyp3, "female", x=0, xpre=0, scen=1)

# Scenario 2: Large vs small (female mu + 1sd vs female mu - 1sd), holding female fixed
xhyp3 <- cfName(xhyp3, "Large Female (Small Female)", scen=2)
xhyp3 <- cfChange(xhyp3, "size",
                  x=mean(gator$size[gator$female==1]) + sd(gator$size[gator$female==1]),
                  xpre=mean(gator$size[gator$female==1]) - sd(gator$size[gator$female==1]),
                  scen=2)
xhyp3 <- cfChange(xhyp3, "female", x=1, xpre=1, scen=2)

# Scenario 3: Female vs male holding size fixed at mean of all gators
xhyp3 <- cfName(xhyp3, "Average Female (Average Male)", scen=3)
xhyp3 <- cfChange(xhyp3, "size",
                  x=mean(gator$size),
                  xpre=mean(gator$size),
                  scen=3)
xhyp3 <- cfChange(xhyp3, "female", x=1, xpre=0, scen=3)

# Scenario 4: Female vs male holding size fixed at high of all gators
xhyp3 <- cfName(xhyp3, "Large Female (Large Male)", scen=4)
xhyp3 <- cfChange(xhyp3, "size",
                  x=mean(gator$size) + sd(gator$size),
                  xpre=mean(gator$size) + sd(gator$size),
                  scen=4)
xhyp3 <- cfChange(xhyp3, "female", x=1, xpre=0, scen=4)

# Scenario 5: Female vs male holding size fixed at low of all gators
xhyp3 <- cfName(xhyp3, "Small Female (Small Male)", scen=5)
xhyp3 <- cfChange(xhyp3, "size",
                  x=mean(gator$size) - sd(gator$size),
                  xpre=mean(gator$size) - sd(gator$size),
                  scen=5)
xhyp3 <- cfChange(xhyp3, "female", x=1, xpre=0, scen=5)


# Simulate first differences with 95\% CI
mlogit.fd2 <- mlogitsimfd(xhyp3, simB, ci=0.95)

# Simulate relative risks with 95\% CI
mlogit.rr2 <- mlogitsimrr(xhyp3, simB, ci=0.95)

sorted <- rev(order(mlogit.rr2$pe[,3]))

# Make trace of RRs, large vs small males, all categories
traceRR2 <- ropeladder(x=mlogit.rr2$pe[sorted,3],
                      lower=mlogit.rr2$lower[sorted,3,1],
                      upper=mlogit.rr2$upper[sorted,3,1],
                      labels=rownames(xhyp3$x)[sorted],
                      size=0.65,
                      lex=1.75,
                      lineend="square",
                      plot=1
                      )

# Make reference line trace for relative risks (at 1)
vertmarkRR2 <- linesTile(x=c(1,1), y=c(0,1), plot=1)

# Set tick marks for x axis
xat <- c(0.5, 1, 2, 5, 10 )

# Make plot with tile

tile(traceRR2, vertmarkRR2,
     xaxis=list(log=TRUE, at=xat, labels=paste0(xat,"x")),
     topaxis=list(add=TRUE, log=TRUE, at=xat, labels=paste0(xat,"x")),
     plottitle=list(labels="Gator 1 compared to (Gator 2)"),
     xaxistitle=list(labels="relative likelihood of mostly eating Fish"),
     width=list(null=4),
     height=list(xaxistitle=3, plottitle=4),
     gridlines=list(type="xt")
     )

CSSS/POLS 510 - Maximum Likelihood Estimation

Lab 8. Unordered Categories

Ramses Llobet

Nominal Regression application: alligators data

Quantities of Interest

Sanity test

Visualization: `ggplot`

Visualization: `tile`

CSSS/POLS 510 - Maximum Likelihood Estimation

Lab 8. Unordered Categories

Ramses Llobet

Nominal Regression application: alligators data

Quantities of Interest

Sanity test

Visualization: ggplot

Visualization: tile

Visualization: `ggplot`

Visualization: `tile`