# Drill 8 (March 28, 2024)

# install the mediation package if you haven't already done so
# install.packages("mediation)

library(tidyverse)
library(car)
library(mediation)

# covariate x grouping interaction

# we'll use car's built-in data set called Prestige

d <- Prestige

# learn more about the data if you'd like
?Prestige

# let's do a small R trick; the occupations are the row names rather than a
# variable - we should fix that (even though we won't use it here) 
d <- d %>% 
  mutate(occupation = row.names(d))

# let's assess whether the relationship between prestige (a continuous measure;
# a "covariate") and income (a continuous measure; the DV) differs across types
# of profession (a grouping variable) - that is, does type of profession
# interact with prestige in predicting income?


# checking the interaction in one line!
summary(aov(income ~ prestige*type, d))

# mean-centering is a good idea for most continuous predictors; let's look at a
# histogram of prestige
hist(d$prestige)
# ranges from around 20 to 80, so we'll mean-center it

d <- d %>% 
  mutate(prestige.c = prestige - mean(prestige))

# let's check the default coding of type of occupation
contrasts(d$type)
# bc (blue collar) is the reference group

# before we do a more-interesting analysis, let's graph the data (I want to drop
# the NAs for the profession type)
d %>% filter(!is.na(type)) %>% 
  ggplot(aes(x = prestige, y = income, color = type)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  geom_vline(xintercept = mean(d$prestige[!is.na(d$type)]))

# getting a little more nuance
summary(lm(income ~ prestige.c*type, d))

# Coefficients:
#                     Estimate Std. Error t value Pr(>|t|)    
# (Intercept)          7192.98     660.95  10.883  < 2e-16 ***

        # predicted income for an average prestige blue collar worker

# prestige.c            160.87      43.96   3.659 0.000422 ***

        # slope of prestige predicting income for blue collar workers

# typeprof            -3570.85    1529.35  -2.335 0.021725 *  

        # how much less an average prestige professional worker makes compared
        # to an average prestige blue collar worker (look at the graph)

# typewc              -1785.74     942.46  -1.895 0.061263 .  

        # how much less an average prestige white collar worker makes compared
        # to an average blue collar worker

# prestige.c:typeprof   169.24      75.03   2.256 0.026469 *  

        # how much steeper the prestige-income slope is for professional workers
        # compared to blue collar workers

# prestige.c:typewc     -83.54      78.26  -1.068 0.288535    

        # how much less steep the prestige-income slope is for white collar
        # workers compared to blue collar workers

# MEDIATION

# we'll continue to use the prestige data

# we're going to assess whether years of education (X) leads to higher income
# (Y) because of the prestige (M) associated with a career

# this may not be very defensible in a causal sense, but we can test a model to
# see whether the data are consistent with this idea

# Baron & Kenny's (1986) approach

# 1: Y ~ X, assess path c
# 2: M ~ X, assess path a
# 3: Y ~ X + M, assess part b and c'

# Step 1
model1 <- lm(income ~ education, d)
summary(model1) # check!

# Step 2
model2 <- lm(prestige ~ education, d)
summary(model2) # check!

# Step 3
model3 <- lm(income ~ education + prestige, d)
summary(model3) # check and check!

# better to assess the indirect effect (path ab) via bootstrapping

medModel <- mediate(model2,                   # M ~ X
                    model3,                   # Y ~ X + M
                    treat = "education",      # X
                    mediator = "prestige",    # M
                    boot = TRUE)              # bootstrap, please

summary(medModel)

# ACME ("Average of Causal Mediation Effect") -- a*b {"indirect effect"}
# ADE: "Average Direct Effect" -- c' {"direct effect"}
# Total effect -- c {ACME + ADE}
# Proportion of Total Effect Mediated - (a*b)/c 

# simple visualization
plot(medModel)