# Drill 8 (March 28, 2024) # install the mediation package if you haven't already done so # install.packages("mediation) library(tidyverse) library(car) library(mediation) # covariate x grouping interaction # we'll use car's built-in data set called Prestige d <- Prestige # learn more about the data if you'd like ?Prestige # let's do a small R trick; the occupations are the row names rather than a # variable - we should fix that (even though we won't use it here) d <- d %>% mutate(occupation = row.names(d)) # let's assess whether the relationship between prestige (a continuous measure; # a "covariate") and income (a continuous measure; the DV) differs across types # of profession (a grouping variable) - that is, does type of profession # interact with prestige in predicting income? # checking the interaction in one line! summary(aov(income ~ prestige*type, d)) # mean-centering is a good idea for most continuous predictors; let's look at a # histogram of prestige hist(d$prestige) # ranges from around 20 to 80, so we'll mean-center it d <- d %>% mutate(prestige.c = prestige - mean(prestige)) # let's check the default coding of type of occupation contrasts(d$type) # bc (blue collar) is the reference group # before we do a more-interesting analysis, let's graph the data (I want to drop # the NAs for the profession type) d %>% filter(!is.na(type)) %>% ggplot(aes(x = prestige, y = income, color = type)) + geom_point() + geom_smooth(method = "lm", se = FALSE) + geom_vline(xintercept = mean(d$prestige[!is.na(d$type)])) # getting a little more nuance summary(lm(income ~ prestige.c*type, d)) # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 7192.98 660.95 10.883 < 2e-16 *** # predicted income for an average prestige blue collar worker # prestige.c 160.87 43.96 3.659 0.000422 *** # slope of prestige predicting income for blue collar workers # typeprof -3570.85 1529.35 -2.335 0.021725 * # how much less an average prestige professional worker makes compared # to an average prestige blue collar worker (look at the graph) # typewc -1785.74 942.46 -1.895 0.061263 . # how much less an average prestige white collar worker makes compared # to an average blue collar worker # prestige.c:typeprof 169.24 75.03 2.256 0.026469 * # how much steeper the prestige-income slope is for professional workers # compared to blue collar workers # prestige.c:typewc -83.54 78.26 -1.068 0.288535 # how much less steep the prestige-income slope is for white collar # workers compared to blue collar workers # MEDIATION # we'll continue to use the prestige data # we're going to assess whether years of education (X) leads to higher income # (Y) because of the prestige (M) associated with a career # this may not be very defensible in a causal sense, but we can test a model to # see whether the data are consistent with this idea # Baron & Kenny's (1986) approach # 1: Y ~ X, assess path c # 2: M ~ X, assess path a # 3: Y ~ X + M, assess part b and c' # Step 1 model1 <- lm(income ~ education, d) summary(model1) # check! # Step 2 model2 <- lm(prestige ~ education, d) summary(model2) # check! # Step 3 model3 <- lm(income ~ education + prestige, d) summary(model3) # check and check! # better to assess the indirect effect (path ab) via bootstrapping medModel <- mediate(model2, # M ~ X model3, # Y ~ X + M treat = "education", # X mediator = "prestige", # M boot = TRUE) # bootstrap, please summary(medModel) # ACME ("Average of Causal Mediation Effect") -- a*b {"indirect effect"} # ADE: "Average Direct Effect" -- c' {"direct effect"} # Total effect -- c {ACME + ADE} # Proportion of Total Effect Mediated - (a*b)/c # simple visualization plot(medModel)