# March 13, 2024 # install.packages("faux") library(tidyverse) library(lmSupport) library(car) library(faux) # for creating variables correlated with an existing variable # fake data set.seed(2001) treatment <- round(rnorm(n = 10, mean = 12, sd = 3), 0) control <- round(rnorm(n = 10, mean = 10, sd = 4), 0) # group means mean(treatment); mean(control) # putting the data into a data frame/tibble d <- tibble(group = c(rep("tx", 10), rep("ctrl", 10)), correct = c(treatment, control)) # adding a contrast code d <- d %>% mutate(con1 = ifelse(group == "ctrl", -1/2, 1/2)) # creating a fake covariate ("foreign language exposure") model1 <- lm(correct ~ con1, d) e <- resid(model1) FLE <- round(rnorm_pre(x = e, mu = 50, sd = 10, r = 0.2), 0) # add FLE to the tibble, mean-centered d <- d %>% mutate(FLE = FLE - mean(FLE)) # the ANCOVA model model2 <- lm(correct ~ con1 + FLE, d) # via contrast codes model3 <- lm(correct ~ groupF + FLE, d) # homogeneity of slopes # ggplot prefers factors d <- d %>% mutate(groupF = factor(group)) # this is the ANCOVA model with lines of best fit that have the same slope # we need predicted scores from the ANCOVA model in the data d <- d %>% mutate(pred = fitted(model2)) ggplot(d, aes(y = correct, x = FLE, col = group)) + geom_point() + geom_line(mapping = aes(y = pred), size = 2) + theme_bw() + geom_vline(xintercept = mean(d$FLE)) coef(model3) # allowing slopes to vary across groups ggplot(d, aes(y = correct, x = FLE, col = group)) + geom_point() + geom_smooth(method = "lm", fill = NA) + theme_bw() + geom_vline(xintercept = mean(d$FLE)) # the model that allows slopes to vary model4 <- lm(correct ~ groupF*FLE, d) coef(model4) # testing independence of group and covariate summary(lm(FLE ~ groupF, d)) # NS! good! ### # interpretation complications! d2 <- read.csv("https://whlevine.hosted.uark.edu/psyc5143/april13.csv") # actual group means d2 %>% group_by(group) %>% summarise(M = mean(Y)) # mean-centering covariates d2 <- d2 %>% mutate(Zc = Z - mean(Z), Zbadc = Zbad - mean(Zbad)) # the typical model summary(lm(Y ~ X1 + X2, d2)) # the ANCOVA summary(lm(Y ~ X1 + X2 + Zc, d2)) # getting adjusted means using the effects package library(effects) ANCOVA <- lm(Y ~ group + Zc, d2) effect("group", ANCOVA) # using the "bad" covariate # what's bad about it? it's not independent of group summary(aov(Zbad ~ group, d2)) ANCOVA2 <- lm(Y ~ group + Zbadc, d2) effect("group", ANCOVA2) # adjusted means will be far from actual means when the covariate is related to # group membership # what causes the problem? adjusted means are based on the predicted score for # each group for the overall mean of the covariate # the horizontal lines illustrate Y means for each group # the verticle lines illustrae Zbad means for each group ggplot(d2, aes(y = Y, x = Zbad, col = group)) + geom_point() + geom_hline(yintercept = mean(d2$Y[d2$group == "A1"]), color = "#F8766D") + geom_hline(yintercept = mean(d2$Y[d2$group == "A2"]), color = "#00BA38") + geom_hline(yintercept = mean(d2$Y[d2$group == "A3"]), color = "#619CFF") + geom_vline(xintercept = mean(d2$Zbad[d2$group == "A1"]), color = '#F8766D') + geom_vline(xintercept = mean(d2$Zbad[d2$group == "A2"]), color = '#00BA38') + geom_vline(xintercept = mean(d2$Zbad[d2$group == "A3"]), color = '#619CFF') # what are the group means of the covariate? aggregate(Zbad ~ group, d2, mean) # the graph generated below illustrates why this is a problem ggplot(d2, aes(y = Y, x = Zbadc, col = group)) + geom_point() + geom_abline(aes(intercept = 7.1, slope = 0.2686672), color = "#F8766D") + geom_abline(aes(intercept = 19.5, slope = 0.2686672), color = "#00BA38") + geom_abline(aes(intercept = 33.4, slope = 0.2688672), color = "#619CFF") + geom_vline(xintercept = 0) + geom_hline(yintercept = mean(d$Y[d$group == "A1"]), color = "#F8766D") + geom_hline(yintercept = mean(d$Y[d$group == "A2"]), color = "#00BA38") + geom_hline(yintercept = mean(d$Y[d$group == "A3"]), color = "#619CFF")