library(tidyverse) library(ez) library(lme4) library(lmerTest) # multi-factor RM designs # Each of the five participants in this two-factor experiment studies six lists # of 20 words each. Half of the lists are made up of concrete nouns; the other # half are made up of abstract nouns. The abstract vs concrete factor is crossed # with a study-time manipulation, so that participants study each list for # either 1, 2, or 3 minutes. MFRM <- read.csv("http://whlevine.hosted.uark.edu/psyc5143/twoFactorRM.csv") head(MFRM) str(MFRM) # factorizing things MFRM <- MFRM %>% mutate(Subject = as.factor(Subject), studytime = as.factor(studytime), wordtype = as.factor(wordtype)) # checking the coding contrasts(MFRM$studytime) # dummy coding with time 1 as the reference contrasts(MFRM$wordtype) # dummy coding with abstract as the reference # I'm going to change these to contrast codes contrasts(MFRM$studytime) <- contr.helmert(3) contrasts(MFRM$wordtype) <- c(-1/2, 1/2) # using ezANOVA ezANOVA(data = MFRM, dv = dv, wid = Subject, within = .(studytime, wordtype), detailed = T) # all the t-tests (I don't recommend this, but it's worth knowing how to do) # we need a single variable that combines word type and study time MFRM <- MFRM %>% mutate(condition = paste(wordtype, studytime, sep = '_')) # all the t-tests pairwise.t.test(x = MFRM$d, g = MFRM$condition, paired = TRUE, p.adjust.method = "bonf") # using lmer anova(lmer(dv ~ studytime*wordtype + (1 | Subject), MFRM)) # the results differ because the ANOVA and the LMM use different error terms # (denominators) in the calculation of F; which one is the "right" analysis is # unclear (it depends on assumptions about what does into scores), but the LMM # comes without the assumption of sphericity and is in much of the research # world the preferred analysis # what is the error term for the effect of word type? # for the RM ANOVA, it's the interaction of people by word type # demonstrating this # easier with wide data ... wide <- MFRM %>% pivot_wider(names_from = c(wordtype, studytime), values_from = dv, id_cols = Subject) wide <- wide %>% rowwise() %>% mutate(d = mean(c(concrete_1, concrete_2, concrete_3)) - mean(c(abstract_1, abstract_2, abstract_3))) modelC <- lm(d ~ 0, wide) modelA <- lm(d ~ 1, wide) anova(modelC) # SSE(C) = 15.889 anova(modelA) # SSE(A) = 4.133 # what goes into SSE(A), which is a piece of the error term? # it's nothing but the sum of squared deviations from the mean Md = mean(wide$d) SSE_A = sum((wide$d - Md)^2) # what does into the values of d? the person-by-condition interaction # maybe easiest to see as a graph # getting abstract and concrete means for each person wide <- wide %>% rowwise() %>% mutate(abstract = mean(c(abstract_1, abstract_2, abstract_3)), concrete = mean(c(concrete_1, concrete_2, concrete_3))) # setting up a data frame to graph g <- wide %>% pivot_longer(cols = c(abstract, concrete)) %>% select(Subject, name, value) # the graph g %>% ggplot(aes(x = name, y = value, group = Subject, color = Subject)) + geom_point() + geom_line() + xlab("condition") + ylab("memory") + theme_bw() # what is the error term for word type in the LMM? # it's the SSE of the residuals of the model e <- resid(lmer(dv ~ studytime*wordtype + (1 | Subject), MFRM)) sum((e - mean(e))^2) / 20 # if we get this far, wowee library(tidyverse) library(lme4) # loading, checking, prepping data read.csv("http://whlevine.hosted.uark.edu/psyc6343/twoclasses.csv") -> twoclasses str(twoclasses) # creating a factor version of classroom twoclasses$classroom.f <- factor(twoclasses$classroom, labels = c("class1", "class2")) # how does R represent factors with dummy variables? contrasts(twoclasses$classroom.f) # dummy-coding classroom (strictly speaking, this isn't necessary) twoclasses$classroom.d <- 0 twoclasses$classroom.d[twoclasses$classroom == 2] <- 1 ####################### # # Section 1 # examining the data # ####################### ggplot(twoclasses, aes(x = pretest.c, y = posttest, color = classroom.f)) + scale_color_brewer(palette = "Dark2") + theme_bw() + geom_point(size = 2) + geom_smooth(method = lm, se = F) + geom_vline(xintercept = 0) ########################################################### # # comparing various analytic options with multilevel data # ########################################################### ############################################ # # Section 2 # separate regressions for each classroom # ############################################ model1 <- lm(data = twoclasses[twoclasses$classroom == 1,], posttest ~ pretest.c) coef(model1) model2 <- lm(data = twoclasses[twoclasses$classroom == 2,], posttest ~ pretest.c) coef(model2) # intercepts are (essentially) DV means when predictors are mean-centered mean(twoclasses$posttest[twoclasses$classroom == 1]) # same as intercept 1 mean(twoclasses$posttest[twoclasses$classroom == 2]) # same as intercept 2 ################################# # # Section 3 # collapsing across classrooms # ################################# model3 <- lm(data = twoclasses, posttest ~ pretest.c) coef(model3) predicted3 <- c(fitted.values(model3)) residual3 <- c(resid(model3)) to.plot3 <- data.frame(predicted3, residual3, twoclasses$classroom.f) # visualizing clustering (non-independence) of residuals ggplot(to.plot3, aes(x = predicted3, y = residual3, color = twoclasses.classroom.f)) + scale_color_brewer(palette = "Dark2") + theme_bw() + geom_point(size = 2) + geom_hline(yintercept = 0) # quantifying non-independence of residuals summary(to.plot3$residual3[to.plot3$twoclasses.classroom.f == "class1"]) summary(to.plot3$residual3[to.plot3$twoclasses.classroom.f == "class2"]) # how good are predictions? summary(model3)$r.squared # R-squared summary(model3)$sigma # SE of the estimate ("average" error of prediction) ###################################### # # Section 4 # modeling classrooms as a predictor # ###################################### model4 <- lm(data = twoclasses, posttest ~ pretest.c + classroom.d) coef(model4) # other information that may be worth a look # summary(model4) # anova(model4) # a digression for you to play with on your own time # stuff you can pull out of lm()'s output (but not lme's!) # model4$coefficients # betas # model4$residuals # residuals # model4$fitted.values # predicted scores # model4$call # the regression equation # model4$model # the data that were analyzed predicted4 <- c(fitted.values(model4)) residual4 <- c(resid(model4)) to.plot4 <- data.frame(predicted4, residual4, twoclasses$classroom.f) # visualizing residuals ggplot(to.plot4, aes(x = predicted4, y = residual4, color = twoclasses.classroom.f)) + scale_color_brewer(palette = "Dark2") + theme_bw() + geom_point(size = 2) + geom_hline(yintercept = 0) # quantifying residuals summary(to.plot4$residual4[to.plot4$twoclasses.classroom.f == "class1"]) summary(to.plot4$residual4[to.plot4$twoclasses.classroom.f == "class2"]) # how good are predictions? summary(model4)$r.squared # R-squared summary(model4)$sigma # SE of the estimate ("average" error of prediction) ###################################### # # Section 5 # multilevel modeling # ###################################### model5 <- lmer(data = twoclasses, posttest ~ (1 | classroom.d) + pretest.c) summary(model5) predicted5 <- c(fitted.values(model5)) residual5 <- c(resid(model5)) to.plot5 <- data.frame(predicted5, residual5, twoclasses$classroom.f) # visualizing residuals ggplot(to.plot5, aes(x = predicted5, y = residual5, color = twoclasses.classroom.f)) + scale_color_brewer(palette = "Dark2") + theme_bw() + geom_point(size = 2) + geom_hline(yintercept = 0) # quantifying residuals summary(to.plot5$residual5[to.plot5$twoclasses.classroom.f == "class1"]) summary(to.plot5$residual5[to.plot5$twoclasses.classroom.f == "class2"]) # how good are predictions? # summary(model5)$r.squared # R-squared is not part of model summary for lmer() # there are R-squared analogs for MLM (which we'll discuss in detail later) summary(model5)$sigma # SE of the estimate ("average" error of prediction) ###################################### # # probably as far as we'll get today # ######################################