############### # # Preparation # ############### library(tidyverse) library(lme4) # loading, checking, prepping data read.csv("http://whlevine.hosted.uark.edu/psyc6343/twoclasses.csv") -> twoclasses str(twoclasses) # creating a factor version of classroom twoclasses$classroom.f <- factor(twoclasses$classroom, labels = c("class1", "class2")) # how does R represent factors with dummy variables? contrasts(twoclasses$classroom.f) # dummy-coding classroom (strictly speaking, this isn't necessary) twoclasses$classroom.d <- 0 twoclasses$classroom.d[twoclasses$classroom == 2] <- 1 ####################### # # Section 1 # examining the data # ####################### ggplot(twoclasses, aes(x = pretest.c, y = posttest, color = classroom.f)) + scale_color_brewer(palette = "Dark2") + theme_bw() + geom_point(size = 2) + geom_smooth(method = lm, se = F) + geom_vline(xintercept = 0) ########################################################### # # comparing various analytic options with multilevel data # ########################################################### ############################################ # # Section 2 # separate regressions for each classroom # ############################################ model1 <- lm(data = twoclasses[twoclasses$classroom == 1,], posttest ~ pretest.c) coef(model1) model2 <- lm(data = twoclasses[twoclasses$classroom == 2,], posttest ~ pretest.c) coef(model2) # intercepts are (essentially) DV means when predictors are mean-centered mean(twoclasses$posttest[twoclasses$classroom == 1]) # same as intercept 1 mean(twoclasses$posttest[twoclasses$classroom == 2]) # same as intercept 2 ################################# # # Section 3 # collapsing across classrooms # ################################# model3 <- lm(data = twoclasses, posttest ~ pretest.c) coef(model3) predicted3 <- c(fitted.values(model3)) residual3 <- c(resid(model3)) to.plot3 <- data.frame(predicted3, residual3, twoclasses$classroom.f) # visualizing clustering (non-independence) of residuals ggplot(to.plot3, aes(x = predicted3, y = residual3, color = twoclasses.classroom.f)) + scale_color_brewer(palette = "Dark2") + theme_bw() + geom_point(size = 2) + geom_hline(yintercept = 0) # quantifying non-independence of residuals summary(to.plot3$residual3[to.plot3$twoclasses.classroom.f == "class1"]) summary(to.plot3$residual3[to.plot3$twoclasses.classroom.f == "class2"]) # how good are predictions? summary(model3)$r.squared # R-squared summary(model3)$sigma # SE of the estimate ("average" error of prediction) ###################################### # # Section 4 # modeling classrooms as a predictor # ###################################### model4 <- lm(data = twoclasses, posttest ~ pretest.c + classroom.d) coef(model4) # other information that may be worth a look # summary(model4) # anova(model4) # a digression for you to play with on your own time # stuff you can pull out of lm()'s output (but not lme's!) # model4$coefficients # betas # model4$residuals # residuals # model4$fitted.values # predicted scores # model4$call # the regression equation # model4$model # the data that were analyzed predicted4 <- c(fitted.values(model4)) residual4 <- c(resid(model4)) to.plot4 <- data.frame(predicted4, residual4, twoclasses$classroom.f) # visualizing residuals ggplot(to.plot4, aes(x = predicted4, y = residual4, color = twoclasses.classroom.f)) + scale_color_brewer(palette = "Dark2") + theme_bw() + geom_point(size = 2) + geom_hline(yintercept = 0) # quantifying residuals summary(to.plot4$residual4[to.plot4$twoclasses.classroom.f == "class1"]) summary(to.plot4$residual4[to.plot4$twoclasses.classroom.f == "class2"]) # how good are predictions? summary(model4)$r.squared # R-squared summary(model4)$sigma # SE of the estimate ("average" error of prediction) ###################################### # # Section 5 # multilevel modeling # ###################################### model5 <- lmer(data = twoclasses, posttest ~ (1 | classroom.d) + pretest.c) summary(model5) predicted5 <- c(fitted.values(model5)) residual5 <- c(resid(model5)) to.plot5 <- data.frame(predicted5, residual5, twoclasses$classroom.f) # visualizing residuals ggplot(to.plot5, aes(x = predicted5, y = residual5, color = twoclasses.classroom.f)) + scale_color_brewer(palette = "Dark2") + theme_bw() + geom_point(size = 2) + geom_hline(yintercept = 0) # quantifying residuals summary(to.plot5$residual5[to.plot5$twoclasses.classroom.f == "class1"]) summary(to.plot5$residual5[to.plot5$twoclasses.classroom.f == "class2"]) # how good are predictions? # summary(model5)$r.squared # R-squared is not part of model summary for lmer() # there are R-squared analogs for MLM (which we'll discuss in detail later) summary(model5)$sigma # SE of the estimate ("average" error of prediction) ###################################### # # probably as far as we'll get today # ###################################### ###################################### # # Preparation for scaling up # ###################################### # loading, checking, prepping data read.csv("http://whlevine.hosted.uark.edu/psyc6343/thirtyclasses.csv") -> data str(data) # creating a class ID variable # the two parts of the class ID data$name <- "class" data$number <- as.character(data$classroom) # paste() the two parts together data$class.id <- paste(data$name, data$number) # coerce it to be a factor data$class.id <- as.factor(data$class.id) # clean-up data$name <- NULL data$number <- NULL # other prep stuff # create an empty data frame for later use slope.int <- data.frame() # grand-mean center pretest scores data$pretest.c <- data$pretest - mean(data$pretest) ############################################### # # Section 6 # scaling up # first, let's do 30 separate regressions! # ############################################### # create a data frame of slopes and intercepts from 30 regressions, one for each # class for(x in 1:30) { subset <- data[data$classroom == x,] temp <- lm(data = subset, posttest ~ pretest.c) slope <- summary(temp)$coefficients[2, 1] int <- summary(temp)$coefficients[1, 1] current.ab <- data.frame(int, slope, x) slope.int <- rbind(slope.int, current.ab) } # all the coefficients slope.int # examining the 30 slopes & intercepts #install.packages("psych") library("psych") describe(slope.int$slope) hist(slope.int$slope) plot(density(slope.int$slope)) describe(slope.int$int) hist(slope.int$int) plot(density(slope.int$int)) ############################################### # # Section 7 # modeling classroom as a predictor # ############################################### model6 <- lm(data = data, posttest ~ pretest.c + class.id) coef(model6) # crazy! ############################################### # # Section 8 # MLM # ############################################### model7 <- lmer(data = data, posttest ~ pretest.c + (1|class.id)) # fixed effects: intercept and any slopes fixef(model7) summary(model7) # digressions # random effects (Level 2 [classes] residuals) ranef(model7) # could put the random effects in a data frame if you want to play with them ranef(model7)$class.id -> randomEffects # Level 1 (students) residuals (all 822 of them) residuals(model7) # put them in a data frame for examination as.data.frame(residuals(model7)) -> subjectResiduals