library(tidyverse) # Drill 7 # cover story/data dictionary # a comparison of the relative effectiveness of three software packages designed # to help students get better at math problem solving # variables in data file linked below # iv = the three packages (numeric; needs to be factorized or converted to dummy variables) # cov = a measure of math ability # dv = scores on a standardized math problem-solving test read.csv("http://whlevine.hosted.uark.edu/psyc5143/ancova2.csv") -> d glimpse(d) #for the sake of drill, to experience working with R's defaults, let's make a # factor version of the iv and use the default dummy coding d$ivF <- as.factor(d$iv) contrasts(d$ivF) # checking dummy coding: group 1 is the reference group # the model without the IV lm(dv ~ cov, d) -> step1 # model with the IV lm(dv ~ cov + ivF, d) -> step2 # ANCOVA as it's conventionally reported is F-change from step1 to step2 anova(step1, step2) # we can also just get the F-ratio for the IV here anova(step2) # we can get more info from the summary of step2 summary(step2) # centering the covariate at its mean will make the intercept more useful # base R! d$cov.c <- d$cov - mean(d$cov) # could use mutate, of course lm(dv ~ cov.c + ivF, d) -> step2.c summary(step2.c) # adjusted means a couple of ways (this package might need to be installed) #install.packages("effects") library(effects) effect(term = "ivF", mod = step2) # the stuff below is a little fancy, but is adapted from # https://stackoverflow.com/questions/31903606/ggplot2-plotting-regression-lines-with-different-intercepts-but-with-same-slope d <- cbind(d, pred = fitted(step2)) # store predicted scores in the data frame # the command on line 60 is a bit old school; we can also do things using mutate d <- d %>% mutate(pred_new_school = fitted(step2)) head(d) #Let's see how our data frame updated 70.38954 + (-5.3666667*1.6777) #an example of where our predicted scores come from step2.c$coefficients[1] + ((d[1,5]) * step2.c$coefficients[2]) tail(d) 70.38954 + (-7.5686) + (1.6333333*1.6777) #another example step2.c$coefficients[1] + step2.c$coefficients[4] + ((d[30,5]) * step2.c$coefficients[2]) # find the mean of the covariate and store it Mcov <- mean(d$cov) # create a scatterplot *assuming* equal slopes of the cov-dv relationship ggplot(d, aes(y = dv, x = cov, col = ivF)) + # create a scatterplot, using groups to "color" points geom_point() + # add points to the scatterplot geom_line(mapping = aes(y = pred)) + # puts a line through the predicted scores for each group theme_bw() + # a graph style that I like scale_color_grey(start = 0.7, end = 0.3) + # to graycale groups geom_vline(xintercept = Mcov) # add a vertical line at the mean of the covariate # create a scatterplot that does not assume equal slopes (to help visualize how # much the slopes differ across groups) ggplot(d, aes(y = dv, x = cov, col = ivF)) + # create a scatterplot, using groups to "color" points geom_point() + # add points to the scatterplot geom_smooth(method = "lm", fill = NA) + # adds separate regression lines for each group theme_bw() + # a graph style that I like scale_color_grey(start = 0.7, end = 0.3) + # to graycale groups geom_vline(xintercept = Mcov) # add a vertical line at the mean of the covariate # testing the equal slope assumption lm(dv ~ cov.c + ivF + cov.c:ivF, d) -> equalSlopesOrNo anova(equalSlopesOrNo) # a NON-significant interaction is a win here! summary(equalSlopesOrNo) # the cov.c slope is for the reference group, and the # cov.c:ivF2 and cov.c:ivF3 slopes are how far the other # two group slopes are from it # testing the independence of groups and the covariate anova(lm(cov ~ ivF, d))