# drill 3 library(tidyverse) # using dummy variables to do regression (ANOVA; same thing!) with categorical # predictors # import data d <- read.csv("http://whlevine.hosted.uark.edu/psyc5143/reward.csv", stringsAsFactors = TRUE) # what's in there glimpse(d) d #graphically: ggplot(d, aes(condition, errors)) + geom_point(position = "jitter") # check factor status of IV is.factor(d$condition) levels(d$condition) # get some condition means and SDs d %>% group_by(condition) %>% summarise(M = mean(errors), SD = sd(errors)) # get the grand mean mean(d$errors) # note that the grand mean = the mean of the group means if and only if the # groups are the same size # let's dummy code the long, painful way d <- d %>% mutate(D1 = case_when(condition == "always" ~ 0, condition == "frequent" ~ 1, condition == "infrequent" ~ 0, condition == "never" ~ 0), D2 = case_when(condition == "always" ~ 0, condition == "frequent" ~ 0, condition == "infrequent" ~ 1, condition == "never" ~ 0), D3 = case_when(condition == "always" ~ 0, condition == "frequent" ~ 0, condition == "infrequent" ~ 0, condition == "never" ~ 1)) # what did that do? d # compare the default lm output to the dummy-coded version coef(lm(errors ~ condition, d)) coef(lm(errors ~ D1 + D2 + D3, d)) # same results, just different labels for slopes # BEWARE THAT R'S DEFAULT CODING RELIES ON ALPHABETICAL ORDER OF FACTOR # LEVELS!!! # one way to check orthogonality (which is NOT expected for dummy codes) cor(d[3:5]) # the off-diagonal correlations must be 0 for orthogonality # better to use orthogonal contrasts; execute the following contrasts # C1 = never vs all others [never = 3/4; all others = -1/4] # C2 = always vs frequent/infrequent [always = -2/3; F&I = 1/3; never = 0] # C3 = infrequent vs frequent [F = 1/2; I = -1/2; others = 0] # contrast codes d <- d %>% mutate(con1 = case_when(condition == "always" ~ -1/4, condition == "frequent" ~ -1/4, condition == "infrequent" ~ -1/4, condition == "never" ~ 3/4), con2 = case_when(condition == "always" ~ -2/3, condition == "frequent" ~ 1/3, condition == "infrequent" ~ 1/3, condition == "never" ~ 0), con3 = case_when(condition == "always" ~ 0, condition == "frequent" ~ 1/2, condition == "infrequent" ~ -1/2, condition == "never" ~ 0)) # what's in there? d # check orthogonality cor(d[6:8]) # woo hoo! # modeling coef(lm(errors ~ con1 + con2 + con3, d)) # a fuller summary summary(lm(errors ~ con1 + con2 + con3, d)) # another way to create orthogonal contrasts: Helmert coding # what's the current coding scheme? contrasts(d$condition) # dummy codes # we can overwrite these with so-called Helmert codes contr.helmert(4) -> contrasts(d$condition) # what are the contrasts? contrasts(d$condition) # the model coef(lm(errors ~ condition, d)) # finally, one other way to do coding that might be easier than using case_when; # this relies on you knowing for sure the order of the levels of your factor levels(d$condition) # the order: [1] "always" "frequent" "infrequent" "never" # creating the contrasts neverVSothers <- c(-1/4, -1/4, -1/4, 3/4) alwaysVSsometimes <- c(-2/3, 1/3, 1/3, 0) freqVSinfreq <- c( 0, 1/2, -1/2, 0) # column binding them allContrasts <- cbind(neverVSothers, alwaysVSsometimes, freqVSinfreq) # what did we just create? allContrasts # maybe not bad? # overwrite whatever coding is in there (Helmert if this script has been run in # order) with the allContrasts just created contrasts(d$condition) <- allContrasts # check; always check! contrasts(d$condition) # model conModel <- lm(errors ~ condition, d) summary(conModel) # if you care only about differences among conditions (but not between # particular conditions), the aov function does the trick anova(conModel) summary(aov(errors ~ condition, d)) summary.aov(conModel)