# Drill 6 (February 29)

# ways to analyze factorial designs

# install.packages("ez")
library(tidyverse)
library(ez)
library(lmSupport)

# read in the data and ensure that factors are, well, factors
scrambled <- read.csv("http://whlevine.hosted.uark.edu/psyc5143/scrambled.csv", stringsAsFactors = T)
is.factor(scrambled$text)
is.factor(scrambled$wpm)

# what's in there?
head(scrambled, 10)

# cell means
scrambled %>% group_by(text, wpm) %>%
	summarise(M = mean(dv),
						SD = sd(dv),
						n = length(dv))

# marginal means
scrambled %>% group_by(text) %>% summarise(M = mean(dv))
scrambled %>% group_by(wpm) %>% summarise(M = mean(dv))

# contrast codes
scrambled <- scrambled %>% 
	mutate(T = ifelse(text == "intact", 1/2, -1/2),
	       
				 R1 = ifelse(wpm == "600 wpm", -2/3, 1/3),
				 R2 = case_when(wpm == "300 wpm" ~ 1/2,
				 							 wpm == "450 wpm" ~ -1/2,
				 							 wpm == "600 wpm" ~ 0),
				 
				 TR1 = T*R1,
				 TR2 = T*R2)
# except for the text factor, these are NOT the only contrast codes that can be
# used

model.contrasts <- lm(dv ~ T + R1 + R2 + TR1 + TR2, scrambled)

# gives the summary for each parameter in slopes, SE, t, and p
summary(model.contrasts)
summary.aov(model.contrasts)

# gives the summary for each parameter in SS, MS, F, and p
anova(model.contrasts)
# note: the Sum Sq values here are the same as the SSR values that modelSummary
# produces

# various models, model comparisons, and equivalents
modelA               <- lm(dv ~ T + R1 + R2 + TR1 + TR2, scrambled)
# modelC.T             <- lm(dv ~     R1 + R2 + TR1 + TR2, scrambled)
modelC.R1            <- lm(dv ~ T +      R2 + TR1 + TR2, scrambled)
# modelC.R2            <- lm(dv ~ T + R1      + TR1 + TR2, scrambled)
# modelC.TR1           <- lm(dv ~ T + R1 + R2       + TR2, scrambled)
modelC.TR2           <- lm(dv ~ T + R1 + R2 + TR1      , scrambled)
# modelC.T_main_effect <- lm(dv ~     R1 + R2 + TR1 + TR2, scrambled)
modelC.R_main_effect <- lm(dv ~ T +         + TR1 + TR2, scrambled)
modelC.interaction   <- lm(dv ~ T + R1 + R2            , scrambled)

# before fitting the next model, check the underlying weights/numbers/lambdas
# for the groups that make up each factor
contrasts(scrambled$text)  # dummy coded with intact as the reference level
contrasts(scrambled$wpm)   # dummy coded with 300 as the reference level

model.ANOVA          <- lm(dv ~ text*wpm, scrambled)
model.ANOVA2         <- aov(dv ~ text*wpm, scrambled)

# anova(modelC.T, modelA)       # this is a test of the T effect
# summary.aov(modelA)    # focus on the T effect

anova(modelC.R1, modelA)      # tests the R1 effect
summary.aov(modelA)    # focus on the R1 effect

# anova(modelC.R2, modelA)      # tests the R2 effect
# summary.aov(modelA)    # focus on the R2 effect
# 
# anova(modelC.TR1, modelA)
# summary.aov(modelA)    # focus on the TR1 effect

anova(modelC.TR2, modelA)
summary.aov(modelA)    # focus on the TR2 effect

# anova(modelC.T_main_effect, modelA) # test the text main effect
# anova(model.ANOVA)                  # focus on the text effect

anova(modelC.R_main_effect, modelA) # test the rate main effect
anova(model.ANOVA)                  # focus on the wpm effect

anova(modelC.interaction, modelA)   # test the interaction effect
anova(model.ANOVA)                  # focus on the interaction effect

# what other options do we have?

# instead of creating T, R1, R2, etc., we can overwrite the default dummy-coding
# for factors

# reminder of the default coding for text
contrasts(scrambled$text)

# we want the values to be +1/2 and -1/2
contrasts(scrambled$text) <- c(1/2, -1/2)

# what do we have now?
contrasts(scrambled$text)

# let's do the same for the rate factor

# the default
contrasts(scrambled$wpm)

# we want what was in lines 31-35 earlier; this will take a little work, and
# what I've done below is NOT the only way to do this

R1 <- c(1/3,  1/3, -2/3) # this is the first contrast
R2 <- c(1/2, -1/2,    0) # the second one

# bind these together in two columsn
Rcontrast <- cbind(R1, R2)

# overwrite the default
contrasts(scrambled$wpm) <- Rcontrast

# check our work
contrasts(scrambled$wpm)

# now we can execute our linear model
model.again <- lm(dv ~ text * wpm, scrambled)

summary(model.again)
summary(model.contrasts) # same
anova(model.again)

# what other options do we have?

# the ez package is very handy, but it requires something like a participant ID,
# which we don't have

# let's add one!
scrambled <- scrambled %>% 
  mutate(ID = row.names(.))

ezANOVA(data = scrambled, 
        wid = ID,
        dv = dv, 
        between = .(text, wpm), 
        detailed = T)

# this is no better than the usual ANOVA in that it would require post-tests,
# but it does produce an effect-size measure labeled ges; this stands for
# 'generalized eta-squared', which is in a between-subjects design the same
# thing as partial eta-squared (pEta-sqr in modelEffectSizes)

modelEffectSizes(model.ANOVA)

# one (sledgehammer-like) way to approach post-tests to the ANOVA is to use
# Tukey's HSD; to do this, we have to ask ezANOVA to give us an 'aov' object
ezANOVA(data = scrambled, 
        wid = ID,
        dv = dv, 
        between = .(text, wpm), 
        detailed = T,
        return_aov = T) -> posttestThing

# now we can use the Tukey HSD procedure to do a LOT of post-tests (all of them
# pairwise comparisons)
TukeyHSD(posttestThing$aov, which = "text")
TukeyHSD(posttestThing$aov, which = "wpm")
TukeyHSD(posttestThing$aov, which = "text:wpm") # !!!!

# many treatments of simple effects in R (and SPSS, for the matter) suggest
# subsetting data; THIS IS A BAD IDEA because it ignores information that is
# useful

# but I'll show you anyway

# let's say you want to compare intact vs scrambled text only at 300 wpm (this
# is the simple effect of the text factor at 300 wpm, i.e., ignoring the other
# rates)

scrambled %>% filter(wpm == "300 wpm") %>% aov(dv ~ text, .) %>% summary()
# what's the problem with this? it has only 14 df in its denominator ... the
# full ANOVA has 42; lower df means a loss of power (because we've ignored
# information)

# better is to use the phia package

# install.packages("phia")
library(phia)

# this tests the simple effects of text at each level of wpm
testInteractions(model = model.ANOVA, 
								 fixed = "wpm", 
								 across = "text")

# this tests the simple effects of wpm at each level of text
testInteractions(model = model.ANOVA,
								 fixed = "text",
								 across = "wpm")

# you can also do more-specific pairwise comparisons within levels
testInteractions(model = model.ANOVA, 
								 pairwise = "wpm",
								 across = "text")