# drill 4: February 15, 2024

library(tidyverse)
# options(scipen = 99) # gets rid of scientific notation, but comes with a cost: very-long decimals

# post-tests, controlling FWER & FDR

# I'm gonna use a four-group design that adds one more group to the three-groups
# data, a group that memorizes by generating an associated (paired) word
d <- read.csv("http://whlevine.hosted.uark.edu/psyc5143/fourGroups.csv", 
							stringsAsFactors = TRUE)  # I like this argument, but some don't

# what's in there
d

# check factor status of IV
is.factor(d$group)
levels(d$group)

# check default coding by R
contrasts(d$group)

# get some condition means & SDs
d %>% 
	group_by(group) %>% 
	summarise(M = mean(memory),
						SD = sd(memory))

# the aov function

# instead of using lm, you can use aov
anovaModel <- aov(memory ~ group, d)
summary(anovaModel)  # now the summary is the usual ANOVA table

# doing the same thing with lm ...
linearModel <- lm(memory ~ group, d)
summary(linearModel)
# ... gives slopes (based on dummy codes)
# which dummy codes?
contrasts(d$group)

# if you want to use Tukey's HSD, the anovaModel object above is needed
TukeyHSD(anovaModel)
# it gives p-values that have been adjusted and can be compared directly to .05
# (or whatever alpha you're using)

# strictly speaking, using Tukey's HSD requires the use of the ANOVA first and a
# significant finding

# instead, you can go directly to pairwise comparisons (skipping the ANOVA)

# using the Dunn-Bonferroni procedure
pairwise.t.test(x = d$memory,
                g = d$group,
                p.adjust.method = "bonferroni")
# these p-values can be compared to the usual alpha (.05)

# using the Benjamini-Hochberg procedure
pairwise.t.test(x = d$memory,
                g = d$group,
                p.adjust.method = "BH")
# these p-values can be compared to the usual alpha (.05) as well

# unadjusted p-values (not a good idea) (often called Fisher's least significant
# differences test)
pairwise.t.test(x = d$memory,
								g = d$group,
								p.adjust.method = "none")
# .. compare these to the linearModel p-values
summary(linearModel)
# ... and you'll see that they are the same

# the lesson: If you use lm to execute contrasts, you have to do something
# deliberate to control Type I error rates; how?

# let's put those p-values into a variable
pvals <- summary(linearModel)$coefficients[2:4, 4]
# what is up with that [2:4, 4] stuff?

# look at the summary table
#   (Intercept)    6.000      1.162   5.164 9.09e-06 ***
# 	groupimage     6.000      1.643   3.651 0.000822 ***
# 	grouppair      4.000      1.643   2.434 0.020008 *  
# 	grouprhyme     4.000      1.643   2.434 0.020008 *  

# the p-values of interest are in the 4th column of values, rows 2 through 4; in
# bracket notation in R, this is [2:4, 4]

# the p-values can then be adjusted directly using the p.adjust function
p.adjust(pvals, method = "bonferroni")
p.adjust(pvals, method = "BH")
# these can be compared directly to the usual alpha (probably .05)