# drill 3

library(tidyverse)

# using dummy variables to do regression (ANOVA; same thing!) with categorical
# predictors

# import data
d <- read.csv("http://whlevine.hosted.uark.edu/psyc5143/reward.csv", stringsAsFactors = TRUE)

# what's in there
glimpse(d)
d

#graphically:
ggplot(d, aes(condition, errors)) + geom_point(position = "jitter")

# check factor status of IV
is.factor(d$condition)
levels(d$condition)

# get some condition means and SDs
d %>% 
	group_by(condition) %>% 
	summarise(M = mean(errors),
						SD = sd(errors))

# get the grand mean
mean(d$errors)
# note that the grand mean = the mean of the group means if and only if the
# groups are the same size

# let's dummy code the long, painful way
d <- d %>% 
	mutate(D1 = case_when(condition == "always" ~ 0,
												condition == "frequent" ~ 1,
												condition == "infrequent" ~ 0,
												condition == "never" ~ 0),
				 D2 = case_when(condition == "always" ~ 0,
				 							 condition == "frequent" ~ 0,
				 							 condition == "infrequent" ~ 1,
				 							 condition == "never" ~ 0),
				 D3 = case_when(condition == "always" ~ 0,
				 							 condition == "frequent" ~ 0,
				 							 condition == "infrequent" ~ 0,
				 							 condition == "never" ~ 1))

# what did that do?
d


# compare the default lm output to the dummy-coded version
coef(lm(errors ~ condition, d))
coef(lm(errors ~ D1 + D2 + D3, d))
# same results, just different labels for slopes

# BEWARE THAT R'S DEFAULT CODING RELIES ON ALPHABETICAL ORDER OF FACTOR
# LEVELS!!!

# one way to check orthogonality (which is NOT expected for dummy codes)

cor(d[3:5]) # the off-diagonal correlations must be 0 for orthogonality

# better to use orthogonal contrasts; execute the following contrasts

# C1 = never vs all others   [never = 3/4; all others = -1/4]
# C2 = always vs frequent/infrequent [always = -2/3; F&I = 1/3; never = 0]
# C3 = infrequent vs frequent [F = 1/2; I = -1/2; others = 0]

# contrast codes
d <- d %>% 
	mutate(con1 = case_when(condition == "always" ~ -1/4,
												condition == "frequent" ~ -1/4,
												condition == "infrequent" ~ -1/4,
												condition == "never" ~ 3/4),
				 con2 = case_when(condition == "always" ~ -2/3,
				 							 condition == "frequent" ~ 1/3,
				 							 condition == "infrequent" ~ 1/3,
				 							 condition == "never" ~ 0),
				 con3 = case_when(condition == "always" ~ 0,
				 							 condition == "frequent" ~ 1/2,
				 							 condition == "infrequent" ~ -1/2,
				 							 condition == "never" ~ 0))

# what's in there?
d

# check orthogonality
cor(d[6:8]) # woo hoo!

# modeling
coef(lm(errors ~ con1 + con2 + con3, d))

# a fuller summary
summary(lm(errors ~ con1 + con2 + con3, d))

# another way to create orthogonal contrasts: Helmert coding

# what's the current coding scheme?
contrasts(d$condition) # dummy codes

# we can overwrite these with so-called Helmert codes
contr.helmert(4) -> contrasts(d$condition)

# what are the contrasts?
contrasts(d$condition)

# the model
coef(lm(errors ~ condition, d))

# finally, one other way to do coding that might be easier than using case_when;
# this relies on you knowing for sure the order of the levels of your factor
levels(d$condition)

# the order: [1] "always" "frequent" "infrequent" "never"

# creating the contrasts
neverVSothers     <- c(-1/4, -1/4, -1/4, 3/4)
alwaysVSsometimes <- c(-2/3,  1/3,  1/3,   0)
freqVSinfreq      <- c(   0,  1/2, -1/2,   0)

# column binding them
allContrasts <- cbind(neverVSothers, alwaysVSsometimes, freqVSinfreq)

# what did we just create?
allContrasts
# maybe not bad?

# overwrite whatever coding is in there (Helmert if this script has been run in
# order) with the allContrasts just created
contrasts(d$condition) <- allContrasts

# check; always check!
contrasts(d$condition)

# model
conModel <- lm(errors ~ condition, d)
summary(conModel)

# if you care only about differences among conditions (but not between
# particular conditions), the aov function does the trick
anova(conModel)
summary(aov(errors ~ condition, d))
summary.aov(conModel)