# April 4, 2024

library(tidyverse)

quiz <- c(2, 3, 8, 6, 5, 6,
					5, 9, 10, 13, 8, 9,
					8, 6, 12, 11, 11, 12)

RM <- tibble(ID = as.factor(rep(c("a", "b", "c", "d", "e", "f"), 3)),
						 strategy = as.factor(c(rep("reread", 6), rep("preparedQs", 6), rep("createQs", 6))),
						 score = quiz)

# look at the data
RM
# notice that they are in "long" form rather than "wide" form

quizModel <- lm(score ~ strategy, RM) # ignores non-independence; NOT the optimal model!
anova(quizModel)

RM <- RM %>% 
	mutate(e = round(resid(quizModel)))

quizModel2 <- lm(score ~ strategy + ID, RM)

modelA <- lm(score ~ strategy + ID, RM)
modelC <- lm(score ~            ID, RM)
anova(modelC, modelA)

RM <- RM %>% 
	mutate(new_e = round(resid(quizModel2)))

# what are we actually doing here? (this is the LOOOOONG way to get to the right analysis)

# the source table for the inappropriate analysis
anova(quizModel)

# finding SS for subjects

# first, find subject means 
personMeans <- RM %>% group_by(ID) %>% summarise(subjM = mean(score))

# now add the mean of the subject means to the data
personMeans <- personMeans %>% 
	mutate(M = mean(subjM))

# now find deviations, square them, and sum them
personMeans <- personMeans %>% 
	mutate(devs = subjM - M,
				 sq_devs = devs^2)

SS_persons = 3*sum(personMeans$sq_devs)

# parameters we care about ... 
coef(lm(score ~ strategy, RM))

# and those we don't care about
coef(lm(score ~ ID, RM))

# instead of adding in many parameters to estimate slopes for persons ...
      lm  (score ~ strategy +    ID, RM)
# ... estimate one parameter for all persons at once
lme4::lmer(score ~ strategy + (1|ID), RM) # won't work witout installing lme4 ... maybe wait for drill