#############################
#
# Section 1
# read in the data
#
#############################

read.csv("http://whlevine.hosted.uark.edu/psyc6343/spiders.csv") -> spiders
str(spiders)

#############################
#
# Section 2
# standard regression
#
#############################

standard <- lm(data = spiders, spiders01 ~ grainsize)
summary(standard)

# examining predictions
summary(fitted(standard)) # max is greater than 1

# visualizing predictions
plot(spiders01 ~ grainsize, data = spiders)
lines(spiders$grainsize, standard$fitted.values, type = "l")


#############################
#
# Section 3
# logistic regression
#
#############################

logistic <- glm(data = spiders, spiders01 ~ grainsize, family = "binomial")
summary(logistic)

#############################
#
# Section 4
# assessing the model (relative to an implied null model)
#
#############################

modelChi <- logistic$null.deviance - logistic$deviance
chidf <- logistic$df.null - logistic$df.residual
modelp <- 1 - pchisq(modelChi, chidf)
modelp

#############################
#
# Section 5
# interpreting the model
#
#############################

# creating odds-ratios
exp(logistic$coefficients)

# visualizing predictions

plot(spiders01 ~ grainsize, data = spiders, xlim = c(0, 1))
lines(spiders$grainsize, logistic$fitted.values, type = "l", col = "red")
summary(fitted(logistic))
as.data.frame(fitted(logistic))
# fit isn't very good, but all predictions are in the 0 to 1 range

# adding in the standard regression fit
lines(spiders$grainsize, standard$fitted.values, type = "l", col = "purple")

##########
# centering the predictor to make the y-intercept more useful

spiders$grainsize.c <- spiders$grainsize - mean(spiders$grainsize)
logistic.c <- glm(data = spiders, spiders01 ~ grainsize.c, family = "binomial")
summary(logistic.c)
abline(v = mean(spiders$grainsize))

# odds ratios
exp(logistic.c$coefficients)

# visualizing the rapid change in odds with increasing grain size
library(tidyverse)
spiders <- spiders %>% 
	mutate(odds = exp(predict(logistic)))

spiders %>% ggplot(aes(grainsize, odds)) +
	geom_line()

# prefer probabilities to odds?
exp(logistic.c$coefficients) / (1 + exp(logistic.c$coefficients))