############################# # # Section 1 # read in the data # ############################# read.csv("http://whlevine.hosted.uark.edu/psyc6343/spiders.csv") -> spiders str(spiders) ############################# # # Section 2 # standard regression # ############################# standard <- lm(data = spiders, spiders01 ~ grainsize) summary(standard) # examining predictions summary(fitted(standard)) # max is greater than 1 # visualizing predictions plot(spiders01 ~ grainsize, data = spiders) lines(spiders$grainsize, standard$fitted.values, type = "l") ############################# # # Section 3 # logistic regression # ############################# logistic <- glm(data = spiders, spiders01 ~ grainsize, family = "binomial") summary(logistic) ############################# # # Section 4 # assessing the model (relative to an implied null model) # ############################# modelChi <- logistic$null.deviance - logistic$deviance chidf <- logistic$df.null - logistic$df.residual modelp <- 1 - pchisq(modelChi, chidf) modelp ############################# # # Section 5 # interpreting the model # ############################# # creating odds-ratios exp(logistic$coefficients) # visualizing predictions plot(spiders01 ~ grainsize, data = spiders, xlim = c(0, 1)) lines(spiders$grainsize, logistic$fitted.values, type = "l", col = "red") summary(fitted(logistic)) as.data.frame(fitted(logistic)) # fit isn't very good, but all predictions are in the 0 to 1 range # adding in the standard regression fit lines(spiders$grainsize, standard$fitted.values, type = "l", col = "purple") ########## # centering the predictor to make the y-intercept more useful spiders$grainsize.c <- spiders$grainsize - mean(spiders$grainsize) logistic.c <- glm(data = spiders, spiders01 ~ grainsize.c, family = "binomial") summary(logistic.c) abline(v = mean(spiders$grainsize)) # odds ratios exp(logistic.c$coefficients) # visualizing the rapid change in odds with increasing grain size library(tidyverse) spiders <- spiders %>% mutate(odds = exp(predict(logistic))) spiders %>% ggplot(aes(grainsize, odds)) + geom_line() # prefer probabilities to odds? exp(logistic.c$coefficients) / (1 + exp(logistic.c$coefficients))