weight_challenge_results.Rmd 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. ---
  2. title: "2017 Weight Challenge"
  3. author: "Will Koehrsen"
  4. date: "January 7, 2018"
  5. output: html_document
  6. ---
  7. ```{r setup, include=FALSE}
  8. knitr::opts_chunk$set(echo = TRUE)
  9. library(googlesheets)
  10. library(tidyverse)
  11. library(slackr)
  12. library(ggthemes)
  13. library(gganimate)
  14. library(prophet)
  15. ```
  16. # Formatting and Cleaning Weights
  17. ```{r}
  18. weights <- read_csv('weight_data.csv')
  19. # Convert to date only
  20. craig_weights <- weights %>% dplyr::filter(name == 'Craig') %>% arrange(date) %>%
  21. dplyr::filter(date < as.Date('2018-01-07'))
  22. will_weights <- weights %>% dplyr::filter(name == 'Will') %>% arrange(date) %>%
  23. dplyr::filter(date < as.Date('2018-01-07'))
  24. weights <- rbind(craig_weights, will_weights)
  25. ```
  26. # Data Manipulation
  27. We want to have a column representing the weight changes. Therefore we first need
  28. to find the starting weighting for both participants.
  29. ```{r}
  30. # Baseline weights
  31. craig_start <- craig_weights$weight[1]
  32. will_start <- will_weights$weight[1]
  33. # Create a column of weight changes
  34. weight_change <- function(row) {
  35. if(row[2] == 'Craig') {
  36. return(craig_start - as.numeric(row[3]))
  37. } else {
  38. return(as.numeric(row[3]) - will_start)
  39. }
  40. }
  41. weights$change <- apply(weights, MARGIN = 1, FUN = weight_change)
  42. # Create a column of percentage weight changes
  43. pct_weight_change <- function(row) {
  44. if (row[2] == 'Craig') {
  45. return(100 * (craig_start - as.numeric(row[3])) / craig_start)
  46. } else {
  47. return(100 * (as.numeric(row[3]) - will_start) / will_start)
  48. }
  49. }
  50. weights$change_pct <- apply(weights, MARGIN = 1, FUN = pct_weight_change)
  51. ```
  52. # Plots
  53. Start off with a simple graph of the raw weights. We need two separate
  54. axes to represent the varying scales.
  55. ```{r}
  56. ggplot(weights, aes(date)) +
  57. geom_jitter(data = dplyr::filter(weights, name == 'Craig'),
  58. aes(y = weight, col = "Craig"),shape = 2, size = 1.2) +
  59. geom_line(data = dplyr::filter(weights, name == 'Craig'),
  60. aes(y = weight, col = "Craig"), lwd = 1.2) +
  61. geom_jitter(data = dplyr::filter(weights, name == 'Will'),
  62. aes(y = 1.9 * weight, col = "Will"),shape = 2, size = 1.2) +
  63. geom_line(data = dplyr::filter(weights, name == 'Will'),
  64. aes(y = 1.9 * weight, col = "Will"), lwd = 1.2) +
  65. scale_y_continuous(sec.axis = sec_axis(~.*(1/1.9), name = 'Will Weight (lbs)',
  66. breaks = seq(115, 145, 10)),
  67. breaks = seq(215, 280, 10), limits = c(215, 275)) +
  68. scale_color_manual(values = c("midnightblue", "red3"), labels = c('Craig', 'Will')) +
  69. xlab("") + ylab('Craig Weight (lbs)') +
  70. labs(color = "Name") + ggtitle("Weight Challenge") +
  71. theme(axis.text.y = element_text(color = "midnightblue"),
  72. axis.text.y.right = element_text(color = 'red3')) +
  73. theme_hc(14)
  74. ggplot(weights, aes(date)) +
  75. geom_point(data = dplyr::filter(weights, name == 'Craig'),
  76. aes(y = weight, col = "Craig"), size = 2.5) +
  77. geom_point(data = dplyr::filter(weights, name == 'Will'),
  78. aes(y = 1.82 * weight, col = "Will"),size = 2.5) +
  79. scale_y_continuous(sec.axis = sec_axis(~.*(1/1.82), name = 'Will Weight (lbs)',
  80. breaks = seq(110, 140, 5)),
  81. breaks = seq(215, 255, 5), limits = c(215, 255)) +
  82. scale_color_manual(values = c('midnightblue', 'red3'), labels = c('Craig', 'Will')) +
  83. xlab("") + ylab('Craig Weight (lbs)') +
  84. labs(color = "") + ggtitle("Full Weight Challenge Results") +
  85. theme_classic(14) +
  86. theme(axis.text.y = element_text(color = "midnightblue"),
  87. axis.text.y.right = element_text(color = 'red3'),
  88. axis.title.y = element_text(color = 'midnightblue'),
  89. axis.title.y.right = element_text(color = 'red3'),
  90. legend.position = 'bottom',
  91. plot.title = element_text(hjust = 0.5, size = 16),
  92. axis.text.x = element_text(color = 'black')) +
  93. geom_smooth(data = dplyr::filter(weights, name == 'Craig'),
  94. aes(y = weight, col = "Craig"), lwd = 1.6) +
  95. geom_smooth(data = dplyr::filter(weights, name == 'Will'),
  96. aes(y = 1.82 * weight, col = "Will"), lwd = 1.6)
  97. ```
  98. We can make plots of the changes in absolute terms to see who
  99. was winning at each stage.
  100. ```{r}
  101. # Absolute change
  102. ggplot(weights, aes(date, change, group = name, col = name)) +
  103. geom_point(size = 2) +
  104. scale_color_manual(values = c('midnightblue', 'red3'), labels = c('Craig', 'Will')) +
  105. xlab('') + ylab('Change (lbs)') + ggtitle('Absolute Weight Change') +
  106. labs(color = '') + coord_cartesian(ylim = c(0, 20)) +
  107. theme_classic(14) + theme(legend.position = 'bottom',
  108. plot.title = element_text(hjust = 0.5,
  109. size = 16),
  110. axis.text = element_text(color = 'black'),
  111. panel.grid.major.y = element_line()) +
  112. geom_smooth()
  113. # Percentage change
  114. ggplot(weights, aes(date, round(change_pct, 1), group = name, col = name)) +
  115. geom_jitter(size = 2) + geom_smooth() +
  116. scale_color_manual(values = c('midnightblue', 'red3'), labels = c('Craig', 'Will')) +
  117. xlab('') + ylab('Change %') + ggtitle('Absolute Weight Change Percentage') +
  118. labs(color = '') + coord_cartesian(ylim = c(0, 10)) +
  119. theme_classic(14) +
  120. theme(legend.position = 'bottom',
  121. plot.title = element_text(hjust = 0.5,
  122. size = 16),
  123. axis.text = element_text(color = 'black'),
  124. panel.grid.major.y = element_line())
  125. ```
  126. Quantitative results.
  127. ```{r}
  128. craig_final <- dplyr::filter(weights, name == 'Craig' & date == '2018-01-06')
  129. will_final <- dplyr::filter(weights, name == 'Will' & date == '2018-01-06')
  130. final_results <- rbind(craig_final, will_final) %>% select(name, weight, change,
  131. change_pct)
  132. knitr::kable(final_results)
  133. ```
  134. # Modeling
  135. ## Linear Models
  136. ```{r}
  137. # Days since start of competition
  138. craig_weights$days <- as.numeric(lubridate::yday(craig_weights$date) -
  139. lubridate::yday(craig_weights$date)[1])
  140. # Create a simple linear model
  141. craig_lm <- lm(weight ~ days, craig_weights)
  142. will_weights$days <- as.numeric(lubridate::yday(will_weights$date) -
  143. lubridate::yday(will_weights$date)[1])
  144. will_lm <- lm(weight ~ days, will_weights)
  145. summary(craig_lm)
  146. summary(will_lm)
  147. ```
  148. Plots of Linear Models
  149. ```{r}
  150. ggplot(weights, aes(date)) +
  151. geom_point(data = dplyr::filter(weights, name == 'Craig'),
  152. aes(y = weight, col = "Craig"), size = 2.5) +
  153. geom_point(data = dplyr::filter(weights, name == 'Will'),
  154. aes(y = 1.82 * weight, col = "Will"),size = 2.5) +
  155. scale_y_continuous(sec.axis = sec_axis(~.*(1/1.82), name = 'Will Weight (lbs)',
  156. breaks = seq(110, 140, 5)),
  157. breaks = seq(215, 255, 5), limits = c(215, 255)) +
  158. scale_color_manual(values = c('midnightblue', 'red3'), labels = c('Craig', 'Will')) +
  159. xlab("") + ylab('Craig Weight (lbs)') +
  160. labs(color = "") + ggtitle("Weight Challenge Results with Linear Model") +
  161. theme_classic(14) +
  162. theme(axis.text.y = element_text(color = "midnightblue"),
  163. axis.text.y.right = element_text(color = 'red3'),
  164. axis.title.y = element_text(color = 'midnightblue'),
  165. axis.title.y.right = element_text(color = 'red3'),
  166. legend.position = 'bottom',
  167. plot.title = element_text(hjust = 0.5, size = 16),
  168. axis.text.x = element_text(color = 'black')) +
  169. geom_smooth(data = dplyr::filter(weights, name == 'Craig'), method = 'lm',
  170. aes(y = weight, col = "Craig"), lwd = 1.6) +
  171. geom_smooth(data = dplyr::filter(weights, name == 'Will'), method = 'lm',
  172. aes(y = 1.82 * weight, col = "Will"), lwd = 1.6)
  173. ```
  174. Predictions for January 1, 2019
  175. ```{r}
  176. # Find number of days between start of competition January 1, 2019
  177. days <- as.numeric(as.Date('2019-01-01') - craig_weights$date[1])
  178. craig_prediction <- predict(craig_lm,
  179. data.frame(days = days, name = 'Craig', plot = TRUE),
  180. interval = 'predict')
  181. will_prediction <- predict(will_lm,
  182. data.frame(days = days, name = 'Will', plot = TRUE),
  183. interval = 'predict')
  184. ```
  185. ## Generalized Additive Model
  186. ```{r}
  187. craig_weights$ds <- craig_weights$date
  188. craig_weights$y <- craig_weights$weight
  189. craig_gam <- prophet::prophet(craig_weights)
  190. will_weights$ds <- will_weights$date
  191. will_weights$y <- will_weights$weight
  192. will_gam <- prophet::prophet(will_weights)
  193. craig_forecast <- prophet::make_future_dataframe(craig_gam, periods = 365)
  194. craig_forecast <- predict(craig_gam, craig_forecast)
  195. prophet::prophet_plot_components(craig_gam, craig_forecast)
  196. will_forecast <- prophet::make_future_dataframe(will_gam, periods = 365)
  197. will_forecast <- predict(will_gam, will_forecast)
  198. prophet::prophet_plot_components(will_gam, will_forecast)
  199. will_forecast$name <- 'Will'
  200. craig_forecast$name <- 'Craig'
  201. forecast <- rbind(will_forecast, craig_forecast)
  202. ```
  203. Predictions for January 1, 2018
  204. ```{r}
  205. short_days <- as.numeric(as.Date('2018-01-01') - as.Date('2017-11-01'))
  206. craig_short_gam <- prophet::prophet(df = dplyr::filter(craig_weights, date < '2017-11-01'))
  207. craig_short_forecast <- prophet::make_future_dataframe(craig_short_gam, periods = short_days)
  208. craig_short_forecast <- predict(craig_short_gam, craig_short_forecast)
  209. will_short_gam <- prophet::prophet(df = dplyr::filter(will_weights, date < '2017-11-01'))
  210. will_short_forecast <- prophet::make_future_dataframe(will_short_gam, periods = short_days)
  211. will_short_forecast <- predict(will_short_gam, will_short_forecast)
  212. prophet::prophet_plot_components(craig_short_gam, craig_short_forecast)
  213. ggplot(craig_short_forecast, aes(x = ds, y = trend)) +
  214. geom_ribbon(data = dplyr::filter(craig_short_forecast, ds >= '2017-11-01'),
  215. aes(ymin = trend_lower, ymax = trend_upper),
  216. color = 'black', lwd = 1, fill = 'gainsboro') +
  217. geom_line(color = 'red3', lwd = 1.8) + theme_classic(14) +
  218. xlab('Date') + ylab('Weight (lbs)') +
  219. ggtitle('Craig GAM Prediction for Jan 1, 2018') +
  220. theme(axis.text = element_text(color = 'black'),
  221. plot.title = element_text(hjust = 0.5, size = 18))
  222. ggplot(will_short_forecast, aes(x = ds, y = trend)) +
  223. geom_ribbon(data = dplyr::filter(will_short_forecast, ds >= '2017-11-01'),
  224. aes(ymin = trend_lower, ymax = trend_upper),
  225. color = 'black', lwd = 1, fill = 'gainsboro') +
  226. geom_line(color = 'red3', lwd = 1.8) + theme_classic(14) +
  227. xlab('Date') + ylab('Weight (lbs)') +
  228. ggtitle('Will GAM Prediction for Jan 1, 2018') +
  229. theme(axis.text = element_text(color = 'black'),
  230. plot.title = element_text(hjust = 0.5, size = 18))
  231. ```
  232. Predictions for Jan 1, 2019
  233. ```{r}
  234. ggplot(craig_forecast, aes(x = ds, y = trend)) +
  235. geom_ribbon(data = dplyr::filter(craig_forecast, ds >= '2018-01-06'),
  236. aes(ymin = trend_lower, ymax = trend_upper),
  237. color = 'black', lwd = 1, fill = 'gainsboro') +
  238. geom_line(color = 'red3', lwd = 1.8) + theme_classic(14) +
  239. xlab('Date') + ylab('Weight (lbs)') +
  240. ggtitle('Craig GAM Prediction for Jan 1, 2019') +
  241. theme(axis.text = element_text(color = 'black'),
  242. plot.title = element_text(hjust = 0.5, size = 18))
  243. ggplot(will_forecast, aes(x = ds, y = trend)) +
  244. geom_ribbon(data = dplyr::filter(will_forecast, ds >= '2018-01-06'),
  245. aes(ymin = trend_lower, ymax = trend_upper),
  246. color = 'black', lwd = 1, fill = 'gainsboro') +
  247. geom_line(color = 'red3', lwd = 1.8) + theme_classic(14) +
  248. xlab('Date') + ylab('Weight (lbs)') +
  249. ggtitle('Will GAM Prediction for Jan 1, 2019') +
  250. theme(axis.text = element_text(color = 'black'),
  251. plot.title = element_text(hjust = 0.5, size = 18))
  252. ```
  253. Generalized Additive Model Graphs
  254. ```{r}
  255. ggplot(forecast, aes(x = as.Date(ds))) +
  256. geom_line(data = dplyr::filter(forecast, ds < '2018-01-07' & name == 'Craig'),
  257. aes(y = trend, col = "Craig"), size = 1.4) +
  258. geom_line(data = dplyr::filter(forecast, ds < '2018-01-07' & name == 'Will'),
  259. aes(y = 1.82 * trend, col = "Will"),size = 1.4) +
  260. scale_y_continuous(sec.axis = sec_axis(~.*(1/1.82), name = 'Will Weight (lbs)',
  261. breaks = seq(110, 140, 5)),
  262. breaks = seq(215, 255, 5), limits = c(215, 255)) +
  263. scale_color_manual(values = c('midnightblue', 'red3'), labels = c('Craig', 'Will')) +
  264. xlab("") + ylab('Craig Weight (lbs)') +
  265. labs(color = "") + ggtitle("Overall Trends") +
  266. theme_classic(14) +
  267. theme(axis.text.y = element_text(color = "midnightblue"),
  268. axis.text.y.right = element_text(color = 'red3'),
  269. axis.title.y = element_text(color = 'midnightblue'),
  270. axis.title.y.right = element_text(color = 'red3'),
  271. legend.position = 'bottom',
  272. plot.title = element_text(hjust = 0.5, size = 16),
  273. axis.text.x = element_text(color = 'black'))
  274. craig_weekly_forecast <- dplyr::filter(forecast, name == 'Craig')[1:7, c('name', 'weekly')]
  275. will_weekly_forecast <- dplyr::filter(forecast, name == 'Will')[1:7, c('name', 'weekly')]
  276. weekly_forecast <- rbind(craig_weekly_forecast, will_weekly_forecast)
  277. weekly_forecast$day <- factor(rep(c('Sun', 'Mon', 'Tues', 'Wed', 'Thur', 'Fri', 'Sat'), 2),
  278. levels = c('Mon', 'Tues', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun'))
  279. ggplot(weekly_forecast, aes(x = day, y = weekly, color = name)) +
  280. geom_line(aes(group = name), size = 1.4) + geom_point(size = 2) +
  281. scale_color_manual(values = c('midnightblue', 'red3'), labels = c('Craig', 'Will')) +
  282. xlab("") + ylab('Change (lbs)') +
  283. labs(color = "") + ggtitle("Weekly Weight Trends") +
  284. theme_classic(14) +
  285. theme(axis.text = element_text(color = "black"),
  286. axis.title = element_text(color = 'black'),
  287. legend.position = 'bottom',
  288. plot.title = element_text(hjust = 0.5, size = 16)) +
  289. scale_y_continuous(lim = c(-1.0, 1.0), breaks = seq(-1.0, 1.0, 0.25))
  290. ```
  291. Contrast models with reality
  292. ```{r}
  293. ggplot(craig_short_forecast, aes(x = ds, y = trend)) +
  294. geom_ribbon(data = dplyr::filter(craig_short_forecast, ds >= '2017-11-01'),
  295. aes(ymin = trend_lower, ymax = trend_upper),
  296. color = 'black', lwd = 1, fill = 'gainsboro') +
  297. geom_line(aes(color = 'predicted'), lwd = 1.8) + theme_classic(14) +
  298. xlab('Date') + ylab('Weight (lbs)') +
  299. ggtitle('Craig Predicted vs Actual') +
  300. theme(axis.text = element_text(color = 'black'),
  301. plot.title = element_text(hjust = 0.5, size = 18)) +
  302. geom_point(data = dplyr::filter(weights, name == 'Craig' & date <= '2018-01-01'),
  303. aes(x = as.POSIXct(date), y = weight,
  304. col = 'actual'), size = 2) + labs(color = '') +
  305. scale_color_manual(values = c('red3', 'darkgreen'))
  306. ggplot(will_short_forecast, aes(x = ds, y = trend)) +
  307. geom_ribbon(data = dplyr::filter(will_short_forecast, ds >= '2017-11-01'),
  308. aes(ymin = trend_lower, ymax = trend_upper),
  309. color = 'black', lwd = 1, fill = 'gainsboro') +
  310. geom_line(aes(color = 'predicted'), lwd = 1.8) + theme_classic(14) +
  311. xlab('Date') + ylab('Weight (lbs)') +
  312. ggtitle('Will Predicted vs Actual') +
  313. theme(axis.text = element_text(color = 'black'),
  314. plot.title = element_text(hjust = 0.5, size = 18)) +
  315. geom_point(data = dplyr::filter(weights, name == 'Will' & date <= '2018-01-01'),
  316. aes(x = as.POSIXct(date), y = weight,
  317. col = 'actual'), size = 2) + labs(color = '') +
  318. scale_color_manual(values = c('red3', 'darkgreen'))
  319. ```
  320. # Animations
  321. One of the most rewarding parts of a data analysis is animations!
  322. ```{r}
  323. library(gganimate)
  324. weights$week <- lubridate::week(weights$date)
  325. weights[which(lubridate::year(weights$date) == '2018'), ]$week <- 53
  326. craig_trend <- dplyr::filter(craig_forecast, ds <= '2018-01-06')$trend
  327. will_trend <- dplyr::filter(will_forecast, ds <= '2018-01-06')$trend
  328. craig_trend_pct <- sapply(craig_trend, function(x) {return( abs(100 * (x - craig_start) / craig_start))})
  329. will_trend_pct <- sapply(will_trend, function(x) {return ( 100 * (x - will_start) / will_start)})
  330. weights$trend <- c(craig_trend, will_trend)
  331. weights$trend_pct_change <- c((craig_trend_pct), will_trend_pct)
  332. p <- ggplot(weights, aes(date, round(change_pct, 1), group = name, col = name,
  333. frame = week, cumulative = TRUE)) +
  334. geom_jitter(size = 2) +
  335. geom_line(aes(date, y = trend_pct_change), lwd = 2) +
  336. scale_color_manual(values = c('midnightblue', 'red3'), labels = c('Craig', 'Will')) +
  337. xlab('') + ylab('Change %') + ggtitle('Absolute Weight Change Percentage') +
  338. labs(color = '') + coord_cartesian(ylim = c(0, 10)) +
  339. theme_classic(14) +
  340. theme(legend.position = 'bottom',
  341. plot.title = element_text(hjust = 0.5,
  342. size = 20),
  343. axis.text = element_text(color = 'black',
  344. size = 16),
  345. axis.title = element_text(color = 'black', size = 18),
  346. legend.text = element_text(size = 16))
  347. gganimate(p, filename = 'animated_weight_change.gif', saver = 'gif',
  348. interval = 0.4, title_frame = FALSE, fig.height = 4)
  349. ```