.Rhistory 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. library(gganimate)
  2. library(ggthemes)
  3. df <- read_csv('data_vis_challenge.csv')
  4. library(tidyverse)
  5. library(gganimate)
  6. library(ggthemes)
  7. df <- read_csv('data_vis_challenge.csv')
  8. df <- gather(df, key = 'species', value = 'rate', -`Light Intensity`, -Temperature)
  9. df <- dplyr::rename(df, intensity = `Light Intensity`, temp = Temperature)
  10. ggplot(data = df, aes(x = temp, y = intensity, color = species, size = intensity)) + geom_point() +
  11. xlab('Temperature') + ylab('Rate')
  12. ggplot(df, aes(x = intensity, y = rate, color = species, frame = temp)) +
  13. geom_point() + theme_classic(12)
  14. library(tidyverse)
  15. library(gganimate)
  16. library(ggthemes)
  17. df <- read_csv('data_vis_challenge.csv')
  18. setwd("C:/Users/Will Koehrsen/Desktop")
  19. load("C:/Users/Will Koehrsen/Desktop/.RData")
  20. ggplot(data = df, aes(x = intensity, y = rate, color = species, frame = temp)) +
  21. geom_point() + theme_classic(12)
  22. table(df$intensity)
  23. table(df$temp)
  24. ggplot(data = df, aes(x = species, y = rate)) + geom_bar(fill = 'navy') +
  25. facet_grid(temp, intensity) + coord_flip()
  26. ggplot(data = df, aes(x = species, y = rate)) + geom_bar(fill = 'navy') +
  27. facet_grid(temp ~ intensity) + coord_flip()
  28. ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
  29. facet_grid(temp ~ intensity) + coord_flip()
  30. ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
  31. facet_grid(intensity ~ temp) + coord_flip() + theme_hc(12)
  32. ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
  33. facet_grid(intensity ~ temp) + coord_flip() + theme_stata(12)
  34. ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
  35. facet_grid(intensity ~ temp) + coord_flip() + theme_economist(12)
  36. ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
  37. facet_grid(intensity ~ temp) + coord_flip() + theme_hc(12)
  38. ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
  39. facet_grid(intensity ~ temp) + coord_flip() + theme_classic(12)
  40. setwd("~/Data-Analysis/random_forest_explained")
  41. library(tidyverse)
  42. library(lubridate)
  43. setwd("~/Data-Analysis/random_forest_explained")
  44. df <- read_csv('1169857.csv')
  45. df <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT WA US')
  46. df <- read_csv('1169857.csv')
  47. table(df$NAME)
  48. df <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  49. temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  50. temps <- mutate(temps, month = lubridate::month(DATE),
  51. day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
  52. temps$temp_1 <- c(45, temps$TMAX[1:{nrow(temps) - 1}])
  53. temps$temp_2 <- c(45, 44, temps$TMAX[1:{nrow(temps) - 2}])
  54. View(temps)
  55. averages <- read_csv('averages.csv')
  56. View(averages)
  57. df <- read_csv('raw_temps.csv')
  58. temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  59. temps <- mutate(temps, month = lubridate::month(DATE),
  60. day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
  61. temps$temp_1 <- c(45, temps$TMAX[1:{nrow(temps) - 1}])
  62. temps$temp_2 <- c(45, 44, temps$TMAX[1:{nrow(temps) - 2}])
  63. averages <- read_csv('averages.csv')
  64. averages$month <- as.numeric(substr(averages$DATE, 5, 6))
  65. averages$day <- as.numeric(substr(averages$DATE, 7, 8))
  66. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'),
  67. all.x = TRUE)
  68. View(temps)
  69. df <- read_csv('raw_temps.csv')
  70. temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  71. temps <- mutate(temps, month = lubridate::month(DATE),
  72. day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
  73. temps$temp_1 <- c(45, temps$TMAX[1:{nrow(temps) - 1}])
  74. temps$temp_2 <- c(45, 44, temps$TMAX[1:{nrow(temps) - 2}])
  75. averages <- read_csv('averages.csv')
  76. averages$month <- as.numeric(substr(averages$DATE, 5, 6))
  77. averages$day <- as.numeric(substr(averages$DATE, 7, 8))
  78. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'),
  79. all.x = TRUE)
  80. temps <- arrange(temps, DATE)
  81. library(tidyverse)
  82. library(lubridate)
  83. df <- read_csv('raw_temps.csv')
  84. setwd("~/Data-Analysis/random_forest_explained/data")
  85. df <- read_csv('raw_temps.csv')
  86. temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  87. temps <- mutate(temps, month = lubridate::month(DATE),
  88. day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
  89. temps$temp_1 <- c(temps$TMAX[1:{nrow(temps) - 1}])
  90. # Read in the data as a dataframe
  91. df <- read_csv('raw_temps.csv')
  92. # Make sure all readings are from same station
  93. temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  94. # Create month, day, and week columns
  95. temps <- mutate(temps, month = lubridate::month(DATE),
  96. day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
  97. # Create the past max temperature columns
  98. temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
  99. temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
  100. # Read in the averages as a dataframe
  101. averages <- read_csv('hist_averages.csv')
  102. # Create columns for the month and day
  103. averages$month <- as.numeric(substr(averages$DATE, 5, 6))
  104. averages$day <- as.numeric(substr(averages$DATE, 7, 8))
  105. # Join the averages to the temperature measurements
  106. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
  107. by = c('month', 'day'), all.x = TRUE)
  108. View(temps)
  109. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
  110. by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
  111. # Read in the data as a dataframe
  112. df <- read_csv('raw_temps.csv')
  113. # Make sure all readings are from same station
  114. temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  115. # Create month, day, and week columns
  116. temps <- mutate(temps, month = lubridate::month(DATE),
  117. day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
  118. # Create the past max temperature columns
  119. temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
  120. temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
  121. # Read in the averages as a dataframe
  122. averages <- read_csv('hist_averages.csv')
  123. # Create columns for the month and day
  124. averages$month <- as.numeric(substr(averages$DATE, 5, 6))
  125. averages$day <- as.numeric(substr(averages$DATE, 7, 8))
  126. # Join the averages to the temperature measurements
  127. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
  128. by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
  129. df <- read_csv('raw_temps.csv')
  130. temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  131. temps <- mutate(temps, month = lubridate::month(DATE),
  132. day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>%
  133. arrange(DATE)
  134. temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
  135. temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
  136. averages <- read_csv('hist_averages.csv')
  137. averages$month <- as.numeric(substr(averages$DATE, 5, 6))
  138. averages$day <- as.numeric(substr(averages$DATE, 7, 8))
  139. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
  140. by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
  141. temps <- dplyr::select(temps, month, day, AWND, PRCP, TMAX, week, temp_1, temp_2, DLY-TMAX-NORMAL)
  142. temps <- dplyr::select(temps, month, day, AWND, PRCP, TMAX, week, temp_1, temp_2, `DLY-TMAX-NORMAL`)
  143. df <- read_csv('raw_temps.csv')
  144. temps <- read_csv('raw_temps.csv')
  145. temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  146. temps <- mutate(temps, month = lubridate::month(DATE),
  147. day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>%
  148. arrange(DATE)
  149. temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
  150. temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
  151. averages <- read_csv('hist_averages.csv')
  152. averages$month <- as.numeric(substr(averages$DATE, 5, 6))
  153. averages$day <- as.numeric(substr(averages$DATE, 7, 8))
  154. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
  155. by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
  156. library(tidyverse)
  157. library(lubridate)
  158. temps <- read_csv('raw_temps.csv')
  159. temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  160. temps <- mutate(temps, year = lubridate::year(DATE), month = lubridate::month(DATE),
  161. day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>%
  162. arrange(DATE)
  163. temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
  164. temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
  165. averages <- read_csv('hist_averages.csv')
  166. averages$month <- as.numeric(substr(averages$DATE, 5, 6))
  167. averages$day <- as.numeric(substr(averages$DATE, 7, 8))
  168. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
  169. by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE) %>% mutate(year = )
  170. temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, TMAX, temp_1, temp_2, `DLY-TMAX-NORMAL`, TMAX)
  171. temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, temp_1, temp_2, `DLY-TMAX-NORMAL`, TMAX)
  172. temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, temp_2, temp_1, `DLY-TMAX-NORMAL`, TMAX)
  173. names(temps) <- c('year', 'month', 'day', 'weekday', 'ws', 'prcp', 'temp_2',
  174. 'temp_1', 'average', 'actual')
  175. sapply(temps$average, function(x) (runif(1, min = x - 20, max = x + 20)))
  176. sapply(temps$average, function(x) round(runif(1, min = x - 20, max = x + 20)))
  177. temps$friend <- sapply(temps$average, function(x) round(runif(1, min = x - 20, max = x + 20)))
  178. View(temps)
  179. temps <- temps[-c(1,2), ]
  180. ggplot(temps, aes(x = seq(1, nrow(temps)), y = ws)) + geom_point()
  181. for (column in c('ws', 'prcp', 'temp_2', 'temp_1', 'average', 'actual', 'friend')) {
  182. ggplot(temps, aes(x = seq(1, nrow(temps)), y = column)) + geom_point()
  183. }
  184. print(ggplot(temps, aes(x = seq(1, nrow(temps)), y = column)) + geom_point())
  185. for (column in c('ws', 'prcp', 'temp_2', 'temp_1', 'average', 'actual', 'friend')) {
  186. print(ggplot(temps, aes(x = seq(1, nrow(temps)), y = temps[[column]])) + geom_point())
  187. }
  188. temps <- temps[complete.cases(temps), ]
  189. summary(temps)
  190. write_csv(temps, 'temps_extended.csv')
  191. table(temps$year)
  192. library(tidyverse)
  193. library(lubridate)
  194. temps <- read_csv('raw_temps.csv')
  195. temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  196. temps <- mutate(temps, year = lubridate::year(DATE),
  197. month = lubridate::month(DATE),
  198. day = lubridate::day(DATE),
  199. week = lubridate::wday(DATE, label = TRUE)) %>%
  200. arrange(DATE)
  201. temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
  202. temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
  203. library(tidyverse)
  204. library(lubridate)
  205. # Read in the data as a dataframe
  206. temps <- read_csv('raw_temps.csv')
  207. # Make sure all readings are from same station
  208. temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  209. # Create month, day, and week columns
  210. temps <- mutate(temps, year = lubridate::year(DATE),
  211. month = lubridate::month(DATE),
  212. day = lubridate::day(DATE),
  213. week = lubridate::wday(DATE, label = TRUE)) %>%
  214. arrange(DATE)
  215. # Create the past max temperature columns
  216. temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
  217. temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
  218. # Shift the average wind speed, precipitation, and snow depth
  219. temps$AWND <- c(NA, temps$AWND[1:{nrow(temps) - 1}])
  220. temps$PRCP <- c(NA, temps$PRCP[1:{nrow(temps) - 1}])
  221. temps$SNWD <- c(NA, temps$SNWD[1:{nrow(temps) - 1}])
  222. averages <- read_csv('hist_averages.csv')
  223. averages$month <- as.numeric(substr(averages$DATE, 5, 6))
  224. averages$day <- as.numeric(substr(averages$DATE, 7, 8))
  225. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
  226. by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
  227. temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, SNWD,
  228. temp_2, temp_1, `DLY-TMAX-NORMAL`, TMAX)
  229. names(temps) <- c('year', 'month', 'day', 'weekday', 'ws', 'prcp', 'snwd',
  230. 'temp_2', 'temp_1', 'average', 'actual')
  231. temps$friend <- sapply(temps$average, function(x)
  232. round(runif(1, min = x - 20, max = x + 20)))
  233. temps <- temps[-c(1,2), ]
  234. temps <- temps[complete.cases(temps), ]
  235. summary(temps)
  236. View(temps)
  237. # RF temperature modeling
  238. #
  239. # Read in data
  240. library(tidyverse)
  241. library(lubridate)
  242. # Read in the data as a dataframe
  243. temps <- read_csv('raw_temps.csv')
  244. # Make sure all readings are from same station
  245. temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  246. # Create month, day, and week columns
  247. temps <- mutate(temps, year = lubridate::year(DATE),
  248. month = lubridate::month(DATE),
  249. day = lubridate::day(DATE),
  250. week = lubridate::wday(DATE, label = TRUE)) %>%
  251. arrange(DATE)
  252. # Create the past max temperature columns
  253. temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
  254. temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
  255. # Shift the average wind speed, precipitation, and snow depth
  256. temps$AWND <- c(NA, temps$AWND[1:{nrow(temps) - 1}])
  257. temps$PRCP <- c(NA, temps$PRCP[1:{nrow(temps) - 1}])
  258. temps$SNWD <- c(NA, temps$SNWD[1:{nrow(temps) - 1}])
  259. # Read in the averages as a dataframe
  260. averages <- read_csv('hist_averages.csv')
  261. # Create columns for the month and day
  262. averages$month <- as.numeric(substr(averages$DATE, 5, 6))
  263. averages$day <- as.numeric(substr(averages$DATE, 7, 8))
  264. # Join the averages to the temperature measurements
  265. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
  266. by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
  267. # Select and order relevant columns
  268. temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, SNWD,
  269. temp_2, temp_1, `DLY-TMAX-NORMAL`, TMAX)
  270. # Rename columns
  271. names(temps) <- c('year', 'month', 'day', 'weekday', 'ws_1', 'prcp_1', 'snwd_1',
  272. 'temp_2', 'temp_1', 'average', 'actual')
  273. # Friend predictions
  274. temps$friend <- sapply(temps$average, function(x)
  275. round(runif(1, min = x - 20, max = x + 20)))
  276. # Remove first two rows
  277. temps <- temps[-c(1,2), ]
  278. # Remove na
  279. temps <- temps[complete.cases(temps), ]
  280. # Summary of data
  281. summary(temps)
  282. # Write to csv file
  283. write_csv(temps, 'temps_extended.csv')