munge_temps.R 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # RF temperature modeling
  2. #
  3. # Read in data
  4. library(tidyverse)
  5. library(lubridate)
  6. # Read in the data as a dataframe
  7. temps <- read_csv('raw_temps.csv')
  8. # Make sure all readings are from same station
  9. temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
  10. # Create month, day, and week columns
  11. temps <- mutate(temps, year = lubridate::year(DATE),
  12. month = lubridate::month(DATE),
  13. day = lubridate::day(DATE),
  14. week = lubridate::wday(DATE, label = TRUE)) %>%
  15. arrange(DATE)
  16. # Create the past max temperature columns
  17. temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
  18. temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
  19. # Shift the average wind speed, precipitation, and snow depth
  20. temps$AWND <- c(NA, temps$AWND[1:{nrow(temps) - 1}])
  21. temps$PRCP <- c(NA, temps$PRCP[1:{nrow(temps) - 1}])
  22. temps$SNWD <- c(NA, temps$SNWD[1:{nrow(temps) - 1}])
  23. # Read in the averages as a dataframe
  24. averages <- read_csv('hist_averages.csv')
  25. # Create columns for the month and day
  26. averages$month <- as.numeric(substr(averages$DATE, 5, 6))
  27. averages$day <- as.numeric(substr(averages$DATE, 7, 8))
  28. # Join the averages to the temperature measurements
  29. temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
  30. by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
  31. # Select and order relevant columns
  32. temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, SNWD,
  33. temp_2, temp_1, `DLY-TMAX-NORMAL`, TMAX)
  34. # Rename columns
  35. names(temps) <- c('year', 'month', 'day', 'weekday', 'ws_1', 'prcp_1', 'snwd_1',
  36. 'temp_2', 'temp_1', 'average', 'actual')
  37. # Friend predictions
  38. temps$friend <- sapply(temps$average, function(x)
  39. round(runif(1, min = x - 20, max = x + 20)))
  40. # Remove first two rows
  41. temps <- temps[-c(1,2), ]
  42. # Remove na
  43. temps <- temps[complete.cases(temps), ]
  44. # Summary of data
  45. summary(temps)
  46. # Write to csv file
  47. write_csv(temps, 'temps_extended.csv')