library(gganimate) library(ggthemes) df <- read_csv('data_vis_challenge.csv') library(tidyverse) library(gganimate) library(ggthemes) df <- read_csv('data_vis_challenge.csv') df <- gather(df, key = 'species', value = 'rate', -`Light Intensity`, -Temperature) df <- dplyr::rename(df, intensity = `Light Intensity`, temp = Temperature) ggplot(data = df, aes(x = temp, y = intensity, color = species, size = intensity)) + geom_point() + xlab('Temperature') + ylab('Rate') ggplot(df, aes(x = intensity, y = rate, color = species, frame = temp)) + geom_point() + theme_classic(12) library(tidyverse) library(gganimate) library(ggthemes) df <- read_csv('data_vis_challenge.csv') setwd("C:/Users/Will Koehrsen/Desktop") load("C:/Users/Will Koehrsen/Desktop/.RData") ggplot(data = df, aes(x = intensity, y = rate, color = species, frame = temp)) + geom_point() + theme_classic(12) table(df$intensity) table(df$temp) ggplot(data = df, aes(x = species, y = rate)) + geom_bar(fill = 'navy') + facet_grid(temp, intensity) + coord_flip() ggplot(data = df, aes(x = species, y = rate)) + geom_bar(fill = 'navy') + facet_grid(temp ~ intensity) + coord_flip() ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') + facet_grid(temp ~ intensity) + coord_flip() ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') + facet_grid(intensity ~ temp) + coord_flip() + theme_hc(12) ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') + facet_grid(intensity ~ temp) + coord_flip() + theme_stata(12) ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') + facet_grid(intensity ~ temp) + coord_flip() + theme_economist(12) ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') + facet_grid(intensity ~ temp) + coord_flip() + theme_hc(12) ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') + facet_grid(intensity ~ temp) + coord_flip() + theme_classic(12) setwd("~/Data-Analysis/random_forest_explained") library(tidyverse) library(lubridate) setwd("~/Data-Analysis/random_forest_explained") df <- read_csv('1169857.csv') df <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT WA US') df <- read_csv('1169857.csv') table(df$NAME) df <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') temps <- mutate(temps, month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) temps$temp_1 <- c(45, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(45, 44, temps$TMAX[1:{nrow(temps) - 2}]) View(temps) averages <- read_csv('averages.csv') View(averages) df <- read_csv('raw_temps.csv') temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') temps <- mutate(temps, month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) temps$temp_1 <- c(45, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(45, 44, temps$TMAX[1:{nrow(temps) - 2}]) averages <- read_csv('averages.csv') averages$month <- as.numeric(substr(averages$DATE, 5, 6)) averages$day <- as.numeric(substr(averages$DATE, 7, 8)) temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'), all.x = TRUE) View(temps) df <- read_csv('raw_temps.csv') temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') temps <- mutate(temps, month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) temps$temp_1 <- c(45, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(45, 44, temps$TMAX[1:{nrow(temps) - 2}]) averages <- read_csv('averages.csv') averages$month <- as.numeric(substr(averages$DATE, 5, 6)) averages$day <- as.numeric(substr(averages$DATE, 7, 8)) temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'), all.x = TRUE) temps <- arrange(temps, DATE) library(tidyverse) library(lubridate) df <- read_csv('raw_temps.csv') setwd("~/Data-Analysis/random_forest_explained/data") df <- read_csv('raw_temps.csv') temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') temps <- mutate(temps, month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) temps$temp_1 <- c(temps$TMAX[1:{nrow(temps) - 1}]) # Read in the data as a dataframe df <- read_csv('raw_temps.csv') # Make sure all readings are from same station temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') # Create month, day, and week columns temps <- mutate(temps, month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) # Create the past max temperature columns temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}]) # Read in the averages as a dataframe averages <- read_csv('hist_averages.csv') # Create columns for the month and day averages$month <- as.numeric(substr(averages$DATE, 5, 6)) averages$day <- as.numeric(substr(averages$DATE, 7, 8)) # Join the averages to the temperature measurements temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'), all.x = TRUE) View(temps) temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE) # Read in the data as a dataframe df <- read_csv('raw_temps.csv') # Make sure all readings are from same station temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') # Create month, day, and week columns temps <- mutate(temps, month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) # Create the past max temperature columns temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}]) # Read in the averages as a dataframe averages <- read_csv('hist_averages.csv') # Create columns for the month and day averages$month <- as.numeric(substr(averages$DATE, 5, 6)) averages$day <- as.numeric(substr(averages$DATE, 7, 8)) # Join the averages to the temperature measurements temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE) df <- read_csv('raw_temps.csv') temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') temps <- mutate(temps, month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>% arrange(DATE) temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}]) averages <- read_csv('hist_averages.csv') averages$month <- as.numeric(substr(averages$DATE, 5, 6)) averages$day <- as.numeric(substr(averages$DATE, 7, 8)) temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE) temps <- dplyr::select(temps, month, day, AWND, PRCP, TMAX, week, temp_1, temp_2, DLY-TMAX-NORMAL) temps <- dplyr::select(temps, month, day, AWND, PRCP, TMAX, week, temp_1, temp_2, `DLY-TMAX-NORMAL`) df <- read_csv('raw_temps.csv') temps <- read_csv('raw_temps.csv') temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') temps <- mutate(temps, month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>% arrange(DATE) temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}]) averages <- read_csv('hist_averages.csv') averages$month <- as.numeric(substr(averages$DATE, 5, 6)) averages$day <- as.numeric(substr(averages$DATE, 7, 8)) temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE) library(tidyverse) library(lubridate) temps <- read_csv('raw_temps.csv') temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') temps <- mutate(temps, year = lubridate::year(DATE), month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>% arrange(DATE) temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}]) averages <- read_csv('hist_averages.csv') averages$month <- as.numeric(substr(averages$DATE, 5, 6)) averages$day <- as.numeric(substr(averages$DATE, 7, 8)) temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE) %>% mutate(year = ) temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, TMAX, temp_1, temp_2, `DLY-TMAX-NORMAL`, TMAX) temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, temp_1, temp_2, `DLY-TMAX-NORMAL`, TMAX) temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, temp_2, temp_1, `DLY-TMAX-NORMAL`, TMAX) names(temps) <- c('year', 'month', 'day', 'weekday', 'ws', 'prcp', 'temp_2', 'temp_1', 'average', 'actual') sapply(temps$average, function(x) (runif(1, min = x - 20, max = x + 20))) sapply(temps$average, function(x) round(runif(1, min = x - 20, max = x + 20))) temps$friend <- sapply(temps$average, function(x) round(runif(1, min = x - 20, max = x + 20))) View(temps) temps <- temps[-c(1,2), ] ggplot(temps, aes(x = seq(1, nrow(temps)), y = ws)) + geom_point() for (column in c('ws', 'prcp', 'temp_2', 'temp_1', 'average', 'actual', 'friend')) { ggplot(temps, aes(x = seq(1, nrow(temps)), y = column)) + geom_point() } print(ggplot(temps, aes(x = seq(1, nrow(temps)), y = column)) + geom_point()) for (column in c('ws', 'prcp', 'temp_2', 'temp_1', 'average', 'actual', 'friend')) { print(ggplot(temps, aes(x = seq(1, nrow(temps)), y = temps[[column]])) + geom_point()) } temps <- temps[complete.cases(temps), ] summary(temps) write_csv(temps, 'temps_extended.csv') table(temps$year) library(tidyverse) library(lubridate) temps <- read_csv('raw_temps.csv') temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') temps <- mutate(temps, year = lubridate::year(DATE), month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>% arrange(DATE) temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}]) library(tidyverse) library(lubridate) # Read in the data as a dataframe temps <- read_csv('raw_temps.csv') # Make sure all readings are from same station temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') # Create month, day, and week columns temps <- mutate(temps, year = lubridate::year(DATE), month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>% arrange(DATE) # Create the past max temperature columns temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}]) # Shift the average wind speed, precipitation, and snow depth temps$AWND <- c(NA, temps$AWND[1:{nrow(temps) - 1}]) temps$PRCP <- c(NA, temps$PRCP[1:{nrow(temps) - 1}]) temps$SNWD <- c(NA, temps$SNWD[1:{nrow(temps) - 1}]) averages <- read_csv('hist_averages.csv') averages$month <- as.numeric(substr(averages$DATE, 5, 6)) averages$day <- as.numeric(substr(averages$DATE, 7, 8)) temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE) temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, SNWD, temp_2, temp_1, `DLY-TMAX-NORMAL`, TMAX) names(temps) <- c('year', 'month', 'day', 'weekday', 'ws', 'prcp', 'snwd', 'temp_2', 'temp_1', 'average', 'actual') temps$friend <- sapply(temps$average, function(x) round(runif(1, min = x - 20, max = x + 20))) temps <- temps[-c(1,2), ] temps <- temps[complete.cases(temps), ] summary(temps) View(temps) # RF temperature modeling # # Read in data library(tidyverse) library(lubridate) # Read in the data as a dataframe temps <- read_csv('raw_temps.csv') # Make sure all readings are from same station temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US') # Create month, day, and week columns temps <- mutate(temps, year = lubridate::year(DATE), month = lubridate::month(DATE), day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>% arrange(DATE) # Create the past max temperature columns temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}]) temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}]) # Shift the average wind speed, precipitation, and snow depth temps$AWND <- c(NA, temps$AWND[1:{nrow(temps) - 1}]) temps$PRCP <- c(NA, temps$PRCP[1:{nrow(temps) - 1}]) temps$SNWD <- c(NA, temps$SNWD[1:{nrow(temps) - 1}]) # Read in the averages as a dataframe averages <- read_csv('hist_averages.csv') # Create columns for the month and day averages$month <- as.numeric(substr(averages$DATE, 5, 6)) averages$day <- as.numeric(substr(averages$DATE, 7, 8)) # Join the averages to the temperature measurements temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE) # Select and order relevant columns temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, SNWD, temp_2, temp_1, `DLY-TMAX-NORMAL`, TMAX) # Rename columns names(temps) <- c('year', 'month', 'day', 'weekday', 'ws_1', 'prcp_1', 'snwd_1', 'temp_2', 'temp_1', 'average', 'actual') # Friend predictions temps$friend <- sapply(temps$average, function(x) round(runif(1, min = x - 20, max = x + 20))) # Remove first two rows temps <- temps[-c(1,2), ] # Remove na temps <- temps[complete.cases(temps), ] # Summary of data summary(temps) # Write to csv file write_csv(temps, 'temps_extended.csv')