|
@@ -0,0 +1,283 @@
|
|
|
+library(gganimate)
|
|
|
+library(ggthemes)
|
|
|
+df <- read_csv('data_vis_challenge.csv')
|
|
|
+library(tidyverse)
|
|
|
+library(gganimate)
|
|
|
+library(ggthemes)
|
|
|
+df <- read_csv('data_vis_challenge.csv')
|
|
|
+df <- gather(df, key = 'species', value = 'rate', -`Light Intensity`, -Temperature)
|
|
|
+df <- dplyr::rename(df, intensity = `Light Intensity`, temp = Temperature)
|
|
|
+ggplot(data = df, aes(x = temp, y = intensity, color = species, size = intensity)) + geom_point() +
|
|
|
+xlab('Temperature') + ylab('Rate')
|
|
|
+ggplot(df, aes(x = intensity, y = rate, color = species, frame = temp)) +
|
|
|
+geom_point() + theme_classic(12)
|
|
|
+library(tidyverse)
|
|
|
+library(gganimate)
|
|
|
+library(ggthemes)
|
|
|
+df <- read_csv('data_vis_challenge.csv')
|
|
|
+setwd("C:/Users/Will Koehrsen/Desktop")
|
|
|
+load("C:/Users/Will Koehrsen/Desktop/.RData")
|
|
|
+ggplot(data = df, aes(x = intensity, y = rate, color = species, frame = temp)) +
|
|
|
+geom_point() + theme_classic(12)
|
|
|
+table(df$intensity)
|
|
|
+table(df$temp)
|
|
|
+ggplot(data = df, aes(x = species, y = rate)) + geom_bar(fill = 'navy') +
|
|
|
+facet_grid(temp, intensity) + coord_flip()
|
|
|
+ggplot(data = df, aes(x = species, y = rate)) + geom_bar(fill = 'navy') +
|
|
|
+facet_grid(temp ~ intensity) + coord_flip()
|
|
|
+ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
|
|
|
+facet_grid(temp ~ intensity) + coord_flip()
|
|
|
+ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
|
|
|
+facet_grid(intensity ~ temp) + coord_flip() + theme_hc(12)
|
|
|
+ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
|
|
|
+facet_grid(intensity ~ temp) + coord_flip() + theme_stata(12)
|
|
|
+ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
|
|
|
+facet_grid(intensity ~ temp) + coord_flip() + theme_economist(12)
|
|
|
+ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
|
|
|
+facet_grid(intensity ~ temp) + coord_flip() + theme_hc(12)
|
|
|
+ggplot(data = df, aes(x = species, y = rate)) + geom_bar(stat = 'identity', fill = 'navy') +
|
|
|
+facet_grid(intensity ~ temp) + coord_flip() + theme_classic(12)
|
|
|
+setwd("~/Data-Analysis/random_forest_explained")
|
|
|
+library(tidyverse)
|
|
|
+library(lubridate)
|
|
|
+setwd("~/Data-Analysis/random_forest_explained")
|
|
|
+df <- read_csv('1169857.csv')
|
|
|
+df <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT WA US')
|
|
|
+df <- read_csv('1169857.csv')
|
|
|
+table(df$NAME)
|
|
|
+df <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+temps <- mutate(temps, month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
|
|
|
+temps$temp_1 <- c(45, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(45, 44, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+View(temps)
|
|
|
+averages <- read_csv('averages.csv')
|
|
|
+View(averages)
|
|
|
+df <- read_csv('raw_temps.csv')
|
|
|
+temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+temps <- mutate(temps, month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
|
|
|
+temps$temp_1 <- c(45, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(45, 44, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+averages <- read_csv('averages.csv')
|
|
|
+averages$month <- as.numeric(substr(averages$DATE, 5, 6))
|
|
|
+averages$day <- as.numeric(substr(averages$DATE, 7, 8))
|
|
|
+temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'),
|
|
|
+all.x = TRUE)
|
|
|
+View(temps)
|
|
|
+df <- read_csv('raw_temps.csv')
|
|
|
+temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+temps <- mutate(temps, month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
|
|
|
+temps$temp_1 <- c(45, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(45, 44, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+averages <- read_csv('averages.csv')
|
|
|
+averages$month <- as.numeric(substr(averages$DATE, 5, 6))
|
|
|
+averages$day <- as.numeric(substr(averages$DATE, 7, 8))
|
|
|
+temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')], by = c('month', 'day'),
|
|
|
+all.x = TRUE)
|
|
|
+temps <- arrange(temps, DATE)
|
|
|
+library(tidyverse)
|
|
|
+library(lubridate)
|
|
|
+df <- read_csv('raw_temps.csv')
|
|
|
+setwd("~/Data-Analysis/random_forest_explained/data")
|
|
|
+df <- read_csv('raw_temps.csv')
|
|
|
+temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+temps <- mutate(temps, month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
|
|
|
+temps$temp_1 <- c(temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+# Read in the data as a dataframe
|
|
|
+df <- read_csv('raw_temps.csv')
|
|
|
+# Make sure all readings are from same station
|
|
|
+temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+# Create month, day, and week columns
|
|
|
+temps <- mutate(temps, month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
|
|
|
+# Create the past max temperature columns
|
|
|
+temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+# Read in the averages as a dataframe
|
|
|
+averages <- read_csv('hist_averages.csv')
|
|
|
+# Create columns for the month and day
|
|
|
+averages$month <- as.numeric(substr(averages$DATE, 5, 6))
|
|
|
+averages$day <- as.numeric(substr(averages$DATE, 7, 8))
|
|
|
+# Join the averages to the temperature measurements
|
|
|
+temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
|
|
|
+by = c('month', 'day'), all.x = TRUE)
|
|
|
+View(temps)
|
|
|
+temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
|
|
|
+by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
|
|
|
+# Read in the data as a dataframe
|
|
|
+df <- read_csv('raw_temps.csv')
|
|
|
+# Make sure all readings are from same station
|
|
|
+temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+# Create month, day, and week columns
|
|
|
+temps <- mutate(temps, month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE))
|
|
|
+# Create the past max temperature columns
|
|
|
+temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+# Read in the averages as a dataframe
|
|
|
+averages <- read_csv('hist_averages.csv')
|
|
|
+# Create columns for the month and day
|
|
|
+averages$month <- as.numeric(substr(averages$DATE, 5, 6))
|
|
|
+averages$day <- as.numeric(substr(averages$DATE, 7, 8))
|
|
|
+# Join the averages to the temperature measurements
|
|
|
+temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
|
|
|
+by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
|
|
|
+df <- read_csv('raw_temps.csv')
|
|
|
+temps <- dplyr::filter(df, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+temps <- mutate(temps, month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>%
|
|
|
+arrange(DATE)
|
|
|
+temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+averages <- read_csv('hist_averages.csv')
|
|
|
+averages$month <- as.numeric(substr(averages$DATE, 5, 6))
|
|
|
+averages$day <- as.numeric(substr(averages$DATE, 7, 8))
|
|
|
+temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
|
|
|
+by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
|
|
|
+temps <- dplyr::select(temps, month, day, AWND, PRCP, TMAX, week, temp_1, temp_2, DLY-TMAX-NORMAL)
|
|
|
+temps <- dplyr::select(temps, month, day, AWND, PRCP, TMAX, week, temp_1, temp_2, `DLY-TMAX-NORMAL`)
|
|
|
+df <- read_csv('raw_temps.csv')
|
|
|
+temps <- read_csv('raw_temps.csv')
|
|
|
+temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+temps <- mutate(temps, month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>%
|
|
|
+arrange(DATE)
|
|
|
+temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+averages <- read_csv('hist_averages.csv')
|
|
|
+averages$month <- as.numeric(substr(averages$DATE, 5, 6))
|
|
|
+averages$day <- as.numeric(substr(averages$DATE, 7, 8))
|
|
|
+temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
|
|
|
+by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
|
|
|
+library(tidyverse)
|
|
|
+library(lubridate)
|
|
|
+temps <- read_csv('raw_temps.csv')
|
|
|
+temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+temps <- mutate(temps, year = lubridate::year(DATE), month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE), week = lubridate::wday(DATE, label = TRUE)) %>%
|
|
|
+arrange(DATE)
|
|
|
+temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+averages <- read_csv('hist_averages.csv')
|
|
|
+averages$month <- as.numeric(substr(averages$DATE, 5, 6))
|
|
|
+averages$day <- as.numeric(substr(averages$DATE, 7, 8))
|
|
|
+temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
|
|
|
+by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE) %>% mutate(year = )
|
|
|
+temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, TMAX, temp_1, temp_2, `DLY-TMAX-NORMAL`, TMAX)
|
|
|
+temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, temp_1, temp_2, `DLY-TMAX-NORMAL`, TMAX)
|
|
|
+temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, temp_2, temp_1, `DLY-TMAX-NORMAL`, TMAX)
|
|
|
+names(temps) <- c('year', 'month', 'day', 'weekday', 'ws', 'prcp', 'temp_2',
|
|
|
+'temp_1', 'average', 'actual')
|
|
|
+sapply(temps$average, function(x) (runif(1, min = x - 20, max = x + 20)))
|
|
|
+sapply(temps$average, function(x) round(runif(1, min = x - 20, max = x + 20)))
|
|
|
+temps$friend <- sapply(temps$average, function(x) round(runif(1, min = x - 20, max = x + 20)))
|
|
|
+View(temps)
|
|
|
+temps <- temps[-c(1,2), ]
|
|
|
+ggplot(temps, aes(x = seq(1, nrow(temps)), y = ws)) + geom_point()
|
|
|
+for (column in c('ws', 'prcp', 'temp_2', 'temp_1', 'average', 'actual', 'friend')) {
|
|
|
+ggplot(temps, aes(x = seq(1, nrow(temps)), y = column)) + geom_point()
|
|
|
+}
|
|
|
+print(ggplot(temps, aes(x = seq(1, nrow(temps)), y = column)) + geom_point())
|
|
|
+for (column in c('ws', 'prcp', 'temp_2', 'temp_1', 'average', 'actual', 'friend')) {
|
|
|
+print(ggplot(temps, aes(x = seq(1, nrow(temps)), y = temps[[column]])) + geom_point())
|
|
|
+}
|
|
|
+temps <- temps[complete.cases(temps), ]
|
|
|
+summary(temps)
|
|
|
+write_csv(temps, 'temps_extended.csv')
|
|
|
+table(temps$year)
|
|
|
+library(tidyverse)
|
|
|
+library(lubridate)
|
|
|
+temps <- read_csv('raw_temps.csv')
|
|
|
+temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+temps <- mutate(temps, year = lubridate::year(DATE),
|
|
|
+month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE),
|
|
|
+week = lubridate::wday(DATE, label = TRUE)) %>%
|
|
|
+arrange(DATE)
|
|
|
+temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+library(tidyverse)
|
|
|
+library(lubridate)
|
|
|
+# Read in the data as a dataframe
|
|
|
+temps <- read_csv('raw_temps.csv')
|
|
|
+# Make sure all readings are from same station
|
|
|
+temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+# Create month, day, and week columns
|
|
|
+temps <- mutate(temps, year = lubridate::year(DATE),
|
|
|
+month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE),
|
|
|
+week = lubridate::wday(DATE, label = TRUE)) %>%
|
|
|
+arrange(DATE)
|
|
|
+# Create the past max temperature columns
|
|
|
+temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+# Shift the average wind speed, precipitation, and snow depth
|
|
|
+temps$AWND <- c(NA, temps$AWND[1:{nrow(temps) - 1}])
|
|
|
+temps$PRCP <- c(NA, temps$PRCP[1:{nrow(temps) - 1}])
|
|
|
+temps$SNWD <- c(NA, temps$SNWD[1:{nrow(temps) - 1}])
|
|
|
+averages <- read_csv('hist_averages.csv')
|
|
|
+averages$month <- as.numeric(substr(averages$DATE, 5, 6))
|
|
|
+averages$day <- as.numeric(substr(averages$DATE, 7, 8))
|
|
|
+temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
|
|
|
+by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
|
|
|
+temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, SNWD,
|
|
|
+temp_2, temp_1, `DLY-TMAX-NORMAL`, TMAX)
|
|
|
+names(temps) <- c('year', 'month', 'day', 'weekday', 'ws', 'prcp', 'snwd',
|
|
|
+'temp_2', 'temp_1', 'average', 'actual')
|
|
|
+temps$friend <- sapply(temps$average, function(x)
|
|
|
+round(runif(1, min = x - 20, max = x + 20)))
|
|
|
+temps <- temps[-c(1,2), ]
|
|
|
+temps <- temps[complete.cases(temps), ]
|
|
|
+summary(temps)
|
|
|
+View(temps)
|
|
|
+# RF temperature modeling
|
|
|
+#
|
|
|
+# Read in data
|
|
|
+library(tidyverse)
|
|
|
+library(lubridate)
|
|
|
+# Read in the data as a dataframe
|
|
|
+temps <- read_csv('raw_temps.csv')
|
|
|
+# Make sure all readings are from same station
|
|
|
+temps <- dplyr::filter(temps, NAME == 'SEATTLE TACOMA INTERNATIONAL AIRPORT, WA US')
|
|
|
+# Create month, day, and week columns
|
|
|
+temps <- mutate(temps, year = lubridate::year(DATE),
|
|
|
+month = lubridate::month(DATE),
|
|
|
+day = lubridate::day(DATE),
|
|
|
+week = lubridate::wday(DATE, label = TRUE)) %>%
|
|
|
+arrange(DATE)
|
|
|
+# Create the past max temperature columns
|
|
|
+temps$temp_1 <- c(NA, temps$TMAX[1:{nrow(temps) - 1}])
|
|
|
+temps$temp_2 <- c(NA, NA, temps$TMAX[1:{nrow(temps) - 2}])
|
|
|
+# Shift the average wind speed, precipitation, and snow depth
|
|
|
+temps$AWND <- c(NA, temps$AWND[1:{nrow(temps) - 1}])
|
|
|
+temps$PRCP <- c(NA, temps$PRCP[1:{nrow(temps) - 1}])
|
|
|
+temps$SNWD <- c(NA, temps$SNWD[1:{nrow(temps) - 1}])
|
|
|
+# Read in the averages as a dataframe
|
|
|
+averages <- read_csv('hist_averages.csv')
|
|
|
+# Create columns for the month and day
|
|
|
+averages$month <- as.numeric(substr(averages$DATE, 5, 6))
|
|
|
+averages$day <- as.numeric(substr(averages$DATE, 7, 8))
|
|
|
+# Join the averages to the temperature measurements
|
|
|
+temps <- merge(temps, averages[, c('month', 'day', 'DLY-TMAX-NORMAL')],
|
|
|
+by = c('month', 'day'), all.x = TRUE) %>% arrange(DATE)
|
|
|
+# Select and order relevant columns
|
|
|
+temps <- dplyr::select(temps, year, month, day, week, AWND, PRCP, SNWD,
|
|
|
+temp_2, temp_1, `DLY-TMAX-NORMAL`, TMAX)
|
|
|
+# Rename columns
|
|
|
+names(temps) <- c('year', 'month', 'day', 'weekday', 'ws_1', 'prcp_1', 'snwd_1',
|
|
|
+'temp_2', 'temp_1', 'average', 'actual')
|
|
|
+# Friend predictions
|
|
|
+temps$friend <- sapply(temps$average, function(x)
|
|
|
+round(runif(1, min = x - 20, max = x + 20)))
|
|
|
+# Remove first two rows
|
|
|
+temps <- temps[-c(1,2), ]
|
|
|
+# Remove na
|
|
|
+temps <- temps[complete.cases(temps), ]
|
|
|
+# Summary of data
|
|
|
+summary(temps)
|
|
|
+# Write to csv file
|
|
|
+write_csv(temps, 'temps_extended.csv')
|