Forráskód Böngészése

Finish extra regressors in R

Ben Letham 8 éve
szülő
commit
57c97f2e5e

+ 2 - 0
R/NAMESPACE

@@ -2,10 +2,12 @@
 
 S3method(plot,prophet)
 S3method(predict,prophet)
+export(add_regressor)
 export(add_seasonality)
 export(cross_validation)
 export(fit.prophet)
 export(make_future_dataframe)
+export(plot_forecast_component)
 export(predictive_samples)
 export(prophet)
 export(prophet_plot_components)

+ 242 - 264
R/R/prophet.R

@@ -9,7 +9,7 @@
 globalVariables(c(
   "ds", "y", "cap", ".",
   "component", "dow", "doy", "holiday", "holidays", "holidays_lower", "holidays_upper", "ix",
-  "lower", "n", "stat", "trend", "row_number", "extra_regressors",
+  "lower", "n", "stat", "trend", "row_number", "extra_regressors", "col",
   "trend_lower", "trend_upper", "upper", "value", "weekly", "weekly_lower", "weekly_upper",
   "x", "yearly", "yearly_lower", "yearly_upper", "yhat", "yhat_lower", "yhat_upper"))
 
@@ -80,7 +80,6 @@ prophet <- function(df = NULL,
                     weekly.seasonality = 'auto',
                     daily.seasonality = 'auto',
                     holidays = NULL,
-                    extra_regressors = NULL,  #new
                     seasonality.prior.scale = 10,
                     holidays.prior.scale = 10,
                     changepoint.prior.scale = 0.05,
@@ -104,7 +103,6 @@ prophet <- function(df = NULL,
     weekly.seasonality = weekly.seasonality,
     daily.seasonality = daily.seasonality,
     holidays = holidays,
-    extra_regressors = extra_regressors,
     seasonality.prior.scale = seasonality.prior.scale,
     changepoint.prior.scale = changepoint.prior.scale,
     holidays.prior.scale = holidays.prior.scale,
@@ -117,6 +115,7 @@ prophet <- function(df = NULL,
     t.scale = NULL,
     changepoints.t = NULL,
     seasonalities = list(),
+    extra_regressors = list(),
     stan.fit = NULL,
     params = list(),
     history = NULL,
@@ -132,48 +131,6 @@ prophet <- function(df = NULL,
   return(m)
 }
 
-#' Validates the name of a seasonality, holiday, or regressor
-#'
-#' @param m Prophet object.
-#' @param name string
-#' @param check_holidays bool check if name already used for holiday
-#' @param check_seasonalities bool check if name already used for seasonality
-#' @param check_regressors  bool check if name already used for regressor
-#'
-validate_column_name <- function(m, name, check_holidays = TRUE,
-                                 check_seasonalities = TRUE, check_regressors = TRUE) {
-
-  if (grepl("_delim_", name)) {
-    stop('Holiday name cannot contain "_delim_"')
-  }
-
-  reserved_names = c('trend', 'seasonal', 'seasonalities', 'daily', 'weekly', 'yearly',
-                     'holidays', 'zeros', 'extra_regressors', 'yhat')
-
-  rn_l = paste(reserved_names,"_lower",sep="")
-  rn_u = paste(reserved_names,"_upper",sep="")
-  reserved_names = c(reserved_names, rn_l, rn_u, c("ds","y"));
-
-  if(name %in% reserved_names){
-    error_message = paste("Name ", name, " is reserved.");
-    stop(error_message)
-  }
-
-  if(check_holidays & !is.null(m$holidays) & (name %in% unique(m$holidays$holiday))){
-    error_message = paste("Name ", name, " already used for a holiday.");
-    stop(error_message)
-  }
-  #m$yearly.seasonality
-  if(check_seasonalities & (name %in% m$seasonalities[[name]])){
-    error_message = paste("Name ", name, " already used for a seasonality.");
-    stop(error_message)
-  }
-  if(check_regressors & (name %in% m$extra_regressors[[name]])){
-    error_message = paste("Name ", name, " already used for an added regressor.");
-    stop(error_message)
-  }
-}
-
 #' Validates the inputs to Prophet.
 #'
 #' @param m Prophet object.
@@ -205,11 +162,50 @@ validate_inputs <- function(m) {
       }
     }
     for (h in unique(m$holidays$holiday)) {
-      validate_column_name(m,h, check_holidays=FALSE)
+      validate_column_name(m, h, check_holidays = FALSE)
     }
   }
 }
 
+#' Validates the name of a seasonality, holiday, or regressor.
+#'
+#' @param m Prophet object.
+#' @param name string
+#' @param check_holidays bool check if name already used for holiday
+#' @param check_seasonalities bool check if name already used for seasonality
+#' @param check_regressors bool check if name already used for regressor
+#'
+#' @keywords internal
+validate_column_name <- function(
+  m, name, check_holidays = TRUE, check_seasonalities = TRUE,
+  check_regressors = TRUE
+) {
+  if (grepl("_delim_", name)) {
+    stop('Holiday name cannot contain "_delim_"')
+  }
+  reserved_names = c(
+    'trend', 'seasonal', 'seasonalities', 'daily', 'weekly', 'yearly',
+    'holidays', 'zeros', 'extra_regressors', 'yhat'
+  )
+  rn_l = paste(reserved_names,"_lower",sep="")
+  rn_u = paste(reserved_names,"_upper",sep="")
+  reserved_names = c(reserved_names, rn_l, rn_u, c("ds","y"))
+  if(name %in% reserved_names){
+    stop("Name ", name, " is reserved.")
+  }
+  if(check_holidays & !is.null(m$holidays) &
+     (name %in% unique(m$holidays$holiday))){
+    stop("Name ", name, " already used for a holiday.")
+  }
+  if(check_seasonalities & (!is.null(m$seasonalities[[name]]))){
+    stop("Name ", name, " already used for a seasonality.")
+  }
+  if(check_regressors & (!is.null(m$seasonalities[[name]]))){
+    stop("Name ", name, " already used for an added regressor.")
+  }
+}
+
+
 #' Load compiled Stan model
 #'
 #' @param model String 'linear' or 'logistic' to specify a linear or logistic
@@ -279,6 +275,7 @@ set_date <- function(ds = NULL, tz = "GMT") {
   } else {
     ds <- as.POSIXct(ds, format = "%Y-%m-%d %H:%M:%S", tz = tz)
   }
+  attr(ds, "tzone") <- tz
   return(ds)
 }
 
@@ -304,8 +301,8 @@ time_diff <- function(ds1, ds2, units = "days") {
 #' and predicting.
 #'
 #' @param m Prophet object.
-#' @param df Data frame with columns ds, y, and cap if logistic growth.Any
-#'           specified additional regressors must also be present.
+#' @param df Data frame with columns ds, y, and cap if logistic growth. Any
+#'  specified additional regressors must also be present.
 #' @param initialize_scales Boolean set scaling factors in m from df.
 #'
 #' @return list with items 'df' and 'm'.
@@ -315,14 +312,19 @@ setup_dataframe <- function(m, df, initialize_scales = FALSE) {
   if (exists('y', where=df)) {
     df$y <- as.numeric(df$y)
   }
+  if (any(is.infinite(df$y))) {
+    stop("Found infinity in column y.")
+  }
   df$ds <- set_date(df$ds)
   if (anyNA(df$ds)) {
     stop(paste('Unable to parse date format in column ds. Convert to date ',
                'format. Either %Y-%m-%d or %Y-%m-%d %H:%M:%S'))
   }
-
-  #names(m$extra_regressors)
-
+  for (name in names(m$extra_regressors)) {
+    if (!(name %in% colnames(df))) {
+      stop('Regressor "', name, '" missing from dataframe')
+    }
+  }
 
   df <- df %>%
     dplyr::arrange(ds)
@@ -334,6 +336,26 @@ setup_dataframe <- function(m, df, initialize_scales = FALSE) {
     }
     m$start <- min(df$ds)
     m$t.scale <- time_diff(max(df$ds), m$start, "secs")
+    for (name in names(m$extra_regressors)) {
+      standardize <- m$extra_regressors[[name]]$standardize
+      if (standardize == 'auto') {
+        if (all(sort(unique(df[[name]])) == c(0, 1))) {
+          # Don't standardize binary variables
+          standardize <- FALSE
+        } else {
+          standardize <- TRUE
+        }
+      }
+      if (standardize) {
+        mu <- mean(df[[name]])
+        std <- stats::sd(df[[name]])
+        if (std == 0) {
+          std <- mu
+        }
+        m$extra_regressors[[name]]$mu <- mu
+        m$extra_regressors[[name]]$std <- std
+      }
+    }
   }
 
   df$t <- time_diff(df$ds, m$start, "secs") / m$t.scale
@@ -348,6 +370,15 @@ setup_dataframe <- function(m, df, initialize_scales = FALSE) {
     df <- df %>%
       dplyr::mutate(cap_scaled = cap / m$y.scale)
   }
+
+  for (name in names(m$extra_regressors)) {
+    df[[name]] <- as.numeric(df[[name]])
+    props <- m$extra_regressors[[name]]
+    df[[name]] <- (df[[name]] - props$mu) / props$std
+    if (anyNA(df[[name]])) {
+      stop('Found NaN in column ', name)
+    }
+  }
   return(list("m" = m, "df" = df))
 }
 
@@ -384,9 +415,7 @@ set_changepoints <- function(m) {
               m$n.changepoints)
     }
     if (m$n.changepoints > 0) {
-      # Place potential changepoints evenly through the first 80 pcnt of
-      # the history.
-      cp.indexes <- round(seq.int(1, floor(nrow(m$history) * .8),
+      cp.indexes <- round(seq.int(1, hist.size,
                                   length.out = (m$n.changepoints + 1))[-1])
       m$changepoints <- m$history$ds[cp.indexes]
     } else {
@@ -463,9 +492,8 @@ make_seasonality_features <- function(dates, period, series.order, prefix) {
 #' @importFrom dplyr "%>%"
 #' @keywords internal
 make_holiday_features <- function(m, dates) {
-  scale.ratio <- m$holidays.prior.scale / m$seasonality.prior.scale
   # Strip dates to be just days, for joining on holidays
-  dates <- set_date(format(dates))
+  dates <- set_date(format(dates, "%Y-%m-%d"))
   wide <- m$holidays %>%
     dplyr::mutate(ds = set_date(ds)) %>%
     dplyr::group_by(holiday, ds) %>%
@@ -481,7 +509,7 @@ make_holiday_features <- function(m, dates) {
         .$holiday, '_delim_', ifelse(offsets < 0, '-', '+'), abs(offsets), sep = '')
       dplyr::data_frame(ds = .$ds + offsets * 24 * 3600, holiday = names)
     }) %>%
-    dplyr::mutate(x = scale.ratio) %>%
+    dplyr::mutate(x = 1.) %>%
     tidyr::spread(holiday, x, fill = 0)
 
   holiday.mat <- data.frame(ds = dates) %>%
@@ -492,41 +520,43 @@ make_holiday_features <- function(m, dates) {
   return(holiday.mat)
 }
 
-#'Add an additional regressor to be used for fitting and predicting.
-#'
-#'The dataframe passed to `fit` and `predict` will have a column with the
-#'specified name to be used as a regressor. When standardize='auto', the
-#'regressor will be standardized unless it is binary. The regression
-#'coefficient is given a prior with the specified scale parameter.
-#'Decreasing the prior scale will add additional regularization. If no
-#'prior scale is provided, self.holidays_prior_scale will be used.
-#'
-#' @param m
-#' @param  name string name of the regressor
-#' @param  prior_scale optional float scale for the normal prior. If not
-#'                    provided, self.holidays_prior_scale will be used.
-#' @param  standardize optional, specify whether this regressor will be
-#'                     standardized prior to fitting. Can be 'auto' (standardize if not
-#'                     binary), True, or False.
+#' Add an additional regressor to be used for fitting and predicting.
+#'
+#' The dataframe passed to `fit` and `predict` will have a column with the
+#' specified name to be used as a regressor. When standardize='auto', the
+#' regressor will be standardized unless it is binary. The regression
+#' coefficient is given a prior with the specified scale parameter.
+#' Decreasing the prior scale will add additional regularization. If no
+#' prior scale is provided, holidays.prior.scale will be used.
+#'
+#' @param m Prophet object.
+#' @param name String name of the regressor
+#' @param prior.scale Float scale for the normal prior. If not provided,
+#'  holidays.prior.scale will be used.
+#' @param standardize Bool, specify whether this regressor will be standardized
+#'  prior to fitting. Can be 'auto' (standardize if not binary), True, or
+#'  False.
+#'
 #' @return  The prophet model with the regressor added.
+#'
 #' @export
-add_regressor <- function(m, prior_scale=0.0, standardize='auto'){
-  if(!is.null(m$history)){
+add_regressor <- function(m, name, prior.scale = NULL, standardize = 'auto'){
+  if (!is.null(m$history)) {
     stop('Regressors must be added prior to model fitting.')
   }
-  validate_column_name(m,check_regressors=FALSE);
-  if(prior_scale == 0){
-    prior_scale = m$holidays.prior.scale
+  validate_column_name(m, name, check_regressors = FALSE)
+  if (is.null(prior.scale)) {
+    prior.scale <- m$holidays.prior.scale
   }
-
-  if(prior_scale < 0){
-    stop("prior_scale is less than 0");
+  if(prior.scale <= 0) {
+    stop("Prior scale must be > 0")
   }
-  m$extra_regressors = list(name = list(prior_scale = prior_scale,
-                                        standardize=standardize,
-                                        mu=0,
-                                        std=1.0))
-
+  m$extra_regressors[[name]] <- list(
+    prior.scale = prior.scale,
+    standardize = standardize,
+    mu = 0,
+    std = 1.0
+  )
   return(m)
 }
 
@@ -547,12 +577,12 @@ add_regressor <- function(m, prior_scale=0.0, standardize='auto'){
 #' @importFrom dplyr "%>%"
 #' @export
 add_seasonality <- function(m, name, period, fourier.order) {
-  if (!is.null(m$holidays)) {
+  if (!is.null(m$history)) {
     stop("Seasonality must be added prior to model fitting.")
   }
-
   if (!(name %in% c('daily', 'weekly', 'yearly'))) {
-    validate_column_name(name,check_seasonalities=FALSE)
+    # Allow overriding built-in seasonalities
+    validate_column_name(m, name, check_seasonalities = FALSE)
   }
   m$seasonalities[[name]] <- c(period, fourier.order)
   return(m)
@@ -562,45 +592,49 @@ add_seasonality <- function(m, name, period, fourier.order) {
 #' Includes seasonality features, holiday features, and added regressors.
 #'
 #' @param m Prophet object.
-#' @param df Dataframe with dates for computing seasonality features.
+#' @param df Dataframe with dates for computing seasonality features and any
+#'  added regressors.
 #'
-#' @return Dataframe with regressor features,
-#'         list of prior scales for each colum of the features and any added regressors
+#' @return List with items
+#'  seasonal.features: Dataframe with regressor features,
+#'  prior.scales: Array of prior scales for each colum of the features
+#'    dataframe.
 #'
 #' @keywords internal
 make_all_seasonality_features <- function(m, df) {
-  #seasonal.features <- data.frame(zeros = rep(0, nrow(df)))
-  seasonal.features <- c();
-  prior_scales <- c();
+  seasonal.features <- data.frame(row.names = 1:nrow(df))
+  prior.scales <- c()
 
+  # Seasonality features
   for (name in names(m$seasonalities)) {
     period <- m$seasonalities[[name]][1]
     series.order <- m$seasonalities[[name]][2]
-    features = make_seasonality_features(df$ds, period, series.order, name);
-    if(is.null(seasonal.features)){
-      seasonal.features <- features;
-    }
-    seasonal.features <- cbind(seasonal.features, features) #test append 와 문제가 없는지 확인
-    prior_scales = c(prior_scales, m$seasonality.prior.scale * dim(features)[2]);
+    features <- make_seasonality_features(df$ds, period, series.order, name)
+    seasonal.features <- cbind(seasonal.features, features)
+    prior.scales <- c(prior.scales,
+                      m$seasonality.prior.scale * rep(1, ncol(features)))
   }
-  if(!is.null(m$holidays)) {
-    features = make_holiday_features(m, df$ds);
-    seasonal.features <- cbind(seasonal.features, features) #test
-    prior_scales <- c(prior_scales, m$holiday_prior_scale * dim(features)[2]);
+
+  # Holiday features
+  if (!is.null(m$holidays)) {
+    features <- make_holiday_features(m, df$ds)
+    seasonal.features <- cbind(seasonal.features, features)
+    prior.scales <- c(prior.scales,
+                      m$holidays.prior.scale * rep(1, ncol(features)))
   }
 
   # Additional regressors
-  for(name in names(m$extra_regressors)){
-    seasonal.features = cbind(seasonal.features, df[name]); #test
-    prior_scales = cbind(prior_scales, m$extra_regressors[[name]][[prior_scale]])
+  for (name in names(m$extra_regressors)) {
+    seasonal.features <- cbind(seasonal.features, df[[name]])
+    prior.scales <- cbind(prior.scales, m$extra_regressors[[name]]$prior.scale)
   }
 
-  if(length(df) == 0){
-    seasonal.features =cbind(seasonal.features,data.frame(zeros = rep(0, nrow(df))));
-    prior_scales = c(prior_scales,0.1)
+  if (ncol(seasonal.features) == 0) {
+    seasonal.features <- data.frame(zeros = rep(0, nrow(df)))
+    prior.scales <- c(1.)
   }
-
-  return(list(seasonal.features=seasonal.features, prior_scales=prior_scales))
+  return(list(seasonal.features = seasonal.features,
+              prior.scales = prior.scales))
 }
 
 #' Get number of Fourier components for built-in seasonalities.
@@ -792,9 +826,6 @@ fit.prophet <- function(m, df, ...) {
   }
   history <- df %>%
     dplyr::filter(!is.na(y))
-  if (any(is.infinite(history$y))) {
-    stop("Found infinity in column y.")
-  }
   m$history.dates <- sort(set_date(df$ds))
 
   out <- setup_dataframe(m, history, initialize_scales = TRUE)
@@ -802,8 +833,9 @@ fit.prophet <- function(m, df, ...) {
   m <- out$m
   m$history <- history
   m <- set_auto_seasonalities(m)
-  seasonal.features <- make_all_seasonality_features(m, history)[[1]]
-  prior_scales <- make_all_seasonality_features(m, history)[[2]]
+  out2 <- make_all_seasonality_features(m, history)
+  seasonal.features <- out2$seasonal.features
+  prior.scales <- out2$prior.scales
 
   m <- set_changepoints(m)
   A <- get_changepoint_matrix(m)
@@ -818,7 +850,7 @@ fit.prophet <- function(m, df, ...) {
     A = A,
     t_change = array(m$changepoints.t),
     X = as.matrix(seasonal.features),
-    sigma = prior_scales,
+    sigmas = array(prior.scales),
     tau = m$changepoint.prior.scale
   )
 
@@ -920,10 +952,19 @@ predict.prophet <- function(object, df = NULL, ...) {
   }
 
   df$trend <- predict_trend(object, df)
+  seasonal.components <- predict_seasonal_components(object, df)
+  intervals <- predict_uncertainty(object, df)
 
+  # Drop columns except ds, cap, and trend
+  if ('cap' %in% colnames(df)) {
+    cols <- c('ds', 'cap', 'trend')
+  } else {
+    cols <- c('ds', 'trend')
+  }
+  df <- df[cols]
   df <- df %>%
-    dplyr::bind_cols(predict_uncertainty(object, df)) %>%
-    dplyr::bind_cols(predict_seasonal_components(object, df))
+    dplyr::bind_cols(seasonal.components) %>%
+    dplyr::bind_cols(intervals)
   df$yhat <- df$trend + df$seasonal
   return(df)
 }
@@ -1010,7 +1051,7 @@ predict_trend <- function(model, df) {
   return(trend * model$y.scale)
 }
 
-#' Predict seasonality broken down into components.
+#' Predict seasonality components, holidays, and added regressors.
 #'
 #' @param m Prophet object.
 #' @param df Prediction dataframe.
@@ -1019,32 +1060,31 @@ predict_trend <- function(model, df) {
 #'
 #' @keywords internal
 predict_seasonal_components <- function(m, df) {
-  seasonal.features <- make_all_seasonality_features(m, df)[[1]]
+  seasonal.features <- make_all_seasonality_features(m, df)$seasonal.features
   lower.p <- (1 - m$interval.width)/2
   upper.p <- (1 + m$interval.width)/2
 
-  # Broken down into components
   components <- dplyr::data_frame(component = colnames(seasonal.features)) %>%
     dplyr::mutate(col = 1:n()) %>%
     tidyr::separate(component, c('component', 'part'), sep = "_delim_",
                     extra = "merge", fill = "right") %>%
-    dplyr::filter(component != 'zeros')
-
-  #components <-
-  components <- rbind(components[,c(1,3)], data.frame("component"=rep("seasonal"),
-                                                      "col"=c(1:dim(seasonal.features)[2])));
-
-  components <- add_group_component(m,components, 'seasonalities', names(m$seasonalities));
-
+    dplyr::select(col, component)
+  # Add total for all regression components
+  components <- rbind(
+    components,
+    data.frame(col = 1:ncol(seasonal.features), component = 'seasonal'))
+  # Add totals for seasonality, holiday, and extra regressors
+  components <- add_group_component(
+    components, 'seasonalities', names(m$seasonalities))
   if(!is.null(m$holidays)){
-    components <- add_group_component(m,components, 'holidays', unique(m$holidays$holiday));
+    components <- add_group_component(
+      components, 'holidays', unique(m$holidays$holiday))
   }
+  components <- add_group_component(
+    components, 'extra_regressors', names(m$extra_regressors))
+  # Remove the placeholder
+  components <- dplyr::filter(components, component != 'zeros')
 
-  components <- add_group_component(m,components, 'extra_regressors', names(m$extra_regressors));
-  # I am stuck on here: I am little confused that do I need to set
-  #                     components as list or dataframe  ??
-  #
-  #if (nrow(components) > 0) {
   component.predictions <- components %>%
     dplyr::group_by(component) %>% dplyr::do({
       comp <- (as.matrix(seasonal.features[, .$col])
@@ -1062,18 +1102,12 @@ predict_seasonal_components <- function(m, df) {
     tidyr::spread(component, value) %>%
     dplyr::select(-ix)
 
-  component.predictions$seasonal <- rowSums(
-    component.predictions[unique(components$component)])
-  #  } else {
-  #    component.predictions <- data.frame(seasonal = rep(0, nrow(df)))
-  #  }
   return(component.predictions)
 }
 
 #' Adds a component with given name that contains all of the components
 #' in group.
 #'
-#' @param m Prophet object.
 #' @param components Dataframe with components.
 #' @param name Name of new group component.
 #' @param group  List of components that form the group.
@@ -1081,14 +1115,15 @@ predict_seasonal_components <- function(m, df) {
 #' @return Dataframe with components.
 #'
 #' @keywords internal
-add_group_component <- function(m, components, name, group) {
-
-  loc = (components$component %in% group);
-  new_comp = components[loc,];
-  new_comp$component = name;
-  components= rbind(components, new_comp);
-  return(components);
+add_group_component <- function(components, name, group) {
+  new_comp <- components[(components$component %in% group), ]
+  if (nrow(new_comp) > 0) {
+    new_comp$component <- name
+    components <- rbind(components, new_comp)
+  }
+  return(components)
 }
+
 #' Prophet posterior predictive samples.
 #'
 #' @param m Prophet object.
@@ -1103,7 +1138,7 @@ sample_posterior_predictive <- function(m, df) {
   samp.per.iter <- max(1, ceiling(m$uncertainty.samples / n.iterations))
   nsamp <- n.iterations * samp.per.iter  # The actual number of samples
 
-  seasonal.features <- make_all_seasonality_features(m, df)[[1]]
+  seasonal.features <- make_all_seasonality_features(m, df)$seasonal.features
   sim.values <- list("trend" = matrix(, nrow = nrow(df), ncol = nsamp),
                      "seasonal" = matrix(, nrow = nrow(df), ncol = nsamp),
                      "yhat" = matrix(, nrow = nrow(df), ncol = nsamp))
@@ -1139,7 +1174,7 @@ predictive_samples <- function(m, df) {
   return(sim.values)
 }
 
-#' Prophet uncertainty intervals.
+#' Prophet uncertainty intervals for yhat and trend
 #'
 #' @param m Prophet object.
 #' @param df Prediction dataframe.
@@ -1157,12 +1192,10 @@ predict_uncertainty <- function(m, df) {
     t(apply(t(sim.values$yhat), 2, stats::quantile, c(lower.p, upper.p),
             na.rm = TRUE)),
     t(apply(t(sim.values$trend), 2, stats::quantile, c(lower.p, upper.p),
-            na.rm = TRUE)),
-    t(apply(t(sim.values$seasonal), 2, stats::quantile, c(lower.p, upper.p),
             na.rm = TRUE))
   ) %>% dplyr::as_data_frame()
 
-  colnames(intervals) <- paste(rep(c('yhat', 'trend', 'seasonal'), each=2),
+  colnames(intervals) <- paste(rep(c('yhat', 'trend'), each=2),
                                c('lower', 'upper'), sep = "_")
   return(intervals)
 }
@@ -1369,29 +1402,36 @@ plot.prophet <- function(x, fcst, uncertainty = TRUE, plot_cap = TRUE,
 #' @importFrom dplyr "%>%"
 prophet_plot_components <- function(
   m, fcst, uncertainty = TRUE, plot_cap = TRUE, weekly_start = 0,
-  yearly_start = 0) {
-  df <- df_for_plotting(m, fcst)
+  yearly_start = 0
+) {
   # Plot the trend
-  panels <- list(plot_trend(df, uncertainty, plot_cap))
+  panels <- list(plot_forecast_component(fcst, 'trend', uncertainty, plot_cap))
   # Plot holiday components, if present.
-  if (!is.null(m$holidays)) {
-    panels[[length(panels) + 1]] <- plot_holidays(m, df, uncertainty)
+  if (!is.null(m$holidays) & ('holidays' %in% colnames(fcst))) {
+    panels[[length(panels) + 1]] <- plot_forecast_component(
+      fcst, 'holidays', uncertainty, FALSE)
   }
   # Plot weekly seasonality, if present
-  if ("weekly" %in% colnames(df)) {
+  if ("weekly" %in% colnames(fcst)) {
     panels[[length(panels) + 1]] <- plot_weekly(m, uncertainty, weekly_start)
   }
   # Plot yearly seasonality, if present
-  if ("yearly" %in% colnames(df)) {
+  if ("yearly" %in% colnames(fcst)) {
     panels[[length(panels) + 1]] <- plot_yearly(m, uncertainty, yearly_start)
   }
   # Plot other seasonalities
   for (name in names(m$seasonalities)) {
     if (!(name %in% c('weekly', 'yearly')) &&
-        (name %in% colnames(df))) {
+        (name %in% colnames(fcst))) {
       panels[[length(panels) + 1]] <- plot_seasonality(m, name, uncertainty)
     }
   }
+  # Plot extra regressors
+  if ((length(m$extra_regressors) > 0)
+      & ('extra_regressors' %in% colnames(fcst))) {
+    panels[[length(panels) + 1]] <- plot_forecast_component(
+      fcst, 'extra_regressors', uncertainty, FALSE)
+  }
 
   # Make the plot.
   grid::grid.newpage()
@@ -1404,67 +1444,56 @@ prophet_plot_components <- function(
   return(invisible(panels))
 }
 
-#' Plot the prophet trend.
+#' Plot a particular component of the forecast.
 #'
-#' @param df Forecast dataframe for plotting.
+#' @param fcst Dataframe output of `predict`.
+#' @param name String name of the component to plot (column of fcst).
 #' @param uncertainty Boolean to plot uncertainty intervals.
 #' @param plot_cap Boolean indicating if the capacity should be shown in the
 #'  figure, if available.
 #'
 #' @return A ggplot2 plot.
 #'
-#' @keywords internal
-plot_trend <- function(df, uncertainty = TRUE, plot_cap = TRUE) {
-  df.t <- df[!is.na(df$trend),]
-  gg.trend <- ggplot2::ggplot(df.t, ggplot2::aes(x = ds, y = trend)) +
+#' @export
+plot_forecast_component <- function(
+  fcst, name, uncertainty = TRUE, plot_cap = FALSE
+) {
+  gg.comp <- ggplot2::ggplot(
+      fcst, ggplot2::aes_string(x = 'ds', y = name, group = 1)) +
     ggplot2::geom_line(color = "#0072B2", na.rm = TRUE)
-  if (exists('cap', where = df.t) && plot_cap) {
-    gg.trend <- gg.trend + ggplot2::geom_line(ggplot2::aes(y = cap),
-                                              linetype = 'dashed',
-                                              na.rm = TRUE)
+  if (exists('cap', where = fcst) && plot_cap) {
+    gg.comp <- gg.comp + ggplot2::geom_line(
+      ggplot2::aes(y = cap), linetype = 'dashed', na.rm = TRUE)
   }
   if (uncertainty) {
-    gg.trend <- gg.trend +
-      ggplot2::geom_ribbon(ggplot2::aes(ymin = trend_lower,
-                                        ymax = trend_upper),
-                           alpha = 0.2,
-                           fill = "#0072B2",
-                           na.rm = TRUE)
+    gg.comp <- gg.comp +
+      ggplot2::geom_ribbon(
+        ggplot2::aes_string(
+          ymin = paste0(name, '_lower'), ymax = paste0(name, '_upper')
+        ),
+        alpha = 0.2,
+        fill = "#0072B2",
+        na.rm = TRUE)
   }
-  return(gg.trend)
+  return(gg.comp)
 }
 
-#' Plot the holidays component of the forecast.
+#' Prepare dataframe for plotting seasonal components.
 #'
-#' @param m Prophet model
-#' @param df Forecast dataframe for plotting.
-#' @param uncertainty Boolean to plot uncertainty intervals.
+#' @param m Prophet object.
+#' @param ds Array of dates for column ds.
 #'
-#' @return A ggplot2 plot.
+#' @return A dataframe with seasonal components on ds.
 #'
 #' @keywords internal
-plot_holidays <- function(m, df, uncertainty = TRUE) {
-  holiday.comps <- unique(m$holidays$holiday) %>% as.character()
-  df.s <- data.frame(ds = df$ds,
-                     holidays = rowSums(df[, holiday.comps, drop = FALSE]),
-                     holidays_lower = rowSums(df[, paste0(holiday.comps,
-                                                          "_lower"), drop = FALSE]),
-                     holidays_upper = rowSums(df[, paste0(holiday.comps,
-                                                          "_upper"), drop = FALSE]))
-  df.s <- df.s[!is.na(df.s$holidays),]
-  # NOTE the above CI calculation is incorrect if holidays overlap in time.
-  # Since it is just for the visualization we will not worry about it now.
-  gg.holidays <- ggplot2::ggplot(df.s, ggplot2::aes(x = ds, y = holidays)) +
-    ggplot2::geom_line(color = "#0072B2", na.rm = TRUE)
-  if (uncertainty) {
-    gg.holidays <- gg.holidays +
-      ggplot2::geom_ribbon(ggplot2::aes(ymin = holidays_lower,
-                                        ymax = holidays_upper),
-                           alpha = 0.2,
-                           fill = "#0072B2",
-                           na.rm = TRUE)
+seasonality_plot_df <- function(m, ds) {
+  df_list <- list(ds = ds, cap = 1)
+  for (name in names(m$extra_regressors)) {
+    df_list[[name]] <- 0
   }
-  return(gg.holidays)
+  df <- as.data.frame(df_list)
+  df <- setup_dataframe(m, df)$df
+  return(df)
 }
 
 #' Plot the weekly component of the forecast.
@@ -1480,12 +1509,9 @@ plot_holidays <- function(m, df, uncertainty = TRUE) {
 #' @keywords internal
 plot_weekly <- function(m, uncertainty = TRUE, weekly_start = 0) {
   # Compute weekly seasonality for a Sun-Sat sequence of dates.
-  df.w <- data.frame(
-    ds=seq(set_date('2017-01-01'), by='d', length.out=7) +
-      weekly_start, cap=1.)
-  df.w <- setup_dataframe(m, df.w)$df
+  days <- seq(set_date('2017-01-01'), by='d', length.out=7) + weekly_start
+  df.w <- seasonality_plot_df(m, days)
   seas <- predict_seasonal_components(m, df.w)
-  print(seas)
   seas$dow <- factor(weekdays(df.w$ds), levels=weekdays(df.w$ds))
 
   gg.weekly <- ggplot2::ggplot(seas, ggplot2::aes(x = dow, y = weekly,
@@ -1516,10 +1542,8 @@ plot_weekly <- function(m, uncertainty = TRUE, weekly_start = 0) {
 #' @keywords internal
 plot_yearly <- function(m, uncertainty = TRUE, yearly_start = 0) {
   # Compute yearly seasonality for a Jan 1 - Dec 31 sequence of dates.
-  df.y <- data.frame(
-    ds=seq(set_date('2017-01-01'), by='d', length.out=365) +
-      yearly_start, cap=1.)
-  df.y <- setup_dataframe(m, df.y)$df
+  days <- seq(set_date('2017-01-01'), by='d', length.out=365) + yearly_start
+  df.y <- seasonality_plot_df(m, days)
   seas <- predict_seasonal_components(m, df.y)
   seas$ds <- df.y$ds
 
@@ -1554,9 +1578,8 @@ plot_seasonality <- function(m, name, uncertainty = TRUE) {
   period <- m$seasonalities[[name]][1]
   end <- start + period * 24 * 3600
   plot.points <- 200
-  df.y <- data.frame(
-    ds=seq(from=start, to=end, length.out=plot.points), cap=1.)
-  df.y <- setup_dataframe(m, df.y)$df
+  days <- seq(from=start, to=end, length.out=plot.points)
+  df.y <- seasonality_plot_df(days)
   seas <- predict_seasonal_components(m, df.y)
   seas$ds <- df.y$ds
   gg.s <- ggplot2::ggplot(
@@ -1621,51 +1644,6 @@ prophet_copy <- function(m, cutoff = NULL) {
     uncertainty.samples = m$uncertainty.samples,
     fit = FALSE,
   ))
-
-#' Sample from the posterior predictive distribution.
-#'
-#' @param m Prophet model object.
-#' @param name String name of the seasonality.
-#' @param uncertainty Boolean to plot uncertainty intervals.
-#'
-#' @return A ggplot2 plot.
-#'
-#' @keywords internal
-plot_seasonality <- function(m, name, uncertainty = TRUE) {
-  # Compute seasonality from Jan 1 through a single period.
-  start <- set_date('2017-01-01')
-  period <- m$seasonalities[[name]][1]
-  end <- start + period * 24 * 3600
-  plot.points <- 200
-  df.y <- data.frame(
-    ds=seq(from=start, to=end, length.out=plot.points), cap=1.)
-  df.y <- setup_dataframe(m, df.y)$df
-  seas <- predict_seasonal_components(m, df.y)
-  seas$ds <- df.y$ds
-  gg.s <- ggplot2::ggplot(
-    seas, ggplot2::aes_string(x = 'ds', y = name, group = 1)) +
-    ggplot2::geom_line(color = "#0072B2", na.rm = TRUE)
-  if (period <= 2) {
-    fmt.str <- '%T'
-  }
-  else if (period < 14) {
-    fmt.str <- '%m/%d %R'
-  } else {
-    fmt.str <- '%m/%d'
-  }
-  gg.s <- gg.s +
-    ggplot2::scale_x_datetime(labels = scales::date_format(fmt.str))
-  if (uncertainty) {
-    gg.s <- gg.s +
-      ggplot2::geom_ribbon(
-        ggplot2::aes_string(
-          ymin = paste0(name, '_lower'), ymax = paste0(name, '_upper')
-        ),
-        alpha = 0.2,
-        fill = "#0072B2",
-        na.rm = TRUE)
-  }
-  return(gg.s)
 }
 
 # fb-block 3

+ 24 - 0
R/man/add_group_component.Rd

@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/prophet.R
+\name{add_group_component}
+\alias{add_group_component}
+\title{Adds a component with given name that contains all of the components
+in group.}
+\usage{
+add_group_component(components, name, group)
+}
+\arguments{
+\item{components}{Dataframe with components.}
+
+\item{name}{Name of new group component.}
+
+\item{group}{List of components that form the group.}
+}
+\value{
+Dataframe with components.
+}
+\description{
+Adds a component with given name that contains all of the components
+in group.
+}
+\keyword{internal}

+ 31 - 0
R/man/add_regressor.Rd

@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/prophet.R
+\name{add_regressor}
+\alias{add_regressor}
+\title{Add an additional regressor to be used for fitting and predicting.}
+\usage{
+add_regressor(m, name, prior.scale = NULL, standardize = "auto")
+}
+\arguments{
+\item{m}{Prophet object.}
+
+\item{name}{String name of the regressor}
+
+\item{prior.scale}{Float scale for the normal prior. If not provided,
+holidays.prior.scale will be used.}
+
+\item{standardize}{Bool, specify whether this regressor will be standardized
+prior to fitting. Can be 'auto' (standardize if not binary), True, or
+False.}
+}
+\value{
+The prophet model with the regressor added.
+}
+\description{
+The dataframe passed to `fit` and `predict` will have a column with the
+specified name to be used as a regressor. When standardize='auto', the
+regressor will be standardized unless it is binary. The regression
+coefficient is given a prior with the specified scale parameter.
+Decreasing the prior scale will add additional regularization. If no
+prior scale is provided, holidays.prior.scale will be used.
+}

+ 9 - 3
R/man/make_all_seasonality_features.Rd

@@ -2,19 +2,25 @@
 % Please edit documentation in R/prophet.R
 \name{make_all_seasonality_features}
 \alias{make_all_seasonality_features}
-\title{Dataframe with seasonality features.}
+\title{Dataframe with seasonality features.
+Includes seasonality features, holiday features, and added regressors.}
 \usage{
 make_all_seasonality_features(m, df)
 }
 \arguments{
 \item{m}{Prophet object.}
 
-\item{df}{Dataframe with dates for computing seasonality features.}
+\item{df}{Dataframe with dates for computing seasonality features and any
+added regressors.}
 }
 \value{
-Dataframe with seasonality.
+List with items
+ seasonal.features: Dataframe with regressor features,
+ prior.scales: Array of prior scales for each colum of the features
+   dataframe.
 }
 \description{
 Dataframe with seasonality features.
+Includes seasonality features, holiday features, and added regressors.
 }
 \keyword{internal}

+ 18 - 0
R/man/parse_seasonality_args.Rd

@@ -2,9 +2,12 @@
 % Please edit documentation in R/prophet.R
 \name{parse_seasonality_args}
 \alias{parse_seasonality_args}
+\alias{parse_seasonality_args}
 \title{Get number of Fourier components for built-in seasonalities.}
 \usage{
 parse_seasonality_args(m, name, arg, auto.disable, default.order)
+
+parse_seasonality_args(m, name, arg, auto.disable, default.order)
 }
 \arguments{
 \item{m}{Prophet object.}
@@ -17,11 +20,26 @@ provided.}
 \item{auto.disable}{Bool if seasonality should be disabled when 'auto'.}
 
 \item{default.order}{Int default Fourier order.}
+
+\item{m}{Prophet object.}
+
+\item{name}{String name of the seasonality component.}
+
+\item{arg}{'auto', TRUE, FALSE, or number of Fourier components as
+provided.}
+
+\item{auto.disable}{Bool if seasonality should be disabled when 'auto'.}
+
+\item{default.order}{Int default Fourier order.}
 }
 \value{
 Number of Fourier components, or 0 for disabled.
+
+Number of Fourier components, or 0 for disabled.
 }
 \description{
 Get number of Fourier components for built-in seasonalities.
+
+Get number of Fourier components for built-in seasonalities.
 }
 \keyword{internal}

+ 2 - 2
R/man/plot.prophet.Rd

@@ -4,8 +4,8 @@
 \alias{plot.prophet}
 \title{Plot the prophet forecast.}
 \usage{
-plot.prophet(x, fcst, uncertainty = TRUE, plot_cap = TRUE, xlabel = "ds",
-  ylabel = "y", ...)
+\method{plot}{prophet}(x, fcst, uncertainty = TRUE, plot_cap = TRUE,
+  xlabel = "ds", ylabel = "y", ...)
 }
 \arguments{
 \item{x}{Prophet object.}

+ 24 - 0
R/man/plot_forecast_component.Rd

@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/prophet.R
+\name{plot_forecast_component}
+\alias{plot_forecast_component}
+\title{Plot a particular component of the forecast.}
+\usage{
+plot_forecast_component(fcst, name, uncertainty = TRUE, plot_cap = FALSE)
+}
+\arguments{
+\item{fcst}{Dataframe output of `predict`.}
+
+\item{name}{String name of the component to plot (column of fcst).}
+
+\item{uncertainty}{Boolean to plot uncertainty intervals.}
+
+\item{plot_cap}{Boolean indicating if the capacity should be shown in the
+figure, if available.}
+}
+\value{
+A ggplot2 plot.
+}
+\description{
+Plot a particular component of the forecast.
+}

+ 0 - 22
R/man/plot_holidays.Rd

@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/prophet.R
-\name{plot_holidays}
-\alias{plot_holidays}
-\title{Plot the holidays component of the forecast.}
-\usage{
-plot_holidays(m, df, uncertainty = TRUE)
-}
-\arguments{
-\item{m}{Prophet model}
-
-\item{df}{Forecast dataframe for plotting.}
-
-\item{uncertainty}{Boolean to plot uncertainty intervals.}
-}
-\value{
-A ggplot2 plot.
-}
-\description{
-Plot the holidays component of the forecast.
-}
-\keyword{internal}

+ 0 - 23
R/man/plot_trend.Rd

@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/prophet.R
-\name{plot_trend}
-\alias{plot_trend}
-\title{Plot the prophet trend.}
-\usage{
-plot_trend(df, uncertainty = TRUE, plot_cap = TRUE)
-}
-\arguments{
-\item{df}{Forecast dataframe for plotting.}
-
-\item{uncertainty}{Boolean to plot uncertainty intervals.}
-
-\item{plot_cap}{Boolean indicating if the capacity should be shown in the
-figure, if available.}
-}
-\value{
-A ggplot2 plot.
-}
-\description{
-Plot the prophet trend.
-}
-\keyword{internal}

+ 2 - 2
R/man/predict_seasonal_components.Rd

@@ -2,7 +2,7 @@
 % Please edit documentation in R/prophet.R
 \name{predict_seasonal_components}
 \alias{predict_seasonal_components}
-\title{Predict seasonality broken down into components.}
+\title{Predict seasonality components, holidays, and added regressors.}
 \usage{
 predict_seasonal_components(m, df)
 }
@@ -15,6 +15,6 @@ predict_seasonal_components(m, df)
 Dataframe with seasonal components.
 }
 \description{
-Predict seasonality broken down into components.
+Predict seasonality components, holidays, and added regressors.
 }
 \keyword{internal}

+ 2 - 2
R/man/predict_uncertainty.Rd

@@ -2,7 +2,7 @@
 % Please edit documentation in R/prophet.R
 \name{predict_uncertainty}
 \alias{predict_uncertainty}
-\title{Prophet uncertainty intervals.}
+\title{Prophet uncertainty intervals for yhat and trend}
 \usage{
 predict_uncertainty(m, df)
 }
@@ -15,6 +15,6 @@ predict_uncertainty(m, df)
 Dataframe with uncertainty intervals.
 }
 \description{
-Prophet uncertainty intervals.
+Prophet uncertainty intervals for yhat and trend
 }
 \keyword{internal}

+ 2 - 1
R/man/predictive_samples.Rd

@@ -14,7 +14,8 @@ predictive_samples(m, df)
 }
 \value{
 A list with items "trend", "seasonal", and "yhat" containing
- posterior predictive samples for that component.
+ posterior predictive samples for that component. "seasonal" is the sum
+ of seasonalities, holidays, and added regressors.
 }
 \description{
 Sample from the posterior predictive distribution.

+ 20 - 0
R/man/seasonality_plot_df.Rd

@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/prophet.R
+\name{seasonality_plot_df}
+\alias{seasonality_plot_df}
+\title{Prepare dataframe for plotting seasonal components.}
+\usage{
+seasonality_plot_df(m, ds)
+}
+\arguments{
+\item{m}{Prophet object.}
+
+\item{ds}{Array of dates for column ds.}
+}
+\value{
+A dataframe with seasonal components on ds.
+}
+\description{
+Prepare dataframe for plotting seasonal components.
+}
+\keyword{internal}

+ 2 - 1
R/man/setup_dataframe.Rd

@@ -9,7 +9,8 @@ setup_dataframe(m, df, initialize_scales = FALSE)
 \arguments{
 \item{m}{Prophet object.}
 
-\item{df}{Data frame with columns ds, y, and cap if logistic growth.}
+\item{df}{Data frame with columns ds, y, and cap if logistic growth. Any
+specified additional regressors must also be present.}
 
 \item{initialize_scales}{Boolean set scaling factors in m from df.}
 }

+ 24 - 0
R/man/validate_column_name.Rd

@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/prophet.R
+\name{validate_column_name}
+\alias{validate_column_name}
+\title{Validates the name of a seasonality, holiday, or regressor.}
+\usage{
+validate_column_name(m, name, check_holidays = TRUE,
+  check_seasonalities = TRUE, check_regressors = TRUE)
+}
+\arguments{
+\item{m}{Prophet object.}
+
+\item{name}{string}
+
+\item{check_holidays}{bool check if name already used for holiday}
+
+\item{check_seasonalities}{bool check if name already used for seasonality}
+
+\item{check_regressors}{bool check if name already used for regressor}
+}
+\description{
+Validates the name of a seasonality, holiday, or regressor.
+}
+\keyword{internal}