8 years ago · 2f9b20b2d3
--- a/R/R/diagnostics.R
+++ b/R/R/diagnostics.R
@@ -27,19 +27,21 @@ generate_cutoffs <- function(df, horizon, k, period) {
 
				   }
			
 
				   tzone <- attr(cutoff, "tzone")  # Timezone is wiped by putting in array
			
 
				   result <- c(cutoff)
			
 
				-  for (i in 2:k) {
			
 
				-    cutoff <- cutoff - period
			
 
				-    # If data does not exist in data range (cutoff, cutoff + horizon]
			
 
				-    if (!any((df$ds > cutoff) & (df$ds <= cutoff + horizon))) {
			
 
				-      # Next cutoff point is 'closest date before cutoff in data - horizon'
			
 
				-      closest.date <- max(df$ds[df$ds <= cutoff])
			
 
				-      cutoff <- closest.date - horizon
			
 
				-    }
			
 
				-    if (cutoff < min(df$ds)) {
			
 
				-      warning('Not enough data for requested number of cutoffs! Using ', i)
			
 
				-      break
			
 
				+  if (k > 1) {
			
 
				+    for (i in 2:k) {
			
 
				+      cutoff <- cutoff - period
			
 
				+      # If data does not exist in data range (cutoff, cutoff + horizon]
			
 
				+      if (!any((df$ds > cutoff) & (df$ds <= cutoff + horizon))) {
			
 
				+        # Next cutoff point is 'closest date before cutoff in data - horizon'
			
 
				+        closest.date <- max(df$ds[df$ds <= cutoff])
			
 
				+        cutoff <- closest.date - horizon
			
 
				+      }
			
 
				+      if (cutoff < min(df$ds)) {
			
 
				+        warning('Not enough data for requested number of cutoffs! Using ', i)
			
 
				+        break
			
 
				+      }
			
 
				+      result <- c(result, cutoff)
			
 
				     }
			
 
				-    result <- c(result, cutoff)
			
 
				   }
			
 
				   # Reset timezones
			
 
				   attr(result, "tzone") <- tzone
			
@@ -47,8 +49,9 @@ generate_cutoffs <- function(df, horizon, k, period) {
 
				 }
			
 
				 
			
 
				 #' Simulated historical forecasts.
			
 
				-#' Make forecasts from k historical cutoff dates, and compare forecast values
			
 
				-#' to actual values.
			
 
				+#'
			
 
				+#' Make forecasts from k historical cutoff points, working backwards from
			
 
				+#' (end - horizon) with a spacing of period between each cutoff.
			
 
				 #'
			
 
				 #' @param model Fitted Prophet model.
			
 
				 #' @param horizon Integer size of the horizon
			
@@ -99,25 +102,31 @@ simulated_historical_forecasts <- function(model, horizon, units, k,
 
				 }
			
 
				 
			
 
				 #' Cross-validation for time series.
			
 
				-#' Computes forecast error with cutoffs at the specified period. When the
			
 
				-#' period is the time interval of the data, is the procedure described in
			
 
				-#' https://robjhyndman.com/hyndsight/tscv/. Beginning from end-horizon, makes
			
 
				-#' a cutoff every "period" amount of time, going back to "initial".
			
 
				+#'
			
 
				+#' Computes forecasts from historical cutoff points. Beginning from initial,
			
 
				+#' makes cutoffs with a spacing of period up to (end - horizon).
			
 
				+#'
			
 
				+#' When period is equal to the time interval of the data, this is the
			
 
				+#' technique described in https://robjhyndman.com/hyndsight/tscv/ .
			
 
				 #'
			
 
				 #' @param model Fitted Prophet model.
			
 
				 #' @param horizon Integer size of the horizon
			
 
				 #' @param units String unit of the horizon, e.g., "days", "secs".
			
 
				 #' @param period Integer amount of time between cutoff dates. Same units as
			
 
				-#'  horizon.
			
 
				+#'  horizon. If not provided, 0.5 * horizon is used.
			
 
				 #' @param initial Integer size of the first training period. If not provided,
			
 
				 #'  3 * horizon is used. Same units as horizon.
			
 
				 #'
			
 
				 #' @return A dataframe with the forecast, actual value, and cutoff date.
			
 
				 #'
			
 
				 #' @export
			
 
				-cross_validation <- function(model, horizon, units, period, initial = NULL) {
			
 
				+cross_validation <- function(
			
 
				+    model, horizon, units, period = NULL, initial = NULL) {
			
 
				   te <- max(model$history$ds)
			
 
				   ts <- min(model$history$ds)
			
 
				+  if (is.null(period)) {
			
 
				+    period <- 0.5 * horizon
			
 
				+  }
			
 
				   if (is.null(initial)) {
			
 
				     initial <- 3 * horizon
			
 
				   }
			
@@ -129,7 +138,7 @@ cross_validation <- function(model, horizon, units, period, initial = NULL) {
 
				     as.double(period.dt, units = 'secs')
			
 
				   )
			
 
				   if (k < 1) {
			
 
				-    stop('Not enough data for specified horizon and initial.')
			
 
				+    stop('Not enough data for specified horizon, period, and initial.')
			
 
				   }
			
 
				   return(simulated_historical_forecasts(model, horizon, units, k, period))
			
 
				 }
			
--- a/R/man/cross_validation.Rd
+++ b/R/man/cross_validation.Rd
@@ -2,13 +2,9 @@
 
				 % Please edit documentation in R/diagnostics.R
			
 
				 \name{cross_validation}
			
 
				 \alias{cross_validation}
			
 
				-\title{Cross-validation for time series.
			
 
				-Computes forecast error with cutoffs at the specified period. When the
			
 
				-period is the time interval of the data, is the procedure described in
			
 
				-https://robjhyndman.com/hyndsight/tscv/. Beginning from end-horizon, makes
			
 
				-a cutoff every "period" amount of time, going back to "initial".}
			
 
				+\title{Cross-validation for time series.}
			
 
				 \usage{
			
 
				-cross_validation(model, horizon, units, period, initial = NULL)
			
 
				+cross_validation(model, horizon, units, period = NULL, initial = NULL)
			
 
				 }
			
 
				 \arguments{
			
 
				 \item{model}{Fitted Prophet model.}
			
@@ -18,7 +14,7 @@ cross_validation(model, horizon, units, period, initial = NULL)
 
				 \item{units}{String unit of the horizon, e.g., "days", "secs".}
			
 
				 
			
 
				 \item{period}{Integer amount of time between cutoff dates. Same units as
			
 
				-horizon.}
			
 
				+horizon. If not provided, 0.5 * horizon is used.}
			
 
				 
			
 
				 \item{initial}{Integer size of the first training period. If not provided,
			
 
				 3 * horizon is used. Same units as horizon.}
			
@@ -27,9 +23,10 @@ horizon.}
 
				 A dataframe with the forecast, actual value, and cutoff date.
			
 
				 }
			
 
				 \description{
			
 
				-Cross-validation for time series.
			
 
				-Computes forecast error with cutoffs at the specified period. When the
			
 
				-period is the time interval of the data, is the procedure described in
			
 
				-https://robjhyndman.com/hyndsight/tscv/. Beginning from end-horizon, makes
			
 
				-a cutoff every "period" amount of time, going back to "initial".
			
 
				+Computes forecasts from historical cutoff points. Beginning from initial,
			
 
				+makes cutoffs with a spacing of period up to (end - horizon).
			
 
				+}
			
 
				+\details{
			
 
				+When period is equal to the time interval of the data, this is the
			
 
				+technique described in https://robjhyndman.com/hyndsight/tscv/ .
			
 
				 }
			
--- a/R/man/parse_seasonality_args.Rd
+++ b/R/man/parse_seasonality_args.Rd
@@ -2,12 +2,9 @@
 
				 % Please edit documentation in R/prophet.R
			
 
				 \name{parse_seasonality_args}
			
 
				 \alias{parse_seasonality_args}
			
 
				-\alias{parse_seasonality_args}
			
 
				 \title{Get number of Fourier components for built-in seasonalities.}
			
 
				 \usage{
			
 
				 parse_seasonality_args(m, name, arg, auto.disable, default.order)
			
 
				-
			
 
				-parse_seasonality_args(m, name, arg, auto.disable, default.order)
			
 
				 }
			
 
				 \arguments{
			
 
				 \item{m}{Prophet object.}
			
@@ -20,26 +17,11 @@ provided.}
 
				 \item{auto.disable}{Bool if seasonality should be disabled when 'auto'.}
			
 
				 
			
 
				 \item{default.order}{Int default Fourier order.}
			
 
				-
			
 
				-\item{m}{Prophet object.}
			
 
				-
			
 
				-\item{name}{String name of the seasonality component.}
			
 
				-
			
 
				-\item{arg}{'auto', TRUE, FALSE, or number of Fourier components as
			
 
				-provided.}
			
 
				-
			
 
				-\item{auto.disable}{Bool if seasonality should be disabled when 'auto'.}
			
 
				-
			
 
				-\item{default.order}{Int default Fourier order.}
			
 
				 }
			
 
				 \value{
			
 
				 Number of Fourier components, or 0 for disabled.
			
 
				-
			
 
				-Number of Fourier components, or 0 for disabled.
			
 
				 }
			
 
				 \description{
			
 
				 Get number of Fourier components for built-in seasonalities.
			
 
				-
			
 
				-Get number of Fourier components for built-in seasonalities.
			
 
				 }
			
 
				 \keyword{internal}
			
--- a/R/man/simulated_historical_forecasts.Rd
+++ b/R/man/simulated_historical_forecasts.Rd
@@ -2,9 +2,7 @@
 
				 % Please edit documentation in R/diagnostics.R
			
 
				 \name{simulated_historical_forecasts}
			
 
				 \alias{simulated_historical_forecasts}
			
 
				-\title{Simulated historical forecasts.
			
 
				-Make forecasts from k historical cutoff dates, and compare forecast values
			
 
				-to actual values.}
			
 
				+\title{Simulated historical forecasts.}
			
 
				 \usage{
			
 
				 simulated_historical_forecasts(model, horizon, units, k, period = NULL)
			
 
				 }
			
@@ -24,7 +22,6 @@ horizon. If not provided, will use 0.5 * horizon.}
 
				 A dataframe with the forecast, actual value, and cutoff date.
			
 
				 }
			
 
				 \description{
			
 
				-Simulated historical forecasts.
			
 
				-Make forecasts from k historical cutoff dates, and compare forecast values
			
 
				-to actual values.
			
 
				+Make forecasts from k historical cutoff points, working backwards from
			
 
				+(end - horizon) with a spacing of period between each cutoff.
			
 
				 }
			
--- a/notebooks/diagnostics.ipynb
+++ b/notebooks/diagnostics.ipynb
--- a/python/fbprophet/diagnostics.py
+++ b/python/fbprophet/diagnostics.py
@@ -46,11 +46,13 @@ def _cutoffs(df, horizon, k, period):
 
				         cutoff -= period
			
 
				         # If data does not exist in data range (cutoff, cutoff + horizon]
			
 
				         if not (((df['ds'] > cutoff) & (df['ds'] <= cutoff + horizon)).any()):
			
 
				-            # Next cutoff point is 'closest date before cutoff in data - horizon'
			
 
				+            # Next cutoff point is 'last date before cutoff in data - horizon'
			
 
				             closest_date = df[df['ds'] <= cutoff].max()['ds']
			
 
				             cutoff = closest_date - horizon
			
 
				         if cutoff < df['ds'].min():
			
 
				-            logger.warning('Not enough data for requested number of cutoffs! Using {}.'.format(i))
			
 
				+            logger.warning(
			
 
				+                'Not enough data for requested number of cutoffs! '
			
 
				+                'Using {}.'.format(i))
			
 
				             break
			
 
				         result.append(cutoff)
			
 
				 
			
@@ -60,20 +62,20 @@ def _cutoffs(df, horizon, k, period):
 
				 
			
 
				 def simulated_historical_forecasts(model, horizon, k, period=None):
			
 
				     """Simulated Historical Forecasts.
			
 
				-        If you would like to know it in detail, read the original paper
			
 
				-        https://facebookincubator.github.io/prophet/static/prophet_paper_20170113.pdf
			
 
				+
			
 
				+    Make forecasts from k historical cutoff points, working backwards from
			
 
				+    (end - horizon) with a spacing of period between each cutoff.
			
 
				 
			
 
				     Parameters
			
 
				     ----------
			
 
				     model: Prophet class object.
			
 
				         Fitted Prophet model
			
 
				-    horizon: string which has pd.Timedelta compatible style.
			
 
				-        Forecast horizon ('5 days', '3 hours', '10 seconds' etc)
			
 
				-    k: Int number.
			
 
				-        The number of forecasts point.
			
 
				-    period: string which has pd.Timedelta compatible style or None, default None.
			
 
				-        Simulated Forecast will be done at every this period.
			
 
				-        0.5 * horizon is used when it is None.
			
 
				+    horizon: string with pd.Timedelta compatible style, e.g., '5 days',
			
 
				+        '3 hours', '10 seconds'.
			
 
				+    k: Int number of forecasts point.
			
 
				+    period: Optional string with pd.Timedelta compatible style. Simulated
			
 
				+        forecast will be done at every this period. If not provided,
			
 
				+        0.5 * horizon is used.
			
 
				 
			
 
				     Returns
			
 
				     -------
			
@@ -108,21 +110,24 @@ def simulated_historical_forecasts(model, horizon, k, period=None):
 
				     return reduce(lambda x, y: x.append(y), predicts).reset_index(drop=True)
			
 
				 
			
 
				 
			
 
				-def cross_validation(model, horizon, period, initial=None):
			
 
				-    """Cross-Validation for time-series.
			
 
				-        This function is the same with Time series cross-validation described in https://robjhyndman.com/hyndsight/tscv/
			
 
				-        when the value of period is equal to the time interval of data.
			
 
				+def cross_validation(model, horizon, period=None, initial=None):
			
 
				+    """Cross-Validation for time series.
			
 
				+
			
 
				+    Computes forecasts from historical cutoff points. Beginning from initial,
			
 
				+    makes cutoffs with a spacing of period up to (end - horizon).
			
 
				+
			
 
				+    When period is equal to the time interval of the data, this is the
			
 
				+    technique described in https://robjhyndman.com/hyndsight/tscv/ .
			
 
				 
			
 
				     Parameters
			
 
				     ----------
			
 
				     model: Prophet class object. Fitted Prophet model
			
 
				-    horizon: string which has pd.Timedelta compatible style.
			
 
				-        Forecast horizon ('5 days', '3 hours', '10 seconds' etc)
			
 
				-    period: string which has pd.Timedelta compatible style.
			
 
				-        Simulated Forecast will be done at every this period.
			
 
				-    initial: string which has pd.Timedelta compatible style or None, default None.
			
 
				-        First training period.
			
 
				-        3 * horizon is used when it is None.
			
 
				+    horizon: string with pd.Timedelta compatible style, e.g., '5 days',
			
 
				+        '3 hours', '10 seconds'.
			
 
				+    period: string with pd.Timedelta compatible style. Simulated forecast will
			
 
				+        be done at every this period. If not provided, 0.5 * horizon is used.
			
 
				+    initial: string with pd.Timedelta compatible style. The first training
			
 
				+        period will begin here. If not provided, 3 * horizon is used.
			
 
				 
			
 
				     Returns
			
 
				     -------
			
@@ -131,9 +136,10 @@ def cross_validation(model, horizon, period, initial=None):
 
				     te = model.history['ds'].max()
			
 
				     ts = model.history['ds'].min()
			
 
				     horizon = pd.Timedelta(horizon)
			
 
				-    period = pd.Timedelta(period)
			
 
				+    period = 0.5 * horizon if period is None else pd.Timedelta(period)
			
 
				     initial = 3 * horizon if initial is None else pd.Timedelta(initial)
			
 
				     k = int(np.ceil(((te - horizon) - (ts + initial)) / period))
			
 
				     if k < 1:
			
 
				-        raise ValueError('Not enough data for specified horizon and initial.')
			
 
				+        raise ValueError(
			
 
				+            'Not enough data for specified horizon, period, and initial.')
			
 
				     return simulated_historical_forecasts(model, horizon, k, period)