浏览代码

Documentation for cross validation

bl 8 年之前
父节点
当前提交
2f9b20b2d3

+ 30 - 21
R/R/diagnostics.R

@@ -27,19 +27,21 @@ generate_cutoffs <- function(df, horizon, k, period) {
   }
   tzone <- attr(cutoff, "tzone")  # Timezone is wiped by putting in array
   result <- c(cutoff)
-  for (i in 2:k) {
-    cutoff <- cutoff - period
-    # If data does not exist in data range (cutoff, cutoff + horizon]
-    if (!any((df$ds > cutoff) & (df$ds <= cutoff + horizon))) {
-      # Next cutoff point is 'closest date before cutoff in data - horizon'
-      closest.date <- max(df$ds[df$ds <= cutoff])
-      cutoff <- closest.date - horizon
-    }
-    if (cutoff < min(df$ds)) {
-      warning('Not enough data for requested number of cutoffs! Using ', i)
-      break
+  if (k > 1) {
+    for (i in 2:k) {
+      cutoff <- cutoff - period
+      # If data does not exist in data range (cutoff, cutoff + horizon]
+      if (!any((df$ds > cutoff) & (df$ds <= cutoff + horizon))) {
+        # Next cutoff point is 'closest date before cutoff in data - horizon'
+        closest.date <- max(df$ds[df$ds <= cutoff])
+        cutoff <- closest.date - horizon
+      }
+      if (cutoff < min(df$ds)) {
+        warning('Not enough data for requested number of cutoffs! Using ', i)
+        break
+      }
+      result <- c(result, cutoff)
     }
-    result <- c(result, cutoff)
   }
   # Reset timezones
   attr(result, "tzone") <- tzone
@@ -47,8 +49,9 @@ generate_cutoffs <- function(df, horizon, k, period) {
 }
 
 #' Simulated historical forecasts.
-#' Make forecasts from k historical cutoff dates, and compare forecast values
-#' to actual values.
+#'
+#' Make forecasts from k historical cutoff points, working backwards from
+#' (end - horizon) with a spacing of period between each cutoff.
 #'
 #' @param model Fitted Prophet model.
 #' @param horizon Integer size of the horizon
@@ -99,25 +102,31 @@ simulated_historical_forecasts <- function(model, horizon, units, k,
 }
 
 #' Cross-validation for time series.
-#' Computes forecast error with cutoffs at the specified period. When the
-#' period is the time interval of the data, is the procedure described in
-#' https://robjhyndman.com/hyndsight/tscv/. Beginning from end-horizon, makes
-#' a cutoff every "period" amount of time, going back to "initial".
+#'
+#' Computes forecasts from historical cutoff points. Beginning from initial,
+#' makes cutoffs with a spacing of period up to (end - horizon).
+#'
+#' When period is equal to the time interval of the data, this is the
+#' technique described in https://robjhyndman.com/hyndsight/tscv/ .
 #'
 #' @param model Fitted Prophet model.
 #' @param horizon Integer size of the horizon
 #' @param units String unit of the horizon, e.g., "days", "secs".
 #' @param period Integer amount of time between cutoff dates. Same units as
-#'  horizon.
+#'  horizon. If not provided, 0.5 * horizon is used.
 #' @param initial Integer size of the first training period. If not provided,
 #'  3 * horizon is used. Same units as horizon.
 #'
 #' @return A dataframe with the forecast, actual value, and cutoff date.
 #'
 #' @export
-cross_validation <- function(model, horizon, units, period, initial = NULL) {
+cross_validation <- function(
+    model, horizon, units, period = NULL, initial = NULL) {
   te <- max(model$history$ds)
   ts <- min(model$history$ds)
+  if (is.null(period)) {
+    period <- 0.5 * horizon
+  }
   if (is.null(initial)) {
     initial <- 3 * horizon
   }
@@ -129,7 +138,7 @@ cross_validation <- function(model, horizon, units, period, initial = NULL) {
     as.double(period.dt, units = 'secs')
   )
   if (k < 1) {
-    stop('Not enough data for specified horizon and initial.')
+    stop('Not enough data for specified horizon, period, and initial.')
   }
   return(simulated_historical_forecasts(model, horizon, units, k, period))
 }

+ 9 - 12
R/man/cross_validation.Rd

@@ -2,13 +2,9 @@
 % Please edit documentation in R/diagnostics.R
 \name{cross_validation}
 \alias{cross_validation}
-\title{Cross-validation for time series.
-Computes forecast error with cutoffs at the specified period. When the
-period is the time interval of the data, is the procedure described in
-https://robjhyndman.com/hyndsight/tscv/. Beginning from end-horizon, makes
-a cutoff every "period" amount of time, going back to "initial".}
+\title{Cross-validation for time series.}
 \usage{
-cross_validation(model, horizon, units, period, initial = NULL)
+cross_validation(model, horizon, units, period = NULL, initial = NULL)
 }
 \arguments{
 \item{model}{Fitted Prophet model.}
@@ -18,7 +14,7 @@ cross_validation(model, horizon, units, period, initial = NULL)
 \item{units}{String unit of the horizon, e.g., "days", "secs".}
 
 \item{period}{Integer amount of time between cutoff dates. Same units as
-horizon.}
+horizon. If not provided, 0.5 * horizon is used.}
 
 \item{initial}{Integer size of the first training period. If not provided,
 3 * horizon is used. Same units as horizon.}
@@ -27,9 +23,10 @@ horizon.}
 A dataframe with the forecast, actual value, and cutoff date.
 }
 \description{
-Cross-validation for time series.
-Computes forecast error with cutoffs at the specified period. When the
-period is the time interval of the data, is the procedure described in
-https://robjhyndman.com/hyndsight/tscv/. Beginning from end-horizon, makes
-a cutoff every "period" amount of time, going back to "initial".
+Computes forecasts from historical cutoff points. Beginning from initial,
+makes cutoffs with a spacing of period up to (end - horizon).
+}
+\details{
+When period is equal to the time interval of the data, this is the
+technique described in https://robjhyndman.com/hyndsight/tscv/ .
 }

+ 0 - 18
R/man/parse_seasonality_args.Rd

@@ -2,12 +2,9 @@
 % Please edit documentation in R/prophet.R
 \name{parse_seasonality_args}
 \alias{parse_seasonality_args}
-\alias{parse_seasonality_args}
 \title{Get number of Fourier components for built-in seasonalities.}
 \usage{
 parse_seasonality_args(m, name, arg, auto.disable, default.order)
-
-parse_seasonality_args(m, name, arg, auto.disable, default.order)
 }
 \arguments{
 \item{m}{Prophet object.}
@@ -20,26 +17,11 @@ provided.}
 \item{auto.disable}{Bool if seasonality should be disabled when 'auto'.}
 
 \item{default.order}{Int default Fourier order.}
-
-\item{m}{Prophet object.}
-
-\item{name}{String name of the seasonality component.}
-
-\item{arg}{'auto', TRUE, FALSE, or number of Fourier components as
-provided.}
-
-\item{auto.disable}{Bool if seasonality should be disabled when 'auto'.}
-
-\item{default.order}{Int default Fourier order.}
 }
 \value{
 Number of Fourier components, or 0 for disabled.
-
-Number of Fourier components, or 0 for disabled.
 }
 \description{
 Get number of Fourier components for built-in seasonalities.
-
-Get number of Fourier components for built-in seasonalities.
 }
 \keyword{internal}

+ 3 - 6
R/man/simulated_historical_forecasts.Rd

@@ -2,9 +2,7 @@
 % Please edit documentation in R/diagnostics.R
 \name{simulated_historical_forecasts}
 \alias{simulated_historical_forecasts}
-\title{Simulated historical forecasts.
-Make forecasts from k historical cutoff dates, and compare forecast values
-to actual values.}
+\title{Simulated historical forecasts.}
 \usage{
 simulated_historical_forecasts(model, horizon, units, k, period = NULL)
 }
@@ -24,7 +22,6 @@ horizon. If not provided, will use 0.5 * horizon.}
 A dataframe with the forecast, actual value, and cutoff date.
 }
 \description{
-Simulated historical forecasts.
-Make forecasts from k historical cutoff dates, and compare forecast values
-to actual values.
+Make forecasts from k historical cutoff points, working backwards from
+(end - horizon) with a spacing of period between each cutoff.
 }

文件差异内容过多而无法显示
+ 260 - 0
notebooks/diagnostics.ipynb


+ 30 - 24
python/fbprophet/diagnostics.py

@@ -46,11 +46,13 @@ def _cutoffs(df, horizon, k, period):
         cutoff -= period
         # If data does not exist in data range (cutoff, cutoff + horizon]
         if not (((df['ds'] > cutoff) & (df['ds'] <= cutoff + horizon)).any()):
-            # Next cutoff point is 'closest date before cutoff in data - horizon'
+            # Next cutoff point is 'last date before cutoff in data - horizon'
             closest_date = df[df['ds'] <= cutoff].max()['ds']
             cutoff = closest_date - horizon
         if cutoff < df['ds'].min():
-            logger.warning('Not enough data for requested number of cutoffs! Using {}.'.format(i))
+            logger.warning(
+                'Not enough data for requested number of cutoffs! '
+                'Using {}.'.format(i))
             break
         result.append(cutoff)
 
@@ -60,20 +62,20 @@ def _cutoffs(df, horizon, k, period):
 
 def simulated_historical_forecasts(model, horizon, k, period=None):
     """Simulated Historical Forecasts.
-        If you would like to know it in detail, read the original paper
-        https://facebookincubator.github.io/prophet/static/prophet_paper_20170113.pdf
+
+    Make forecasts from k historical cutoff points, working backwards from
+    (end - horizon) with a spacing of period between each cutoff.
 
     Parameters
     ----------
     model: Prophet class object.
         Fitted Prophet model
-    horizon: string which has pd.Timedelta compatible style.
-        Forecast horizon ('5 days', '3 hours', '10 seconds' etc)
-    k: Int number.
-        The number of forecasts point.
-    period: string which has pd.Timedelta compatible style or None, default None.
-        Simulated Forecast will be done at every this period.
-        0.5 * horizon is used when it is None.
+    horizon: string with pd.Timedelta compatible style, e.g., '5 days',
+        '3 hours', '10 seconds'.
+    k: Int number of forecasts point.
+    period: Optional string with pd.Timedelta compatible style. Simulated
+        forecast will be done at every this period. If not provided,
+        0.5 * horizon is used.
 
     Returns
     -------
@@ -108,21 +110,24 @@ def simulated_historical_forecasts(model, horizon, k, period=None):
     return reduce(lambda x, y: x.append(y), predicts).reset_index(drop=True)
 
 
-def cross_validation(model, horizon, period, initial=None):
-    """Cross-Validation for time-series.
-        This function is the same with Time series cross-validation described in https://robjhyndman.com/hyndsight/tscv/
-        when the value of period is equal to the time interval of data.
+def cross_validation(model, horizon, period=None, initial=None):
+    """Cross-Validation for time series.
+
+    Computes forecasts from historical cutoff points. Beginning from initial,
+    makes cutoffs with a spacing of period up to (end - horizon).
+
+    When period is equal to the time interval of the data, this is the
+    technique described in https://robjhyndman.com/hyndsight/tscv/ .
 
     Parameters
     ----------
     model: Prophet class object. Fitted Prophet model
-    horizon: string which has pd.Timedelta compatible style.
-        Forecast horizon ('5 days', '3 hours', '10 seconds' etc)
-    period: string which has pd.Timedelta compatible style.
-        Simulated Forecast will be done at every this period.
-    initial: string which has pd.Timedelta compatible style or None, default None.
-        First training period.
-        3 * horizon is used when it is None.
+    horizon: string with pd.Timedelta compatible style, e.g., '5 days',
+        '3 hours', '10 seconds'.
+    period: string with pd.Timedelta compatible style. Simulated forecast will
+        be done at every this period. If not provided, 0.5 * horizon is used.
+    initial: string with pd.Timedelta compatible style. The first training
+        period will begin here. If not provided, 3 * horizon is used.
 
     Returns
     -------
@@ -131,9 +136,10 @@ def cross_validation(model, horizon, period, initial=None):
     te = model.history['ds'].max()
     ts = model.history['ds'].min()
     horizon = pd.Timedelta(horizon)
-    period = pd.Timedelta(period)
+    period = 0.5 * horizon if period is None else pd.Timedelta(period)
     initial = 3 * horizon if initial is None else pd.Timedelta(initial)
     k = int(np.ceil(((te - horizon) - (ts + initial)) / period))
     if k < 1:
-        raise ValueError('Not enough data for specified horizon and initial.')
+        raise ValueError(
+            'Not enough data for specified horizon, period, and initial.')
     return simulated_historical_forecasts(model, horizon, k, period)