vor 8 Jahren · 218283f157
--- a/R/NAMESPACE
+++ b/R/NAMESPACE
@@ -15,6 +15,5 @@ export(plot_forecast_component)
 
				 export(predictive_samples)
			
 
				 export(prophet)
			
 
				 export(prophet_plot_components)
			
 
				-export(simulated_historical_forecasts)
			
 
				 import(Rcpp)
			
 
				 importFrom(dplyr,"%>%")
			
--- a/R/R/diagnostics.R
+++ b/R/R/diagnostics.R
@@ -11,68 +11,81 @@ globalVariables(c(
 
				 
			
 
				 #' Generate cutoff dates
			
 
				 #'
			
 
				-#' @param df Dataframe with historical data
			
 
				-#' @param horizon timediff forecast horizon
			
 
				-#' @param k integer number of forecast points
			
 
				+#' @param df Dataframe with historical data.
			
 
				+#' @param horizon timediff forecast horizon.
			
 
				+#' @param initial timediff initial window.
			
 
				 #' @param period timediff Simulated forecasts are done with this period.
			
 
				 #'
			
 
				-#' @return Array of datetimes
			
 
				+#' @return Array of datetimes.
			
 
				 #'
			
 
				 #' @keywords internal
			
 
				-generate_cutoffs <- function(df, horizon, k, period) {
			
 
				+generate_cutoffs <- function(df, horizon, initial, period) {
			
 
				   # Last cutoff is (latest date in data) - (horizon).
			
 
				   cutoff <- max(df$ds) - horizon
			
 
				-  if (cutoff < min(df$ds)) {
			
 
				-    stop('Less data than horizon.')
			
 
				-  }
			
 
				   tzone <- attr(cutoff, "tzone")  # Timezone is wiped by putting in array
			
 
				-  result <- cutoff
			
 
				-  if (k > 1) {
			
 
				-    for (i in 2:k) {
			
 
				-      cutoff <- cutoff - period
			
 
				-      # If data does not exist in data range (cutoff, cutoff + horizon]
			
 
				-      if (!any((df$ds > cutoff) & (df$ds <= cutoff + horizon))) {
			
 
				+  result <- c(cutoff)
			
 
				+  while (result[length(result)] >= min(df$ds) + initial) {
			
 
				+    cutoff <- cutoff - period
			
 
				+    # If data does not exist in data range (cutoff, cutoff + horizon]
			
 
				+    if (!any((df$ds > cutoff) & (df$ds <= cutoff + horizon))) {
			
 
				         # Next cutoff point is 'closest date before cutoff in data - horizon'
			
 
				         closest.date <- max(df$ds[df$ds <= cutoff])
			
 
				         cutoff <- closest.date - horizon
			
 
				-      }
			
 
				-      if (cutoff < min(df$ds)) {
			
 
				-        warning('Not enough data for requested number of cutoffs! Using ', i)
			
 
				-        break
			
 
				-      }
			
 
				-      result <- c(result, cutoff)
			
 
				     }
			
 
				+    result <- c(result, cutoff)
			
 
				+  }
			
 
				+  result <- head(result, -1)
			
 
				+  if (length(result) == 0) {
			
 
				+    stop(paste(
			
 
				+      'Less data than horizon after initial window.',
			
 
				+      'Make horizon or initial shorter.'
			
 
				+    ))
			
 
				   }
			
 
				   # Reset timezones
			
 
				   attr(result, "tzone") <- tzone
			
 
				+  message(paste(
			
 
				+    'Making', length(result), 'forecasts with cutoffs between',
			
 
				+    result[length(result)], 'and', result[1]
			
 
				+  ))
			
 
				   return(rev(result))
			
 
				 }
			
 
				 
			
 
				-#' Simulated historical forecasts.
			
 
				+#' Cross-validation for time series.
			
 
				+#'
			
 
				+#' Computes forecasts from historical cutoff points. Beginning from
			
 
				+#' (end - horizon), works backwards making cutoffs with a spacing of period
			
 
				+#' until initial is reached.
			
 
				 #'
			
 
				-#' Make forecasts from k historical cutoff points, working backwards from
			
 
				-#' (end - horizon) with a spacing of period between each cutoff.
			
 
				+#' When period is equal to the time interval of the data, this is the
			
 
				+#' technique described in https://robjhyndman.com/hyndsight/tscv/ .
			
 
				 #'
			
 
				 #' @param model Fitted Prophet model.
			
 
				 #' @param horizon Integer size of the horizon
			
 
				 #' @param units String unit of the horizon, e.g., "days", "secs".
			
 
				-#' @param k integer number of forecast points
			
 
				 #' @param period Integer amount of time between cutoff dates. Same units as
			
 
				-#'  horizon. If not provided, will use 0.5 * horizon.
			
 
				+#'  horizon. If not provided, 0.5 * horizon is used.
			
 
				+#' @param initial Integer size of the first training period. If not provided,
			
 
				+#'  3 * horizon is used. Same units as horizon.
			
 
				 #'
			
 
				 #' @return A dataframe with the forecast, actual value, and cutoff date.
			
 
				 #'
			
 
				 #' @export
			
 
				-simulated_historical_forecasts <- function(model, horizon, units, k,
			
 
				-                                           period = NULL) {
			
 
				+cross_validation <- function(
			
 
				+    model, horizon, units, period = NULL, initial = NULL) {
			
 
				   df <- model$history
			
 
				-  horizon <- as.difftime(horizon, units = units)
			
 
				+  te <- max(df$ds)
			
 
				+  ts <- min(df$ds)
			
 
				   if (is.null(period)) {
			
 
				-    period <- horizon / 2
			
 
				-  } else {
			
 
				-    period <- as.difftime(period, units = units)
			
 
				+    period <- 0.5 * horizon
			
 
				+  }
			
 
				+  if (is.null(initial)) {
			
 
				+    initial <- 3 * horizon
			
 
				   }
			
 
				-  cutoffs <- generate_cutoffs(df, horizon, k, period)
			
 
				+  horizon.dt <- as.difftime(horizon, units = units)
			
 
				+  initial.dt <- as.difftime(initial, units = units)
			
 
				+  period.dt <- as.difftime(period, units = units)
			
 
				+
			
 
				+  cutoffs <- generate_cutoffs(df, horizon.dt, initial.dt, period.dt)
			
 
				   predicts <- data.frame()
			
 
				   for (i in seq_along(cutoffs)) {
			
 
				     cutoff <- cutoffs[i]
			
@@ -80,9 +93,12 @@ simulated_historical_forecasts <- function(model, horizon, units, k,
 
				     m <- prophet_copy(model, cutoff)
			
 
				     # Train model
			
 
				     history.c <- dplyr::filter(df, ds <= cutoff)
			
 
				+    if (nrow(history.c) < 2) {
			
 
				+      stop('Less than two datapoints before cutoff. Increase initial window.')
			
 
				+    }
			
 
				     m <- fit.prophet(m, history.c)
			
 
				     # Calculate yhat
			
 
				-    df.predict <- dplyr::filter(df, ds > cutoff, ds <= cutoff + horizon)
			
 
				+    df.predict <- dplyr::filter(df, ds > cutoff, ds <= cutoff + horizon.dt)
			
 
				     # Get the columns for the future dataframe
			
 
				     columns <- 'ds'
			
 
				     if (m$growth == 'logistic') {
			
@@ -92,7 +108,7 @@ simulated_historical_forecasts <- function(model, horizon, units, k,
 
				       }
			
 
				     }
			
 
				     columns <- c(columns, names(m$extra_regressors))
			
 
				-    future <- df[columns]
			
 
				+    future <- df.predict[columns]
			
 
				     yhat <- stats::predict(m, future)
			
 
				     # Merge yhat, y, and cutoff.
			
 
				     df.c <- dplyr::inner_join(df.predict, yhat, by = "ds")
			
@@ -103,48 +119,6 @@ simulated_historical_forecasts <- function(model, horizon, units, k,
 
				   return(predicts)
			
 
				 }
			
 
				 
			
 
				-#' Cross-validation for time series.
			
 
				-#'
			
 
				-#' Computes forecasts from historical cutoff points. Beginning from initial,
			
 
				-#' makes cutoffs with a spacing of period up to (end - horizon).
			
 
				-#'
			
 
				-#' When period is equal to the time interval of the data, this is the
			
 
				-#' technique described in https://robjhyndman.com/hyndsight/tscv/ .
			
 
				-#'
			
 
				-#' @param model Fitted Prophet model.
			
 
				-#' @param horizon Integer size of the horizon
			
 
				-#' @param units String unit of the horizon, e.g., "days", "secs".
			
 
				-#' @param period Integer amount of time between cutoff dates. Same units as
			
 
				-#'  horizon. If not provided, 0.5 * horizon is used.
			
 
				-#' @param initial Integer size of the first training period. If not provided,
			
 
				-#'  3 * horizon is used. Same units as horizon.
			
 
				-#'
			
 
				-#' @return A dataframe with the forecast, actual value, and cutoff date.
			
 
				-#'
			
 
				-#' @export
			
 
				-cross_validation <- function(
			
 
				-    model, horizon, units, period = NULL, initial = NULL) {
			
 
				-  te <- max(model$history$ds)
			
 
				-  ts <- min(model$history$ds)
			
 
				-  if (is.null(period)) {
			
 
				-    period <- 0.5 * horizon
			
 
				-  }
			
 
				-  if (is.null(initial)) {
			
 
				-    initial <- 3 * horizon
			
 
				-  }
			
 
				-  horizon.dt <- as.difftime(horizon, units = units)
			
 
				-  initial.dt <- as.difftime(initial, units = units)
			
 
				-  period.dt <- as.difftime(period, units = units)
			
 
				-  k <- ceiling(
			
 
				-    as.double((te - horizon.dt) - (ts + initial.dt), units='secs') /
			
 
				-    as.double(period.dt, units = 'secs')
			
 
				-  )
			
 
				-  if (k < 1) {
			
 
				-    stop('Not enough data for specified horizon, period, and initial.')
			
 
				-  }
			
 
				-  return(simulated_historical_forecasts(model, horizon, units, k, period))
			
 
				-}
			
 
				-
			
 
				 #' Copy Prophet object.
			
 
				 #'
			
 
				 #' @param m Prophet model object.
			
--- a/R/man/cross_validation.Rd
+++ b/R/man/cross_validation.Rd
@@ -23,8 +23,9 @@ horizon. If not provided, 0.5 * horizon is used.}
 
				 A dataframe with the forecast, actual value, and cutoff date.
			
 
				 }
			
 
				 \description{
			
 
				-Computes forecasts from historical cutoff points. Beginning from initial,
			
 
				-makes cutoffs with a spacing of period up to (end - horizon).
			
 
				+Computes forecasts from historical cutoff points. Beginning from
			
 
				+(end - horizon), works backwards making cutoffs with a spacing of period
			
 
				+until initial is reached.
			
 
				 }
			
 
				 \details{
			
 
				 When period is equal to the time interval of the data, this is the
			
--- a/R/man/dyplot.prophet.Rd
+++ b/R/man/dyplot.prophet.Rd
@@ -24,8 +24,9 @@ Plot the prophet forecast.
 
				 }
			
 
				 \examples{
			
 
				 \dontrun{
			
 
				-history <- data.frame(ds = seq(as.Date('2015-01-01'), as.Date('2016-01-01'), by = 'd'),
			
 
				-                      y = sin(1:366/200) + rnorm(366)/10)
			
 
				+history <- data.frame(
			
 
				+ ds = seq(as.Date('2015-01-01'), as.Date('2016-01-01'), by = 'd'),
			
 
				+ y = sin(1:366/200) + rnorm(366)/10)
			
 
				 m <- prophet(history)
			
 
				 future <- make_future_dataframe(m, periods = 365)
			
 
				 forecast <- predict(m, future)
			
--- a/R/man/generate_cutoffs.Rd
+++ b/R/man/generate_cutoffs.Rd
@@ -4,14 +4,14 @@
 
				 \alias{generate_cutoffs}
			
 
				 \title{Generate cutoff dates}
			
 
				 \usage{
			
 
				-generate_cutoffs(df, horizon, k, period)
			
 
				+generate_cutoffs(df, horizon, initial, period)
			
 
				 }
			
 
				 \arguments{
			
 
				 \item{df}{Dataframe with historical data}
			
 
				 
			
 
				 \item{horizon}{timediff forecast horizon}
			
 
				 
			
 
				-\item{k}{integer number of forecast points}
			
 
				+\item{initial}{timediff initial window}
			
 
				 
			
 
				 \item{period}{timediff Simulated forecasts are done with this period.}
			
 
				 }
			
--- a/R/man/prophet.Rd
+++ b/R/man/prophet.Rd
@@ -5,12 +5,13 @@
 
				 \title{Prophet forecaster.}
			
 
				 \usage{
			
 
				 prophet(df = NULL, growth = "linear", changepoints = NULL,
			
 
				-  n.changepoints = 25, yearly.seasonality = "auto",
			
 
				-  weekly.seasonality = "auto", daily.seasonality = "auto",
			
 
				-  holidays = NULL, seasonality.mode = "additive",
			
 
				-  seasonality.prior.scale = 10, holidays.prior.scale = 10,
			
 
				-  changepoint.prior.scale = 0.05, mcmc.samples = 0, interval.width = 0.8,
			
 
				-  uncertainty.samples = 1000, fit = TRUE, ...)
			
 
				+  n.changepoints = 25, changepoint.range = 0.8,
			
 
				+  yearly.seasonality = "auto", weekly.seasonality = "auto",
			
 
				+  daily.seasonality = "auto", holidays = NULL,
			
 
				+  seasonality.mode = "additive", seasonality.prior.scale = 10,
			
 
				+  holidays.prior.scale = 10, changepoint.prior.scale = 0.05,
			
 
				+  mcmc.samples = 0, interval.width = 0.8, uncertainty.samples = 1000,
			
 
				+  fit = TRUE, ...)
			
 
				 }
			
 
				 \arguments{
			
 
				 \item{df}{(optional) Dataframe containing the history. Must have columns ds
			
@@ -29,7 +30,11 @@ automatically.}
 
				 \item{n.changepoints}{Number of potential changepoints to include. Not used
			
 
				 if input `changepoints` is supplied. If `changepoints` is not supplied,
			
 
				 then n.changepoints potential changepoints are selected uniformly from the
			
 
				-first 80 percent of df$ds.}
			
 
				+first `changepoint.range` proportion of df$ds.}
			
 
				+
			
 
				+\item{changepoint.range}{Proportion of history in which trend changepoints
			
 
				+will be estimated. Defaults to 0.8 for the first 80%. Not used if
			
 
				+`changepoints` is specified.}
			
 
				 
			
 
				 \item{yearly.seasonality}{Fit yearly seasonality. Can be 'auto', TRUE,
			
 
				 FALSE, or a number of Fourier terms to generate.}
			
--- a/R/man/simulated_historical_forecasts.Rd
+++ b/R/man/simulated_historical_forecasts.Rd
@@ -1,27 +0,0 @@
 
				-% Generated by roxygen2: do not edit by hand
			
 
				-% Please edit documentation in R/diagnostics.R
			
 
				-\name{simulated_historical_forecasts}
			
 
				-\alias{simulated_historical_forecasts}
			
 
				-\title{Simulated historical forecasts.}
			
 
				-\usage{
			
 
				-simulated_historical_forecasts(model, horizon, units, k, period = NULL)
			
 
				-}
			
 
				-\arguments{
			
 
				-\item{model}{Fitted Prophet model.}
			
 
				-
			
 
				-\item{horizon}{Integer size of the horizon}
			
 
				-
			
 
				-\item{units}{String unit of the horizon, e.g., "days", "secs".}
			
 
				-
			
 
				-\item{k}{integer number of forecast points}
			
 
				-
			
 
				-\item{period}{Integer amount of time between cutoff dates. Same units as
			
 
				-horizon. If not provided, will use 0.5 * horizon.}
			
 
				-}
			
 
				-\value{
			
 
				-A dataframe with the forecast, actual value, and cutoff date.
			
 
				-}
			
 
				-\description{
			
 
				-Make forecasts from k historical cutoff points, working backwards from
			
 
				-(end - horizon) with a spacing of period between each cutoff.
			
 
				-}
			
--- a/R/tests/testthat/test_diagnostics.R
+++ b/R/tests/testthat/test_diagnostics.R
@@ -8,47 +8,50 @@ DATA_all <- read.csv('data.csv')
 
				 DATA_all$ds <- as.Date(DATA_all$ds)
			
 
				 DATA <- head(DATA_all, 100)
			
 
				 
			
 
				-test_that("simulated_historical_forecasts", {
			
 
				+test_that("cross_validation", {
			
 
				   skip_if_not(Sys.getenv('R_ARCH') != '/i386')
			
 
				   m <- prophet(DATA)
			
 
				-  k <- 2
			
 
				-  for (p in c(1, 10)) {
			
 
				-    for (h in c(1, 3)) {
			
 
				-      df.shf <- simulated_historical_forecasts(
			
 
				-        m, horizon = h, units = 'days', k = k, period = p)
			
 
				-      # All cutoff dates should be less than ds dates
			
 
				-      expect_true(all(df.shf$cutoff < df.shf$ds))
			
 
				-      # The unique size of output cutoff should be equal to 'k'
			
 
				-      expect_equal(length(unique(df.shf$cutoff)), k)
			
 
				-      expect_equal(max(df.shf$ds - df.shf$cutoff),
			
 
				-                   as.difftime(h, units = 'days'))
			
 
				-      dc <- diff(df.shf$cutoff)
			
 
				-      dc <- min(dc[dc > 0])
			
 
				-      expect_true(dc >= as.difftime(p, units = 'days'))
			
 
				-      # Each y in df_shf and DATA with same ds should be equal
			
 
				-      df.merged <- dplyr::left_join(df.shf, m$history, by="ds")
			
 
				-      expect_equal(sum((df.merged$y.x - df.merged$y.y) ** 2), 0)
			
 
				-    }
			
 
				-  }
			
 
				+  # Calculate the number of cutoff points
			
 
				+  te <- max(DATA$ds)
			
 
				+  ts <- min(DATA$ds)
			
 
				+  horizon <- as.difftime(4, units = "days")
			
 
				+  period <- as.difftime(10, units = "days")
			
 
				+  initial <- as.difftime(115, units = "days")
			
 
				+  df.cv <- cross_validation(
			
 
				+    m, horizon = 4, units = "days", period = 10, initial = 115)
			
 
				+  expect_equal(length(unique(df.cv$cutoff)), 3)
			
 
				+  expect_equal(max(df.cv$ds - df.cv$cutoff), horizon)
			
 
				+  expect_true(min(df.cv$cutoff) >= ts + initial)
			
 
				+  dc <- diff(df.cv$cutoff)
			
 
				+  dc <- min(dc[dc > 0])
			
 
				+  expect_true(dc >= period)
			
 
				+  expect_true(all(df.cv$cutoff < df.cv$ds))
			
 
				+  # Each y in df.cv and DATA with same ds should be equal
			
 
				+  df.merged <- dplyr::left_join(df.cv, m$history, by="ds")
			
 
				+  expect_equal(sum((df.merged$y.x - df.merged$y.y) ** 2), 0)
			
 
				+  df.cv <- cross_validation(
			
 
				+    m, horizon = 4, units = "days", period = 10, initial = 135)
			
 
				+  expect_equal(length(unique(df.cv$cutoff)), 1)
			
 
				+  expect_error(
			
 
				+    cross_validation(
			
 
				+      m, horizon = 10, units = "days", period = 10, initial = 140)
			
 
				+  )
			
 
				 })
			
 
				 
			
 
				-test_that("simulated_historical_forecasts_logistic", {
			
 
				+test_that("cross_validation_logistic", {
			
 
				   skip_if_not(Sys.getenv('R_ARCH') != '/i386')
			
 
				   df <- DATA
			
 
				   df$cap <- 40
			
 
				-  m <- prophet(df, growth='logistic')
			
 
				-  df.shf <- simulated_historical_forecasts(
			
 
				-    m, horizon = 3, units = 'days', k = 2, period = 3)
			
 
				-  # All cutoff dates should be less than ds dates
			
 
				-  expect_true(all(df.shf$cutoff < df.shf$ds))
			
 
				-  # The unique size of output cutoff should be equal to 'k'
			
 
				-  expect_equal(length(unique(df.shf$cutoff)), 2)
			
 
				-  # Each y in df_shf and DATA with same ds should be equal
			
 
				-  df.merged <- dplyr::left_join(df.shf, m$history, by="ds")
			
 
				+  m <- prophet(df, growth = 'logistic')
			
 
				+  df.cv <- cross_validation(
			
 
				+    m, horizon = 1, units = "days", period = 1, initial = 140)
			
 
				+  expect_equal(length(unique(df.cv$cutoff)), 2)
			
 
				+  expect_true(all(df.cv$cutoff < df.cv$ds))
			
 
				+  df.merged <- dplyr::left_join(df.cv, m$history, by="ds")
			
 
				   expect_equal(sum((df.merged$y.x - df.merged$y.y) ** 2), 0)
			
 
				 })
			
 
				 
			
 
				-test_that("simulated_historical_forecasts_extra_regressors", {
			
 
				+test_that("cross_validation_extra_regressors", {
			
 
				   skip_if_not(Sys.getenv('R_ARCH') != '/i386')
			
 
				   df <- DATA
			
 
				   df$extra <- seq(0, nrow(df) - 1)
			
@@ -56,43 +59,16 @@ test_that("simulated_historical_forecasts_extra_regressors", {
 
				   m <- add_seasonality(m, name = 'monthly', period = 30.5, fourier.order = 5)
			
 
				   m <- add_regressor(m, 'extra')
			
 
				   m <- fit.prophet(m, df)
			
 
				-  df.shf <- simulated_historical_forecasts(
			
 
				-    m, horizon = 3, units = 'days', k = 2, period = 3)
			
 
				-  # All cutoff dates should be less than ds dates
			
 
				-  expect_true(all(df.shf$cutoff < df.shf$ds))
			
 
				-  # The unique size of output cutoff should be equal to 'k'
			
 
				-  expect_equal(length(unique(df.shf$cutoff)), 2)
			
 
				-  # Each y in df_shf and DATA with same ds should be equal
			
 
				-  df.merged <- dplyr::left_join(df.shf, m$history, by="ds")
			
 
				-  expect_equal(sum((df.merged$y.x - df.merged$y.y) ** 2), 0)
			
 
				-})
			
 
				-
			
 
				-test_that("simulated_historical_forecasts_default_value_check", {
			
 
				-  skip_if_not(Sys.getenv('R_ARCH') != '/i386')
			
 
				-  m <- prophet(DATA)
			
 
				-  df.shf1 <- simulated_historical_forecasts(
			
 
				-    m, horizon = 10, units = 'days', k = 1)
			
 
				-  df.shf2 <- simulated_historical_forecasts(
			
 
				-    m, horizon = 10, units = 'days', k = 1, period = 5)
			
 
				-  expect_equal(sum(dplyr::select(df.shf1 - df.shf2, y, yhat)), 0)
			
 
				-})
			
 
				-
			
 
				-test_that("cross_validation", {
			
 
				-  skip_if_not(Sys.getenv('R_ARCH') != '/i386')
			
 
				-  m <- prophet(DATA)
			
 
				-  # Calculate the number of cutoff points
			
 
				-  te <- max(DATA$ds)
			
 
				-  ts <- min(DATA$ds)
			
 
				-  horizon <- as.difftime(4, units = "days")
			
 
				-  period <- as.difftime(10, units = "days")
			
 
				-  k <- 5
			
 
				   df.cv <- cross_validation(
			
 
				-    m, horizon = 4, units = "days", period = 10, initial = 90)
			
 
				-  expect_equal(length(unique(df.cv$cutoff)), k)
			
 
				-  expect_equal(max(df.cv$ds - df.cv$cutoff), horizon)
			
 
				+    m, horizon = 4, units = "days", period = 4, initial = 135)
			
 
				+  expect_equal(length(unique(df.cv$cutoff)), 2)
			
 
				+  period <- as.difftime(4, units = "days")
			
 
				   dc <- diff(df.cv$cutoff)
			
 
				   dc <- min(dc[dc > 0])
			
 
				   expect_true(dc >= period)
			
 
				+  expect_true(all(df.cv$cutoff < df.cv$ds))
			
 
				+  df.merged <- dplyr::left_join(df.cv, m$history, by="ds")
			
 
				+  expect_equal(sum((df.merged$y.x - df.merged$y.y) ** 2), 0)
			
 
				 })
			
 
				 
			
 
				 test_that("cross_validation_default_value_check", {
			
@@ -116,11 +92,11 @@ test_that("performance_metrics", {
 
				     sort(colnames(df_none))
			
 
				     == sort(c('horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'))
			
 
				   ))
			
 
				-  expect_equal(nrow(df_none), 14)
			
 
				+  expect_equal(nrow(df_none), 16)
			
 
				   # Aggregation level 0.2
			
 
				   df_horizon <- performance_metrics(df_cv, rolling_window = 0.2)
			
 
				   expect_equal(length(unique(df_horizon$horizon)), 4)
			
 
				-  expect_equal(nrow(df_horizon), 13)
			
 
				+  expect_equal(nrow(df_horizon), 14)
			
 
				   # Aggregation level all
			
 
				   df_all <- performance_metrics(df_cv, rolling_window = 1)
			
 
				   expect_equal(nrow(df_all), 1)
			
--- a/python/fbprophet/diagnostics.py
+++ b/python/fbprophet/diagnostics.py
@@ -21,18 +21,15 @@ import pandas as pd
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 
			
 
				-def _cutoffs(df, horizon, k, period):
			
 
				+def generate_cutoffs(df, horizon, initial, period):
			
 
				     """Generate cutoff dates
			
 
				 
			
 
				     Parameters
			
 
				     ----------
			
 
				-    df: pd.DataFrame with historical data
			
 
				-    horizon: pd.Timedelta.
			
 
				-        Forecast horizon
			
 
				-    k: Int number.
			
 
				-        The number of forecasts point.
			
 
				-    period: pd.Timedelta.
			
 
				-        Simulated Forecast will be done at every this period.
			
 
				+    df: pd.DataFrame with historical data.
			
 
				+    horizon: pd.Timedelta forecast horizon.
			
 
				+    initial: pd.Timedelta window of the initial forecast period.
			
 
				+    period: pd.Timedelta simulated forecasts are done with this period.
			
 
				 
			
 
				     Returns
			
 
				     -------
			
@@ -43,56 +40,70 @@ def _cutoffs(df, horizon, k, period):
 
				     if cutoff < df['ds'].min():
			
 
				         raise ValueError('Less data than horizon.')
			
 
				     result = [cutoff]
			
 
				-
			
 
				-    for i in range(1, k):
			
 
				+    while result[-1] >= min(df['ds']) + initial:
			
 
				         cutoff -= period
			
 
				         # If data does not exist in data range (cutoff, cutoff + horizon]
			
 
				         if not (((df['ds'] > cutoff) & (df['ds'] <= cutoff + horizon)).any()):
			
 
				             # Next cutoff point is 'last date before cutoff in data - horizon'
			
 
				             closest_date = df[df['ds'] <= cutoff].max()['ds']
			
 
				             cutoff = closest_date - horizon
			
 
				-        if cutoff < df['ds'].min():
			
 
				-            logger.warning(
			
 
				-                'Not enough data for requested number of cutoffs! '
			
 
				-                'Using {}.'.format(i))
			
 
				-            break
			
 
				         result.append(cutoff)
			
 
				-
			
 
				-    # Sort lines in ascending order
			
 
				+    result = result[:-1]
			
 
				+    if len(result) == 0:
			
 
				+        raise ValueError(
			
 
				+            'Less data than horizon after initial window. '
			
 
				+            'Make horizon or initial shorter.'
			
 
				+        )
			
 
				+    logger.info('Making {} forecasts with cutoffs between {} and {}'.format(
			
 
				+        len(result), result[-1], result[0]
			
 
				+    ))
			
 
				     return reversed(result)
			
 
				 
			
 
				 
			
 
				-def simulated_historical_forecasts(model, horizon, k, period=None):
			
 
				-    """Simulated Historical Forecasts.
			
 
				+def cross_validation(model, horizon, period=None, initial=None):
			
 
				+    """Cross-Validation for time series.
			
 
				 
			
 
				-    Make forecasts from k historical cutoff points, working backwards from
			
 
				-    (end - horizon) with a spacing of period between each cutoff.
			
 
				+    Computes forecasts from historical cutoff points. Beginning from
			
 
				+    (end - horizon), works backwards making cutoffs with a spacing of period
			
 
				+    until initial is reached.
			
 
				+
			
 
				+    When period is equal to the time interval of the data, this is the
			
 
				+    technique described in https://robjhyndman.com/hyndsight/tscv/ .
			
 
				 
			
 
				     Parameters
			
 
				     ----------
			
 
				-    model: Prophet class object.
			
 
				-        Fitted Prophet model
			
 
				+    model: Prophet class object. Fitted Prophet model
			
 
				     horizon: string with pd.Timedelta compatible style, e.g., '5 days',
			
 
				         '3 hours', '10 seconds'.
			
 
				-    k: Int number of forecasts point.
			
 
				-    period: Optional string with pd.Timedelta compatible style. Simulated
			
 
				-        forecast will be done at every this period. If not provided,
			
 
				-        0.5 * horizon is used.
			
 
				+    period: string with pd.Timedelta compatible style. Simulated forecast will
			
 
				+        be done at every this period. If not provided, 0.5 * horizon is used.
			
 
				+    initial: string with pd.Timedelta compatible style. The first training
			
 
				+        period will begin here. If not provided, 3 * horizon is used.
			
 
				 
			
 
				     Returns
			
 
				     -------
			
 
				     A pd.DataFrame with the forecast, actual value and cutoff.
			
 
				     """
			
 
				     df = model.history.copy().reset_index(drop=True)
			
 
				+    te = df['ds'].max()
			
 
				+    ts = df['ds'].min()
			
 
				     horizon = pd.Timedelta(horizon)
			
 
				     period = 0.5 * horizon if period is None else pd.Timedelta(period)
			
 
				-    cutoffs = _cutoffs(df, horizon, k, period)
			
 
				+    initial = 3 * horizon if initial is None else pd.Timedelta(initial)
			
 
				+
			
 
				+    cutoffs = generate_cutoffs(df, horizon, initial, period)
			
 
				     predicts = []
			
 
				     for cutoff in cutoffs:
			
 
				         # Generate new object with copying fitting options
			
 
				         m = prophet_copy(model, cutoff)
			
 
				         # Train model
			
 
				-        m.fit(df[df['ds'] <= cutoff])
			
 
				+        history_c = df[df['ds'] <= cutoff]
			
 
				+        if history_c.shape[0] < 2:
			
 
				+            raise Exception(
			
 
				+                'Less than two datapoints before cutoff. '
			
 
				+                'Increase initial window.'
			
 
				+            )
			
 
				+        m.fit(history_c)
			
 
				         # Calculate yhat
			
 
				         index_predicted = (df['ds'] > cutoff) & (df['ds'] <= cutoff + horizon)
			
 
				         # Get the columns for the future dataframe
			
@@ -113,42 +124,6 @@ def simulated_historical_forecasts(model, horizon, k, period=None):
 
				     # Combine all predicted pd.DataFrame into one pd.DataFrame
			
 
				     return reduce(lambda x, y: x.append(y), predicts).reset_index(drop=True)
			
 
				 
			
 
				-
			
 
				-def cross_validation(model, horizon, period=None, initial=None):
			
 
				-    """Cross-Validation for time series.
			
 
				-
			
 
				-    Computes forecasts from historical cutoff points. Beginning from initial,
			
 
				-    makes cutoffs with a spacing of period up to (end - horizon).
			
 
				-
			
 
				-    When period is equal to the time interval of the data, this is the
			
 
				-    technique described in https://robjhyndman.com/hyndsight/tscv/ .
			
 
				-
			
 
				-    Parameters
			
 
				-    ----------
			
 
				-    model: Prophet class object. Fitted Prophet model
			
 
				-    horizon: string with pd.Timedelta compatible style, e.g., '5 days',
			
 
				-        '3 hours', '10 seconds'.
			
 
				-    period: string with pd.Timedelta compatible style. Simulated forecast will
			
 
				-        be done at every this period. If not provided, 0.5 * horizon is used.
			
 
				-    initial: string with pd.Timedelta compatible style. The first training
			
 
				-        period will begin here. If not provided, 3 * horizon is used.
			
 
				-
			
 
				-    Returns
			
 
				-    -------
			
 
				-    A pd.DataFrame with the forecast, actual value and cutoff.
			
 
				-    """
			
 
				-    te = model.history['ds'].max()
			
 
				-    ts = model.history['ds'].min()
			
 
				-    horizon = pd.Timedelta(horizon)
			
 
				-    period = 0.5 * horizon if period is None else pd.Timedelta(period)
			
 
				-    initial = 3 * horizon if initial is None else pd.Timedelta(initial)
			
 
				-    k = int(np.ceil(((te - horizon) - (ts + initial)) / period))
			
 
				-    if k < 1:
			
 
				-        raise ValueError(
			
 
				-            'Not enough data for specified horizon, period, and initial.')
			
 
				-    return simulated_historical_forecasts(model, horizon, k, period)
			
 
				-
			
 
				-
			
 
				 def prophet_copy(m, cutoff=None):
			
 
				     """Copy Prophet object
			
 
				 
			
--- a/python/fbprophet/tests/test_diagnostics.py
+++ b/python/fbprophet/tests/test_diagnostics.py
@@ -36,95 +36,63 @@ class TestDiagnostics(TestCase):
 
				         # Use first 100 record in data.csv
			
 
				         self.__df = DATA
			
 
				 
			
 
				-    def test_simulated_historical_forecasts(self):
			
 
				+    def test_cross_validation(self):
			
 
				         m = Prophet()
			
 
				         m.fit(self.__df)
			
 
				-        k = 2
			
 
				-        for p in [1, 10]:
			
 
				-            for h in [1, 3]:
			
 
				-                period = '{} days'.format(p)
			
 
				-                horizon = '{} days'.format(h)
			
 
				-                df_shf = diagnostics.simulated_historical_forecasts(
			
 
				-                    m, horizon=horizon, k=k, period=period)
			
 
				-                # All cutoff dates should be less than ds dates
			
 
				-                self.assertTrue((df_shf['cutoff'] < df_shf['ds']).all())
			
 
				-                # The unique size of output cutoff should be equal to 'k'
			
 
				-                self.assertEqual(len(np.unique(df_shf['cutoff'])), k)
			
 
				-                self.assertEqual(
			
 
				-                    max(df_shf['ds'] - df_shf['cutoff']),
			
 
				-                    pd.Timedelta(horizon),
			
 
				-                )
			
 
				-                dc = df_shf['cutoff'].diff()
			
 
				-                dc = dc[dc > pd.Timedelta(0)].min()
			
 
				-                self.assertTrue(dc >= pd.Timedelta(period))
			
 
				-                # Each y in df_shf and self.__df with same ds should be equal
			
 
				-                df_merged = pd.merge(df_shf, self.__df, 'left', on='ds')
			
 
				-                self.assertAlmostEqual(
			
 
				-                    np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
			
 
				+        # Calculate the number of cutoff points(k)
			
 
				+        horizon = pd.Timedelta('4 days')
			
 
				+        period = pd.Timedelta('10 days')
			
 
				+        initial = pd.Timedelta('115 days')
			
 
				+        df_cv = diagnostics.cross_validation(
			
 
				+            m, horizon='4 days', period='10 days', initial='115 days')
			
 
				+        self.assertEqual(len(np.unique(df_cv['cutoff'])), 3)
			
 
				+        self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon)
			
 
				+        self.assertTrue(min(df_cv['cutoff']) >= min(self.__df['ds']) + initial)
			
 
				+        dc = df_cv['cutoff'].diff()
			
 
				+        dc = dc[dc > pd.Timedelta(0)].min()
			
 
				+        self.assertTrue(dc >= period)
			
 
				+        self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
			
 
				+        # Each y in df_cv and self.__df with same ds should be equal
			
 
				+        df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
			
 
				+        self.assertAlmostEqual(
			
 
				+            np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
			
 
				+        df_cv = diagnostics.cross_validation(
			
 
				+            m, horizon='4 days', period='10 days', initial='135 days')
			
 
				+        self.assertEqual(len(np.unique(df_cv['cutoff'])), 1)
			
 
				+        with self.assertRaises(ValueError):
			
 
				+            diagnostics.cross_validation(
			
 
				+                m, horizon='10 days', period='10 days', initial='140 days')
			
 
				 
			
 
				-    def test_simulated_historical_forecasts_logistic(self):
			
 
				-        m = Prophet(growth='logistic')
			
 
				+    def test_cross_validation_logistic(self):
			
 
				         df = self.__df.copy()
			
 
				         df['cap'] = 40
			
 
				-        m.fit(df)
			
 
				-        df_shf = diagnostics.simulated_historical_forecasts(
			
 
				-            m, horizon='3 days', k=2, period='3 days')
			
 
				-        # All cutoff dates should be less than ds dates
			
 
				-        self.assertTrue((df_shf['cutoff'] < df_shf['ds']).all())
			
 
				-        # The unique size of output cutoff should be equal to 'k'
			
 
				-        self.assertEqual(len(np.unique(df_shf['cutoff'])), 2)
			
 
				-        # Each y in df_shf and self.__df with same ds should be equal
			
 
				-        df_merged = pd.merge(df_shf, df, 'left', on='ds')
			
 
				+        m = Prophet(growth='logistic').fit(df)
			
 
				+        df_cv = diagnostics.cross_validation(
			
 
				+            m, horizon='1 days', period='1 days', initial='140 days')
			
 
				+        self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
			
 
				+        self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
			
 
				+        df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
			
 
				         self.assertAlmostEqual(
			
 
				             np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
			
 
				 
			
 
				-    def test_simulated_historical_forecasts_extra_regressors(self):
			
 
				+    def test_cross_validation_extra_regressors(self):
			
 
				+        df = self.__df.copy()
			
 
				+        df['extra'] = range(df.shape[0])
			
 
				         m = Prophet()
			
 
				         m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
			
 
				         m.add_regressor('extra')
			
 
				-        df = self.__df.copy()
			
 
				-        df['cap'] = 40
			
 
				-        df['extra'] = range(df.shape[0])
			
 
				         m.fit(df)
			
 
				-        df_shf = diagnostics.simulated_historical_forecasts(
			
 
				-            m, horizon='3 days', k=2, period='3 days')
			
 
				-        # All cutoff dates should be less than ds dates
			
 
				-        self.assertTrue((df_shf['cutoff'] < df_shf['ds']).all())
			
 
				-        # The unique size of output cutoff should be equal to 'k'
			
 
				-        self.assertEqual(len(np.unique(df_shf['cutoff'])), 2)
			
 
				-        # Each y in df_shf and self.__df with same ds should be equal
			
 
				-        df_merged = pd.merge(df_shf, df, 'left', on='ds')
			
 
				-        self.assertAlmostEqual(
			
 
				-            np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
			
 
				-
			
 
				-    def test_simulated_historical_forecasts_default_value_check(self):
			
 
				-        m = Prophet()
			
 
				-        m.fit(self.__df)
			
 
				-        # Default value of period should be equal to 0.5 * horizon
			
 
				-        df_shf1 = diagnostics.simulated_historical_forecasts(
			
 
				-            m, horizon='10 days', k=1)
			
 
				-        df_shf2 = diagnostics.simulated_historical_forecasts(
			
 
				-            m, horizon='10 days', k=1, period='5 days')
			
 
				-        self.assertAlmostEqual(
			
 
				-            ((df_shf1['y'] - df_shf2['y']) ** 2).sum(), 0.0)
			
 
				-        self.assertAlmostEqual(
			
 
				-            ((df_shf1['yhat'] - df_shf2['yhat']) ** 2).sum(), 0.0)
			
 
				-
			
 
				-    def test_cross_validation(self):
			
 
				-        m = Prophet()
			
 
				-        m.fit(self.__df)
			
 
				-        # Calculate the number of cutoff points(k)
			
 
				-        horizon = pd.Timedelta('4 days')
			
 
				-        period = pd.Timedelta('10 days')
			
 
				-        k = 5
			
 
				         df_cv = diagnostics.cross_validation(
			
 
				-            m, horizon='4 days', period='10 days', initial='90 days')
			
 
				-        # The unique size of output cutoff should be equal to 'k'
			
 
				-        self.assertEqual(len(np.unique(df_cv['cutoff'])), k)
			
 
				-        self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon)
			
 
				+            m, horizon='4 days', period='4 days', initial='135 days')
			
 
				+        self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
			
 
				+        period = pd.Timedelta('4 days')
			
 
				         dc = df_cv['cutoff'].diff()
			
 
				         dc = dc[dc > pd.Timedelta(0)].min()
			
 
				         self.assertTrue(dc >= period)
			
 
				+        self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
			
 
				+        df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
			
 
				+        self.assertAlmostEqual(
			
 
				+            np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
			
 
				 
			
 
				     def test_cross_validation_default_value_check(self):
			
 
				         m = Prophet()
			
@@ -150,11 +118,11 @@ class TestDiagnostics(TestCase):
 
				             set(df_none.columns),
			
 
				             {'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'},
			
 
				         )
			
 
				-        self.assertEqual(df_none.shape[0], 14)
			
 
				+        self.assertEqual(df_none.shape[0], 16)
			
 
				         # Aggregation level 0.2
			
 
				         df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
			
 
				         self.assertEqual(len(df_horizon['horizon'].unique()), 4)
			
 
				-        self.assertEqual(df_horizon.shape[0], 13)
			
 
				+        self.assertEqual(df_horizon.shape[0], 14)
			
 
				         # Aggregation level all
			
 
				         df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
			
 
				         self.assertEqual(df_all.shape[0], 1)