Browse Source

Merge in bugfixes from master (#349)

* Update memory requirement description per #326

* Fix R warning with extra regressor; disallow constant extra regressors.

* Fix unit test broken in new pandas

* Fix diagnostics unit tests for new pandas

* Fix copy with extra seasonalities / regressors Py

* Fix copy with extra seasonalities / regressors R

* Fix weekly_start and yearly_start in R plot_components

* Fix plotting in pandas 0.21 by using pydatetime instead of numpy
Ben Letham 7 years ago
parent
commit
e78f583f90

+ 24 - 12
R/R/prophet.R

@@ -392,9 +392,13 @@ initialize_scales_fn <- function(m, initialize_scales, df) {
   m$start <- min(df$ds)
   m$t.scale <- time_diff(max(df$ds), m$start, "secs")
   for (name in names(m$extra_regressors)) {
+    n.vals <- length(unique(df[[name]]))
+    if (n.vals < 2) {
+      stop('Regressor ', name, ' is constant.')
+    }
     standardize <- m$extra_regressors[[name]]$standardize
     if (standardize == 'auto') {
-      if (all(sort(unique(df[[name]])) == c(0, 1))) {
+      if (n.vals == 2 && all(sort(unique(df[[name]])) == c(0, 1))) {
         # Don't standardize binary variables
         standardize <- FALSE
       } else {
@@ -404,9 +408,6 @@ initialize_scales_fn <- function(m, initialize_scales, df) {
     if (standardize) {
       mu <- mean(df[[name]])
       std <- stats::sd(df[[name]])
-      if (std == 0) {
-        std <- mu
-      }
       m$extra_regressors[[name]]$mu <- mu
       m$extra_regressors[[name]]$std <- std
     }
@@ -1586,7 +1587,8 @@ seasonality_plot_df <- function(m, ds) {
 #' @keywords internal
 plot_weekly <- function(m, uncertainty = TRUE, weekly_start = 0) {
   # Compute weekly seasonality for a Sun-Sat sequence of dates.
-  days <- seq(set_date('2017-01-01'), by='d', length.out=7) + weekly_start
+  days <- seq(set_date('2017-01-01'), by='d', length.out=7) + as.difftime(
+    weekly_start, units = "days")
   df.w <- seasonality_plot_df(m, days)
   seas <- predict_seasonal_components(m, df.w)
   seas$dow <- factor(weekdays(df.w$ds), levels=weekdays(df.w$ds))
@@ -1619,7 +1621,8 @@ plot_weekly <- function(m, uncertainty = TRUE, weekly_start = 0) {
 #' @keywords internal
 plot_yearly <- function(m, uncertainty = TRUE, yearly_start = 0) {
   # Compute yearly seasonality for a Jan 1 - Dec 31 sequence of dates.
-  days <- seq(set_date('2017-01-01'), by='d', length.out=365) + yearly_start
+  days <- seq(set_date('2017-01-01'), by='d', length.out=365) + as.difftime(
+    yearly_start, units = "days")
   df.y <- seasonality_plot_df(m, days)
   seas <- predict_seasonal_components(m, df.y)
   seas$ds <- df.y$ds
@@ -1695,6 +1698,10 @@ plot_seasonality <- function(m, name, uncertainty = TRUE) {
 #'
 #' @keywords internal
 prophet_copy <- function(m, cutoff = NULL) {
+  if (is.null(m$history)) {
+    stop("This is for copying a fitted Prophet object.")
+  }
+
   if (m$specified.changepoints) {
     changepoints <- m$changepoints
     if (!is.null(cutoff)) {
@@ -1704,13 +1711,15 @@ prophet_copy <- function(m, cutoff = NULL) {
   } else {
     changepoints <- NULL
   }
-  return(prophet(
+  # Auto seasonalities are set to FALSE because they are already set in
+  # m$seasonalities.
+  m2 <- prophet(
     growth = m$growth,
     changepoints = changepoints,
     n.changepoints = m$n.changepoints,
-    yearly.seasonality = m$yearly.seasonality,
-    weekly.seasonality = m$weekly.seasonality,
-    daily.seasonality = m$daily.seasonality,
+    yearly.seasonality = FALSE,
+    weekly.seasonality = FALSE,
+    daily.seasonality = FALSE,
     holidays = m$holidays,
     seasonality.prior.scale = m$seasonality.prior.scale,
     changepoint.prior.scale = m$changepoint.prior.scale,
@@ -1718,8 +1727,11 @@ prophet_copy <- function(m, cutoff = NULL) {
     mcmc.samples = m$mcmc.samples,
     interval.width = m$interval.width,
     uncertainty.samples = m$uncertainty.samples,
-    fit = FALSE,
-  ))
+    fit = FALSE
+  )
+  m2$extra_regressors <- m$extra_regressors
+  m2$seasonalities <- m$seasonalities
+  return(m2)
 }
 
 # fb-block 3

+ 40 - 21
R/tests/testthat/test_prophet.R

@@ -511,24 +511,24 @@ test_that("added_regressors", {
   expect_equal(fcst$seasonal[1],
                fcst$seasonalities[1] + fcst$extra_regressors[1])
   expect_equal(fcst$yhat[1], fcst$trend[1] + fcst$seasonal[1])
+  # Check fails if constant extra regressor
+  df$constant_feature <- 5
+  m <- prophet()
+  m <- add_regressor(m, 'constant_feature')
+  expect_error(fit.prophet(m, df))
 })
 
 test_that("copy", {
   skip_if_not(Sys.getenv('R_ARCH') != '/i386')
+  df <- DATA
+  df$cap <- 200.
+  df$binary_feature <- c(rep(0, 255), rep(1, 255))
   inputs <- list(
     growth = c('linear', 'logistic'),
-    changepoints = c(NULL, c('2016-12-25')),
-    n.changepoints = c(3),
     yearly.seasonality = c(TRUE, FALSE),
     weekly.seasonality = c(TRUE, FALSE),
     daily.seasonality = c(TRUE, FALSE),
-    holidays = c(NULL, 'insert_dataframe'),
-    seasonality.prior.scale = c(1.1),
-    holidays.prior.scale = c(1.1),
-    changepoints.prior.scale = c(0.1),
-    mcmc.samples = c(100),
-    interval.width = c(0.9),
-    uncertainty.samples = c(200)
+    holidays = c('null', 'insert_dataframe')
   )
   products <- expand.grid(inputs)
   for (i in 1:length(products)) {
@@ -538,32 +538,51 @@ test_that("copy", {
       holidays <- NULL
     }
     m1 <- prophet(
-      growth = products$growth[i],
-      changepoints = products$changepoints[i],
-      n.changepoints = products$n.changepoints[i],
+      growth = as.character(products$growth[i]),
+      changepoints = NULL,
+      n.changepoints = 3,
       yearly.seasonality = products$yearly.seasonality[i],
       weekly.seasonality = products$weekly.seasonality[i],
       daily.seasonality = products$daily.seasonality[i],
       holidays = holidays,
-      seasonality.prior.scale = products$seasonality.prior.scale[i],
-      holidays.prior.scale = products$holidays.prior.scale[i],
-      changepoints.prior.scale = products$changepoints.prior.scale[i],
-      mcmc.samples = products$mcmc.samples[i],
-      interval.width = products$interval.width[i],
-      uncertainty.samples = products$uncertainty.samples[i],
+      seasonality.prior.scale = 1.1,
+      holidays.prior.scale = 1.1,
+      changepoints.prior.scale = 0.1,
+      mcmc.samples = 100,
+      interval.width = 0.9,
+      uncertainty.samples = 200,
       fit = FALSE
     )
+    out <- prophet:::setup_dataframe(m1, df, initialize_scales = TRUE)
+    m1 <- out$m
+    m1$history <- out$df
+    m1 <- prophet:::set_auto_seasonalities(m1)
     m2 <- prophet:::prophet_copy(m1)
     # Values should be copied correctly
-    for (arg in names(inputs)) {
+    args <- c('growth', 'changepoints', 'n.changepoints', 'holidays',
+              'seasonality.prior.scale', 'holidays.prior.scale',
+              'changepoints.prior.scale', 'mcmc.samples', 'interval.width',
+              'uncertainty.samples')
+    for (arg in args) {
       expect_equal(m1[[arg]], m2[[arg]])
     }
+    expect_equal(FALSE, m2$yearly.seasonality)
+    expect_equal(FALSE, m2$weekly.seasonality)
+    expect_equal(FALSE, m2$daily.seasonality)
+    expect_equal(m1$yearly.seasonality, 'yearly' %in% names(m2$seasonalities))
+    expect_equal(m1$weekly.seasonality, 'weekly' %in% names(m2$seasonalities))
+    expect_equal(m1$daily.seasonality, 'daily' %in% names(m2$seasonalities))
   }
-  # Check for cutoff
+  # Check for cutoff and custom seasonality and extra regressors
   changepoints <- seq.Date(as.Date('2012-06-15'), as.Date('2012-09-15'), by='d')
   cutoff <- as.Date('2012-07-25')
-  m1 <- prophet(DATA, changepoints = changepoints)
+  m1 <- prophet(changepoints = changepoints)
+  m1 <- add_seasonality(m1, 'custom', 10, 5)
+  m1 <- add_regressor(m1, 'binary_feature')
+  m1 <- fit.prophet(m1, df)
   m2 <- prophet:::prophet_copy(m1, cutoff)
   changepoints <- changepoints[changepoints <= cutoff]
   expect_equal(prophet:::set_date(changepoints), m2$changepoints)
+  expect_true('custom' %in% names(m2$seasonalities))
+  expect_true('binary_feature' %in% names(m2$extra_regressors))
 })

+ 1 - 1
README.md

@@ -52,7 +52,7 @@ On Windows, PyStan requires a compiler so you'll need to [follow the instruction
 
 ### Linux
 
-Make sure compilers (gcc, g++) and Python development tools (python-dev) are installed. If you are using a VM, be aware that you will need at least 2GB of memory to run PyStan.
+Make sure compilers (gcc, g++) and Python development tools (python-dev) are installed. If you are using a VM, be aware that you will need at least 4GB of memory to install fbprophet, and at least 2GB of memory to use fbprophet.
 
 ### Anaconda
 

+ 1 - 1
docs/_docs/installation.md

@@ -43,7 +43,7 @@ On Windows, PyStan requires a compiler so you'll need to [follow the instruction
 
 ### Linux
 
-Make sure compilers (gcc, g++) and Python development tools (python-dev) are installed. If you are using a VM, be aware that you will need at least 2GB of memory to run PyStan.
+Make sure compilers (gcc, g++) and Python development tools (python-dev) are installed. If you are using a VM, be aware that you will need at least 4GB of memory to install fbprophet, and at least 2GB of memory to use fbprophet.
 
 ### Anaconda
 

File diff suppressed because it is too large
+ 1 - 1
docs/_docs/seasonality_and_holiday_effects.md


File diff suppressed because it is too large
+ 1 - 1
notebooks/seasonality_and_holiday_effects.ipynb


+ 38 - 25
python/fbprophet/forecaster.py

@@ -11,6 +11,7 @@ from __future__ import print_function
 from __future__ import unicode_literals
 
 from collections import defaultdict
+from copy import deepcopy
 from datetime import timedelta
 import logging
 
@@ -278,6 +279,9 @@ class Prophet(object):
         self.t_scale = df['ds'].max() - self.start
         for name, props in self.extra_regressors.items():
             standardize = props['standardize']
+            n_vals = len(df[name].unique())
+            if n_vals < 2:
+                raise ValueError('Regressor {} is constant.'.format(name))
             if standardize == 'auto':
                 if set(df[name].unique()) == set([1, 0]):
                     # Don't standardize binary variables.
@@ -287,8 +291,6 @@ class Prophet(object):
             if standardize:
                 mu = df[name].mean()
                 std = df[name].std()
-                if std == 0:
-                    std = mu
                 self.extra_regressors[name]['mu'] = mu
                 self.extra_regressors[name]['std'] = std
 
@@ -1248,16 +1250,16 @@ class Prophet(object):
             ax = fig.add_subplot(111)
         else:
             fig = ax.get_figure()
-        ax.plot(self.history['ds'].values, self.history['y'], 'k.')
-        ax.plot(fcst['ds'].values, fcst['yhat'], ls='-', c='#0072B2')
+        fcst_t = fcst['ds'].dt.to_pydatetime()
+        ax.plot(self.history['ds'].dt.to_pydatetime(), self.history['y'], 'k.')
+        ax.plot(fcst_t, fcst['yhat'], ls='-', c='#0072B2')
         if 'cap' in fcst and plot_cap:
-            ax.plot(fcst['ds'].values, fcst['cap'], ls='--', c='k')
+            ax.plot(fcst_t, fcst['cap'], ls='--', c='k')
         if self.logistic_floor and 'floor' in fcst and plot_cap:
-            ax.plot(fcst['ds'].values, fcst['floor'], ls='--', c='k')
+            ax.plot(fcst_t, fcst['floor'], ls='--', c='k')
         if uncertainty:
-            ax.fill_between(fcst['ds'].values, fcst['yhat_lower'],
-                            fcst['yhat_upper'], color='#0072B2',
-                            alpha=0.2)
+            ax.fill_between(fcst_t, fcst['yhat_lower'], fcst['yhat_upper'],
+                            color='#0072B2', alpha=0.2)
         ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
         ax.set_xlabel(xlabel)
         ax.set_ylabel(ylabel)
@@ -1345,15 +1347,16 @@ class Prophet(object):
         if not ax:
             fig = plt.figure(facecolor='w', figsize=(10, 6))
             ax = fig.add_subplot(111)
-        artists += ax.plot(fcst['ds'].values, fcst[name], ls='-', c='#0072B2')
+        fcst_t = fcst['ds'].dt.to_pydatetime()
+        artists += ax.plot(fcst_t, fcst[name], ls='-', c='#0072B2')
         if 'cap' in fcst and plot_cap:
-            artists += ax.plot(fcst['ds'].values, fcst['cap'], ls='--', c='k')
+            artists += ax.plot(fcst_t, fcst['cap'], ls='--', c='k')
         if self.logistic_floor and 'floor' in fcst and plot_cap:
-            ax.plot(fcst['ds'].values, fcst['floor'], ls='--', c='k')
+            ax.plot(fcst_t, fcst['floor'], ls='--', c='k')
         if uncertainty:
             artists += [ax.fill_between(
-                fcst['ds'].values, fcst[name + '_lower'],
-                fcst[name + '_upper'], color='#0072B2', alpha=0.2)]
+                fcst_t, fcst[name + '_lower'], fcst[name + '_upper'],
+                color='#0072B2', alpha=0.2)]
         ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
         ax.set_xlabel('ds')
         ax.set_ylabel(name)
@@ -1441,11 +1444,11 @@ class Prophet(object):
                 pd.Timedelta(days=yearly_start))
         df_y = self.seasonality_plot_df(days)
         seas = self.predict_seasonal_components(df_y)
-        artists += ax.plot(df_y['ds'], seas['yearly'], ls='-',
-                           c='#0072B2')
+        artists += ax.plot(
+            df_y['ds'].dt.to_pydatetime(), seas['yearly'], ls='-', c='#0072B2')
         if uncertainty:
             artists += [ax.fill_between(
-                df_y['ds'].values, seas['yearly_lower'],
+                df_y['ds'].dt.to_pydatetime(), seas['yearly_lower'],
                 seas['yearly_upper'], color='#0072B2', alpha=0.2)]
         ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
         months = MonthLocator(range(1, 13), bymonthday=1, interval=2)
@@ -1481,14 +1484,16 @@ class Prophet(object):
         days = pd.to_datetime(np.linspace(start.value, end.value, plot_points))
         df_y = self.seasonality_plot_df(days)
         seas = self.predict_seasonal_components(df_y)
-        artists += ax.plot(df_y['ds'], seas[name], ls='-',
+        artists += ax.plot(df_y['ds'].dt.to_pydatetime(), seas[name], ls='-',
                             c='#0072B2')
         if uncertainty:
             artists += [ax.fill_between(
-                df_y['ds'].values, seas[name + '_lower'],
+                df_y['ds'].dt.to_pydatetime(), seas[name + '_lower'],
                 seas[name + '_upper'], color='#0072B2', alpha=0.2)]
         ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
-        ax.set_xticks(pd.to_datetime(np.linspace(start.value, end.value, 7)))
+        xticks = pd.to_datetime(np.linspace(start.value, end.value, 7)
+            ).to_pydatetime()
+        ax.set_xticks(xticks)
         if period <= 2:
             fmt_str = '{dt:%T}'
         elif period < 14:
@@ -1514,6 +1519,9 @@ class Prophet(object):
         -------
         Prophet class object with the same parameter with model variable
         """
+        if self.history is None:
+            raise Exception('This is for copying a fitted Prophet object.')
+
         if self.specified_changepoints:
             changepoints = self.changepoints
             if cutoff is not None:
@@ -1522,18 +1530,23 @@ class Prophet(object):
         else:
             changepoints = None
 
-        return Prophet(
+        # Auto seasonalities are set to False because they are already set in
+        # self.seasonalities.
+        m = Prophet(
             growth=self.growth,
             n_changepoints=self.n_changepoints,
             changepoints=changepoints,
-            yearly_seasonality=self.yearly_seasonality,
-            weekly_seasonality=self.weekly_seasonality,
-            daily_seasonality=self.daily_seasonality,
+            yearly_seasonality=False,
+            weekly_seasonality=False,
+            daily_seasonality=False,
             holidays=self.holidays,
             seasonality_prior_scale=self.seasonality_prior_scale,
             changepoint_prior_scale=self.changepoint_prior_scale,
             holidays_prior_scale=self.holidays_prior_scale,
             mcmc_samples=self.mcmc_samples,
             interval_width=self.interval_width,
-            uncertainty_samples=self.uncertainty_samples
+            uncertainty_samples=self.uncertainty_samples,
         )
+        m.extra_regressors = deepcopy(self.extra_regressors)
+        m.seasonalities = deepcopy(self.seasonalities)
+        return m

+ 6 - 2
python/fbprophet/tests/test_diagnostics.py

@@ -84,7 +84,9 @@ class TestDiagnostics(TestCase):
         df_shf2 = diagnostics.simulated_historical_forecasts(
             m, horizon='10 days', k=1, period='5 days')
         self.assertAlmostEqual(
-            ((df_shf1 - df_shf2)**2)[['y', 'yhat']].sum().sum(), 0.0)
+            ((df_shf1['y'] - df_shf2['y']) ** 2).sum(), 0.0)
+        self.assertAlmostEqual(
+            ((df_shf1['yhat'] - df_shf2['yhat']) ** 2).sum(), 0.0)
 
     def test_cross_validation(self):
         m = Prophet()
@@ -111,4 +113,6 @@ class TestDiagnostics(TestCase):
         df_cv2 = diagnostics.cross_validation(
             m, horizon='32 days', period='10 days', initial='96 days')
         self.assertAlmostEqual(
-            ((df_cv1 - df_cv2)**2)[['y', 'yhat']].sum().sum(), 0.0)
+            ((df_cv1['y'] - df_cv2['y']) ** 2).sum(), 0.0)
+        self.assertAlmostEqual(
+            ((df_cv1['yhat'] - df_cv2['yhat']) ** 2).sum(), 0.0)

+ 27 - 6
python/fbprophet/tests/test_prophet.py

@@ -62,7 +62,6 @@ class TestProphet(TestCase):
 
     def test_fit_changepoint_not_in_history(self):
         train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')]
-        train[(train['ds'] > '2014-01-01')] += 20
         future = pd.DataFrame({'ds': DATA['ds']})
         forecaster = Prophet(changepoints=['2013-06-06'])
         forecaster.fit(train)
@@ -548,8 +547,17 @@ class TestProphet(TestCase):
             fcst['yhat'][0],
             fcst['trend'][0] + fcst['seasonal'][0],
         )
+        # Check fails if constant extra regressor
+        df['constant_feature'] = 5
+        m = Prophet()
+        m.add_regressor('constant_feature')
+        with self.assertRaises(ValueError):
+            m.fit(df.copy())
 
     def test_copy(self):
+        df = DATA.copy()
+        df['cap'] = 200.
+        df['binary_feature'] = [0] * 255 + [1] * 255
         # These values are created except for its default values
         holiday = pd.DataFrame(
             {'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['x']})
@@ -571,13 +579,22 @@ class TestProphet(TestCase):
         # Values should be copied correctly
         for product in products:
             m1 = Prophet(*product)
+            m1.history = m1.setup_dataframe(
+                df.copy(), initialize_scales=True)
+            m1.set_auto_seasonalities()
             m2 = m1.copy()
             self.assertEqual(m1.growth, m2.growth)
             self.assertEqual(m1.n_changepoints, m2.n_changepoints)
             self.assertEqual(m1.changepoints, m2.changepoints)
-            self.assertEqual(m1.yearly_seasonality, m2.yearly_seasonality)
-            self.assertEqual(m1.weekly_seasonality, m2.weekly_seasonality)
-            self.assertEqual(m1.daily_seasonality, m2.daily_seasonality)
+            self.assertEqual(False, m2.yearly_seasonality)
+            self.assertEqual(False, m2.weekly_seasonality)
+            self.assertEqual(False, m2.daily_seasonality)
+            self.assertEqual(
+                m1.yearly_seasonality, 'yearly' in m2.seasonalities)
+            self.assertEqual(
+                m1.weekly_seasonality, 'weekly' in m2.seasonalities)
+            self.assertEqual(
+                m1.daily_seasonality, 'daily' in m2.seasonalities)
             if m1.holidays is None:
                 self.assertEqual(m1.holidays, m2.holidays)
             else:
@@ -589,11 +606,15 @@ class TestProphet(TestCase):
             self.assertEqual(m1.interval_width, m2.interval_width)
             self.assertEqual(m1.uncertainty_samples, m2.uncertainty_samples)
 
-        # Check for cutoff
+        # Check for cutoff and custom seasonality and extra regressors
         changepoints = pd.date_range('2012-06-15', '2012-09-15')
         cutoff = pd.Timestamp('2012-07-25')
         m1 = Prophet(changepoints=changepoints)
-        m1.fit(DATA)
+        m1.add_seasonality('custom', 10, 5)
+        m1.add_regressor('binary_feature')
+        m1.fit(df)
         m2 = m1.copy(cutoff=cutoff)
         changepoints = changepoints[changepoints <= cutoff]
         self.assertTrue((changepoints == m2.changepoints).all())
+        self.assertTrue('custom' in m2.seasonalities)
+        self.assertTrue('binary_feature' in m2.extra_regressors)