Sfoglia il codice sorgente

Modification of holiday features (#644)

* Allow both both hoidays and append holidays; match holidays in predict and fit

* Allow both both hoidays and append holidays; match holidays in predict and fit

* Add test for append_holiday features; minor fixes

* Add column name validation for append_holidays names; allow only one country
ziye666 7 anni fa
parent
commit
817f0306a4

+ 1 - 0
python/fbprophet/diagnostics.py

@@ -159,6 +159,7 @@ def prophet_copy(m, cutoff=None):
         weekly_seasonality=False,
         daily_seasonality=False,
         holidays=m.holidays,
+        append_holidays=m.append_holidays,
         seasonality_mode=m.seasonality_mode,
         seasonality_prior_scale=m.seasonality_prior_scale,
         changepoint_prior_scale=m.changepoint_prior_scale,

+ 74 - 27
python/fbprophet/forecaster.py

@@ -14,16 +14,12 @@ from collections import defaultdict
 from datetime import timedelta
 import logging
 import warnings
-
 import numpy as np
 import pandas as pd
-try:
-    import pystan  # noqa F401
-except ImportError:
-    logger.exception('You cannot run fbprophet without pystan installed')
 
 from fbprophet.diagnostics import prophet_copy
 from fbprophet.models import prophet_stan_model
+from fbprophet.make_holidays import get_holiday_names, make_holidays_df
 from fbprophet.plot import (
     plot,
     plot_components,
@@ -38,6 +34,11 @@ logging.basicConfig()
 logger = logging.getLogger(__name__)
 warnings.filterwarnings("default", category=DeprecationWarning)
 
+try:
+    import pystan  # noqa F401
+except ImportError:
+    logger.exception('You cannot run fbprophet without pystan installed')
+
 
 class Prophet(object):
     """Prophet forecaster.
@@ -68,6 +69,7 @@ class Prophet(object):
         lower_window=-2 will include 2 days prior to the date as holidays. Also
         optionally can have a column prior_scale specifying the prior scale for
         that holiday.
+    append_holidays: country name or abbreviation; must be string
     seasonality_mode: 'additive' (default) or 'multiplicative'.
     seasonality_prior_scale: Parameter modulating the strength of the
         seasonality model. Larger values allow the model to fit larger seasonal
@@ -100,6 +102,7 @@ class Prophet(object):
             weekly_seasonality='auto',
             daily_seasonality='auto',
             holidays=None,
+            append_holidays=None,
             seasonality_mode='additive',
             seasonality_prior_scale=10.0,
             holidays_prior_scale=10.0,
@@ -134,6 +137,13 @@ class Prophet(object):
             holidays['ds'] = pd.to_datetime(holidays['ds'])
         self.holidays = holidays
 
+        if append_holidays is not None:
+            if not (
+                    isinstance(append_holidays, str)
+            ):
+                raise ValueError("append_holidays must be a string")
+        self.append_holidays = append_holidays
+
         self.seasonality_mode = seasonality_mode
         self.seasonality_prior_scale = float(seasonality_prior_scale)
         self.changepoint_prior_scale = float(changepoint_prior_scale)
@@ -157,6 +167,7 @@ class Prophet(object):
         self.history_dates = None
         self.train_component_cols = None
         self.component_modes = None
+        self.train_holiday_names = None
         self.validate_inputs()
 
     def validate_inputs(self):
@@ -199,7 +210,7 @@ class Prophet(object):
             raise ValueError('Name cannot contain "_delim_"')
         reserved_names = [
             'trend', 'additive_terms', 'daily', 'weekly', 'yearly',
-            'holidays', 'zeros', 'extra_regressors_additive','yhat',
+            'holidays', 'zeros', 'extra_regressors_additive', 'yhat',
             'extra_regressors_multiplicative', 'multiplicative_terms',
         ]
         rn_l = [n + '_lower' for n in reserved_names]
@@ -214,6 +225,10 @@ class Prophet(object):
                 name in self.holidays['holiday'].unique()):
             raise ValueError(
                 'Name "{}" already used for a holiday.'.format(name))
+        if (check_holidays and self.append_holidays is not None and
+                name in get_holiday_names(self.append_holidays)):
+            raise ValueError(
+                'Name "{}" is a holiday name in {}.'.format(name, self.append_holidays))
         if check_seasonalities and name in self.seasonalities:
             raise ValueError(
                 'Name "{}" already used for a seasonality.'.format(name))
@@ -351,8 +366,8 @@ class Prophet(object):
             if self.n_changepoints > 0:
                 cp_indexes = (
                     np.linspace(0, hist_size - 1, self.n_changepoints + 1)
-                    .round()
-                    .astype(np.int)
+                        .round()
+                        .astype(np.int)
                 )
                 self.changepoints = (
                     self.history.iloc[cp_indexes]['ds'].tail(-1)
@@ -384,8 +399,8 @@ class Prophet(object):
         # convert to days since epoch
         t = np.array(
             (dates - pd.datetime(1970, 1, 1))
-            .dt.total_seconds()
-            .astype(np.float)
+                .dt.total_seconds()
+                .astype(np.float)
         ) / (3600 * 24.)
         return np.column_stack([
             fun((2.0 * (i + 1) * np.pi * t / period))
@@ -429,6 +444,32 @@ class Prophet(object):
         prior_scale_list: List of prior scales for each holiday column.
         holiday_names: List of names of holidays
         """
+        # Concatenate holidays and append_holidays
+        all_holidays = self.holidays
+        if self.append_holidays is not None:
+            year_list = list({x.year for x in dates})
+            append_holidays_df = make_holidays_df(
+                                    year_list=year_list,
+                                    country=self.append_holidays)
+            all_holidays = pd.concat((all_holidays, append_holidays_df), sort=False)
+            all_holidays.reset_index(drop=True, inplace=True)
+        # Make fit and predict holidays components match
+        if self.train_holiday_names is not None:
+            train_holidays = self.train_holiday_names
+            # Remove holiday names didn't show up in fit
+            index_to_drop = all_holidays.index[
+                                np.logical_not(
+                                    all_holidays.holiday.isin(train_holidays))]
+            all_holidays = all_holidays.drop(index_to_drop)
+            # Add holiday names show up in fit but not in predict with ds as NA
+            holidays_to_add = pd.DataFrame(
+                                {'holiday':
+                                    train_holidays[
+                                        np.logical_not(
+                                            train_holidays.isin(all_holidays.holiday))]})
+            all_holidays = pd.concat((all_holidays, holidays_to_add), sort=False)
+            all_holidays.reset_index(drop=True, inplace=True)
+
         # Holds columns of our future matrix.
         expanded_holidays = defaultdict(lambda: np.zeros(dates.shape[0]))
         prior_scales = {}
@@ -436,7 +477,7 @@ class Prophet(object):
         # Strip to just dates.
         row_index = pd.DatetimeIndex(dates.apply(lambda x: x.date()))
 
-        for _ix, row in self.holidays.iterrows():
+        for _ix, row in all_holidays.iterrows():
             dt = row.ds.date()
             try:
                 lw = int(row.get('lower_window', 0))
@@ -448,7 +489,7 @@ class Prophet(object):
             if np.isnan(ps):
                 ps = float(self.holidays_prior_scale)
             if (
-                row.holiday in prior_scales and prior_scales[row.holiday] != ps
+                    row.holiday in prior_scales and prior_scales[row.holiday] != ps
             ):
                 raise ValueError(
                     'Holiday {} does not have consistent prior scale '
@@ -475,14 +516,20 @@ class Prophet(object):
                     # Access key to generate value
                     expanded_holidays[key]
         holiday_features = pd.DataFrame(expanded_holidays)
+        # Make sure fit and predict component_cols perfectly equal
+        holiday_features = holiday_features[sorted(holiday_features.columns.tolist())]
         prior_scale_list = [
             prior_scales[h.split('_delim_')[0]]
             for h in holiday_features.columns
         ]
-        return holiday_features, prior_scale_list, list(prior_scales.keys())
+        holiday_names = list(prior_scales.keys())
+        # Store holiday names used in fit
+        if self.train_holiday_names is None:
+            self.train_holiday_names = pd.Series(holiday_names)
+        return holiday_features, prior_scale_list, holiday_names
 
     def add_regressor(
-        self, name, prior_scale=None, standardize='auto', mode=None
+            self, name, prior_scale=None, standardize='auto', mode=None
     ):
         """Add an additional regressor to be used for fitting and predicting.
 
@@ -534,7 +581,7 @@ class Prophet(object):
         return self
 
     def add_seasonality(
-        self, name, period, fourier_order, prior_scale=None, mode=None
+            self, name, period, fourier_order, prior_scale=None, mode=None
     ):
         """Add a seasonal component with specified period, number of Fourier
         components, and prior scale.
@@ -626,7 +673,7 @@ class Prophet(object):
             modes[props['mode']].append(name)
 
         # Holiday features
-        if self.holidays is not None:
+        if self.holidays is not None or self.append_holidays is not None:
             features, holiday_priors, holiday_names = (
                 self.make_holiday_features(df['ds'])
             )
@@ -679,9 +726,9 @@ class Prophet(object):
             ],
         })
         # Add total for holidays
-        if self.holidays is not None:
+        if self.train_holiday_names is not None:
             components = self.add_group_component(
-                components, 'holidays', self.holidays['holiday'].unique())
+                components, 'holidays', self.train_holiday_names.unique())
         # Add totals additive and multiplicative components, and regressors
         for mode in ['additive', 'multiplicative']:
             components = self.add_group_component(
@@ -710,8 +757,8 @@ class Prophet(object):
         component_cols.drop('zeros', axis=1, inplace=True, errors='ignore')
         # Validation
         if (
-            max(component_cols['additive_terms']
-            + component_cols['multiplicative_terms']) > 1
+                max(component_cols['additive_terms']
+                    + component_cols['multiplicative_terms']) > 1
         ):
             raise Exception('A bug occurred in seasonal components.')
         # Compare to the training, if set.
@@ -979,8 +1026,8 @@ class Prophet(object):
             }
 
         if (
-            (history['y'].min() == history['y'].max())
-            and self.growth == 'linear'
+                (history['y'].min() == history['y'].max())
+                and self.growth == 'linear'
         ):
             # Nothing to fit.
             self.params = stan_init()
@@ -1050,8 +1097,8 @@ class Prophet(object):
         # Add in forecast components
         df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1)
         df2['yhat'] = (
-            df2['trend'] * (1 + df2['multiplicative_terms'])
-            + df2['additive_terms']
+                df2['trend'] * (1 + df2['multiplicative_terms'])
+                + df2['additive_terms']
         )
         return df2
 
@@ -1104,8 +1151,8 @@ class Prophet(object):
         gammas = np.zeros(len(changepoint_ts))
         for i, t_s in enumerate(changepoint_ts):
             gammas[i] = (
-                (t_s - m - np.sum(gammas))
-                * (1 - k_cum[i] / k_cum[i + 1])  # noqa W503
+                    (t_s - m - np.sum(gammas))
+                    * (1 - k_cum[i] / k_cum[i + 1])  # noqa W503
             )
         # Get cumulative rate and offset at each t
         k_t = k * np.ones_like(t)
@@ -1165,7 +1212,7 @@ class Prophet(object):
 
             comp = np.matmul(X, beta_c.transpose())
             if component in self.component_modes['additive']:
-                 comp *= self.y_scale
+                comp *= self.y_scale
             data[component] = np.nanmean(comp, axis=1)
             data[component + '_lower'] = np.nanpercentile(
                 comp, lower_p, axis=1,

File diff suppressed because it is too large
+ 1206 - 0
python/fbprophet/hdays.py


+ 70 - 0
python/fbprophet/make_holidays.py

@@ -0,0 +1,70 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import pandas as pd
+import numpy as np
+import warnings
+import holidays as hdays_part1
+import fbprophet.hdays as hdays_part2
+
+
+def get_holiday_names(country):
+    """Return all possible holiday names of given country
+
+    Parameters
+    ----------
+    country: country name
+
+    Returns
+    ------- a
+    Dataframe with 'ds' and 'holiday', which can directly feed
+    to 'holidays' params in Prophet
+    """
+    years = np.arange(1995, 2045)
+    try:
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            holiday_names = getattr(hdays_part2, country)(years=years).values()
+    except AttributeError:
+        try:
+            holiday_names = getattr(hdays_part1, country)(years=years).values()
+        except AttributeError:
+            raise AttributeError(
+                "Holidays in {} are not currently supported!".format(country))
+    return set(holiday_names)
+
+
+def make_holidays_df(year_list, country):
+    """Make dataframe of holidays for given years and countries
+
+    Parameters
+    ----------
+    year_list: a list of years
+    country: country name
+
+    Returns
+    -------
+    Dataframe with 'ds' and 'holiday', which can directly feed
+    to 'holidays' params in Prophet
+    """
+    try:
+        holidays = getattr(hdays_part2, country)(years=year_list)
+    except AttributeError:
+        try:
+            holidays = getattr(hdays_part1, country)(years=year_list)
+        except AttributeError:
+            raise AttributeError(
+                "Holidays in {} are not currently supported!".format(country))
+    holidays_df = pd.DataFrame(list(holidays.items()), columns=['ds', 'holiday'])
+    holidays_df.reset_index(inplace=True, drop=True)
+    holidays_df['ds'] = pd.to_datetime(holidays_df['ds'])
+    return (holidays_df)

+ 24 - 0
python/fbprophet/tests/test_prophet.py

@@ -358,6 +358,30 @@ class TestProphet(TestCase):
         model = Prophet(holidays=holidays, uncertainty_samples=0)
         model.fit(DATA).predict()
 
+    def test_fit_predict_with_append_holidays(self):
+        holidays = pd.DataFrame({
+            'ds': pd.to_datetime(['2012-06-06', '2013-06-06']),
+            'holiday': ['seans-bday'] * 2,
+            'lower_window': [0] * 2,
+            'upper_window': [1] * 2,
+        })
+        append_holidays = 'US'
+        # Test with holidays and append_holidays
+        model = Prophet(holidays=holidays,
+                        append_holidays=append_holidays,
+                        uncertainty_samples=0)
+        model.fit(DATA).predict()
+        # There are training holidays missing in the test set
+        train = DATA.head(154)
+        future = DATA.tail(355)
+        model = Prophet(append_holidays=append_holidays, uncertainty_samples=0)
+        model.fit(train).predict(future)
+        # There are test holidays missing in the training set
+        train = DATA.tail(355)
+        future = DATA2
+        model = Prophet(append_holidays=append_holidays, uncertainty_samples=0)
+        model.fit(train).predict(future)
+
     def test_make_future_dataframe(self):
         N = 468
         train = DATA.head(N // 2)

+ 3 - 0
python/requirements.txt

@@ -3,3 +3,6 @@ pystan>=2.14
 numpy>=1.10.0
 pandas>=0.20.1
 matplotlib>=2.0.0
+lunardate>=0.1.5
+convertdate>=2.1.2
+holidays>=0.9.5