Parcourir la source

Compute and store binary matrix of which seasonalities/regressors correspond to which columns in the feature matrix (Py)

Ben Letham il y a 7 ans
Parent
commit
b9923fd9fd
2 fichiers modifiés avec 70 ajouts et 43 suppressions
  1. 44 28
      python/fbprophet/forecaster.py
  2. 26 15
      python/fbprophet/tests/test_prophet.py

+ 44 - 28
python/fbprophet/forecaster.py

@@ -152,6 +152,7 @@ class Prophet(object):
         self.params = {}
         self.params = {}
         self.history = None
         self.history = None
         self.history_dates = None
         self.history_dates = None
+        self.train_component_cols = None
         self.validate_inputs()
         self.validate_inputs()
 
 
     def validate_inputs(self):
     def validate_inputs(self):
@@ -187,7 +188,7 @@ class Prophet(object):
         if '_delim_' in name:
         if '_delim_' in name:
             raise ValueError('Name cannot contain "_delim_"')
             raise ValueError('Name cannot contain "_delim_"')
         reserved_names = [
         reserved_names = [
-            'trend', 'seasonal', 'seasonalities', 'daily', 'weekly', 'yearly',
+            'trend', 'additive_terms', 'daily', 'weekly', 'yearly',
             'holidays', 'zeros', 'extra_regressors', 'yhat'
             'holidays', 'zeros', 'extra_regressors', 'yhat'
         ]
         ]
         rn_l = [n + '_lower' for n in reserved_names]
         rn_l = [n + '_lower' for n in reserved_names]
@@ -557,6 +558,8 @@ class Prophet(object):
         -------
         -------
         pd.DataFrame with regression features.
         pd.DataFrame with regression features.
         list of prior scales for each column of the features dataframe.
         list of prior scales for each column of the features dataframe.
+        Dataframe with indicators for which regression components correspond to
+            which columns.
         """
         """
         seasonal_features = []
         seasonal_features = []
         prior_scales = []
         prior_scales = []
@@ -588,7 +591,10 @@ class Prophet(object):
             seasonal_features.append(
             seasonal_features.append(
                 pd.DataFrame({'zeros': np.zeros(df.shape[0])}))
                 pd.DataFrame({'zeros': np.zeros(df.shape[0])}))
             prior_scales.append(1.)
             prior_scales.append(1.)
-        return pd.concat(seasonal_features, axis=1), prior_scales
+
+        seasonal_features = pd.concat(seasonal_features, axis=1)
+        component_cols = self.regressor_column_matrix(seasonal_features)
+        return seasonal_features, prior_scales, component_cols
 
 
     def parse_seasonality_args(self, name, arg, auto_disable, default_order):
     def parse_seasonality_args(self, name, arg, auto_disable, default_order):
         """Get number of fourier components for built-in seasonalities.
         """Get number of fourier components for built-in seasonalities.
@@ -779,8 +785,9 @@ class Prophet(object):
         history = self.setup_dataframe(history, initialize_scales=True)
         history = self.setup_dataframe(history, initialize_scales=True)
         self.history = history
         self.history = history
         self.set_auto_seasonalities()
         self.set_auto_seasonalities()
-        seasonal_features, prior_scales = (
+        seasonal_features, prior_scales, component_cols = (
             self.make_all_seasonality_features(history))
             self.make_all_seasonality_features(history))
+        self.train_component_cols = component_cols
 
 
         self.set_changepoints()
         self.set_changepoints()
 
 
@@ -884,7 +891,7 @@ class Prophet(object):
             cols.append('floor')
             cols.append('floor')
         # Add in forecast components
         # Add in forecast components
         df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1)
         df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1)
-        df2['yhat'] = df2['trend'] + df2['seasonal']
+        df2['yhat'] = df2['trend'] + df2['additive_terms']
         return df2
         return df2
 
 
     @staticmethod
     @staticmethod
@@ -984,22 +991,38 @@ class Prophet(object):
         -------
         -------
         Dataframe with seasonal components.
         Dataframe with seasonal components.
         """
         """
-        seasonal_features, _ = self.make_all_seasonality_features(df)
+        seasonal_features, _, component_cols = (
+            self.make_all_seasonality_features(df)
+        )
         lower_p = 100 * (1.0 - self.interval_width) / 2
         lower_p = 100 * (1.0 - self.interval_width) / 2
         upper_p = 100 * (1.0 + self.interval_width) / 2
         upper_p = 100 * (1.0 + self.interval_width) / 2
 
 
+        X = seasonal_features.as_matrix()
+        data = {}
+        for component in component_cols.columns:
+            beta_c = self.params['beta'] * component_cols[component].values
+
+            comp = np.matmul(X, beta_c.transpose()) * self.y_scale
+            data[component] = np.nanmean(comp, axis=1)
+            data[component + '_lower'] = np.nanpercentile(
+                comp, lower_p, axis=1,
+            )
+            data[component + '_upper'] = np.nanpercentile(
+                comp, upper_p, axis=1,
+            )
+        return pd.DataFrame(data)
+
+    def regressor_column_matrix(self, seasonal_features):
         components = pd.DataFrame({
         components = pd.DataFrame({
             'col': np.arange(seasonal_features.shape[1]),
             'col': np.arange(seasonal_features.shape[1]),
             'component': [x.split('_delim_')[0] for x in seasonal_features.columns],
             'component': [x.split('_delim_')[0] for x in seasonal_features.columns],
         })
         })
-        # Add total for all regression components
+        # Add total for all additive components
         components = components.append(pd.DataFrame({
         components = components.append(pd.DataFrame({
             'col': np.arange(seasonal_features.shape[1]),
             'col': np.arange(seasonal_features.shape[1]),
-            'component': 'seasonal',
+            'component': 'additive_terms',
         }))
         }))
-        # Add totals for seasonality, holiday, and extra regressors
-        components = self.add_group_component(
-            components, 'seasonalities', self.seasonalities.keys())
+        # Add totals for holidays and extra regressors
         if self.holidays is not None:
         if self.holidays is not None:
             components = self.add_group_component(
             components = self.add_group_component(
                 components, 'holidays', self.holidays['holiday'].unique())
                 components, 'holidays', self.holidays['holiday'].unique())
@@ -1007,23 +1030,16 @@ class Prophet(object):
             components, 'extra_regressors', self.extra_regressors.keys())
             components, 'extra_regressors', self.extra_regressors.keys())
         # Remove the placeholder
         # Remove the placeholder
         components = components[components['component'] != 'zeros']
         components = components[components['component'] != 'zeros']
-
-        X = seasonal_features.as_matrix()
-        data = {}
-        for component, features in components.groupby('component'):
-            cols = features.col.tolist()
-            comp_beta = self.params['beta'][:, cols]
-            comp_features = X[:, cols]
-            comp = (
-                np.matmul(comp_features, comp_beta.transpose())
-                * self.y_scale  # noqa W503
-            )
-            data[component] = np.nanmean(comp, axis=1)
-            data[component + '_lower'] = np.nanpercentile(comp, lower_p,
-                                                            axis=1)
-            data[component + '_upper'] = np.nanpercentile(comp, upper_p,
-                                                            axis=1)
-        return pd.DataFrame(data)
+        # Convert to a binary matrix
+        component_cols = pd.crosstab(
+            components['col'], components['component'],
+        )
+        # Compare to the training, if set.
+        if self.train_component_cols is not None:
+            component_cols = component_cols[self.train_component_cols.columns]
+            if not component_cols.equals(self.train_component_cols):
+                raise Exception('A bug occurred in constructing regressors.')
+        return component_cols
 
 
     def add_group_component(self, components, name, group):
     def add_group_component(self, components, name, group):
         """Adds a component with given name that contains all of the components
         """Adds a component with given name that contains all of the components
@@ -1061,7 +1077,7 @@ class Prophet(object):
         )))
         )))
 
 
         # Generate seasonality features once so we can re-use them.
         # Generate seasonality features once so we can re-use them.
-        seasonal_features, _ = self.make_all_seasonality_features(df)
+        seasonal_features, _, _ = self.make_all_seasonality_features(df)
 
 
         sim_values = {'yhat': [], 'trend': [], 'seasonal': []}
         sim_values = {'yhat': [], 'trend': [], 'seasonal': []}
         for i in range(n_iterations):
         for i in range(n_iterations):

+ 26 - 15
python/fbprophet/tests/test_prophet.py

@@ -460,12 +460,20 @@ class TestProphet(TestCase):
         m.add_seasonality(name='monthly', period=30, fourier_order=5,
         m.add_seasonality(name='monthly', period=30, fourier_order=5,
                           prior_scale=2.)
                           prior_scale=2.)
         m.fit(DATA.copy())
         m.fit(DATA.copy())
-        seasonal_features, prior_scales = m.make_all_seasonality_features(
-            m.history)
+        seasonal_features, prior_scales, component_cols = (
+            m.make_all_seasonality_features(m.history)
+        )
+        self.assertEqual(sum(component_cols['monthly']), 10)
+        self.assertEqual(sum(component_cols['special_day']), 1)
+        self.assertEqual(sum(component_cols['weekly']), 6)
         if seasonal_features.columns[0] == 'monthly_delim_1':
         if seasonal_features.columns[0] == 'monthly_delim_1':
             true = [2.] * 10 + [10.] * 6 + [4.]
             true = [2.] * 10 + [10.] * 6 + [4.]
+            self.assertEqual(sum(component_cols['monthly'][:10]), 10)
+            self.assertEqual(sum(component_cols['weekly'][10:16]), 6)
         else:
         else:
             true = [10.] * 6 + [2.] * 10 + [4.]
             true = [10.] * 6 + [2.] * 10 + [4.]
+            self.assertEqual(sum(component_cols['weekly'][:6]), 6)
+            self.assertEqual(sum(component_cols['monthly'][6:16]), 10)
         self.assertEqual(prior_scales, true)
         self.assertEqual(prior_scales, true)
 
 
     def test_added_regressors(self):
     def test_added_regressors(self):
@@ -504,12 +512,19 @@ class TestProphet(TestCase):
         self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4)
         self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4)
         self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4)
         self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4)
         # Check that feature matrix and prior scales are correctly constructed
         # Check that feature matrix and prior scales are correctly constructed
-        seasonal_features, prior_scales = m.make_all_seasonality_features(df2)
-        self.assertIn('binary_feature', seasonal_features)
-        self.assertIn('numeric_feature', seasonal_features)
-        self.assertIn('binary_feature2', seasonal_features)
+        seasonal_features, prior_scales, component_cols = (
+            m.make_all_seasonality_features(df2)
+        )
         self.assertEqual(seasonal_features.shape[1], 29)
         self.assertEqual(seasonal_features.shape[1], 29)
-        self.assertEqual(set(prior_scales[26:]), set([0.2, 0.5, 10.]))
+        names = ['binary_feature', 'numeric_feature', 'binary_feature2']
+        true_priors = [0.2, 0.5, 10.]
+        for i, name in enumerate(names):
+            self.assertIn(name, seasonal_features)
+            self.assertEqual(sum(component_cols[name]), 1)
+            self.assertEqual(
+                sum(np.array(prior_scales) * component_cols[name]),
+                true_priors[i],
+            )
         # Check that forecast components are reasonable
         # Check that forecast components are reasonable
         future = pd.DataFrame({
         future = pd.DataFrame({
             'ds': ['2014-06-01'],
             'ds': ['2014-06-01'],
@@ -520,23 +535,19 @@ class TestProphet(TestCase):
             m.predict(future)
             m.predict(future)
         future['binary_feature2'] = 0
         future['binary_feature2'] = 0
         fcst = m.predict(future)
         fcst = m.predict(future)
-        self.assertEqual(fcst.shape[1], 31)
+        self.assertEqual(fcst.shape[1], 28)
         self.assertEqual(fcst['binary_feature'][0], 0)
         self.assertEqual(fcst['binary_feature'][0], 0)
         self.assertAlmostEqual(
         self.assertAlmostEqual(
             fcst['extra_regressors'][0],
             fcst['extra_regressors'][0],
             fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
             fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
         )
         )
         self.assertAlmostEqual(
         self.assertAlmostEqual(
-            fcst['seasonalities'][0],
-            fcst['yearly'][0] + fcst['weekly'][0],
-        )
-        self.assertAlmostEqual(
-            fcst['seasonal'][0],
-            fcst['seasonalities'][0] + fcst['extra_regressors'][0],
+            fcst['additive_terms'][0],
+            fcst['yearly'][0] + fcst['weekly'][0] + fcst['extra_regressors'][0]
         )
         )
         self.assertAlmostEqual(
         self.assertAlmostEqual(
             fcst['yhat'][0],
             fcst['yhat'][0],
-            fcst['trend'][0] + fcst['seasonal'][0],
+            fcst['trend'][0] + fcst['additive_terms'][0],
         )
         )
         # Check fails if constant extra regressor
         # Check fails if constant extra regressor
         df['constant_feature'] = 5
         df['constant_feature'] = 5