Ben Letham 8 anni fa
parent
commit
4523315ffc
2 ha cambiato i file con 41 aggiunte e 38 eliminazioni
  1. 38 35
      python/fbprophet/forecaster.py
  2. 3 3
      python/fbprophet/tests/test_prophet.py

+ 38 - 35
python/fbprophet/forecaster.py

@@ -66,7 +66,7 @@ class Prophet(object):
         fluctuations, smaller values dampen the seasonality. Can be specified
         fluctuations, smaller values dampen the seasonality. Can be specified
         for individual seasonalities using add_seasonality.
         for individual seasonalities using add_seasonality.
     holidays_prior_scale: Parameter modulating the strength of the holiday
     holidays_prior_scale: Parameter modulating the strength of the holiday
-        components model, unless overriden in the holidays input.
+        components model, unless overridden in the holidays input.
     changepoint_prior_scale: Parameter modulating the flexibility of the
     changepoint_prior_scale: Parameter modulating the flexibility of the
         automatic changepoint selection. Large values will allow many
         automatic changepoint selection. Large values will allow many
         changepoints, small values will allow few changepoints.
         changepoints, small values will allow few changepoints.
@@ -115,8 +115,8 @@ class Prophet(object):
         if holidays is not None:
         if holidays is not None:
             if not (
             if not (
                 isinstance(holidays, pd.DataFrame)
                 isinstance(holidays, pd.DataFrame)
-                and 'ds' in holidays
-                and 'holiday' in holidays
+                and 'ds' in holidays  # noqa W503
+                and 'holiday' in holidays  # noqa W503
             ):
             ):
                 raise ValueError("holidays must be a DataFrame with 'ds' and "
                 raise ValueError("holidays must be a DataFrame with 'ds' and "
                                  "'holiday' columns.")
                                  "'holiday' columns.")
@@ -232,32 +232,7 @@ class Prophet(object):
         df = df.sort_values('ds')
         df = df.sort_values('ds')
         df.reset_index(inplace=True, drop=True)
         df.reset_index(inplace=True, drop=True)
 
 
-        if initialize_scales:
-            if self.growth == 'logistic' and 'floor' in df:
-                self.logistic_floor = True
-                floor = df['floor']
-            else:
-                floor = 0.
-            self.y_scale = (df['y'] - floor).abs().max()
-            if self.y_scale == 0:
-                self.y_scale = 1
-            self.start = df['ds'].min()
-            self.t_scale = df['ds'].max() - self.start
-            for name, props in self.extra_regressors.items():
-                standardize = props['standardize']
-                if standardize == 'auto':
-                    if set(df[name].unique()) == set([1, 0]):
-                        # Don't standardize binary variables.
-                        standardize = False
-                    else:
-                        standardize = True
-                if standardize:
-                    mu = df[name].mean()
-                    std = df[name].std()
-                    if std == 0:
-                        std = mu
-                    self.extra_regressors[name]['mu'] = mu
-                    self.extra_regressors[name]['std'] = std
+        self.initialize_scales(initialize_scales, df)
 
 
         if self.logistic_floor:
         if self.logistic_floor:
             if 'floor' not in df:
             if 'floor' not in df:
@@ -279,6 +254,35 @@ class Prophet(object):
                 raise ValueError('Found NaN in column ' + name)
                 raise ValueError('Found NaN in column ' + name)
         return df
         return df
 
 
+    def initialize_scales(self, initialize_scales, df):
+        if not initialize_scales:
+            return
+        if self.growth == 'logistic' and 'floor' in df:
+            self.logistic_floor = True
+            floor = df['floor']
+        else:
+            floor = 0.
+        self.y_scale = (df['y'] - floor).abs().max()
+        if self.y_scale == 0:
+            self.y_scale = 1
+        self.start = df['ds'].min()
+        self.t_scale = df['ds'].max() - self.start
+        for name, props in self.extra_regressors.items():
+            standardize = props['standardize']
+            if standardize == 'auto':
+                if set(df[name].unique()) == set([1, 0]):
+                    # Don't standardize binary variables.
+                    standardize = False
+                else:
+                    standardize = True
+            if standardize:
+                mu = df[name].mean()
+                std = df[name].std()
+                if std == 0:
+                    std = mu
+                self.extra_regressors[name]['mu'] = mu
+                self.extra_regressors[name]['std'] = std
+
     def set_changepoints(self):
     def set_changepoints(self):
         """Set changepoints
         """Set changepoints
 
 
@@ -422,7 +426,7 @@ class Prophet(object):
             if ps <= 0:
             if ps <= 0:
                 raise ValueError('Prior scale must be > 0')
                 raise ValueError('Prior scale must be > 0')
             prior_scales[row.holiday] = ps
             prior_scales[row.holiday] = ps
-                
+
             for offset in range(lw, uw + 1):
             for offset in range(lw, uw + 1):
                 occurrence = dt + timedelta(days=offset)
                 occurrence = dt + timedelta(days=offset)
                 try:
                 try:
@@ -918,7 +922,7 @@ class Prophet(object):
         for i, t_s in enumerate(changepoint_ts):
         for i, t_s in enumerate(changepoint_ts):
             gammas[i] = (
             gammas[i] = (
                 (t_s - m - np.sum(gammas))
                 (t_s - m - np.sum(gammas))
-                * (1 - k_cum[i] / k_cum[i + 1])
+                * (1 - k_cum[i] / k_cum[i + 1])  # noqa W503
             )
             )
         # Get cumulative rate and offset at each t
         # Get cumulative rate and offset at each t
         k_t = k * np.ones_like(t)
         k_t = k * np.ones_like(t)
@@ -997,7 +1001,7 @@ class Prophet(object):
             comp_features = X[:, cols]
             comp_features = X[:, cols]
             comp = (
             comp = (
                 np.matmul(comp_features, comp_beta.transpose())
                 np.matmul(comp_features, comp_beta.transpose())
-                * self.y_scale
+                * self.y_scale  # noqa W503
             )
             )
             data[component] = np.nanmean(comp, axis=1)
             data[component] = np.nanmean(comp, axis=1)
             data[component + '_lower'] = np.nanpercentile(comp, lower_p,
             data[component + '_lower'] = np.nanpercentile(comp, lower_p,
@@ -1025,7 +1029,6 @@ class Prophet(object):
         components = components.append(new_comp)
         components = components.append(new_comp)
         return components
         return components
 
 
-
     def sample_posterior_predictive(self, df):
     def sample_posterior_predictive(self, df):
         """Prophet posterior predictive samples.
         """Prophet posterior predictive samples.
 
 
@@ -1237,7 +1240,7 @@ class Prophet(object):
         ax.plot(fcst['ds'].values, fcst['yhat'], ls='-', c='#0072B2')
         ax.plot(fcst['ds'].values, fcst['yhat'], ls='-', c='#0072B2')
         if 'cap' in fcst and plot_cap:
         if 'cap' in fcst and plot_cap:
             ax.plot(fcst['ds'].values, fcst['cap'], ls='--', c='k')
             ax.plot(fcst['ds'].values, fcst['cap'], ls='--', c='k')
-        if self.logistic_floor and 'floor' in fcst and plot_cap :
+        if self.logistic_floor and 'floor' in fcst and plot_cap:
             ax.plot(fcst['ds'].values, fcst['floor'], ls='--', c='k')
             ax.plot(fcst['ds'].values, fcst['floor'], ls='--', c='k')
         if uncertainty:
         if uncertainty:
             ax.fill_between(fcst['ds'].values, fcst['yhat_lower'],
             ax.fill_between(fcst['ds'].values, fcst['yhat_lower'],
@@ -1333,7 +1336,7 @@ class Prophet(object):
         artists += ax.plot(fcst['ds'].values, fcst[name], ls='-', c='#0072B2')
         artists += ax.plot(fcst['ds'].values, fcst[name], ls='-', c='#0072B2')
         if 'cap' in fcst and plot_cap:
         if 'cap' in fcst and plot_cap:
             artists += ax.plot(fcst['ds'].values, fcst['cap'], ls='--', c='k')
             artists += ax.plot(fcst['ds'].values, fcst['cap'], ls='--', c='k')
-        if self.logistic_floor and 'floor' in fcst and plot_cap :
+        if self.logistic_floor and 'floor' in fcst and plot_cap:
             ax.plot(fcst['ds'].values, fcst['floor'], ls='--', c='k')
             ax.plot(fcst['ds'].values, fcst['floor'], ls='--', c='k')
         if uncertainty:
         if uncertainty:
             artists += [ax.fill_between(
             artists += [ax.fill_between(

+ 3 - 3
python/fbprophet/tests/test_prophet.py

@@ -521,15 +521,15 @@ class TestProphet(TestCase):
             fcst['extra_regressors'][0],
             fcst['extra_regressors'][0],
             fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
             fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
         )
         )
-        self.assertEqual(
+        self.assertAlmostEqual(
             fcst['seasonalities'][0],
             fcst['seasonalities'][0],
             fcst['yearly'][0] + fcst['weekly'][0],
             fcst['yearly'][0] + fcst['weekly'][0],
         )
         )
-        self.assertEqual(
+        self.assertAlmostEqual(
             fcst['seasonal'][0],
             fcst['seasonal'][0],
             fcst['seasonalities'][0] + fcst['extra_regressors'][0],
             fcst['seasonalities'][0] + fcst['extra_regressors'][0],
         )
         )
-        self.assertEqual(
+        self.assertAlmostEqual(
             fcst['yhat'][0],
             fcst['yhat'][0],
             fcst['trend'][0] + fcst['seasonal'][0],
             fcst['trend'][0] + fcst['seasonal'][0],
         )
         )