8 лет назад · 3e59bbdc84
--- a/python/fbprophet/diagnostics.py
+++ b/python/fbprophet/diagnostics.py
@@ -196,7 +196,7 @@ def prophet_copy(m, cutoff=None):
 
				     return m2
			
 
				 
			
 
				 
			
 
				-def performance_metrics(df, metrics=None, aggregation='horizon'):
			
 
				+def performance_metrics(df, metrics=None, rolling_window=0.05):
			
 
				     """Compute performance metrics from cross-validation results.
			
 
				 
			
 
				     Computes a suite of performance metrics on the output of cross-validation.
			
@@ -209,13 +209,17 @@ def performance_metrics(df, metrics=None, aggregation='horizon'):
 
				     A subset of these can be specified by passing a list of names as the
			
 
				     `metrics` argument.
			
 
				 
			
 
				-    By default, metrics will be computed for each horizon (ds - cutoff).
			
 
				-    Alternatively, metrics can be computed at the level of individual ds/cutoff
			
 
				-    pairs (aggregation='none'), or aggregated over all ds/cutoffs
			
 
				-    (aggregation='all').
			
 
				-
			
 
				-    The output is a dataframe containing the columns corresponding to the level
			
 
				-    of aggregation ('horizon', 'ds' and 'cutoff', or none) along with columns
			
 
				+    Metrics are calculated over a rolling window of cross validation
			
 
				+    predictions, after sorting by horizon. The size of that window (number of
			
 
				+    simulated forecast points) is determined by the rolling_window argument,
			
 
				+    which specifies a proportion of simulated forecast points to include in
			
 
				+    each window. rolling_window=0 will compute it separately for each simulated
			
 
				+    forecast point (i.e., 'mse' will actually be squared error with no mean).
			
 
				+    The default of rolling_window=0.05 will use 5% of the rows in df in each
			
 
				+    window. rolling_window=1 will compute the metric across all simulated forecast
			
 
				+    points. The results are set to the right edge of the window.
			
 
				+
			
 
				+    The output is a dataframe containing column 'horizon' along with columns
			
 
				     for each of the metrics computed.
			
 
				 
			
 
				     Parameters
			
@@ -223,22 +227,13 @@ def performance_metrics(df, metrics=None, aggregation='horizon'):
 
				     df: The dataframe returned by cross_validation.
			
 
				     metrics: A list of performance metrics to compute. If not provided, will
			
 
				         use ['mse', 'mae', 'mape', 'coverage'].
			
 
				-    aggregation: Level of aggregation for computing performance statistics.
			
 
				-        Must be 'horizon', 'none', or 'all'.
			
 
				+    rolling_window: Proportion of data to use in each rolling window for
			
 
				+        computing the metrics.
			
 
				 
			
 
				     Returns
			
 
				     -------
			
 
				-    Dataframe with a column for each metric, and a combination of columns 'ds',
			
 
				-    'cutoff', and 'horizon', depending on the aggregation level.
			
 
				+    Dataframe with a column for each metric, and column 'horizon'
			
 
				     """
			
 
				-    # Input validation
			
 
				-    valid_aggregations = ['horizon', 'all', 'none']
			
 
				-    if aggregation not in valid_aggregations:
			
 
				-        raise ValueError(
			
 
				-            'Aggregation {} is not valid; must be one of {}'.format(
			
 
				-                aggregation, valid_agggregations
			
 
				-            )
			
 
				-        )
			
 
				     valid_metrics = ['mse', 'mae', 'mape', 'coverage']
			
 
				     if metrics is None:
			
 
				         metrics = valid_metrics
			
@@ -248,62 +243,56 @@ def performance_metrics(df, metrics=None, aggregation='horizon'):
 
				         raise ValueError(
			
 
				             'Valid values for metrics are: {}'.format(valid_metrics)
			
 
				         )
			
 
				-    # Get function for the metrics we want
			
 
				-    metric_fns = {m: eval(m) for m in metrics}
			
 
				-    def all_metrics(df_g):
			
 
				-        return pd.Series({name: fn(df_g) for name, fn in metric_fns.items()})
			
 
				-    # Apply functions to groupby
			
 
				-    if aggregation == 'all':
			
 
				-        return all_metrics(df)
			
 
				-    # else,
			
 
				     df_m = df.copy()
			
 
				     df_m['horizon'] = df_m['ds'] - df_m['cutoff']
			
 
				-    if aggregation == 'horizon':
			
 
				-        return df_m.groupby('horizon').apply(all_metrics).reset_index()
			
 
				-    # else,
			
 
				-    for name, fn in metric_fns.items():
			
 
				-        df_m[name] = fn(df_m, agg=False)
			
 
				-    return df_m
			
 
				+    df_m.sort_values('horizon', inplace=True)
			
 
				+    # Window size
			
 
				+    w = int(rolling_window * df_m.shape[0])
			
 
				+    w = max(w, 1)
			
 
				+    w = min(w, df_m.shape[0])
			
 
				+    cols = ['horizon']
			
 
				+    for metric in metrics:
			
 
				+        df_m[metric] = eval(metric)(df_m, w)
			
 
				+        cols.append(metric)
			
 
				+    df_m = df_m[cols]
			
 
				+    return df_m.dropna()
			
 
				+
			
 
				+
			
 
				+def rolling_mean(x, w):
			
 
				+    s = np.cumsum(np.insert(x, 0, 0))
			
 
				+    prefix = np.empty(w - 1)
			
 
				+    prefix.fill(np.nan)
			
 
				+    return np.hstack((prefix, (s[w:] - s[:-w]) / float(w)))  # right-aligned
			
 
				 
			
 
				 
			
 
				 # The functions below specify performance metrics for cross-validation results.
			
 
				-# Each takes as input the output of cross_validation, and has two modes of
			
 
				-# return: if agg=True, returns a float that is the metric aggregated over the
			
 
				-# input. If agg=False, returns results without aggregation (for
			
 
				-# aggregation='none' in performance_metrics).
			
 
				+# Each takes as input the output of cross_validation, and returns the statistic
			
 
				+# as an array, given a window size for rolling aggregation.
			
 
				 
			
 
				 
			
 
				-def mse(df, agg=True):
			
 
				+def mse(df, w):
			
 
				     """Mean squared error
			
 
				     """
			
 
				     se = (df['y'] - df['yhat']) ** 2
			
 
				-    if agg:
			
 
				-        return np.mean(se)
			
 
				-    return se
			
 
				+    return rolling_mean(se.values, w)
			
 
				 
			
 
				 
			
 
				-def mae(df, agg=True):
			
 
				+def mae(df, w):
			
 
				     """Mean absolute error
			
 
				     """
			
 
				     ae = np.abs(df['y'] - df['yhat'])
			
 
				-    if agg:
			
 
				-        return np.mean(ae)
			
 
				-    return ae
			
 
				+    return rolling_mean(ae.values, w)
			
 
				 
			
 
				 
			
 
				-def mape(df, agg=True):
			
 
				+def mape(df, w):
			
 
				     """Mean absolute percent error
			
 
				     """
			
 
				     ape = np.abs((df['y'] - df['yhat']) / df['y'])
			
 
				-    if agg:
			
 
				-        return np.mean(ape)
			
 
				-    return ape
			
 
				+    return rolling_mean(ape.values, w)
			
 
				 
			
 
				 
			
 
				-def coverage(df, agg=True):
			
 
				+def coverage(df, w):
			
 
				     """Coverage
			
 
				     """
			
 
				     is_covered = (df['y'] >= df['yhat_lower']) & (df['y'] <= df['yhat_upper'])
			
 
				-    if agg:
			
 
				-        return np.mean(is_covered)
			
 
				-    return is_covered
			
 
				+    return rolling_mean(is_covered.values, w)
			
--- a/python/fbprophet/tests/test_diagnostics.py
+++ b/python/fbprophet/tests/test_diagnostics.py
@@ -142,52 +142,21 @@ class TestDiagnostics(TestCase):
 
				         df_cv = diagnostics.cross_validation(
			
 
				             m, horizon='4 days', period='10 days', initial='90 days')
			
 
				         # Aggregation level none
			
 
				-        df_none = diagnostics.performance_metrics(df_cv, aggregation='none')
			
 
				+        df_none = diagnostics.performance_metrics(df_cv, rolling_window=0)
			
 
				         self.assertEqual(
			
 
				             set(df_none.columns),
			
 
				-            {
			
 
				-                'y', 'yhat', 'yhat_lower', 'yhat_upper', 'ds', 'cutoff',
			
 
				-                'horizon', 'coverage', 'mae', 'mape', 'mse',
			
 
				-            },
			
 
				+            {'horizon', 'coverage', 'mae', 'mape', 'mse'},
			
 
				         )
			
 
				-        # Check each metric
			
 
				-        self.assertEqual(
			
 
				-            np.abs(df_cv['yhat'][0] - df_cv['y'][0]),
			
 
				-            df_none['mae'][0],
			
 
				-        )
			
 
				-        self.assertEqual(
			
 
				-            np.abs((df_cv['yhat'][0] - df_cv['y'][0]) / df_cv['y'][0]),
			
 
				-            df_none['mape'][0],
			
 
				-        )
			
 
				-        self.assertEqual(
			
 
				-            (df_cv['yhat'][0] - df_cv['y'][0]) ** 2,
			
 
				-            df_none['mse'][0],
			
 
				-        )
			
 
				-        self.assertEqual(
			
 
				-            (
			
 
				-                (df_cv['y'][0] >= df_cv['yhat_lower'][0])
			
 
				-                and (df_cv['y'][0] <= df_cv['yhat_upper'][0])
			
 
				-            ),
			
 
				-            df_none['coverage'][0],
			
 
				-        )
			
 
				-        # Aggregation level horizon (default)
			
 
				-        df_horizon = diagnostics.performance_metrics(df_cv)
			
 
				+        self.assertEqual(df_none.shape[0], 14)
			
 
				+        # Aggregation level 0.2
			
 
				+        df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
			
 
				         self.assertEqual(len(df_horizon['horizon'].unique()), 4)
			
 
				-        self.assertEqual(
			
 
				-            set(df_horizon.columns),
			
 
				-            {'coverage', 'mse', 'mape', 'mae', 'horizon'},
			
 
				-        )
			
 
				-        self.assertEqual(df_horizon.shape[0], 4)
			
 
				-        # Check aggregation
			
 
				-        agg = df_none.groupby('horizon', as_index=False).agg('mean')
			
 
				-        for metric in ['mse', 'mape', 'mae', 'horizon']:
			
 
				-            self.assertTrue((agg[metric] == df_horizon[metric]).all())
			
 
				+        self.assertEqual(df_horizon.shape[0], 13)
			
 
				         # Aggregation level all
			
 
				-        df_all = diagnostics.performance_metrics(df_cv, aggregation='all')
			
 
				-        self.assertEqual(df_all.shape, (4,))
			
 
				-        self.assertEqual(set(df_all.index), {'coverage', 'mse', 'mae', 'mape'})
			
 
				+        df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
			
 
				+        self.assertEqual(df_all.shape[0], 1)
			
 
				         for metric in ['mse', 'mape', 'mae', 'coverage']:
			
 
				-            self.assertEqual(df_all[metric], df_all[metric].mean())
			
 
				+            self.assertEqual(df_all[metric].values[0], df_none[metric].mean())
			
 
				         # Custom list of metrics
			
 
				         df_horizon = diagnostics.performance_metrics(
			
 
				             df_cv, metrics=['coverage', 'mse'],