пре 8 година · 8198afe17a
--- a/notebooks/diagnostics.ipynb
+++ b/notebooks/diagnostics.ipynb
--- a/python/fbprophet/diagnostics.py
+++ b/python/fbprophet/diagnostics.py
@@ -196,7 +196,7 @@ def prophet_copy(m, cutoff=None):
 
				     return m2
			
 
				 
			
 
				 
			
 
				-def performance_metrics(df, metrics=None, rolling_window=0.05):
			
 
				+def performance_metrics(df, metrics=None, rolling_window=0.1):
			
 
				     """Compute performance metrics from cross-validation results.
			
 
				 
			
 
				     Computes a suite of performance metrics on the output of cross-validation.
			
@@ -216,7 +216,7 @@ def performance_metrics(df, metrics=None, rolling_window=0.05):
 
				     which specifies a proportion of simulated forecast points to include in
			
 
				     each window. rolling_window=0 will compute it separately for each simulated
			
 
				     forecast point (i.e., 'mse' will actually be squared error with no mean).
			
 
				-    The default of rolling_window=0.05 will use 5% of the rows in df in each
			
 
				+    The default of rolling_window=0.1 will use 10% of the rows in df in each
			
 
				     window. rolling_window=1 will compute the metric across all simulated forecast
			
 
				     points. The results are set to the right edge of the window.
			
 
				 
			
@@ -227,9 +227,9 @@ def performance_metrics(df, metrics=None, rolling_window=0.05):
 
				     ----------
			
 
				     df: The dataframe returned by cross_validation.
			
 
				     metrics: A list of performance metrics to compute. If not provided, will
			
 
				-        use ['mse', 'mae', 'mape', 'coverage', 'rmse'].
			
 
				+        use ['mse', 'rmse', 'mae', 'mape', 'coverage'].
			
 
				     rolling_window: Proportion of data to use in each rolling window for
			
 
				-        computing the metrics.
			
 
				+        computing the metrics. Should be in [0, 1].
			
 
				 
			
 
				     Returns
			
 
				     -------
			
--- a/python/fbprophet/plot.py
+++ b/python/fbprophet/plot.py
@@ -15,6 +15,9 @@ import logging
 
				 import numpy as np
			
 
				 import pandas as pd
			
 
				 
			
 
				+from fbprophet.diagnostics import performance_metrics
			
 
				+
			
 
				+
			
 
				 logging.basicConfig()
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
@@ -367,3 +370,78 @@ def add_changepoints_to_plot(
 
				     for cp in signif_changepoints:
			
 
				         artists.append(ax.axvline(x=cp, c=cp_color, ls=cp_linestyle))
			
 
				     return artists
			
 
				+
			
 
				+
			
 
				+def plot_cross_validation_metric(df_cv, metric, rolling_window=0.1, ax=None):
			
 
				+    """Plot a performance metric vs. forecast horizon from cross validation.
			
 
				+
			
 
				+    Cross validation produces a collection of out-of-sample model predictions
			
 
				+    that can be compared to actual values, at a range of different horizons
			
 
				+    (distance from the cutoff). This computes a specified performance metric
			
 
				+    for each prediction, and aggregated over a rolling window with horizon.
			
 
				+
			
 
				+    This uses fbprophet.diagnostics.performance_metrics to compute the metrics.
			
 
				+    Valid values of metric are 'mse', 'rmse', 'mae', 'mape', and 'coverage'.
			
 
				+
			
 
				+    rolling_window is the proportion of data included in the rolling window of
			
 
				+    aggregation. The default value of 0.1 means 10% of data are included in the
			
 
				+    aggregation for computing the metric.
			
 
				+
			
 
				+    As a concrete example, if metric='mse', then this plot will show the
			
 
				+    squared error for each cross validation prediction, along with the MSE
			
 
				+    averaged over rolling windows of 10% of the data.
			
 
				+
			
 
				+    Parameters
			
 
				+    ----------
			
 
				+    df_cv: The output from fbprophet.diagnostics.cross_validation.
			
 
				+    metric: Metric name, one of ['mse', 'rmse', 'mae', 'mape', 'coverage'].
			
 
				+    rolling_window: Proportion of data to use for rolling average of metric.
			
 
				+        In [0, 1]. Defaults to 0.1.
			
 
				+    ax: Optional matplotlib axis on which to plot. If not given, a new figure
			
 
				+        will be created.
			
 
				+
			
 
				+    Returns
			
 
				+    -------
			
 
				+    a matplotlib figure.
			
 
				+    """
			
 
				+    if ax is None:
			
 
				+        fig = plt.figure(facecolor='w', figsize=(10, 6))
			
 
				+        ax = fig.add_subplot(111)
			
 
				+    else:
			
 
				+        fig = ax.get_figure()
			
 
				+    # Get the metric at the level of individual predictions, and with the rolling window.
			
 
				+    df_none = performance_metrics(df_cv, metrics=[metric], rolling_window=0)
			
 
				+    df_h = performance_metrics(df_cv, metrics=[metric], rolling_window=rolling_window)
			
 
				+
			
 
				+    # Some work because matplotlib does not handle timedelta
			
 
				+    # Target ~10 ticks.
			
 
				+    tick_w = max(df_none['horizon'].astype('timedelta64[ns]')) / 10.
			
 
				+    # Find the largest time resolution that has <1 unit per bin.
			
 
				+    dts = ['D', 'h', 'm', 's', 'ms', 'us', 'ns']
			
 
				+    dt_names = [
			
 
				+        'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds',
			
 
				+        'nanoseconds'
			
 
				+    ]
			
 
				+    dt_conversions = [
			
 
				+        24 * 60 * 60 * 10 ** 9,
			
 
				+        60 * 60 * 10 ** 9,
			
 
				+        60 * 10 ** 9,
			
 
				+        10 ** 9,
			
 
				+        10 ** 6,
			
 
				+        10 ** 3,
			
 
				+        1.,
			
 
				+    ]
			
 
				+    for i, dt in enumerate(dts):
			
 
				+        if np.timedelta64(1, dt) < np.timedelta64(tick_w, 'ns'):
			
 
				+            break
			
 
				+
			
 
				+    x_plt = df_none['horizon'].astype('timedelta64[ns]').astype(int) / float(dt_conversions[i])
			
 
				+    x_plt_h = df_h['horizon'].astype('timedelta64[ns]').astype(int) / float(dt_conversions[i])
			
 
				+
			
 
				+    ax.plot(x_plt, df_none[metric], '.', alpha=0.5, c='gray')
			
 
				+    ax.plot(x_plt_h, df_h[metric], '-', c='b')
			
 
				+    ax.grid(True)
			
 
				+
			
 
				+    ax.set_xlabel('Horizon ({})'.format(dt_names[i]))
			
 
				+    ax.set_ylabel(metric)
			
 
				+    return fig