test_diagnostics.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. # Copyright (c) 2017-present, Facebook, Inc.
  2. # All rights reserved.
  3. #
  4. # This source code is licensed under the BSD-style license found in the
  5. # LICENSE file in the root directory of this source tree. An additional grant
  6. # of patent rights can be found in the PATENTS file in the same directory.
  7. from __future__ import absolute_import
  8. from __future__ import division
  9. from __future__ import print_function
  10. from __future__ import unicode_literals
  11. import itertools
  12. import os
  13. from unittest import TestCase
  14. import numpy as np
  15. import pandas as pd
  16. from fbprophet import Prophet
  17. from fbprophet import diagnostics
  18. DATA_all = pd.read_csv(
  19. os.path.join(os.path.dirname(__file__), 'data.csv'), parse_dates=['ds']
  20. )
  21. DATA = DATA_all.head(100)
  22. class TestDiagnostics(TestCase):
  23. def __init__(self, *args, **kwargs):
  24. super(TestDiagnostics, self).__init__(*args, **kwargs)
  25. # Use first 100 record in data.csv
  26. self.__df = DATA
  27. def test_cross_validation(self):
  28. m = Prophet()
  29. m.fit(self.__df)
  30. # Calculate the number of cutoff points(k)
  31. horizon = pd.Timedelta('4 days')
  32. period = pd.Timedelta('10 days')
  33. initial = pd.Timedelta('115 days')
  34. df_cv = diagnostics.cross_validation(
  35. m, horizon='4 days', period='10 days', initial='115 days')
  36. self.assertEqual(len(np.unique(df_cv['cutoff'])), 3)
  37. self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon)
  38. self.assertTrue(min(df_cv['cutoff']) >= min(self.__df['ds']) + initial)
  39. dc = df_cv['cutoff'].diff()
  40. dc = dc[dc > pd.Timedelta(0)].min()
  41. self.assertTrue(dc >= period)
  42. self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
  43. # Each y in df_cv and self.__df with same ds should be equal
  44. df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
  45. self.assertAlmostEqual(
  46. np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
  47. df_cv = diagnostics.cross_validation(
  48. m, horizon='4 days', period='10 days', initial='135 days')
  49. self.assertEqual(len(np.unique(df_cv['cutoff'])), 1)
  50. with self.assertRaises(ValueError):
  51. diagnostics.cross_validation(
  52. m, horizon='10 days', period='10 days', initial='140 days')
  53. def test_cross_validation_logistic(self):
  54. df = self.__df.copy()
  55. df['cap'] = 40
  56. m = Prophet(growth='logistic').fit(df)
  57. df_cv = diagnostics.cross_validation(
  58. m, horizon='1 days', period='1 days', initial='140 days')
  59. self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
  60. self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
  61. df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
  62. self.assertAlmostEqual(
  63. np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
  64. def test_cross_validation_extra_regressors(self):
  65. df = self.__df.copy()
  66. df['extra'] = range(df.shape[0])
  67. m = Prophet()
  68. m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
  69. m.add_regressor('extra')
  70. m.fit(df)
  71. df_cv = diagnostics.cross_validation(
  72. m, horizon='4 days', period='4 days', initial='135 days')
  73. self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
  74. period = pd.Timedelta('4 days')
  75. dc = df_cv['cutoff'].diff()
  76. dc = dc[dc > pd.Timedelta(0)].min()
  77. self.assertTrue(dc >= period)
  78. self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
  79. df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
  80. self.assertAlmostEqual(
  81. np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
  82. def test_cross_validation_default_value_check(self):
  83. m = Prophet()
  84. m.fit(self.__df)
  85. # Default value of initial should be equal to 3 * horizon
  86. df_cv1 = diagnostics.cross_validation(
  87. m, horizon='32 days', period='10 days')
  88. df_cv2 = diagnostics.cross_validation(
  89. m, horizon='32 days', period='10 days', initial='96 days')
  90. self.assertAlmostEqual(
  91. ((df_cv1['y'] - df_cv2['y']) ** 2).sum(), 0.0)
  92. self.assertAlmostEqual(
  93. ((df_cv1['yhat'] - df_cv2['yhat']) ** 2).sum(), 0.0)
  94. def test_performance_metrics(self):
  95. m = Prophet()
  96. m.fit(self.__df)
  97. df_cv = diagnostics.cross_validation(
  98. m, horizon='4 days', period='10 days', initial='90 days')
  99. # Aggregation level none
  100. df_none = diagnostics.performance_metrics(df_cv, rolling_window=0)
  101. self.assertEqual(
  102. set(df_none.columns),
  103. {'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'},
  104. )
  105. self.assertEqual(df_none.shape[0], 16)
  106. # Aggregation level 0.2
  107. df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
  108. self.assertEqual(len(df_horizon['horizon'].unique()), 4)
  109. self.assertEqual(df_horizon.shape[0], 14)
  110. # Aggregation level all
  111. df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
  112. self.assertEqual(df_all.shape[0], 1)
  113. for metric in ['mse', 'mape', 'mae', 'coverage']:
  114. self.assertEqual(df_all[metric].values[0], df_none[metric].mean())
  115. # Custom list of metrics
  116. df_horizon = diagnostics.performance_metrics(
  117. df_cv, metrics=['coverage', 'mse'],
  118. )
  119. self.assertEqual(
  120. set(df_horizon.columns),
  121. {'coverage', 'mse', 'horizon'},
  122. )
  123. def test_copy(self):
  124. df = DATA_all.copy()
  125. df['cap'] = 200.
  126. df['binary_feature'] = [0] * 255 + [1] * 255
  127. # These values are created except for its default values
  128. holiday = pd.DataFrame(
  129. {'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['x']})
  130. append_holidays = 'US'
  131. products = itertools.product(
  132. ['linear', 'logistic'], # growth
  133. [None, pd.to_datetime(['2016-12-25'])], # changepoints
  134. [3], # n_changepoints
  135. [0.9], # changepoint_range
  136. [True, False], # yearly_seasonality
  137. [True, False], # weekly_seasonality
  138. [True, False], # daily_seasonality
  139. [None, holiday], # holidays
  140. [None, append_holidays], # append_holidays
  141. ['additive', 'multiplicative'], # seasonality_mode
  142. [1.1], # seasonality_prior_scale
  143. [1.1], # holidays_prior_scale
  144. [0.1], # changepoint_prior_scale
  145. [100], # mcmc_samples
  146. [0.9], # interval_width
  147. [200] # uncertainty_samples
  148. )
  149. # Values should be copied correctly
  150. for product in products:
  151. m1 = Prophet(*product)
  152. m1.history = m1.setup_dataframe(
  153. df.copy(), initialize_scales=True)
  154. m1.set_auto_seasonalities()
  155. m2 = diagnostics.prophet_copy(m1)
  156. self.assertEqual(m1.growth, m2.growth)
  157. self.assertEqual(m1.n_changepoints, m2.n_changepoints)
  158. self.assertEqual(m1.changepoint_range, m2.changepoint_range)
  159. self.assertEqual(m1.changepoints, m2.changepoints)
  160. self.assertEqual(False, m2.yearly_seasonality)
  161. self.assertEqual(False, m2.weekly_seasonality)
  162. self.assertEqual(False, m2.daily_seasonality)
  163. self.assertEqual(
  164. m1.yearly_seasonality, 'yearly' in m2.seasonalities)
  165. self.assertEqual(
  166. m1.weekly_seasonality, 'weekly' in m2.seasonalities)
  167. self.assertEqual(
  168. m1.daily_seasonality, 'daily' in m2.seasonalities)
  169. if m1.holidays is None:
  170. self.assertEqual(m1.holidays, m2.holidays)
  171. else:
  172. self.assertTrue((m1.holidays == m2.holidays).values.all())
  173. self.assertEqual(m1.append_holidays, m2.append_holidays)
  174. self.assertEqual(m1.seasonality_mode, m2.seasonality_mode)
  175. self.assertEqual(m1.seasonality_prior_scale, m2.seasonality_prior_scale)
  176. self.assertEqual(m1.changepoint_prior_scale, m2.changepoint_prior_scale)
  177. self.assertEqual(m1.holidays_prior_scale, m2.holidays_prior_scale)
  178. self.assertEqual(m1.mcmc_samples, m2.mcmc_samples)
  179. self.assertEqual(m1.interval_width, m2.interval_width)
  180. self.assertEqual(m1.uncertainty_samples, m2.uncertainty_samples)
  181. # Check for cutoff and custom seasonality and extra regressors
  182. changepoints = pd.date_range('2012-06-15', '2012-09-15')
  183. cutoff = pd.Timestamp('2012-07-25')
  184. m1 = Prophet(changepoints=changepoints)
  185. m1.add_seasonality('custom', 10, 5)
  186. m1.add_regressor('binary_feature')
  187. m1.fit(df)
  188. m2 = diagnostics.prophet_copy(m1, cutoff=cutoff)
  189. changepoints = changepoints[changepoints <= cutoff]
  190. self.assertTrue((changepoints == m2.changepoints).all())
  191. self.assertTrue('custom' in m2.seasonalities)
  192. self.assertTrue('binary_feature' in m2.extra_regressors)