test_prophet.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. # Copyright (c) 2017-present, Facebook, Inc.
  2. # All rights reserved.
  3. #
  4. # This source code is licensed under the BSD-style license found in the
  5. # LICENSE file in the root directory of this source tree. An additional grant
  6. # of patent rights can be found in the PATENTS file in the same directory.
  7. from __future__ import absolute_import
  8. from __future__ import division
  9. from __future__ import print_function
  10. from __future__ import unicode_literals
  11. import numpy as np
  12. import pandas as pd
  13. # fb-block 1 start
  14. import os
  15. import itertools
  16. from unittest import TestCase
  17. from fbprophet import Prophet
  18. DATA = pd.read_csv(
  19. os.path.join(os.path.dirname(__file__), 'data.csv'),
  20. parse_dates=['ds'],
  21. )
  22. DATA2 = pd.read_csv(
  23. os.path.join(os.path.dirname(__file__), 'data2.csv'),
  24. parse_dates=['ds'],
  25. )
  26. # fb-block 1 end
  27. # fb-block 2
  28. class TestProphet(TestCase):
  29. def test_fit_predict(self):
  30. N = DATA.shape[0]
  31. train = DATA.head(N // 2)
  32. future = DATA.tail(N // 2)
  33. forecaster = Prophet()
  34. forecaster.fit(train)
  35. forecaster.predict(future)
  36. def test_fit_predict_no_seasons(self):
  37. N = DATA.shape[0]
  38. train = DATA.head(N // 2)
  39. future = DATA.tail(N // 2)
  40. forecaster = Prophet(weekly_seasonality=False, yearly_seasonality=False)
  41. forecaster.fit(train)
  42. forecaster.predict(future)
  43. def test_fit_predict_no_changepoints(self):
  44. N = DATA.shape[0]
  45. train = DATA.head(N // 2)
  46. future = DATA.tail(N // 2)
  47. forecaster = Prophet(n_changepoints=0)
  48. forecaster.fit(train)
  49. forecaster.predict(future)
  50. def test_fit_changepoint_not_in_history(self):
  51. train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')]
  52. train[(train['ds'] > '2014-01-01')] += 20
  53. future = pd.DataFrame({'ds': DATA['ds']})
  54. forecaster = Prophet(changepoints=['2013-06-06'])
  55. forecaster.fit(train)
  56. forecaster.predict(future)
  57. def test_fit_predict_duplicates(self):
  58. N = DATA.shape[0]
  59. train1 = DATA.head(N // 2).copy()
  60. train2 = DATA.head(N // 2).copy()
  61. train2['y'] += 10
  62. train = train1.append(train2)
  63. future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)})
  64. forecaster = Prophet()
  65. forecaster.fit(train)
  66. forecaster.predict(future)
  67. def test_fit_predict_constant_history(self):
  68. N = DATA.shape[0]
  69. train = DATA.head(N // 2).copy()
  70. train['y'] = 20
  71. future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)})
  72. m = Prophet()
  73. m.fit(train)
  74. fcst = m.predict(future)
  75. self.assertEqual(fcst['yhat'].values[-1], 20)
  76. train['y'] = 0
  77. future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)})
  78. m = Prophet()
  79. m.fit(train)
  80. fcst = m.predict(future)
  81. self.assertEqual(fcst['yhat'].values[-1], 0)
  82. def test_setup_dataframe(self):
  83. m = Prophet()
  84. N = DATA.shape[0]
  85. history = DATA.head(N // 2).copy()
  86. history = m.setup_dataframe(history, initialize_scales=True)
  87. self.assertTrue('t' in history)
  88. self.assertEqual(history['t'].min(), 0.0)
  89. self.assertEqual(history['t'].max(), 1.0)
  90. self.assertTrue('y_scaled' in history)
  91. self.assertEqual(history['y_scaled'].max(), 1.0)
  92. def test_get_changepoints(self):
  93. m = Prophet()
  94. N = DATA.shape[0]
  95. history = DATA.head(N // 2).copy()
  96. history = m.setup_dataframe(history, initialize_scales=True)
  97. m.history = history
  98. m.set_changepoints()
  99. cp = m.changepoints_t
  100. self.assertEqual(cp.shape[0], m.n_changepoints)
  101. self.assertEqual(len(cp.shape), 1)
  102. self.assertTrue(cp.min() > 0)
  103. self.assertTrue(cp.max() < N)
  104. mat = m.get_changepoint_matrix()
  105. self.assertEqual(mat.shape[0], N // 2)
  106. self.assertEqual(mat.shape[1], m.n_changepoints)
  107. def test_get_zero_changepoints(self):
  108. m = Prophet(n_changepoints=0)
  109. N = DATA.shape[0]
  110. history = DATA.head(N // 2).copy()
  111. history = m.setup_dataframe(history, initialize_scales=True)
  112. m.history = history
  113. m.set_changepoints()
  114. cp = m.changepoints_t
  115. self.assertEqual(cp.shape[0], 1)
  116. self.assertEqual(cp[0], 0)
  117. mat = m.get_changepoint_matrix()
  118. self.assertEqual(mat.shape[0], N // 2)
  119. self.assertEqual(mat.shape[1], 1)
  120. def test_override_n_changepoints(self):
  121. m = Prophet()
  122. history = DATA.head(20).copy()
  123. history = m.setup_dataframe(history, initialize_scales=True)
  124. m.history = history
  125. m.set_changepoints()
  126. self.assertEqual(m.n_changepoints, 15)
  127. cp = m.changepoints_t
  128. self.assertEqual(cp.shape[0], 15)
  129. def test_fourier_series_weekly(self):
  130. mat = Prophet.fourier_series(DATA['ds'], 7, 3)
  131. # These are from the R forecast package directly.
  132. true_values = np.array([
  133. 0.7818315, 0.6234898, 0.9749279, -0.2225209, 0.4338837, -0.9009689,
  134. ])
  135. self.assertAlmostEqual(np.sum((mat[0] - true_values)**2), 0.0)
  136. def test_fourier_series_yearly(self):
  137. mat = Prophet.fourier_series(DATA['ds'], 365.25, 3)
  138. # These are from the R forecast package directly.
  139. true_values = np.array([
  140. 0.7006152, -0.7135393, -0.9998330, 0.01827656, 0.7262249, 0.6874572,
  141. ])
  142. self.assertAlmostEqual(np.sum((mat[0] - true_values)**2), 0.0)
  143. def test_growth_init(self):
  144. model = Prophet(growth='logistic')
  145. history = DATA.iloc[:468].copy()
  146. history['cap'] = history['y'].max()
  147. history = model.setup_dataframe(history, initialize_scales=True)
  148. k, m = model.linear_growth_init(history)
  149. self.assertAlmostEqual(k, 0.3055671)
  150. self.assertAlmostEqual(m, 0.5307511)
  151. k, m = model.logistic_growth_init(history)
  152. self.assertAlmostEqual(k, 1.507925, places=4)
  153. self.assertAlmostEqual(m, -0.08167497, places=4)
  154. def test_piecewise_linear(self):
  155. model = Prophet()
  156. t = np.arange(11.)
  157. m = 0
  158. k = 1.0
  159. deltas = np.array([0.5])
  160. changepoint_ts = np.array([5])
  161. y = model.piecewise_linear(t, deltas, k, m, changepoint_ts)
  162. y_true = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0,
  163. 6.5, 8.0, 9.5, 11.0, 12.5])
  164. self.assertEqual((y - y_true).sum(), 0.0)
  165. t = t[8:]
  166. y_true = y_true[8:]
  167. y = model.piecewise_linear(t, deltas, k, m, changepoint_ts)
  168. self.assertEqual((y - y_true).sum(), 0.0)
  169. def test_piecewise_logistic(self):
  170. model = Prophet()
  171. t = np.arange(11.)
  172. cap = np.ones(11) * 10
  173. m = 0
  174. k = 1.0
  175. deltas = np.array([0.5])
  176. changepoint_ts = np.array([5])
  177. y = model.piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
  178. y_true = np.array([5.000000, 7.310586, 8.807971, 9.525741, 9.820138,
  179. 9.933071, 9.984988, 9.996646, 9.999252, 9.999833,
  180. 9.999963])
  181. self.assertAlmostEqual((y - y_true).sum(), 0.0, places=5)
  182. t = t[8:]
  183. y_true = y_true[8:]
  184. cap = cap[8:]
  185. y = model.piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
  186. self.assertAlmostEqual((y - y_true).sum(), 0.0, places=5)
  187. def test_holidays(self):
  188. holidays = pd.DataFrame({
  189. 'ds': pd.to_datetime(['2016-12-25']),
  190. 'holiday': ['xmas'],
  191. 'lower_window': [-1],
  192. 'upper_window': [0],
  193. })
  194. model = Prophet(holidays=holidays)
  195. df = pd.DataFrame({
  196. 'ds': pd.date_range('2016-12-20', '2016-12-31')
  197. })
  198. feats = model.make_holiday_features(df['ds'])
  199. # 11 columns generated even though only 8 overlap
  200. self.assertEqual(feats.shape, (df.shape[0], 2))
  201. self.assertEqual((feats.sum(0) - np.array([1.0, 1.0])).sum(), 0)
  202. holidays = pd.DataFrame({
  203. 'ds': pd.to_datetime(['2016-12-25']),
  204. 'holiday': ['xmas'],
  205. 'lower_window': [-1],
  206. 'upper_window': [10],
  207. })
  208. feats = Prophet(holidays=holidays).make_holiday_features(df['ds'])
  209. # 12 columns generated even though only 8 overlap
  210. self.assertEqual(feats.shape, (df.shape[0], 12))
  211. def test_fit_with_holidays(self):
  212. holidays = pd.DataFrame({
  213. 'ds': pd.to_datetime(['2012-06-06', '2013-06-06']),
  214. 'holiday': ['seans-bday'] * 2,
  215. 'lower_window': [0] * 2,
  216. 'upper_window': [1] * 2,
  217. })
  218. model = Prophet(holidays=holidays, uncertainty_samples=0)
  219. model.fit(DATA).predict()
  220. def test_make_future_dataframe(self):
  221. N = 468
  222. train = DATA.head(N // 2)
  223. forecaster = Prophet()
  224. forecaster.fit(train)
  225. future = forecaster.make_future_dataframe(periods=3, freq='D',
  226. include_history=False)
  227. correct = pd.DatetimeIndex(['2013-04-26', '2013-04-27', '2013-04-28'])
  228. self.assertEqual(len(future), 3)
  229. for i in range(3):
  230. self.assertEqual(future.iloc[i]['ds'], correct[i])
  231. future = forecaster.make_future_dataframe(periods=3, freq='M',
  232. include_history=False)
  233. correct = pd.DatetimeIndex(['2013-04-30', '2013-05-31', '2013-06-30'])
  234. self.assertEqual(len(future), 3)
  235. for i in range(3):
  236. self.assertEqual(future.iloc[i]['ds'], correct[i])
  237. def test_auto_weekly_seasonality(self):
  238. # Should be enabled
  239. N = 15
  240. train = DATA.head(N)
  241. m = Prophet()
  242. self.assertEqual(m.weekly_seasonality, 'auto')
  243. m.fit(train)
  244. self.assertIn('weekly', m.seasonalities)
  245. self.assertEqual(m.seasonalities['weekly'], (7, 3))
  246. # Should be disabled due to too short history
  247. N = 9
  248. train = DATA.head(N)
  249. m = Prophet()
  250. m.fit(train)
  251. self.assertNotIn('weekly', m.seasonalities)
  252. m = Prophet(weekly_seasonality=True)
  253. m.fit(train)
  254. self.assertIn('weekly', m.seasonalities)
  255. # Should be False due to weekly spacing
  256. train = DATA.iloc[::7, :]
  257. m = Prophet()
  258. m.fit(train)
  259. self.assertNotIn('weekly', m.seasonalities)
  260. m = Prophet(weekly_seasonality=2)
  261. m.fit(DATA)
  262. self.assertEqual(m.seasonalities['weekly'], (7, 2))
  263. def test_auto_yearly_seasonality(self):
  264. # Should be enabled
  265. m = Prophet()
  266. self.assertEqual(m.yearly_seasonality, 'auto')
  267. m.fit(DATA)
  268. self.assertIn('yearly', m.seasonalities)
  269. self.assertEqual(m.seasonalities['yearly'], (365.25, 10))
  270. # Should be disabled due to too short history
  271. N = 240
  272. train = DATA.head(N)
  273. m = Prophet()
  274. m.fit(train)
  275. self.assertNotIn('yearly', m.seasonalities)
  276. m = Prophet(yearly_seasonality=True)
  277. m.fit(train)
  278. self.assertIn('yearly', m.seasonalities)
  279. m = Prophet(yearly_seasonality=7)
  280. m.fit(DATA)
  281. self.assertEqual(m.seasonalities['yearly'], (365.25, 7))
  282. def test_auto_daily_seasonality(self):
  283. # Should be enabled
  284. m = Prophet()
  285. self.assertEqual(m.daily_seasonality, 'auto')
  286. m.fit(DATA2)
  287. self.assertIn('daily', m.seasonalities)
  288. self.assertEqual(m.seasonalities['daily'], (1, 4))
  289. # Should be disabled due to too short history
  290. N = 430
  291. train = DATA2.head(N)
  292. m = Prophet()
  293. m.fit(train)
  294. self.assertNotIn('daily', m.seasonalities)
  295. m = Prophet(daily_seasonality=True)
  296. m.fit(train)
  297. self.assertIn('daily', m.seasonalities)
  298. m = Prophet(daily_seasonality=7)
  299. m.fit(DATA2)
  300. self.assertEqual(m.seasonalities['daily'], (1, 7))
  301. m = Prophet()
  302. m.fit(DATA)
  303. self.assertNotIn('daily', m.seasonalities)
  304. def test_subdaily_holidays(self):
  305. holidays = pd.DataFrame({
  306. 'ds': pd.to_datetime(['2017-01-02']),
  307. 'holiday': ['special_day'],
  308. })
  309. m = Prophet(holidays=holidays)
  310. m.fit(DATA2)
  311. fcst = m.predict()
  312. self.assertEqual(sum(fcst['special_day'] == 0), 575)
  313. def test_custom_seasonality(self):
  314. holidays = pd.DataFrame({
  315. 'ds': pd.to_datetime(['2017-01-02']),
  316. 'holiday': ['special_day'],
  317. })
  318. m = Prophet(holidays=holidays)
  319. m.add_seasonality(name='monthly', period=30, fourier_order=5)
  320. self.assertEqual(m.seasonalities['monthly'], (30, 5))
  321. with self.assertRaises(ValueError):
  322. m.add_seasonality(name='special_day', period=30, fourier_order=5)
  323. with self.assertRaises(ValueError):
  324. m.add_seasonality(name='trend', period=30, fourier_order=5)
  325. m.add_seasonality(name='weekly', period=30, fourier_order=5)
  326. def test_added_regressors(self):
  327. m = Prophet()
  328. m.add_regressor('binary_feature', prior_scale=0.2)
  329. m.add_regressor('numeric_feature', prior_scale=0.5)
  330. m.add_regressor('binary_feature2', standardize=True)
  331. df = DATA.copy()
  332. df['binary_feature'] = [0] * 255 + [1] * 255
  333. df['numeric_feature'] = range(510)
  334. with self.assertRaises(ValueError):
  335. # Require all regressors in df
  336. m.fit(df)
  337. df['binary_feature2'] = [1] * 100 + [0] * 410
  338. m.fit(df)
  339. # Check that standardizations are correctly set
  340. self.assertEqual(
  341. m.extra_regressors['binary_feature'],
  342. {'prior_scale': 0.2, 'mu': 0, 'std': 1, 'standardize': 'auto'},
  343. )
  344. self.assertEqual(
  345. m.extra_regressors['numeric_feature']['prior_scale'], 0.5)
  346. self.assertEqual(
  347. m.extra_regressors['numeric_feature']['mu'], 254.5)
  348. self.assertAlmostEqual(
  349. m.extra_regressors['numeric_feature']['std'], 147.368585, places=5)
  350. self.assertEqual(
  351. m.extra_regressors['binary_feature2']['prior_scale'], 10.)
  352. self.assertAlmostEqual(
  353. m.extra_regressors['binary_feature2']['mu'], 0.1960784, places=5)
  354. self.assertAlmostEqual(
  355. m.extra_regressors['binary_feature2']['std'], 0.3974183, places=5)
  356. # Check that standardization is done correctly
  357. df2 = m.setup_dataframe(df.copy())
  358. self.assertEqual(df2['binary_feature'][0], 0)
  359. self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4)
  360. self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4)
  361. # Check that feature matrix and prior scales are correctly constructed
  362. seasonal_features, prior_scales = m.make_all_seasonality_features(df2)
  363. self.assertIn('binary_feature', seasonal_features)
  364. self.assertIn('numeric_feature', seasonal_features)
  365. self.assertIn('binary_feature2', seasonal_features)
  366. self.assertEqual(seasonal_features.shape[1], 29)
  367. self.assertEqual(set(prior_scales[26:]), set([0.2, 0.5, 10.]))
  368. # Check that forecast components are reasonable
  369. future = pd.DataFrame({
  370. 'ds': ['2014-06-01'],
  371. 'binary_feature': [0],
  372. 'numeric_feature': [10],
  373. })
  374. with self.assertRaises(ValueError):
  375. m.predict(future)
  376. future['binary_feature2'] = 0
  377. fcst = m.predict(future)
  378. self.assertEqual(fcst.shape[1], 31)
  379. self.assertEqual(fcst['binary_feature'][0], 0)
  380. self.assertEqual(
  381. fcst['extra_regressors'][0],
  382. fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
  383. )
  384. self.assertEqual(
  385. fcst['seasonalities'][0],
  386. fcst['yearly'][0] + fcst['weekly'][0],
  387. )
  388. self.assertEqual(
  389. fcst['seasonal'][0],
  390. fcst['seasonalities'][0] + fcst['extra_regressors'][0],
  391. )
  392. self.assertEqual(
  393. fcst['yhat'][0],
  394. fcst['trend'][0] + fcst['seasonal'][0],
  395. )
  396. def test_copy(self):
  397. # These values are created except for its default values
  398. products = itertools.product(
  399. ['linear', 'logistic'], # growth
  400. [None, pd.to_datetime(['2016-12-25'])], # changepoints
  401. [3], # n_changepoints
  402. [True, False], # yearly_seasonality
  403. [True, False], # weekly_seasonality
  404. [True, False], # daily_seasonality
  405. [None, pd.DataFrame({'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['x']})], # holidays
  406. [1.1], # seasonality_prior_scale
  407. [1.1], # holidays_prior_scale
  408. [0.1], # changepoint_prior_scale
  409. [100], # mcmc_samples
  410. [0.9], # interval_width
  411. [200] # uncertainty_samples
  412. )
  413. # Values should be copied correctly
  414. for product in products:
  415. m1 = Prophet(*product)
  416. m2 = m1.copy()
  417. self.assertEqual(m1.growth, m2.growth)
  418. self.assertEqual(m1.n_changepoints, m2.n_changepoints)
  419. self.assertEqual(m1.changepoints, m2.changepoints)
  420. self.assertEqual(m1.yearly_seasonality, m2.yearly_seasonality)
  421. self.assertEqual(m1.weekly_seasonality, m2.weekly_seasonality)
  422. self.assertEqual(m1.daily_seasonality, m2.daily_seasonality)
  423. if m1.holidays is None:
  424. self.assertEqual(m1.holidays, m2.holidays)
  425. else:
  426. self.assertTrue((m1.holidays == m2.holidays).values.all())
  427. self.assertEqual(m1.seasonality_prior_scale, m2.seasonality_prior_scale)
  428. self.assertEqual(m1.changepoint_prior_scale, m2.changepoint_prior_scale)
  429. self.assertEqual(m1.holidays_prior_scale, m2.holidays_prior_scale)
  430. self.assertEqual(m1.mcmc_samples, m2.mcmc_samples)
  431. self.assertEqual(m1.interval_width, m2.interval_width)
  432. self.assertEqual(m1.uncertainty_samples, m2.uncertainty_samples)
  433. # Check for cutoff
  434. changepoints = pd.date_range('2012-06-15', '2012-09-15')
  435. cutoff = pd.Timestamp('2012-07-25')
  436. m1 = Prophet(changepoints=changepoints)
  437. m1.fit(DATA)
  438. m2 = m1.copy(cutoff=cutoff)
  439. changepoints = changepoints[changepoints <= cutoff]
  440. self.assertTrue((changepoints == m2.changepoints).all())