generate_holidays_file.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. # Copyright (c) 2017-present, Facebook, Inc.
  2. # All rights reserved.
  3. #
  4. # This source code is licensed under the BSD-style license found in the
  5. # LICENSE file in the root directory of this source tree. An additional grant
  6. # of patent rights can be found in the PATENTS file in the same directory.
  7. from __future__ import absolute_import
  8. from __future__ import division
  9. from __future__ import print_function
  10. from __future__ import unicode_literals
  11. import pandas as pd
  12. import numpy as np
  13. import warnings
  14. import holidays as hdays_part1
  15. import fbprophet.hdays as hdays_part2
  16. import inspect
  17. def generate_holidays_file():
  18. """Generate csv file of all possible holiday names, ds,
  19. and countries, year combination
  20. """
  21. years = np.arange(1995, 2045, 1)
  22. all_holidays = []
  23. # class names in holiday packages which are not countries
  24. class_to_exclude = set(['rd', 'datetime', 'date', 'HolidayBase', 'Calendar',
  25. 'LunarDate', 'timedelta', 'date'])
  26. class_list2 = inspect.getmembers(hdays_part2, inspect.isclass)
  27. country_set2 = set(list(zip(*class_list2))[0])
  28. country_set2 -= class_to_exclude
  29. for country in country_set2:
  30. with warnings.catch_warnings():
  31. warnings.simplefilter("ignore")
  32. temp = getattr(hdays_part2, country)(years=years)
  33. temp_df = pd.DataFrame(list(temp.items()),
  34. columns=['ds', 'holiday'])
  35. temp_df['country'] = country
  36. all_holidays.append(temp_df)
  37. class_list1 = inspect.getmembers(hdays_part1, inspect.isclass)
  38. country_set1 = set(list(zip(*class_list1))[0])
  39. country_set1 -= class_to_exclude
  40. # Avoid overwrting holidays get from hdays_part2
  41. country_set1 -= country_set2
  42. for country in country_set1:
  43. temp = getattr(hdays_part1, country)(years=years)
  44. temp_df = pd.DataFrame(list(temp.items()),
  45. columns=['ds', 'holiday'])
  46. temp_df['country'] = country
  47. all_holidays.append(temp_df)
  48. generated_holidays = pd.concat(all_holidays, axis=0, ignore_index=True)
  49. generated_holidays['year'] = generated_holidays.ds.apply(lambda x: x.year)
  50. generated_holidays.to_csv("../R/data-raw/generated_holidays.csv")
  51. if __name__ == "__main__":
  52. # execute only if run as a script
  53. generate_holidays_file()