pandas_mappingFunctions.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import pickle
  2. import pandas as pd
  3. import quandl
  4. import matplotlib.pyplot as plt
  5. from matplotlib import style
  6. import numpy as np
  7. from statistics import mean
  8. style.use('seaborn-dark-palette')
  9. ax1 = plt.subplot(2,1,1)
  10. ax2 = plt.subplot(2,1,2, sharex=ax1)
  11. def create_labels(cur_hpi, fut_hpi):
  12. if fut_hpi > cur_hpi:
  13. return 1
  14. else:
  15. return 0
  16. def moving_average(values):
  17. return mean(values)
  18. benchmark = pd.read_pickle('us_pct.pickle') # us overall housing price index percentage change
  19. HPI = pd.read_pickle('HPI_complete.pickle') # all of the state data, thirty year mortgage, unemployment rate, GDP, SP500
  20. HPI = HPI.join(benchmark['United States'])
  21. # all in percentage change since the start of the data (1975-01-01)
  22. HPI.dropna(inplace=True)
  23. housing_pct = HPI.pct_change()
  24. housing_pct.replace([np.inf, -np.inf], np.nan, inplace=True)
  25. housing_pct['US_HPI_future'] = housing_pct['United States'].shift(-1)
  26. housing_pct.dropna(inplace=True)
  27. housing_pct['label'] = list(map(create_labels, housing_pct['United States'], housing_pct['US_HPI_future']))
  28. # housing_pct['ma_apply_example'] = pd.rolling_apply(housing_pct['M30'], 10, moving_average)
  29. housing_pct['ma_apply_example'] = housing_pct['M30'].rolling(window=10).apply(moving_average)
  30. print(housing_pct.tail())
  31. # state_HPI_M30 = HPI_data.join(HPI['M30']) # fifty states plus mortgage data
  32. # print(state_HPI_M30.corr().describe().tail())