pandas_mappingFunctions.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import pickle
  2. import pandas as pd
  3. import quandl
  4. import matplotlib.pyplot as plt
  5. from matplotlib import style
  6. import numpy as np
  7. from statistics import mean
  8. style.use("seaborn-dark-palette")
  9. ax1 = plt.subplot(2, 1, 1)
  10. ax2 = plt.subplot(2, 1, 2, sharex=ax1)
  11. def create_labels(cur_hpi, fut_hpi):
  12. if fut_hpi > cur_hpi:
  13. return 1
  14. else:
  15. return 0
  16. def moving_average(values):
  17. return mean(values)
  18. benchmark = pd.read_pickle(
  19. "us_pct.pickle"
  20. ) # us overall housing price index percentage change
  21. HPI = pd.read_pickle(
  22. "HPI_complete.pickle"
  23. ) # all of the state data, thirty year mortgage, unemployment rate, GDP, SP500
  24. HPI = HPI.join(benchmark["United States"])
  25. # all in percentage change since the start of the data (1975-01-01)
  26. HPI.dropna(inplace=True)
  27. housing_pct = HPI.pct_change()
  28. housing_pct.replace([np.inf, -np.inf], np.nan, inplace=True)
  29. housing_pct["US_HPI_future"] = housing_pct["United States"].shift(-1)
  30. housing_pct.dropna(inplace=True)
  31. housing_pct["label"] = list(
  32. map(create_labels, housing_pct["United States"], housing_pct["US_HPI_future"])
  33. )
  34. # housing_pct['ma_apply_example'] = pd.rolling_apply(housing_pct['M30'], 10, moving_average)
  35. housing_pct["ma_apply_example"] = (
  36. housing_pct["M30"].rolling(window=10).apply(moving_average)
  37. )
  38. print(housing_pct.tail())
  39. # state_HPI_M30 = HPI_data.join(HPI['M30']) # fifty states plus mortgage data
  40. # print(state_HPI_M30.corr().describe().tail())