pandas_rollingStatistics.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. import pickle
  2. import pandas as pd
  3. import quandl
  4. import matplotlib.pyplot as plt
  5. from matplotlib import style
  6. style.use('seaborn')
  7. api_key = 'rFsSehe51RLzREtYhLfo'
  8. def state_list():
  9. fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
  10. return fifty_states[0][0][1:]
  11. def initial_state_data():
  12. states = state_list()
  13. main_df = pd.DataFrame()
  14. for abbv in states:
  15. query = 'FMAC/HPI_' + str(abbv)
  16. df = quandl.get(query, authtoken=api_key)
  17. df.columns = [str(abbv)]
  18. df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
  19. if main_df.empty:
  20. main_df = df
  21. else:
  22. main_df = main_df.join(df)
  23. pickle_out = open('fifty_states_pct.pickle', 'wb')
  24. pickle.dump(main_df, pickle_out)
  25. pickle_out.close()
  26. def HPI_Benchmark():
  27. df = quandl.get('FMAC/HPI_USA' , authtoken=api_key)
  28. df['United States'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
  29. pickle_out = open('us_pct.pickle', 'wb')
  30. pickle.dump(df, pickle_out)
  31. pickle_out.close()
  32. ax1 = plt.subplot(2,1,1)
  33. ax2 = plt.subplot(2,1,2, sharex=ax1)
  34. # initial_state_data()
  35. pickle_in = open('fifty_states_pct.pickle' , 'rb')
  36. HPI_data = pickle.load(pickle_in)
  37. # HPI_Benchmark()
  38. pickle_in = open('us_pct.pickle','rb')
  39. benchmark = pickle.load(pickle_in)
  40. # HPI_data = HPI_data.pct_change()
  41. # HPI_data.plot(ax=ax1)
  42. # benchmark['United States'].plot(ax=ax1, color='k', linewidth=10)
  43. # plt.legend().remove()
  44. TX1yr = HPI_data['TX'].resample('A').mean()
  45. HPI_data['TX1yr'] = TX1yr
  46. # print(HPI_data[['TX1yr','TX']])
  47. print(HPI_data.isnull().values.sum())
  48. HPI_data.fillna(method='bfill', inplace=True)
  49. # HPI_data.dropna(inplace=True)
  50. print(HPI_data.isnull().values.sum())
  51. # print(HPI_data[['TX1yr','TX']])
  52. # HPI_data[['TX1yr', 'TX']].plot(ax=ax1)
  53. # plt.show()
  54. # print(HPI_data['TX'].hasnans)
  55. # rolling statistics
  56. HPI_data['TX12MA'] = HPI_data['TX'].rolling(window=12, center=False).mean()
  57. HPI_data['TX12STD']= HPI_data['TX'].rolling(window=12, center=False).std()
  58. # standard deviation is a measure of the volatility of the price
  59. HPI_data.dropna(inplace=True)
  60. TK_AK_12corr = HPI_data['TX'].rolling(window=12).corr(HPI_data['AK'])
  61. HPI_data['TX'].plot(ax=ax1, label = 'TX HPI')
  62. HPI_data['AK'].plot(ax=ax1, label = 'AK HPI')
  63. ax1.legend(loc=4)
  64. TK_AK_12corr.plot(ax=ax2, label= 'TK AK 12 month correlation')
  65. ax2.legend(loc=4)
  66. # HPI_data[['TX12MA','TX']].plot(ax=ax1)
  67. # HPI_data['TX12STD'].plot(ax=ax2)
  68. # print(HPI_data.head())
  69. plt.show()