pandas_percentChange_correlation 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. import pickle
  2. import pandas as pd
  3. import quandl
  4. import matplotlib.pyplot as plt
  5. from matplotlib import style
  6. style.use('seaborn')
  7. api_key = 'rFsSehe51RLzREtYhLfo'
  8. def state_list():
  9. fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
  10. return fifty_states[0][0][1:]
  11. def initial_state_data():
  12. states = state_list()
  13. main_df = pd.DataFrame()
  14. for abbv in states:
  15. query = 'FMAC/HPI_' + str(abbv)
  16. df = quandl.get(query, authtoken=api_key)
  17. df.columns = [str(abbv)]
  18. df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
  19. if main_df.empty:
  20. main_df = df
  21. else:
  22. main_df = main_df.join(df)
  23. print(main_df.head())
  24. pickle_out = open('fifty_states_pct.pickle', 'wb')
  25. pickle.dump(main_df, pickle_out)
  26. pickle_out.close()
  27. def HPI_Benchmark():
  28. df = quandl.get('FMAC/HPI_USA' , authtoken=api_key)
  29. df['United States'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
  30. pickle_out = open('us_pct.pickle', 'wb')
  31. pickle.dump(df, pickle_out)
  32. pickle_out.close()
  33. fig = plt.figure()
  34. ax1 = plt.subplot2grid((1,1), (0,0))
  35. # initial_state_data()
  36. pickle_in = open('fifty_states_pct.pickle' , 'rb')
  37. HPI_data = pickle.load(pickle_in)
  38. # HPI_Benchmark()
  39. pickle_in = open('us_pct.pickle' , 'rb')
  40. benchmark = pickle.load(pickle_in)
  41. # HPI_data = HPI_data.pct_change()
  42. HPI_data.plot(ax=ax1)
  43. benchmark['United States'].plot(ax=ax1, color='k', linewidth=10)
  44. plt.legend().remove()
  45. HPI_State_Correlation = HPI_data.corr()
  46. print(HPI_State_Correlation)
  47. plt.show()
  48. # print(HPI_data[['IL','WI']].corr())