pandas_indexing.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. import pickle
  2. import pandas as pd
  3. import quandl
  4. import matplotlib.pyplot as plt
  5. from matplotlib import style
  6. style.use("seaborn")
  7. api_key = "rFsSehe51RLzREtYhLfo"
  8. def state_list():
  9. fifty_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states")
  10. return fifty_states[0][0][1:]
  11. def initial_state_data():
  12. states = state_list()
  13. main_df = pd.DataFrame()
  14. for abbv in states:
  15. query = "FMAC/HPI_" + str(abbv)
  16. df = quandl.get(query, authtoken=api_key)
  17. df.columns = [str(abbv)]
  18. df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
  19. if main_df.empty:
  20. main_df = df
  21. else:
  22. main_df = main_df.join(df)
  23. pickle_out = open("fifty_states_pct.pickle", "wb")
  24. pickle.dump(main_df, pickle_out)
  25. pickle_out.close()
  26. # initial_state_data()
  27. def HPI_Benchmark():
  28. df = quandl.get("FMAC/HPI_USA", authtoken=api_key)
  29. df["United States"] = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100.0
  30. return df
  31. fig = plt.figure()
  32. ax1 = plt.subplot2grid((1, 1), (0, 0))
  33. pickle_in = open("fifty_states_pct.pickle", "rb")
  34. HPI_data = pickle.load(pickle_in)
  35. benchmark = HPI_Benchmark()
  36. # HPI_data = HPI_data.pct_change()
  37. HPI_data.plot(ax=ax1)
  38. benchmark["United States"].plot(ax=ax1, color="k", linewidth=10)
  39. plt.legend().remove()
  40. HPI_complete_data = HPI_data
  41. HPI_complete_data["United States"] = benchmark["United States"]
  42. # print(HPI_complete_data.head())
  43. HPI_State_Correlation = HPI_data.corr()
  44. HPI_complete_correlation = HPI_complete_data.corr()
  45. HPI_US_correlation = HPI_complete_correlation["United States"]
  46. HPI_US_correlation_sorted = HPI_US_correlation.sort_values(ascending=True)
  47. print(
  48. HPI_US_correlation_sorted[
  49. HPI_US_correlation_sorted == HPI_US_correlation_sorted[-2]
  50. ].index
  51. )
  52. plt.show()
  53. # print(HPI_data[['IL','WI']].corr())