import pickle import pandas as pd import quandl import matplotlib.pyplot as plt from matplotlib import style style.use('seaborn') api_key = 'rFsSehe51RLzREtYhLfo' def state_list(): fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states') return fifty_states[0][0][1:] def initial_state_data(): states = state_list() main_df = pd.DataFrame() for abbv in states: query = 'FMAC/HPI_' + str(abbv) df = quandl.get(query, authtoken=api_key) df.columns = [str(abbv)] df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0 if main_df.empty: main_df = df else: main_df = main_df.join(df) print(main_df.head()) pickle_out = open('fifty_states_pct.pickle', 'wb') pickle.dump(main_df, pickle_out) pickle_out.close() def HPI_Benchmark(): df = quandl.get('FMAC/HPI_USA' , authtoken=api_key) df['United States'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0 pickle_out = open('us_pct.pickle', 'wb') pickle.dump(df, pickle_out) pickle_out.close() fig = plt.figure() ax1 = plt.subplot2grid((1,1), (0,0)) # initial_state_data() pickle_in = open('fifty_states_pct.pickle' , 'rb') HPI_data = pickle.load(pickle_in) # HPI_Benchmark() pickle_in = open('us_pct.pickle' , 'rb') benchmark = pickle.load(pickle_in) # HPI_data = HPI_data.pct_change() HPI_data.plot(ax=ax1) benchmark['United States'].plot(ax=ax1, color='k', linewidth=10) plt.legend().remove() HPI_State_Correlation = HPI_data.corr() print(HPI_State_Correlation) plt.show() # print(HPI_data[['IL','WI']].corr())