radu
/
Data-Analysis-Jupyter
zrkadlo https://github.com/WillKoehrsen/Data-Analysis.git


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
							import pickle
import pandas as pd 
import quandl 
import matplotlib.pyplot as plt 
from matplotlib import style

style.use('seaborn')

api_key = 'rFsSehe51RLzREtYhLfo'

def state_list():
	fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
	return fifty_states[0][0][1:]

def initial_state_data():
	states = state_list()
	main_df = pd.DataFrame()

	for abbv in states:
		query = 'FMAC/HPI_' + str(abbv)
		df = quandl.get(query, authtoken=api_key)
		df.columns = [str(abbv)]
		df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
		if main_df.empty:
			main_df = df
		else:
			main_df = main_df.join(df)

	pickle_out = open('fifty_states_pct.pickle', 'wb')
	pickle.dump(main_df, pickle_out)
	pickle_out.close()

# initial_state_data()

def HPI_Benchmark():
	df = quandl.get('FMAC/HPI_USA' , authtoken=api_key)
	df['United States'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
	return df

fig = plt.figure()
ax1 = plt.subplot2grid((1,1), (0,0))

pickle_in = open('fifty_states_pct.pickle' , 'rb')
HPI_data = pickle.load(pickle_in)
benchmark = HPI_Benchmark()
# HPI_data = HPI_data.pct_change()

HPI_data.plot(ax=ax1)
benchmark['United States'].plot(ax=ax1, color='k', linewidth=10)
plt.legend().remove()

HPI_complete_data = HPI_data
HPI_complete_data['United States'] = benchmark['United States']
# print(HPI_complete_data.head())
HPI_State_Correlation = HPI_data.corr()

HPI_complete_correlation = HPI_complete_data.corr()
HPI_US_correlation = HPI_complete_correlation['United States']
HPI_US_correlation_sorted = HPI_US_correlation.sort_values(ascending=True)
print(HPI_US_correlation_sorted[HPI_US_correlation_sorted == HPI_US_correlation_sorted[-2]].index)
plt.show()
# print(HPI_data[['IL','WI']].corr())