|
@@ -67,10 +67,10 @@ class Stocker():
|
|
|
self.max_price_date = self.max_price_date[self.max_price_date.index[0]]
|
|
|
|
|
|
# The starting price (starting with the opening price)
|
|
|
- self.starting_price = float(self.stock.ix[0, 'Adj. Open'])
|
|
|
+ self.starting_price = float(self.stock.loc[0, 'Adj. Open'])
|
|
|
|
|
|
# The most recent price
|
|
|
- self.most_recent_price = float(self.stock.ix[len(self.stock) - 1, 'y'])
|
|
|
+ self.most_recent_price = float(self.stock.loc[self.stock.index[-1], 'y'])
|
|
|
|
|
|
# Whether or not to round dates
|
|
|
self.round_dates = True
|
|
@@ -88,8 +88,8 @@ class Stocker():
|
|
|
self.changepoints = None
|
|
|
|
|
|
print('{} Stocker Initialized. Data covers {} to {}.'.format(self.symbol,
|
|
|
- self.min_date.date(),
|
|
|
- self.max_date.date()))
|
|
|
+ self.min_date,
|
|
|
+ self.max_date))
|
|
|
|
|
|
"""
|
|
|
Make sure start and end dates are in the range and can be
|
|
@@ -122,7 +122,7 @@ class Stocker():
|
|
|
valid_end = True
|
|
|
valid_start = True
|
|
|
|
|
|
- if end_date.date() < start_date.date():
|
|
|
+ if end_date < start_date:
|
|
|
print('End Date must be later than start date.')
|
|
|
start_date = pd.to_datetime(input('Enter a new start date: '))
|
|
|
end_date= pd.to_datetime(input('Enter a new end date: '))
|
|
@@ -130,12 +130,12 @@ class Stocker():
|
|
|
valid_start = False
|
|
|
|
|
|
else:
|
|
|
- if end_date.date() > self.max_date.date():
|
|
|
+ if end_date > self.max_date:
|
|
|
print('End Date exceeds data range')
|
|
|
end_date= pd.to_datetime(input('Enter a new end date: '))
|
|
|
valid_end = False
|
|
|
|
|
|
- if start_date.date() < self.min_date.date():
|
|
|
+ if start_date < self.min_date:
|
|
|
print('Start Date is before date range')
|
|
|
start_date = pd.to_datetime(input('Enter a new start date: '))
|
|
|
valid_start = False
|
|
@@ -169,23 +169,23 @@ class Stocker():
|
|
|
|
|
|
# If both are not in dataframe, round both
|
|
|
if (not end_in) & (not start_in):
|
|
|
- trim_df = df[(df['Date'] >= start_date.date()) &
|
|
|
- (df['Date'] <= end_date.date())]
|
|
|
+ trim_df = df[(df['Date'] >= start_date) &
|
|
|
+ (df['Date'] <= end_date)]
|
|
|
|
|
|
else:
|
|
|
# If both are in dataframe, round neither
|
|
|
if (end_in) & (start_in):
|
|
|
- trim_df = df[(df['Date'] >= start_date.date()) &
|
|
|
- (df['Date'] <= end_date.date())]
|
|
|
+ trim_df = df[(df['Date'] >= start_date) &
|
|
|
+ (df['Date'] <= end_date)]
|
|
|
else:
|
|
|
# If only start is missing, round start
|
|
|
if (not start_in):
|
|
|
- trim_df = df[(df['Date'] > start_date.date()) &
|
|
|
- (df['Date'] <= end_date.date())]
|
|
|
+ trim_df = df[(df['Date'] > start_date) &
|
|
|
+ (df['Date'] <= end_date)]
|
|
|
# If only end is imssing round end
|
|
|
elif (not end_in):
|
|
|
- trim_df = df[(df['Date'] >= start_date.date()) &
|
|
|
- (df['Date'] < end_date.date())]
|
|
|
+ trim_df = df[(df['Date'] >= start_date) &
|
|
|
+ (df['Date'] < end_date)]
|
|
|
|
|
|
|
|
|
else:
|
|
@@ -210,8 +210,8 @@ class Stocker():
|
|
|
end_date = pd.to_datetime(input(prompt='Enter a new end date: ') )
|
|
|
|
|
|
# Dates are not rounded
|
|
|
- trim_df = df[(df['Date'] >= start_date.date()) &
|
|
|
- (df['Date'] <= end_date.date())]
|
|
|
+ trim_df = df[(df['Date'] >= start_date) &
|
|
|
+ (df['Date'] <= end_date.date)]
|
|
|
|
|
|
|
|
|
|
|
@@ -240,13 +240,13 @@ class Stocker():
|
|
|
stat_avg = np.mean(stock_plot[stat])
|
|
|
|
|
|
date_stat_min = stock_plot[stock_plot[stat] == stat_min]['Date']
|
|
|
- date_stat_min = date_stat_min[date_stat_min.index[0]].date()
|
|
|
+ date_stat_min = date_stat_min[date_stat_min.index[0]]
|
|
|
date_stat_max = stock_plot[stock_plot[stat] == stat_max]['Date']
|
|
|
- date_stat_max = date_stat_max[date_stat_max.index[0]].date()
|
|
|
+ date_stat_max = date_stat_max[date_stat_max.index[0]]
|
|
|
|
|
|
print('Maximum {} = {:.2f} on {}.'.format(stat, stat_max, date_stat_max))
|
|
|
print('Minimum {} = {:.2f} on {}.'.format(stat, stat_min, date_stat_min))
|
|
|
- print('Current {} = {:.2f} on {}.\n'.format(stat, self.stock.ix[len(self.stock) - 1, stat], self.max_date.date()))
|
|
|
+ print('Current {} = {:.2f} on {}.\n'.format(stat, self.stock.loc[self.stock.index[-1], stat], self.max_date))
|
|
|
|
|
|
# Percentage y-axis
|
|
|
if plot_type == 'pct':
|
|
@@ -281,7 +281,7 @@ class Stocker():
|
|
|
def reset_plot():
|
|
|
|
|
|
# Restore default parameters
|
|
|
- matplotlib.rcParams.update(matplotlib.rcParamsDefault)
|
|
|
+ matplotlib.rcdefaults()
|
|
|
|
|
|
# Adjust a few parameters to liking
|
|
|
matplotlib.rcParams['figure.figsize'] = (8, 5)
|
|
@@ -339,18 +339,18 @@ class Stocker():
|
|
|
total_hold_profit = nshares * (end_price - start_price)
|
|
|
|
|
|
print('{} Total buy and hold profit from {} to {} for {} shares = ${:.2f}'.format
|
|
|
- (self.symbol, start_date.date(), end_date.date(), nshares, total_hold_profit))
|
|
|
+ (self.symbol, start_date, end_date, nshares, total_hold_profit))
|
|
|
|
|
|
# Plot the total profits
|
|
|
plt.style.use('dark_background')
|
|
|
|
|
|
# Location for number of profit
|
|
|
- text_location = (end_date - pd.DateOffset(months = 1)).date()
|
|
|
+ text_location = (end_date - pd.DateOffset(months = 1))
|
|
|
|
|
|
# Plot the profits over time
|
|
|
plt.plot(profits['Date'], profits['hold_profit'], 'b', linewidth = 3)
|
|
|
plt.ylabel('Profit ($)'); plt.xlabel('Date'); plt.title('Buy and Hold Profits for {} {} to {}'.format(
|
|
|
- self.symbol, start_date.date(), end_date.date()))
|
|
|
+ self.symbol, start_date, end_date))
|
|
|
|
|
|
# Display final value on graph
|
|
|
plt.text(x = text_location,
|
|
@@ -382,7 +382,7 @@ class Stocker():
|
|
|
def changepoint_prior_analysis(self, changepoint_priors=[0.001, 0.05, 0.1, 0.2], colors=['b', 'r', 'grey', 'gold']):
|
|
|
|
|
|
# Training and plotting with specified years of data
|
|
|
- train = self.stock[(self.stock['Date'] > (max(self.stock['Date']) - pd.DateOffset(years=self.training_years)).date())]
|
|
|
+ train = self.stock[(self.stock['Date'] > (max(self.stock['Date']) - pd.DateOffset(years=self.training_years)))]
|
|
|
|
|
|
# Iterate through all the changepoints and make models
|
|
|
for i, prior in enumerate(changepoint_priors):
|
|
@@ -441,7 +441,7 @@ class Stocker():
|
|
|
model = self.create_model()
|
|
|
|
|
|
# Fit on the stock history for self.training_years number of years
|
|
|
- stock_history = self.stock[self.stock['Date'] > (self.max_date - pd.DateOffset(years = self.training_years)).date()]
|
|
|
+ stock_history = self.stock[self.stock['Date'] > (self.max_date - pd.DateOffset(years = self.training_years))]
|
|
|
|
|
|
if resample:
|
|
|
stock_history = self.resample(stock_history)
|
|
@@ -455,7 +455,7 @@ class Stocker():
|
|
|
if days > 0:
|
|
|
# Print the predicted price
|
|
|
print('Predicted Price on {} = ${:.2f}'.format(
|
|
|
- future.ix[len(future) - 1, 'ds'].date(), future.ix[len(future) - 1, 'yhat']))
|
|
|
+ future.loc[future.index[-1], 'ds'], future.loc[future.index[-1], 'yhat']))
|
|
|
|
|
|
title = '%s Historical and Predicted Stock Price' % self.symbol
|
|
|
else:
|
|
@@ -495,11 +495,11 @@ class Stocker():
|
|
|
start_date, end_date = self.handle_dates(start_date, end_date)
|
|
|
|
|
|
# Training data starts self.training_years years before start date and goes up to start date
|
|
|
- train = self.stock[(self.stock['Date'] < start_date.date()) &
|
|
|
- (self.stock['Date'] > (start_date - pd.DateOffset(years=self.training_years)).date())]
|
|
|
+ train = self.stock[(self.stock['Date'] < start_date) &
|
|
|
+ (self.stock['Date'] > (start_date - pd.DateOffset(years=self.training_years)))]
|
|
|
|
|
|
# Testing data is specified in the range
|
|
|
- test = self.stock[(self.stock['Date'] >= start_date.date()) & (self.stock['Date'] <= end_date.date())]
|
|
|
+ test = self.stock[(self.stock['Date'] >= start_date) & (self.stock['Date'] <= end_date)]
|
|
|
|
|
|
# Create and train the model
|
|
|
model = self.create_model()
|
|
@@ -517,9 +517,9 @@ class Stocker():
|
|
|
# Calculate the differences between consecutive measurements
|
|
|
test['pred_diff'] = test['yhat'].diff()
|
|
|
test['real_diff'] = test['y'].diff()
|
|
|
-
|
|
|
+
|
|
|
# Correct is when we predicted the correct direction
|
|
|
- test['correct'] = (np.sign(test['pred_diff']) == np.sign(test['real_diff'])) * 1
|
|
|
+ test['correct'] = (np.sign(test['pred_diff'][1:]) == np.sign(test['real_diff'][1:])) * 1
|
|
|
|
|
|
# Accuracy when we predict increase and decrease
|
|
|
increase_accuracy = 100 * np.mean(test[test['pred_diff'] > 0]['correct'])
|
|
@@ -536,20 +536,20 @@ class Stocker():
|
|
|
test['in_range'] = False
|
|
|
|
|
|
for i in test.index:
|
|
|
- if (test.ix[i, 'y'] < test.ix[i, 'yhat_upper']) & (test.ix[i, 'y'] > test.ix[i, 'yhat_lower']):
|
|
|
- test.ix[i, 'in_range'] = True
|
|
|
+ if (test.loc[i, 'y'] < test.loc[i, 'yhat_upper']) & (test.loc[i, 'y'] > test.loc[i, 'yhat_lower']):
|
|
|
+ test.loc[i, 'in_range'] = True
|
|
|
|
|
|
in_range_accuracy = 100 * np.mean(test['in_range'])
|
|
|
|
|
|
if not nshares:
|
|
|
|
|
|
# Date range of predictions
|
|
|
- print('\nPrediction Range: {} to {}.'.format(start_date.date(),
|
|
|
- end_date.date()))
|
|
|
+ print('\nPrediction Range: {} to {}.'.format(start_date,
|
|
|
+ end_date))
|
|
|
|
|
|
# Final prediction vs actual value
|
|
|
- print('\nPredicted price on {} = ${:.2f}.'.format(max(future['ds']).date(), future.ix[len(future) - 1, 'yhat']))
|
|
|
- print('Actual price on {} = ${:.2f}.\n'.format(max(test['ds']).date(), test.ix[len(test) - 1, 'y']))
|
|
|
+ print('\nPredicted price on {} = ${:.2f}.'.format(max(future['ds']), future.loc[future.index[-1], 'yhat']))
|
|
|
+ print('Actual price on {} = ${:.2f}.\n'.format(max(test['ds']), test.loc[test.index[-1], 'y']))
|
|
|
|
|
|
print('Average Absolute Error on Training Data = ${:.2f}.'.format(train_mean_error))
|
|
|
print('Average Absolute Error on Testing Data = ${:.2f}.\n'.format(test_mean_error))
|
|
@@ -579,7 +579,7 @@ class Stocker():
|
|
|
facecolor = 'gold', edgecolor = 'k', linewidth = 1.4, label = 'Confidence Interval')
|
|
|
|
|
|
# Put a vertical line at the start of predictions
|
|
|
- plt.vlines(x=min(test['ds']).date(), ymin=min(future['yhat_lower']), ymax=max(future['yhat_upper']), colors = 'r',
|
|
|
+ plt.vlines(x=min(test['ds']), ymin=min(future['yhat_lower']), ymax=max(future['yhat_upper']), colors = 'r',
|
|
|
linestyles='dashed', label = 'Prediction Start')
|
|
|
|
|
|
# Plot formatting
|
|
@@ -587,7 +587,7 @@ class Stocker():
|
|
|
plt.grid(linewidth=0.6, alpha = 0.6)
|
|
|
|
|
|
plt.title('{} Model Evaluation from {} to {}.'.format(self.symbol,
|
|
|
- start_date.date(), end_date.date()));
|
|
|
+ start_date, end_date));
|
|
|
plt.show();
|
|
|
|
|
|
|
|
@@ -605,31 +605,31 @@ class Stocker():
|
|
|
|
|
|
# If we predicted up and the price goes up, we gain the difference
|
|
|
if correct == 1:
|
|
|
- prediction_profit.append(nshares * test_pred_increase.ix[i, 'real_diff'])
|
|
|
+ prediction_profit.append(nshares * test_pred_increase.loc[i, 'real_diff'])
|
|
|
# If we predicted up and the price goes down, we lose the difference
|
|
|
else:
|
|
|
- prediction_profit.append(nshares * test_pred_increase.ix[i, 'real_diff'])
|
|
|
+ prediction_profit.append(nshares * test_pred_increase.loc[i, 'real_diff'])
|
|
|
|
|
|
test_pred_increase['pred_profit'] = prediction_profit
|
|
|
|
|
|
# Put the profit into the test dataframe
|
|
|
test = pd.merge(test, test_pred_increase[['ds', 'pred_profit']], on = 'ds', how = 'left')
|
|
|
- test.ix[0, 'pred_profit'] = 0
|
|
|
+ test.loc[0, 'pred_profit'] = 0
|
|
|
|
|
|
# Profit for either method at all dates
|
|
|
test['pred_profit'] = test['pred_profit'].cumsum().ffill()
|
|
|
- test['hold_profit'] = nshares * (test['y'] - float(test.ix[0, 'y']))
|
|
|
+ test['hold_profit'] = nshares * (test['y'] - float(test.loc[0, 'y']))
|
|
|
|
|
|
# Display information
|
|
|
print('You played the stock market in {} from {} to {} with {} shares.\n'.format(
|
|
|
- self.symbol, start_date.date(), end_date.date(), nshares))
|
|
|
+ self.symbol, start_date, end_date, nshares))
|
|
|
|
|
|
print('When the model predicted an increase, the price increased {:.2f}% of the time.'.format(increase_accuracy))
|
|
|
print('When the model predicted a decrease, the price decreased {:.2f}% of the time.\n'.format(decrease_accuracy))
|
|
|
|
|
|
# Display some friendly information about the perils of playing the stock market
|
|
|
print('The total profit using the Prophet model = ${:.2f}.'.format(np.sum(prediction_profit)))
|
|
|
- print('The Buy and Hold strategy profit = ${:.2f}.'.format(float(test.ix[len(test) - 1, 'hold_profit'])))
|
|
|
+ print('The Buy and Hold strategy profit = ${:.2f}.'.format(float(test.loc[test.index[-1], 'hold_profit'])))
|
|
|
print('\nThanks for playing the stock market!\n')
|
|
|
|
|
|
|
|
@@ -638,12 +638,12 @@ class Stocker():
|
|
|
self.reset_plot()
|
|
|
|
|
|
# Final profit and final smart used for locating text
|
|
|
- final_profit = test.ix[len(test) - 1, 'pred_profit']
|
|
|
- final_smart = test.ix[len(test) - 1, 'hold_profit']
|
|
|
+ final_profit = test.loc[test.index[-1], 'pred_profit']
|
|
|
+ final_smart = test.loc[test.index[-1], 'hold_profit']
|
|
|
|
|
|
# text location
|
|
|
- last_date = test.ix[len(test) - 1, 'ds']
|
|
|
- text_location = (last_date - pd.DateOffset(months = 1)).date()
|
|
|
+ last_date = test.loc[test.index[-1], 'ds']
|
|
|
+ text_location = (last_date - pd.DateOffset(months = 1))
|
|
|
|
|
|
plt.style.use('dark_background')
|
|
|
|
|
@@ -705,7 +705,7 @@ class Stocker():
|
|
|
model = self.create_model()
|
|
|
|
|
|
# Use past self.training_years years of data
|
|
|
- train = self.stock[self.stock['Date'] > (self.max_date - pd.DateOffset(years = self.training_years)).date()]
|
|
|
+ train = self.stock[self.stock['Date'] > (self.max_date - pd.DateOffset(years = self.training_years))]
|
|
|
model.fit(train)
|
|
|
|
|
|
# Predictions of the training data (no future periods)
|
|
@@ -720,9 +720,9 @@ class Stocker():
|
|
|
# Create dataframe of only changepoints
|
|
|
change_indices = []
|
|
|
for changepoint in (changepoints):
|
|
|
- change_indices.append(train[train['ds'] == changepoint.date()].index[0])
|
|
|
+ change_indices.append(train[train['ds'] == changepoint].index[0])
|
|
|
|
|
|
- c_data = train.ix[change_indices, :]
|
|
|
+ c_data = train.loc[change_indices, :]
|
|
|
deltas = model.params['delta'][0]
|
|
|
|
|
|
c_data['delta'] = deltas
|
|
@@ -742,7 +742,7 @@ class Stocker():
|
|
|
if not search:
|
|
|
|
|
|
print('\nChangepoints sorted by slope rate of change (2nd derivative):\n')
|
|
|
- print(c_data.ix[:, ['Date', 'Adj. Close', 'delta']][:5])
|
|
|
+ print(c_data.loc[:, ['Date', 'Adj. Close', 'delta']][:5])
|
|
|
|
|
|
# Line plot showing actual values, estimated values, and changepoints
|
|
|
self.reset_plot()
|
|
@@ -768,7 +768,7 @@ class Stocker():
|
|
|
# Show related queries, rising related queries
|
|
|
# Graph changepoints, search frequency, stock price
|
|
|
if search:
|
|
|
- date_range = ['%s %s' % (str(min(train['Date']).date()), str(max(train['Date']).date()))]
|
|
|
+ date_range = ['%s %s' % (str(min(train['Date'])), str(max(train['Date'])))]
|
|
|
|
|
|
# Get the Google Trends for specified terms and join to training dataframe
|
|
|
trends, related_queries = self.retrieve_google_trends(search, date_range)
|
|
@@ -823,7 +823,7 @@ class Stocker():
|
|
|
def predict_future(self, days=30):
|
|
|
|
|
|
# Use past self.training_years years for training
|
|
|
- train = self.stock[self.stock['Date'] > (max(self.stock['Date']) - pd.DateOffset(years=self.training_years)).date()]
|
|
|
+ train = self.stock[self.stock['Date'] > (max(self.stock['Date']) - pd.DateOffset(years=self.training_years))]
|
|
|
|
|
|
model = self.create_model()
|
|
|
|
|
@@ -834,7 +834,7 @@ class Stocker():
|
|
|
future = model.predict(future)
|
|
|
|
|
|
# Only concerned with future dates
|
|
|
- future = future[future['ds'] >= max(self.stock['Date']).date()]
|
|
|
+ future = future[future['ds'] >= max(self.stock['Date'])]
|
|
|
|
|
|
# Remove the weekends
|
|
|
future = self.remove_weekends(future)
|
|
@@ -907,24 +907,24 @@ class Stocker():
|
|
|
start_date, end_date = self.handle_dates(start_date, end_date)
|
|
|
|
|
|
# Select self.training_years number of years
|
|
|
- train = self.stock[(self.stock['Date'] > (start_date - pd.DateOffset(years=self.training_years)).date()) &
|
|
|
- (self.stock['Date'] < start_date.date())]
|
|
|
+ train = self.stock[(self.stock['Date'] > (start_date - pd.DateOffset(years=self.training_years))) &
|
|
|
+ (self.stock['Date'] < start_date)]
|
|
|
|
|
|
# Testing data is specified by range
|
|
|
- test = self.stock[(self.stock['Date'] >= start_date.date()) & (self.stock['Date'] <= end_date.date())]
|
|
|
+ test = self.stock[(self.stock['Date'] >= start_date) & (self.stock['Date'] <= end_date)]
|
|
|
|
|
|
- eval_days = (max(test['Date']).date() - min(test['Date']).date()).days
|
|
|
+ eval_days = (max(test['Date']) - min(test['Date'])).days
|
|
|
|
|
|
results = pd.DataFrame(0, index = list(range(len(changepoint_priors))),
|
|
|
columns = ['cps', 'train_err', 'train_range', 'test_err', 'test_range'])
|
|
|
|
|
|
- print('\nValidation Range {} to {}.\n'.format(min(test['Date']).date(),
|
|
|
- max(test['Date']).date()))
|
|
|
+ print('\nValidation Range {} to {}.\n'.format(min(test['Date']),
|
|
|
+ max(test['Date'])))
|
|
|
|
|
|
|
|
|
# Iterate through all the changepoints and make models
|
|
|
for i, prior in enumerate(changepoint_priors):
|
|
|
- results.ix[i, 'cps'] = prior
|
|
|
+ results.loc[i, 'cps'] = prior
|
|
|
|
|
|
# Select the changepoint
|
|
|
self.changepoint_prior_scale = prior
|
|
@@ -941,16 +941,16 @@ class Stocker():
|
|
|
avg_train_error = np.mean(abs(train_results['y'] - train_results['yhat']))
|
|
|
avg_train_uncertainty = np.mean(abs(train_results['yhat_upper'] - train_results['yhat_lower']))
|
|
|
|
|
|
- results.ix[i, 'train_err'] = avg_train_error
|
|
|
- results.ix[i, 'train_range'] = avg_train_uncertainty
|
|
|
+ results.loc[i, 'train_err'] = avg_train_error
|
|
|
+ results.loc[i, 'train_range'] = avg_train_uncertainty
|
|
|
|
|
|
# Testing results and metrics
|
|
|
test_results = pd.merge(test, future[['ds', 'yhat', 'yhat_upper', 'yhat_lower']], on = 'ds', how = 'inner')
|
|
|
avg_test_error = np.mean(abs(test_results['y'] - test_results['yhat']))
|
|
|
avg_test_uncertainty = np.mean(abs(test_results['yhat_upper'] - test_results['yhat_lower']))
|
|
|
|
|
|
- results.ix[i, 'test_err'] = avg_test_error
|
|
|
- results.ix[i, 'test_range'] = avg_test_uncertainty
|
|
|
+ results.loc[i, 'test_err'] = avg_test_error
|
|
|
+ results.loc[i, 'test_range'] = avg_test_uncertainty
|
|
|
|
|
|
print(results)
|
|
|
|