stocker.py 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287
  1. # Quandl for financial analysis, pandas and numpy for data manipulation
  2. # fbprophet for additive models, #pytrends for Google trend data
  3. import quandl
  4. import pandas as pd
  5. import numpy as np
  6. import fbprophet
  7. import pytrends
  8. from pytrends.request import TrendReq
  9. # matplotlib pyplot for plotting
  10. import matplotlib.pyplot as plt
  11. import matplotlib
  12. # Class for analyzing and (attempting) to predict future prices
  13. # Contains a number of visualizations and analysis methods
  14. class Stocker:
  15. # Initialization requires a ticker symbol
  16. def __init__(self, ticker, exchange="WIKI"):
  17. # Enforce capitalization
  18. ticker = ticker.upper()
  19. # Symbol is used for labeling plots
  20. self.symbol = ticker
  21. # Use Personal Api Key
  22. # quandl.ApiConfig.api_key = 'YourKeyHere'
  23. # Retrieval the financial data
  24. try:
  25. stock = quandl.get("%s/%s" % (exchange, ticker))
  26. except Exception as e:
  27. print("Error Retrieving Data.")
  28. print(e)
  29. return
  30. # Set the index to a column called Date
  31. stock = stock.reset_index(level=0)
  32. # Columns required for prophet
  33. stock["ds"] = stock["Date"]
  34. if "Adj. Close" not in stock.columns:
  35. stock["Adj. Close"] = stock["Close"]
  36. stock["Adj. Open"] = stock["Open"]
  37. stock["y"] = stock["Adj. Close"]
  38. stock["Daily Change"] = stock["Adj. Close"] - stock["Adj. Open"]
  39. # Data assigned as class attribute
  40. self.stock = stock.copy()
  41. # Minimum and maximum date in range
  42. self.min_date = min(stock["Date"])
  43. self.max_date = max(stock["Date"])
  44. # Find max and min prices and dates on which they occurred
  45. self.max_price = np.max(self.stock["y"])
  46. self.min_price = np.min(self.stock["y"])
  47. self.min_price_date = self.stock[self.stock["y"] == self.min_price]["Date"]
  48. self.min_price_date = self.min_price_date[self.min_price_date.index[0]]
  49. self.max_price_date = self.stock[self.stock["y"] == self.max_price]["Date"]
  50. self.max_price_date = self.max_price_date[self.max_price_date.index[0]]
  51. # The starting price (starting with the opening price)
  52. self.starting_price = float(self.stock.loc[0, "Adj. Open"])
  53. # The most recent price
  54. self.most_recent_price = float(self.stock.loc[self.stock.index[-1], "y"])
  55. # Whether or not to round dates
  56. self.round_dates = True
  57. # Number of years of data to train on
  58. self.training_years = 3
  59. # Prophet parameters
  60. # Default prior from library
  61. self.changepoint_prior_scale = 0.05
  62. self.weekly_seasonality = False
  63. self.daily_seasonality = False
  64. self.monthly_seasonality = True
  65. self.yearly_seasonality = True
  66. self.changepoints = None
  67. print(
  68. "{} Stocker Initialized. Data covers {} to {}.".format(
  69. self.symbol, self.min_date, self.max_date
  70. )
  71. )
  72. """
  73. Make sure start and end dates are in the range and can be
  74. converted to pandas datetimes. Returns dates in the correct format
  75. """
  76. def handle_dates(self, start_date, end_date):
  77. # Default start and end date are the beginning and end of data
  78. if start_date is None:
  79. start_date = self.min_date
  80. if end_date is None:
  81. end_date = self.max_date
  82. try:
  83. # Convert to pandas datetime for indexing dataframe
  84. start_date = pd.to_datetime(start_date)
  85. end_date = pd.to_datetime(end_date)
  86. except Exception as e:
  87. print("Enter valid pandas date format.")
  88. print(e)
  89. return
  90. valid_start = False
  91. valid_end = False
  92. # User will continue to enter dates until valid dates are met
  93. while (not valid_start) & (not valid_end):
  94. valid_end = True
  95. valid_start = True
  96. if end_date < start_date:
  97. print("End Date must be later than start date.")
  98. start_date = pd.to_datetime(input("Enter a new start date: "))
  99. end_date = pd.to_datetime(input("Enter a new end date: "))
  100. valid_end = False
  101. valid_start = False
  102. else:
  103. if end_date > self.max_date:
  104. print("End Date exceeds data range")
  105. end_date = pd.to_datetime(input("Enter a new end date: "))
  106. valid_end = False
  107. if start_date < self.min_date:
  108. print("Start Date is before date range")
  109. start_date = pd.to_datetime(input("Enter a new start date: "))
  110. valid_start = False
  111. return start_date, end_date
  112. """
  113. Return the dataframe trimmed to the specified range.
  114. """
  115. def make_df(self, start_date, end_date, df=None):
  116. # Default is to use the object stock data
  117. if not df:
  118. df = self.stock.copy()
  119. start_date, end_date = self.handle_dates(start_date, end_date)
  120. # keep track of whether the start and end dates are in the data
  121. start_in = True
  122. end_in = True
  123. # If user wants to round dates (default behavior)
  124. if self.round_dates:
  125. # Record if start and end date are in df
  126. if start_date not in list(df["Date"]):
  127. start_in = False
  128. if end_date not in list(df["Date"]):
  129. end_in = False
  130. # If both are not in dataframe, round both
  131. if (not end_in) & (not start_in):
  132. trim_df = df[(df["Date"] >= start_date) & (df["Date"] <= end_date)]
  133. else:
  134. # If both are in dataframe, round neither
  135. if (end_in) & (start_in):
  136. trim_df = df[(df["Date"] >= start_date) & (df["Date"] <= end_date)]
  137. else:
  138. # If only start is missing, round start
  139. if not start_in:
  140. trim_df = df[
  141. (df["Date"] > start_date) & (df["Date"] <= end_date)
  142. ]
  143. # If only end is imssing round end
  144. elif not end_in:
  145. trim_df = df[
  146. (df["Date"] >= start_date) & (df["Date"] < end_date)
  147. ]
  148. else:
  149. valid_start = False
  150. valid_end = False
  151. while (not valid_start) & (not valid_end):
  152. start_date, end_date = self.handle_dates(start_date, end_date)
  153. # No round dates, if either data not in, print message and return
  154. if start_date in list(df["Date"]):
  155. valid_start = True
  156. if end_date in list(df["Date"]):
  157. valid_end = True
  158. # Check to make sure dates are in the data
  159. if start_date not in list(df["Date"]):
  160. print(
  161. "Start Date not in data (either out of range or not a trading day.)"
  162. )
  163. start_date = pd.to_datetime(
  164. input(prompt="Enter a new start date: ")
  165. )
  166. elif end_date not in list(df["Date"]):
  167. print(
  168. "End Date not in data (either out of range or not a trading day.)"
  169. )
  170. end_date = pd.to_datetime(input(prompt="Enter a new end date: "))
  171. # Dates are not rounded
  172. trim_df = df[(df["Date"] >= start_date) & (df["Date"] <= end_date.date)]
  173. return trim_df
  174. # Basic Historical Plots and Basic Statistics
  175. def plot_stock(
  176. self, start_date=None, end_date=None, stats=["Adj. Close"], plot_type="basic"
  177. ):
  178. self.reset_plot()
  179. if start_date is None:
  180. start_date = self.min_date
  181. if end_date is None:
  182. end_date = self.max_date
  183. stock_plot = self.make_df(start_date, end_date)
  184. colors = ["r", "b", "g", "y", "c", "m"]
  185. for i, stat in enumerate(stats):
  186. stat_min = min(stock_plot[stat])
  187. stat_max = max(stock_plot[stat])
  188. stat_avg = np.mean(stock_plot[stat])
  189. date_stat_min = stock_plot[stock_plot[stat] == stat_min]["Date"]
  190. date_stat_min = date_stat_min[date_stat_min.index[0]]
  191. date_stat_max = stock_plot[stock_plot[stat] == stat_max]["Date"]
  192. date_stat_max = date_stat_max[date_stat_max.index[0]]
  193. print("Maximum {} = {:.2f} on {}.".format(stat, stat_max, date_stat_max))
  194. print("Minimum {} = {:.2f} on {}.".format(stat, stat_min, date_stat_min))
  195. print(
  196. "Current {} = {:.2f} on {}.\n".format(
  197. stat, self.stock.loc[self.stock.index[-1], stat], self.max_date
  198. )
  199. )
  200. # Percentage y-axis
  201. if plot_type == "pct":
  202. # Simple Plot
  203. plt.style.use("fivethirtyeight")
  204. if stat == "Daily Change":
  205. plt.plot(
  206. stock_plot["Date"],
  207. 100 * stock_plot[stat],
  208. color=colors[i],
  209. linewidth=2.4,
  210. alpha=0.9,
  211. label=stat,
  212. )
  213. else:
  214. plt.plot(
  215. stock_plot["Date"],
  216. 100 * (stock_plot[stat] - stat_avg) / stat_avg,
  217. color=colors[i],
  218. linewidth=2.4,
  219. alpha=0.9,
  220. label=stat,
  221. )
  222. plt.xlabel("Date")
  223. plt.ylabel("Change Relative to Average (%)")
  224. plt.title("%s Stock History" % self.symbol)
  225. plt.legend(prop={"size": 10})
  226. plt.grid(color="k", alpha=0.4)
  227. # Stat y-axis
  228. elif plot_type == "basic":
  229. plt.style.use("fivethirtyeight")
  230. plt.plot(
  231. stock_plot["Date"],
  232. stock_plot[stat],
  233. color=colors[i],
  234. linewidth=3,
  235. label=stat,
  236. alpha=0.8,
  237. )
  238. plt.xlabel("Date")
  239. plt.ylabel("US $")
  240. plt.title("%s Stock History" % self.symbol)
  241. plt.legend(prop={"size": 10})
  242. plt.grid(color="k", alpha=0.4)
  243. plt.show()
  244. # Reset the plotting parameters to clear style formatting
  245. # Not sure if this should be a static method
  246. @staticmethod
  247. def reset_plot():
  248. # Restore default parameters
  249. matplotlib.rcdefaults()
  250. # Adjust a few parameters to liking
  251. matplotlib.rcParams["figure.figsize"] = (8, 5)
  252. matplotlib.rcParams["axes.labelsize"] = 10
  253. matplotlib.rcParams["xtick.labelsize"] = 8
  254. matplotlib.rcParams["ytick.labelsize"] = 8
  255. matplotlib.rcParams["axes.titlesize"] = 14
  256. matplotlib.rcParams["text.color"] = "k"
  257. # Method to linearly interpolate prices on the weekends
  258. def resample(self, dataframe):
  259. # Change the index and resample at daily level
  260. dataframe = dataframe.set_index("ds")
  261. dataframe = dataframe.resample("D")
  262. # Reset the index and interpolate nan values
  263. dataframe = dataframe.reset_index(level=0)
  264. dataframe = dataframe.interpolate()
  265. return dataframe
  266. # Remove weekends from a dataframe
  267. def remove_weekends(self, dataframe):
  268. # Reset index to use ix
  269. dataframe = dataframe.reset_index(drop=True)
  270. weekends = []
  271. # Find all of the weekends
  272. for i, date in enumerate(dataframe["ds"]):
  273. if (date.weekday()) == 5 or (date.weekday() == 6):
  274. weekends.append(i)
  275. # Drop the weekends
  276. dataframe = dataframe.drop(weekends, axis=0)
  277. return dataframe
  278. # Calculate and plot profit from buying and holding shares for specified date range
  279. def buy_and_hold(self, start_date=None, end_date=None, nshares=1):
  280. self.reset_plot()
  281. start_date, end_date = self.handle_dates(start_date, end_date)
  282. # Find starting and ending price of stock
  283. start_price = float(self.stock[self.stock["Date"] == start_date]["Adj. Open"])
  284. end_price = float(self.stock[self.stock["Date"] == end_date]["Adj. Close"])
  285. # Make a profit dataframe and calculate profit column
  286. profits = self.make_df(start_date, end_date)
  287. profits["hold_profit"] = nshares * (profits["Adj. Close"] - start_price)
  288. # Total profit
  289. total_hold_profit = nshares * (end_price - start_price)
  290. print(
  291. "{} Total buy and hold profit from {} to {} for {} shares = ${:.2f}".format(
  292. self.symbol, start_date, end_date, nshares, total_hold_profit
  293. )
  294. )
  295. # Plot the total profits
  296. plt.style.use("dark_background")
  297. # Location for number of profit
  298. text_location = end_date - pd.DateOffset(months=1)
  299. # Plot the profits over time
  300. plt.plot(profits["Date"], profits["hold_profit"], "b", linewidth=3)
  301. plt.ylabel("Profit ($)")
  302. plt.xlabel("Date")
  303. plt.title(
  304. "Buy and Hold Profits for {} {} to {}".format(
  305. self.symbol, start_date, end_date
  306. )
  307. )
  308. # Display final value on graph
  309. plt.text(
  310. x=text_location,
  311. y=total_hold_profit + (total_hold_profit / 40),
  312. s="$%d" % total_hold_profit,
  313. color="g" if total_hold_profit > 0 else "r",
  314. size=14,
  315. )
  316. plt.grid(alpha=0.2)
  317. plt.show()
  318. # Create a prophet model without training
  319. def create_model(self):
  320. # Make the model
  321. model = fbprophet.Prophet(
  322. daily_seasonality=self.daily_seasonality,
  323. weekly_seasonality=self.weekly_seasonality,
  324. yearly_seasonality=self.yearly_seasonality,
  325. changepoint_prior_scale=self.changepoint_prior_scale,
  326. changepoints=self.changepoints,
  327. )
  328. if self.monthly_seasonality:
  329. # Add monthly seasonality
  330. model.add_seasonality(name="monthly", period=30.5, fourier_order=5)
  331. return model
  332. # Graph the effects of altering the changepoint prior scale (cps)
  333. def changepoint_prior_analysis(
  334. self,
  335. changepoint_priors=[0.001, 0.05, 0.1, 0.2],
  336. colors=["b", "r", "grey", "gold"],
  337. ):
  338. # Training and plotting with specified years of data
  339. train = self.stock[
  340. (
  341. self.stock["Date"]
  342. > (max(self.stock["Date"]) - pd.DateOffset(years=self.training_years))
  343. )
  344. ]
  345. # Iterate through all the changepoints and make models
  346. for i, prior in enumerate(changepoint_priors):
  347. # Select the changepoint
  348. self.changepoint_prior_scale = prior
  349. # Create and train a model with the specified cps
  350. model = self.create_model()
  351. model.fit(train)
  352. future = model.make_future_dataframe(periods=180, freq="D")
  353. # Make a dataframe to hold predictions
  354. if i == 0:
  355. predictions = future.copy()
  356. future = model.predict(future)
  357. # Fill in prediction dataframe
  358. predictions["%.3f_yhat_upper" % prior] = future["yhat_upper"]
  359. predictions["%.3f_yhat_lower" % prior] = future["yhat_lower"]
  360. predictions["%.3f_yhat" % prior] = future["yhat"]
  361. # Remove the weekends
  362. predictions = self.remove_weekends(predictions)
  363. # Plot set-up
  364. self.reset_plot()
  365. plt.style.use("fivethirtyeight")
  366. fig, ax = plt.subplots(1, 1)
  367. # Actual observations
  368. ax.plot(train["ds"], train["y"], "ko", ms=4, label="Observations")
  369. color_dict = {prior: color for prior, color in zip(changepoint_priors, colors)}
  370. # Plot each of the changepoint predictions
  371. for prior in changepoint_priors:
  372. # Plot the predictions themselves
  373. ax.plot(
  374. predictions["ds"],
  375. predictions["%.3f_yhat" % prior],
  376. linewidth=1.2,
  377. color=color_dict[prior],
  378. label="%.3f prior scale" % prior,
  379. )
  380. # Plot the uncertainty interval
  381. ax.fill_between(
  382. predictions["ds"].dt.to_pydatetime(),
  383. predictions["%.3f_yhat_upper" % prior],
  384. predictions["%.3f_yhat_lower" % prior],
  385. facecolor=color_dict[prior],
  386. alpha=0.3,
  387. edgecolor="k",
  388. linewidth=0.6,
  389. )
  390. # Plot labels
  391. plt.legend(loc=2, prop={"size": 10})
  392. plt.xlabel("Date")
  393. plt.ylabel("Stock Price ($)")
  394. plt.title("Effect of Changepoint Prior Scale")
  395. plt.show()
  396. # Basic prophet model for specified number of days
  397. def create_prophet_model(self, days=0, resample=False):
  398. self.reset_plot()
  399. model = self.create_model()
  400. # Fit on the stock history for self.training_years number of years
  401. stock_history = self.stock[
  402. self.stock["Date"]
  403. > (self.max_date - pd.DateOffset(years=self.training_years))
  404. ]
  405. if resample:
  406. stock_history = self.resample(stock_history)
  407. model.fit(stock_history)
  408. # Make and predict for next year with future dataframe
  409. future = model.make_future_dataframe(periods=days, freq="D")
  410. future = model.predict(future)
  411. if days > 0:
  412. # Print the predicted price
  413. print(
  414. "Predicted Price on {} = ${:.2f}".format(
  415. future.loc[future.index[-1], "ds"],
  416. future.loc[future.index[-1], "yhat"],
  417. )
  418. )
  419. title = "%s Historical and Predicted Stock Price" % self.symbol
  420. else:
  421. title = "%s Historical and Modeled Stock Price" % self.symbol
  422. # Set up the plot
  423. fig, ax = plt.subplots(1, 1)
  424. # Plot the actual values
  425. ax.plot(
  426. stock_history["ds"],
  427. stock_history["y"],
  428. "ko-",
  429. linewidth=1.4,
  430. alpha=0.8,
  431. ms=1.8,
  432. label="Observations",
  433. )
  434. # Plot the predicted values
  435. ax.plot(
  436. future["ds"], future["yhat"], "forestgreen", linewidth=2.4, label="Modeled"
  437. )
  438. # Plot the uncertainty interval as ribbon
  439. ax.fill_between(
  440. future["ds"].dt.to_pydatetime(),
  441. future["yhat_upper"],
  442. future["yhat_lower"],
  443. alpha=0.3,
  444. facecolor="g",
  445. edgecolor="k",
  446. linewidth=1.4,
  447. label="Confidence Interval",
  448. )
  449. # Plot formatting
  450. plt.legend(loc=2, prop={"size": 10})
  451. plt.xlabel("Date")
  452. plt.ylabel("Price $")
  453. plt.grid(linewidth=0.6, alpha=0.6)
  454. plt.title(title)
  455. plt.show()
  456. return model, future
  457. # Evaluate prediction model for one year
  458. def evaluate_prediction(self, start_date=None, end_date=None, nshares=None):
  459. # Default start date is one year before end of data
  460. # Default end date is end date of data
  461. if start_date is None:
  462. start_date = self.max_date - pd.DateOffset(years=1)
  463. if end_date is None:
  464. end_date = self.max_date
  465. start_date, end_date = self.handle_dates(start_date, end_date)
  466. # Training data starts self.training_years years before start date and goes up to start date
  467. train = self.stock[
  468. (self.stock["Date"] < start_date)
  469. & (
  470. self.stock["Date"]
  471. > (start_date - pd.DateOffset(years=self.training_years))
  472. )
  473. ]
  474. # Testing data is specified in the range
  475. test = self.stock[
  476. (self.stock["Date"] >= start_date) & (self.stock["Date"] <= end_date)
  477. ]
  478. # Create and train the model
  479. model = self.create_model()
  480. model.fit(train)
  481. # Make a future dataframe and predictions
  482. future = model.make_future_dataframe(periods=365, freq="D")
  483. future = model.predict(future)
  484. # Merge predictions with the known values
  485. test = pd.merge(test, future, on="ds", how="inner")
  486. train = pd.merge(train, future, on="ds", how="inner")
  487. # Calculate the differences between consecutive measurements
  488. test["pred_diff"] = test["yhat"].diff()
  489. test["real_diff"] = test["y"].diff()
  490. # Correct is when we predicted the correct direction
  491. test["correct"] = (
  492. np.sign(test["pred_diff"][1:]) == np.sign(test["real_diff"][1:])
  493. ) * 1
  494. # Accuracy when we predict increase and decrease
  495. increase_accuracy = 100 * np.mean(test[test["pred_diff"] > 0]["correct"])
  496. decrease_accuracy = 100 * np.mean(test[test["pred_diff"] < 0]["correct"])
  497. # Calculate mean absolute error
  498. test_errors = abs(test["y"] - test["yhat"])
  499. test_mean_error = np.mean(test_errors)
  500. train_errors = abs(train["y"] - train["yhat"])
  501. train_mean_error = np.mean(train_errors)
  502. # Calculate percentage of time actual value within prediction range
  503. test["in_range"] = False
  504. for i in test.index:
  505. if (test.loc[i, "y"] < test.loc[i, "yhat_upper"]) & (
  506. test.loc[i, "y"] > test.loc[i, "yhat_lower"]
  507. ):
  508. test.loc[i, "in_range"] = True
  509. in_range_accuracy = 100 * np.mean(test["in_range"])
  510. if not nshares:
  511. # Date range of predictions
  512. print("\nPrediction Range: {} to {}.".format(start_date, end_date))
  513. # Final prediction vs actual value
  514. print(
  515. "\nPredicted price on {} = ${:.2f}.".format(
  516. max(future["ds"]), future.loc[future.index[-1], "yhat"]
  517. )
  518. )
  519. print(
  520. "Actual price on {} = ${:.2f}.\n".format(
  521. max(test["ds"]), test.loc[test.index[-1], "y"]
  522. )
  523. )
  524. print(
  525. "Average Absolute Error on Training Data = ${:.2f}.".format(
  526. train_mean_error
  527. )
  528. )
  529. print(
  530. "Average Absolute Error on Testing Data = ${:.2f}.\n".format(
  531. test_mean_error
  532. )
  533. )
  534. # Direction accuracy
  535. print(
  536. "When the model predicted an increase, the price increased {:.2f}% of the time.".format(
  537. increase_accuracy
  538. )
  539. )
  540. print(
  541. "When the model predicted a decrease, the price decreased {:.2f}% of the time.\n".format(
  542. decrease_accuracy
  543. )
  544. )
  545. print(
  546. "The actual value was within the {:d}% confidence interval {:.2f}% of the time.".format(
  547. int(100 * model.interval_width), in_range_accuracy
  548. )
  549. )
  550. # Reset the plot
  551. self.reset_plot()
  552. # Set up the plot
  553. fig, ax = plt.subplots(1, 1)
  554. # Plot the actual values
  555. ax.plot(
  556. train["ds"],
  557. train["y"],
  558. "ko-",
  559. linewidth=1.4,
  560. alpha=0.8,
  561. ms=1.8,
  562. label="Observations",
  563. )
  564. ax.plot(
  565. test["ds"],
  566. test["y"],
  567. "ko-",
  568. linewidth=1.4,
  569. alpha=0.8,
  570. ms=1.8,
  571. label="Observations",
  572. )
  573. # Plot the predicted values
  574. ax.plot(
  575. future["ds"], future["yhat"], "navy", linewidth=2.4, label="Predicted"
  576. )
  577. # Plot the uncertainty interval as ribbon
  578. ax.fill_between(
  579. future["ds"].dt.to_pydatetime(),
  580. future["yhat_upper"],
  581. future["yhat_lower"],
  582. alpha=0.6,
  583. facecolor="gold",
  584. edgecolor="k",
  585. linewidth=1.4,
  586. label="Confidence Interval",
  587. )
  588. # Put a vertical line at the start of predictions
  589. plt.vlines(
  590. x=min(test["ds"]),
  591. ymin=min(future["yhat_lower"]),
  592. ymax=max(future["yhat_upper"]),
  593. colors="r",
  594. linestyles="dashed",
  595. label="Prediction Start",
  596. )
  597. # Plot formatting
  598. plt.legend(loc=2, prop={"size": 8})
  599. plt.xlabel("Date")
  600. plt.ylabel("Price $")
  601. plt.grid(linewidth=0.6, alpha=0.6)
  602. plt.title(
  603. "{} Model Evaluation from {} to {}.".format(
  604. self.symbol, start_date, end_date
  605. )
  606. )
  607. plt.show()
  608. # If a number of shares is specified, play the game
  609. elif nshares:
  610. # Only playing the stocks when we predict the stock will increase
  611. test_pred_increase = test[test["pred_diff"] > 0]
  612. test_pred_increase.reset_index(inplace=True)
  613. prediction_profit = []
  614. # Iterate through all the predictions and calculate profit from playing
  615. for i, correct in enumerate(test_pred_increase["correct"]):
  616. # If we predicted up and the price goes up, we gain the difference
  617. if correct == 1:
  618. prediction_profit.append(
  619. nshares * test_pred_increase.loc[i, "real_diff"]
  620. )
  621. # If we predicted up and the price goes down, we lose the difference
  622. else:
  623. prediction_profit.append(
  624. nshares * test_pred_increase.loc[i, "real_diff"]
  625. )
  626. test_pred_increase["pred_profit"] = prediction_profit
  627. # Put the profit into the test dataframe
  628. test = pd.merge(
  629. test, test_pred_increase[["ds", "pred_profit"]], on="ds", how="left"
  630. )
  631. test.loc[0, "pred_profit"] = 0
  632. # Profit for either method at all dates
  633. test["pred_profit"] = test["pred_profit"].cumsum().ffill()
  634. test["hold_profit"] = nshares * (test["y"] - float(test.loc[0, "y"]))
  635. # Display information
  636. print(
  637. "You played the stock market in {} from {} to {} with {} shares.\n".format(
  638. self.symbol, start_date, end_date, nshares
  639. )
  640. )
  641. print(
  642. "When the model predicted an increase, the price increased {:.2f}% of the time.".format(
  643. increase_accuracy
  644. )
  645. )
  646. print(
  647. "When the model predicted a decrease, the price decreased {:.2f}% of the time.\n".format(
  648. decrease_accuracy
  649. )
  650. )
  651. # Display some friendly information about the perils of playing the stock market
  652. print(
  653. "The total profit using the Prophet model = ${:.2f}.".format(
  654. np.sum(prediction_profit)
  655. )
  656. )
  657. print(
  658. "The Buy and Hold strategy profit = ${:.2f}.".format(
  659. float(test.loc[test.index[-1], "hold_profit"])
  660. )
  661. )
  662. print("\nThanks for playing the stock market!\n")
  663. # Plot the predicted and actual profits over time
  664. self.reset_plot()
  665. # Final profit and final smart used for locating text
  666. final_profit = test.loc[test.index[-1], "pred_profit"]
  667. final_smart = test.loc[test.index[-1], "hold_profit"]
  668. # text location
  669. last_date = test.loc[test.index[-1], "ds"]
  670. text_location = last_date - pd.DateOffset(months=1)
  671. plt.style.use("dark_background")
  672. # Plot smart profits
  673. plt.plot(
  674. test["ds"],
  675. test["hold_profit"],
  676. "b",
  677. linewidth=1.8,
  678. label="Buy and Hold Strategy",
  679. )
  680. # Plot prediction profits
  681. plt.plot(
  682. test["ds"],
  683. test["pred_profit"],
  684. color="g" if final_profit > 0 else "r",
  685. linewidth=1.8,
  686. label="Prediction Strategy",
  687. )
  688. # Display final values on graph
  689. plt.text(
  690. x=text_location,
  691. y=final_profit + (final_profit / 40),
  692. s="$%d" % final_profit,
  693. color="g" if final_profit > 0 else "r",
  694. size=18,
  695. )
  696. plt.text(
  697. x=text_location,
  698. y=final_smart + (final_smart / 40),
  699. s="$%d" % final_smart,
  700. color="g" if final_smart > 0 else "r",
  701. size=18,
  702. )
  703. # Plot formatting
  704. plt.ylabel("Profit (US $)")
  705. plt.xlabel("Date")
  706. plt.title("Predicted versus Buy and Hold Profits")
  707. plt.legend(loc=2, prop={"size": 10})
  708. plt.grid(alpha=0.2)
  709. plt.show()
  710. def retrieve_google_trends(self, search, date_range):
  711. # Set up the trend fetching object
  712. pytrends = TrendReq(hl="en-US", tz=360)
  713. kw_list = [search]
  714. try:
  715. # Create the search object
  716. pytrends.build_payload(
  717. kw_list, cat=0, timeframe=date_range[0], geo="", gprop="news"
  718. )
  719. # Retrieve the interest over time
  720. trends = pytrends.interest_over_time()
  721. related_queries = pytrends.related_queries()
  722. except Exception as e:
  723. print("\nGoogle Search Trend retrieval failed.")
  724. print(e)
  725. return
  726. return trends, related_queries
  727. def changepoint_date_analysis(self, search=None):
  728. self.reset_plot()
  729. model = self.create_model()
  730. # Use past self.training_years years of data
  731. train = self.stock[
  732. self.stock["Date"]
  733. > (self.max_date - pd.DateOffset(years=self.training_years))
  734. ]
  735. model.fit(train)
  736. # Predictions of the training data (no future periods)
  737. future = model.make_future_dataframe(periods=0, freq="D")
  738. future = model.predict(future)
  739. train = pd.merge(train, future[["ds", "yhat"]], on="ds", how="inner")
  740. changepoints = model.changepoints
  741. train = train.reset_index(drop=True)
  742. # Create dataframe of only changepoints
  743. change_indices = []
  744. for changepoint in changepoints:
  745. change_indices.append(train[train["ds"] == changepoint].index[0])
  746. c_data = train.loc[change_indices, :]
  747. deltas = model.params["delta"][0]
  748. c_data["delta"] = deltas
  749. c_data["abs_delta"] = abs(c_data["delta"])
  750. # Sort the values by maximum change
  751. c_data = c_data.sort_values(by="abs_delta", ascending=False)
  752. # Limit to 10 largest changepoints
  753. c_data = c_data[:10]
  754. # Separate into negative and positive changepoints
  755. cpos_data = c_data[c_data["delta"] > 0]
  756. cneg_data = c_data[c_data["delta"] < 0]
  757. # Changepoints and data
  758. if not search:
  759. print("\nChangepoints sorted by slope rate of change (2nd derivative):\n")
  760. print(c_data.loc[:, ["Date", "Adj. Close", "delta"]][:5])
  761. # Line plot showing actual values, estimated values, and changepoints
  762. self.reset_plot()
  763. # Set up line plot
  764. plt.plot(train["ds"], train["y"], "ko", ms=4, label="Stock Price")
  765. plt.plot(
  766. future["ds"],
  767. future["yhat"],
  768. color="navy",
  769. linewidth=2.0,
  770. label="Modeled",
  771. )
  772. # Changepoints as vertical lines
  773. plt.vlines(
  774. cpos_data["ds"].dt.to_pydatetime(),
  775. ymin=min(train["y"]),
  776. ymax=max(train["y"]),
  777. linestyles="dashed",
  778. color="r",
  779. linewidth=1.2,
  780. label="Negative Changepoints",
  781. )
  782. plt.vlines(
  783. cneg_data["ds"].dt.to_pydatetime(),
  784. ymin=min(train["y"]),
  785. ymax=max(train["y"]),
  786. linestyles="dashed",
  787. color="darkgreen",
  788. linewidth=1.2,
  789. label="Positive Changepoints",
  790. )
  791. plt.legend(prop={"size": 10})
  792. plt.xlabel("Date")
  793. plt.ylabel("Price ($)")
  794. plt.title("Stock Price with Changepoints")
  795. plt.show()
  796. # Search for search term in google news
  797. # Show related queries, rising related queries
  798. # Graph changepoints, search frequency, stock price
  799. if search:
  800. date_range = ["%s %s" % (str(min(train["Date"])), str(max(train["Date"])))]
  801. # Get the Google Trends for specified terms and join to training dataframe
  802. trends, related_queries = self.retrieve_google_trends(search, date_range)
  803. if (trends is None) or (related_queries is None):
  804. print("No search trends found for %s" % search)
  805. return
  806. print("\n Top Related Queries: \n")
  807. print(related_queries[search]["top"].head())
  808. print("\n Rising Related Queries: \n")
  809. print(related_queries[search]["rising"].head())
  810. # Upsample the data for joining with training data
  811. trends = trends.resample("D")
  812. trends = trends.reset_index(level=0)
  813. trends = trends.rename(columns={"date": "ds", search: "freq"})
  814. # Interpolate the frequency
  815. trends["freq"] = trends["freq"].interpolate()
  816. # Merge with the training data
  817. train = pd.merge(train, trends, on="ds", how="inner")
  818. # Normalize values
  819. train["y_norm"] = train["y"] / max(train["y"])
  820. train["freq_norm"] = train["freq"] / max(train["freq"])
  821. self.reset_plot()
  822. # Plot the normalized stock price and normalize search frequency
  823. plt.plot(train["ds"], train["y_norm"], "k-", label="Stock Price")
  824. plt.plot(
  825. train["ds"],
  826. train["freq_norm"],
  827. color="goldenrod",
  828. label="Search Frequency",
  829. )
  830. # Changepoints as vertical lines
  831. plt.vlines(
  832. cpos_data["ds"].dt.to_pydatetime(),
  833. ymin=0,
  834. ymax=1,
  835. linestyles="dashed",
  836. color="r",
  837. linewidth=1.2,
  838. label="Negative Changepoints",
  839. )
  840. plt.vlines(
  841. cneg_data["ds"].dt.to_pydatetime(),
  842. ymin=0,
  843. ymax=1,
  844. linestyles="dashed",
  845. color="darkgreen",
  846. linewidth=1.2,
  847. label="Positive Changepoints",
  848. )
  849. # Plot formatting
  850. plt.legend(prop={"size": 10})
  851. plt.xlabel("Date")
  852. plt.ylabel("Normalized Values")
  853. plt.title(
  854. "%s Stock Price and Search Frequency for %s" % (self.symbol, search)
  855. )
  856. plt.show()
  857. # Predict the future price for a given range of days
  858. def predict_future(self, days=30):
  859. # Use past self.training_years years for training
  860. train = self.stock[
  861. self.stock["Date"]
  862. > (max(self.stock["Date"]) - pd.DateOffset(years=self.training_years))
  863. ]
  864. model = self.create_model()
  865. model.fit(train)
  866. # Future dataframe with specified number of days to predict
  867. future = model.make_future_dataframe(periods=days, freq="D")
  868. future = model.predict(future)
  869. # Only concerned with future dates
  870. future = future[future["ds"] >= max(self.stock["Date"])]
  871. # Remove the weekends
  872. future = self.remove_weekends(future)
  873. # Calculate whether increase or not
  874. future["diff"] = future["yhat"].diff()
  875. future = future.dropna()
  876. # Find the prediction direction and create separate dataframes
  877. future["direction"] = (future["diff"] > 0) * 1
  878. # Rename the columns for presentation
  879. future = future.rename(
  880. columns={
  881. "ds": "Date",
  882. "yhat": "estimate",
  883. "diff": "change",
  884. "yhat_upper": "upper",
  885. "yhat_lower": "lower",
  886. }
  887. )
  888. future_increase = future[future["direction"] == 1]
  889. future_decrease = future[future["direction"] == 0]
  890. # Print out the dates
  891. print("\nPredicted Increase: \n")
  892. print(future_increase[["Date", "estimate", "change", "upper", "lower"]])
  893. print("\nPredicted Decrease: \n")
  894. print(future_decrease[["Date", "estimate", "change", "upper", "lower"]])
  895. self.reset_plot()
  896. # Set up plot
  897. plt.style.use("fivethirtyeight")
  898. matplotlib.rcParams["axes.labelsize"] = 10
  899. matplotlib.rcParams["xtick.labelsize"] = 8
  900. matplotlib.rcParams["ytick.labelsize"] = 8
  901. matplotlib.rcParams["axes.titlesize"] = 12
  902. # Plot the predictions and indicate if increase or decrease
  903. fig, ax = plt.subplots(1, 1, figsize=(8, 6))
  904. # Plot the estimates
  905. ax.plot(
  906. future_increase["Date"],
  907. future_increase["estimate"],
  908. "g^",
  909. ms=12,
  910. label="Pred. Increase",
  911. )
  912. ax.plot(
  913. future_decrease["Date"],
  914. future_decrease["estimate"],
  915. "rv",
  916. ms=12,
  917. label="Pred. Decrease",
  918. )
  919. # Plot errorbars
  920. ax.errorbar(
  921. future["Date"].dt.to_pydatetime(),
  922. future["estimate"],
  923. yerr=future["upper"] - future["lower"],
  924. capthick=1.4,
  925. color="k",
  926. linewidth=2,
  927. ecolor="darkblue",
  928. capsize=4,
  929. elinewidth=1,
  930. label="Pred with Range",
  931. )
  932. # Plot formatting
  933. plt.legend(loc=2, prop={"size": 10})
  934. plt.xticks(rotation="45")
  935. plt.ylabel("Predicted Stock Price (US $)")
  936. plt.xlabel("Date")
  937. plt.title("Predictions for %s" % self.symbol)
  938. plt.show()
  939. def changepoint_prior_validation(
  940. self, start_date=None, end_date=None, changepoint_priors=[0.001, 0.05, 0.1, 0.2]
  941. ):
  942. # Default start date is two years before end of data
  943. # Default end date is one year before end of data
  944. if start_date is None:
  945. start_date = self.max_date - pd.DateOffset(years=2)
  946. if end_date is None:
  947. end_date = self.max_date - pd.DateOffset(years=1)
  948. # Convert to pandas datetime for indexing dataframe
  949. start_date = pd.to_datetime(start_date)
  950. end_date = pd.to_datetime(end_date)
  951. start_date, end_date = self.handle_dates(start_date, end_date)
  952. # Select self.training_years number of years
  953. train = self.stock[
  954. (
  955. self.stock["Date"]
  956. > (start_date - pd.DateOffset(years=self.training_years))
  957. )
  958. & (self.stock["Date"] < start_date)
  959. ]
  960. # Testing data is specified by range
  961. test = self.stock[
  962. (self.stock["Date"] >= start_date) & (self.stock["Date"] <= end_date)
  963. ]
  964. eval_days = (max(test["Date"]) - min(test["Date"])).days
  965. results = pd.DataFrame(
  966. 0,
  967. index=list(range(len(changepoint_priors))),
  968. columns=["cps", "train_err", "train_range", "test_err", "test_range"],
  969. )
  970. print(
  971. "\nValidation Range {} to {}.\n".format(
  972. min(test["Date"]), max(test["Date"])
  973. )
  974. )
  975. # Iterate through all the changepoints and make models
  976. for i, prior in enumerate(changepoint_priors):
  977. results.loc[i, "cps"] = prior
  978. # Select the changepoint
  979. self.changepoint_prior_scale = prior
  980. # Create and train a model with the specified cps
  981. model = self.create_model()
  982. model.fit(train)
  983. future = model.make_future_dataframe(periods=eval_days, freq="D")
  984. future = model.predict(future)
  985. # Training results and metrics
  986. train_results = pd.merge(
  987. train,
  988. future[["ds", "yhat", "yhat_upper", "yhat_lower"]],
  989. on="ds",
  990. how="inner",
  991. )
  992. avg_train_error = np.mean(abs(train_results["y"] - train_results["yhat"]))
  993. avg_train_uncertainty = np.mean(
  994. abs(train_results["yhat_upper"] - train_results["yhat_lower"])
  995. )
  996. results.loc[i, "train_err"] = avg_train_error
  997. results.loc[i, "train_range"] = avg_train_uncertainty
  998. # Testing results and metrics
  999. test_results = pd.merge(
  1000. test,
  1001. future[["ds", "yhat", "yhat_upper", "yhat_lower"]],
  1002. on="ds",
  1003. how="inner",
  1004. )
  1005. avg_test_error = np.mean(abs(test_results["y"] - test_results["yhat"]))
  1006. avg_test_uncertainty = np.mean(
  1007. abs(test_results["yhat_upper"] - test_results["yhat_lower"])
  1008. )
  1009. results.loc[i, "test_err"] = avg_test_error
  1010. results.loc[i, "test_range"] = avg_test_uncertainty
  1011. print(results)
  1012. # Plot of training and testing average errors
  1013. self.reset_plot()
  1014. plt.plot(results["cps"], results["train_err"], "bo-", ms=8, label="Train Error")
  1015. plt.plot(results["cps"], results["test_err"], "r*-", ms=8, label="Test Error")
  1016. plt.xlabel("Changepoint Prior Scale")
  1017. plt.ylabel("Avg. Absolute Error ($)")
  1018. plt.title("Training and Testing Curves as Function of CPS")
  1019. plt.grid(color="k", alpha=0.3)
  1020. plt.xticks(results["cps"], results["cps"])
  1021. plt.legend(prop={"size": 10})
  1022. plt.show()
  1023. # Plot of training and testing average uncertainty
  1024. self.reset_plot()
  1025. plt.plot(
  1026. results["cps"], results["train_range"], "bo-", ms=8, label="Train Range"
  1027. )
  1028. plt.plot(results["cps"], results["test_range"], "r*-", ms=8, label="Test Range")
  1029. plt.xlabel("Changepoint Prior Scale")
  1030. plt.ylabel("Avg. Uncertainty ($)")
  1031. plt.title("Uncertainty in Estimate as Function of CPS")
  1032. plt.grid(color="k", alpha=0.3)
  1033. plt.xticks(results["cps"], results["cps"])
  1034. plt.legend(prop={"size": 10})
  1035. plt.show()