weighter.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685
  1. # pandas and numpy for data manipulation
  2. import pandas as pd
  3. import numpy as np
  4. # fbprophet for additive models
  5. import fbprophet
  6. # gspread for Google Sheets access
  7. import gspread
  8. # slacker for interacting with Slack
  9. from slacker import Slacker
  10. # oauth2client for authorizing access to Google Sheets
  11. from oauth2client.service_account import ServiceAccountCredentials
  12. # os for deleting images
  13. import os
  14. # matplotlib for plotting
  15. import matplotlib.pyplot as plt
  16. import matplotlib.patches as mpatches
  17. import matplotlib
  18. class Weighter:
  19. """
  20. When weighter is initialized, we need to convert the usernames,
  21. get a dictionary of the unrecorded entries, construct a dictionary
  22. of the actions to take, and make sure all data is formatted correctly
  23. """
  24. def __init__(self, weights, gsheet, slack):
  25. # Weights is a dataframe
  26. self.weights = weights.copy()
  27. self.gsheet = gsheet
  28. self.slack = slack
  29. # Users is a list of the unique users in the data
  30. self.users = list(set(self.weights["Name"]))
  31. correct_names = []
  32. # Name Changes
  33. for user in self.weights["Name"]:
  34. # Have to hardcode in name Changes
  35. if user == "koehrcl":
  36. correct_names.append("Craig")
  37. elif user == "willkoehrsen":
  38. correct_names.append("Will")
  39. elif user == "fletcher":
  40. correct_names.append("Fletcher")
  41. # Currently do not handle new users
  42. else:
  43. print("New User Detected")
  44. return
  45. self.weights["Name"] = correct_names
  46. # Users is a list of the unique users in the data
  47. self.users = list(set(self.weights["Name"]))
  48. # Create a dataframe of the unrecorded entries
  49. self.unrecorded = self.weights[self.weights["Record"] != True]
  50. # Process the unrecorded entries
  51. self.process_unrecorded()
  52. # The remaning entries will all be weights
  53. self.weights["Entry"] = [float(weight) for weight in self.weights["Entry"]]
  54. # Build the user dictionary
  55. self.build_user_dict()
  56. # Calculate the change and percentage change columns
  57. self.calculate_columns()
  58. """
  59. Constructs a dictionary for each user with critical information
  60. This forms the basis for the summarize function
  61. """
  62. def build_user_dict(self):
  63. user_dict = {}
  64. user_goals = {"Craig": 215.0, "Fletcher": 200.0, "Will": 155.0}
  65. user_colors = {"Craig": "forestgreen", "Fletcher": "navy", "Will": "darkred"}
  66. for i, user in enumerate(self.users):
  67. user_weights = self.weights[self.weights["Name"] == user]
  68. goal = user_goals.get(user)
  69. start_weight = user_weights.ix[min(user_weights.index), "Entry"]
  70. start_date = min(user_weights.index)
  71. # Find minimum weight and date on which it occurs
  72. min_weight = min(user_weights["Entry"])
  73. min_weight_date = (user_weights[user_weights["Entry"] == min_weight].index)[
  74. 0
  75. ]
  76. # Find maximum weight and date on which it occurs
  77. max_weight = max(user_weights["Entry"])
  78. max_weight_date = (user_weights[user_weights["Entry"] == max_weight].index)[
  79. 0
  80. ]
  81. most_recent_weight = user_weights.ix[max(user_weights.index), "Entry"]
  82. if goal < start_weight:
  83. change = start_weight - most_recent_weight
  84. obj = "lose"
  85. elif goal > start_weight:
  86. change = most_recent_weight - start_weight
  87. obj = "gain"
  88. pct_change = 100 * change / start_weight
  89. pct_to_goal = 100 * (change / abs(start_weight - goal))
  90. # Color for plotting
  91. user_color = user_colors[user]
  92. user_dict[user] = {
  93. "min_weight": min_weight,
  94. "max_weight": max_weight,
  95. "min_date": min_weight_date,
  96. "max_date": max_weight_date,
  97. "recent": most_recent_weight,
  98. "abs_change": change,
  99. "pct_change": pct_change,
  100. "pct_towards_goal": pct_to_goal,
  101. "start_weight": start_weight,
  102. "start_date": start_date,
  103. "goal_weight": goal,
  104. "objective": obj,
  105. "color": user_color,
  106. }
  107. self.user_dict = user_dict
  108. """
  109. Builds a dictionary of unrecorded entries where each key is the user
  110. and the value is a list of weights and methods called for by the user.
  111. This dictionary is saved as the entries attribute of the class.
  112. Removes the none weights from the data and from the google sheet.
  113. """
  114. def process_unrecorded(self):
  115. entries = {name: [] for name in self.users}
  116. drop = []
  117. location = {}
  118. for index in self.unrecorded.index:
  119. entry = self.unrecorded.ix[index, "Entry"]
  120. user = str(self.unrecorded.ix[index, "Name"])
  121. # Try and except does not seem like the best way to handle this
  122. try:
  123. entry = float(entry)
  124. entries[user].append(entry)
  125. location[index] = True
  126. except:
  127. entry = str(entry)
  128. entries[user].append(entry.strip())
  129. location[index] = "remove"
  130. drop.append(index)
  131. self.weights.ix[index, "Record"] = True
  132. # Indexes of new entries
  133. self.location = location
  134. # Update the Google Sheet before dropping
  135. self.update_sheet()
  136. # Drop the rows which do not contain a weight
  137. self.weights.drop(drop, axis=0, inplace=True)
  138. # Entries is all of the new entries
  139. self.entries = entries
  140. """
  141. Update the Google Spreadsheet. This involves removing the rows without weight
  142. entries and putting a True in the record column for all weights.
  143. """
  144. def update_sheet(self):
  145. delete_count = 0
  146. # Iterate through the locations and update as appropriate
  147. for index, action in self.location.items():
  148. cell_row = (np.where(self.weights.index == index))[0][0] + 2 - delete_count
  149. if action == "remove":
  150. self.gsheet.delete_row(index=cell_row)
  151. delete_count += 1
  152. elif action:
  153. self.gsheet.update_acell(label="D%d" % cell_row, val="True")
  154. """
  155. Iterates through the unrecorded entries and delegates
  156. each one to the appropriate method.
  157. Updates the record cell in the google sheet
  158. """
  159. def process_entries(self):
  160. for user, user_entries in self.entries.items():
  161. for entry in user_entries:
  162. # If a weight, display the basic message
  163. if type(entry) == float:
  164. self.basic_message(user)
  165. # If the message is a string hand off to the appropriate function
  166. else:
  167. # Require at lesat 8 days of data
  168. if len(self.weights[self.weights["Name"] == user]) < 8:
  169. message = (
  170. "\nAt least 8 days of data required for detailed analysis."
  171. )
  172. self.slack.chat.post_message(
  173. channel="#weight_tracker",
  174. text=message,
  175. username="Data Analyst",
  176. icon_emoji=":calendar:",
  177. )
  178. elif entry.lower() == "summary":
  179. self.summary(user)
  180. elif entry.lower() == "percent":
  181. self.percentage_plot()
  182. elif entry.lower() == "history":
  183. self.history_plot(user)
  184. elif entry.lower() == "future":
  185. self.future_plot(user)
  186. elif entry.lower() == "analysis":
  187. self.analyze(user)
  188. # Display a help message if the string is not valid
  189. else:
  190. message = (
  191. "\nPlease enter a valid message:\n\n"
  192. "Your weight\n"
  193. "'Summary' to see a personal summary\n"
  194. "'Percent' to see a plot of all users percentage changes\n"
  195. "'History' to see a plot of your personal history\n"
  196. "'Future' to see your predictions for the next thirty days\n"
  197. "'Analysis' to view personalized advice\n"
  198. "For more help, contact @koehrsen_will on Twitter.\n"
  199. )
  200. self.slack.chat.post_message(
  201. channel="#weight_tracker",
  202. text=message,
  203. username="Help",
  204. icon_emoji=":interrobang:",
  205. )
  206. """
  207. Adds the change and percentage change columns to the self.weights df
  208. """
  209. def calculate_columns(self):
  210. self.weights = self.weights.sort_values("Name")
  211. self.weights["change"] = 0
  212. self.weights["pct_change"] = 0
  213. self.weights.reset_index(level=0, inplace=True)
  214. for index in self.weights.index:
  215. user = self.weights.ix[index, "Name"]
  216. weight = self.weights.ix[index, "Entry"]
  217. start_weight = self.user_dict[user]["start_weight"]
  218. objective = self.user_dict[user]["objective"]
  219. if objective == "lose":
  220. self.weights.ix[index, "change"] = start_weight - weight
  221. self.weights.ix[index, "pct_change"] = (
  222. 100 * (start_weight - weight) / start_weight
  223. )
  224. elif objective == "gain":
  225. self.weights.ix[index, "change"] = weight - start_weight
  226. self.weights.ix[index, "pct_change"] = (
  227. 100 * (weight - start_weight) / start_weight
  228. )
  229. self.weights.set_index("Date", drop=True, inplace=True)
  230. """
  231. This method is automatically run for each new weight
  232. """
  233. def basic_message(self, user):
  234. # Find information for user, construct message, post message to Slack
  235. user_info = self.user_dict.get(user)
  236. message = (
  237. "\n{}: Total Weight Change = {:.2f} lbs.\n\n"
  238. "Percentage Weight Change = {:.2f}%\n"
  239. ).format(user, user_info["abs_change"], user_info["pct_change"])
  240. self.slack.chat.post_message(
  241. "#weight_tracker", text=message, username="Update", icon_emoji=":scales:"
  242. )
  243. """
  244. Displays comprehensive stats about the user
  245. """
  246. def summary(self, user):
  247. user_info = self.user_dict.get(user)
  248. message = (
  249. "\n{}, your most recent weight was {:.2f} lbs.\n\n"
  250. "Absolute weight change = {:.2f} lbs, percentage weight change = {:.2f}%.\n\n"
  251. "Minimum weight = {:.2f} lbs on {} and maximum weight = {:.2f} lbs on {}.\n\n"
  252. "Your goal weight = {:.2f} lbs. and you are {:.2f}% of the way there.\n\n"
  253. "You started at {:.2f} lbs on {}. Congratulations on the progress!\n"
  254. ).format(
  255. user,
  256. user_info["recent"],
  257. user_info["abs_change"],
  258. user_info["pct_change"],
  259. user_info["min_weight"],
  260. str(user_info["min_date"].date()),
  261. user_info["max_weight"],
  262. str(user_info["max_date"].date()),
  263. user_info["goal_weight"],
  264. user_info["pct_towards_goal"],
  265. user_info["start_weight"],
  266. str(user_info["start_date"].date()),
  267. )
  268. self.slack.chat.post_message(
  269. "#weight_tracker",
  270. text=message,
  271. username="Summary",
  272. icon_emoji=":earth_africa:",
  273. )
  274. """
  275. Reset the plot and institute basic parameters
  276. """
  277. @staticmethod
  278. def reset_plot():
  279. matplotlib.rcParams.update(matplotlib.rcParamsDefault)
  280. matplotlib.rcParams["text.color"] = "k"
  281. """
  282. Plot of all users percentage changes.
  283. Includes polynomial fits (degree may need to be adjusted).
  284. """
  285. def percentage_plot(self):
  286. self.reset_plot()
  287. plt.style.use("fivethirtyeight")
  288. plt.figure(figsize=(10, 8))
  289. for i, user in enumerate(self.users):
  290. user_color = self.user_dict[user]["color"]
  291. # Select the user and order dataframe by date
  292. df = self.weights[self.weights["Name"] == user]
  293. df.sort_index(inplace=True)
  294. # List is used for fitting polynomial
  295. xvalues = list(range(len(df)))
  296. # Create a polynomial fit
  297. z = np.polyfit(xvalues, df["pct_change"], deg=6)
  298. # Create a function from the fit
  299. p = np.poly1d(z)
  300. # Map the x values to y values
  301. fit_data = p(xvalues)
  302. # Plot the actual points and the fit
  303. plt.plot(
  304. df.index,
  305. df["pct_change"],
  306. "o",
  307. color=user_color,
  308. label="%s Observations" % user,
  309. )
  310. plt.plot(
  311. df.index,
  312. fit_data,
  313. "-",
  314. color=user_color,
  315. linewidth=5,
  316. label="%s Smooth Fit" % user,
  317. )
  318. # Plot formatting
  319. plt.xlabel("Date")
  320. plt.ylabel("% Change from Start")
  321. plt.title("Percentage Changes")
  322. plt.grid(color="k", alpha=0.4)
  323. plt.legend(prop={"size": 14})
  324. plt.savefig(
  325. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png"
  326. )
  327. self.slack.files.upload(
  328. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png",
  329. channels="#weight_tracker",
  330. title="Percent Plot",
  331. )
  332. os.remove(
  333. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png"
  334. )
  335. """
  336. Plot of a single user's history.
  337. Also plot a polynomial fit on the observations.
  338. """
  339. def history_plot(self, user):
  340. self.reset_plot()
  341. plt.style.use("fivethirtyeight")
  342. plt.figure(figsize=(10, 8))
  343. df = self.weights[self.weights["Name"] == user]
  344. df.sort_index(inplace=True)
  345. user_color = self.user_dict[user]["color"]
  346. # List is used for fitting polynomial
  347. xvalues = list(range(len(df)))
  348. # Create a polynomial fit
  349. z = np.polyfit(xvalues, df["Entry"], deg=6)
  350. # Create a function from the fit
  351. p = np.poly1d(z)
  352. # Map the x values to y values
  353. fit_data = p(xvalues)
  354. # Make a simple plot and upload to slack
  355. plt.plot(df.index, df["Entry"], "ko", ms=8, label="Observed")
  356. plt.plot(
  357. df.index, fit_data, "-", color=user_color, linewidth=5, label="Smooth Fit"
  358. )
  359. plt.xlabel("Date")
  360. plt.ylabel("Weight (lbs)")
  361. plt.title("%s Weight History" % user)
  362. plt.legend(prop={"size": 14})
  363. plt.savefig(
  364. fname="C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png"
  365. )
  366. self.slack.files.upload(
  367. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png",
  368. channels="#weight_tracker",
  369. title="%s History" % user,
  370. )
  371. # Remove the plot from local storage
  372. os.remove(
  373. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png"
  374. )
  375. """
  376. Create a prophet model for forecasting and trend analysis.
  377. Might need to adjust model hyperparameters.
  378. """
  379. def prophet_model(self):
  380. model = fbprophet.Prophet(daily_seasonality=False, yearly_seasonality=False)
  381. return model
  382. """
  383. Plot the prophet forecast for the next thirty days
  384. Print the expected weight at the end of the forecast
  385. """
  386. def future_plot(self, user):
  387. self.reset_plot()
  388. df = self.weights[self.weights["Name"] == user]
  389. dates = [date.date() for date in df.index]
  390. df["ds"] = dates
  391. df["y"] = df["Entry"]
  392. df.sort_index(inplace=True)
  393. # Prophet model
  394. model = self.prophet_model()
  395. model.fit(df)
  396. # Future dataframe for predictions
  397. future = model.make_future_dataframe(periods=30, freq="D")
  398. future = model.predict(future)
  399. color = self.user_dict[user]["color"]
  400. # Write a message and post to slack
  401. message = "{} Your predicted weight on {} = {:.2f} lbs.".format(
  402. user, max(future["ds"]).date(), future.ix[len(future) - 1, "yhat"]
  403. )
  404. self.slack.chat.post_message(
  405. channel="#weight_tracker",
  406. text=message,
  407. username="The Future",
  408. icon_emoji=":city_sunrise:",
  409. )
  410. # Create the plot and upload to slack
  411. fig, ax = plt.subplots(1, 1, figsize=(10, 8))
  412. ax.plot(df["ds"], df["y"], "o", color="k", ms=8, label="observations")
  413. ax.plot(future["ds"], future["yhat"], "-", color=color, label="modeled")
  414. ax.fill_between(
  415. future["ds"].dt.to_pydatetime(),
  416. future["yhat_upper"],
  417. future["yhat_lower"],
  418. facecolor=color,
  419. alpha=0.4,
  420. edgecolor="k",
  421. linewidth=1.8,
  422. label="confidence interval",
  423. )
  424. plt.xlabel("Date")
  425. plt.ylabel("Weight (lbs)")
  426. plt.title("%s 30 Day Prediction" % user)
  427. plt.legend()
  428. plt.savefig(
  429. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png"
  430. )
  431. self.slack.files.upload(
  432. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png",
  433. channels="#weight_tracker",
  434. title="%s Future Predictions" % user,
  435. )
  436. os.remove(
  437. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png"
  438. )
  439. """
  440. Analyze user trends and provide recommendations.
  441. Determine if the user is on track to meet their goal.
  442. """
  443. def analyze(self, user):
  444. self.reset_plot()
  445. # Get user info and sort dataframe by date
  446. info = self.user_dict.get(user)
  447. goal_weight = info["goal_weight"]
  448. df = self.weights[self.weights["Name"] == user]
  449. df = df.sort_index()
  450. df["ds"] = [date.date() for date in df.index]
  451. df["y"] = df["Entry"]
  452. model = self.prophet_model()
  453. model.fit(df)
  454. prediction_days = 2 * len(df)
  455. future = model.make_future_dataframe(periods=prediction_days, freq="D")
  456. future = model.predict(future)
  457. # lbs change per day
  458. change_per_day = info["abs_change"] / (max(df["ds"]) - min(df["ds"])).days
  459. days_to_goal = abs(int((info["recent"] - goal_weight) / change_per_day))
  460. date_for_goal = max(df["ds"]) + pd.DateOffset(days=days_to_goal)
  461. # future dataframe where the user in above goal
  462. goal_future = future[future["yhat"] < goal_weight]
  463. # The additive model predicts the user will meet their goal
  464. if len(goal_future) > 0:
  465. model_goal_date = min(goal_future["ds"])
  466. message = (
  467. "\n{} Your average weight change per day is {:.2f} lbs\n"
  468. "Extrapolating the average loss per day, you will reach your goal of {} lbs in {} days on {}.\n\n"
  469. "The additive model predicts you will reach your goal on {}\n".format(
  470. user,
  471. change_per_day,
  472. goal_weight,
  473. days_to_goal,
  474. date_for_goal.date(),
  475. model_goal_date.date(),
  476. )
  477. )
  478. # The additive model does not predict the user will meet their goal
  479. else:
  480. final_future_date = max(future["ds"])
  481. message = (
  482. "\n{} Your average weight change per day is {:.2f} lbs\n\n"
  483. "Extrapolating the average loss per day, you will reach your goal of {} lbs in {} days on {}.\n\n"
  484. "The additive model does not forecast you reaching your goal by {}.\n".format(
  485. user,
  486. change_per_day,
  487. goal_weight,
  488. days_to_goal,
  489. date_for_goal.date(),
  490. final_future_date,
  491. )
  492. )
  493. self.slack.chat.post_message(
  494. channel="#weight_tracker",
  495. text=message,
  496. username="Analysis",
  497. icon_emoji=":bar_chart:",
  498. )
  499. # Identify Weekly Trends
  500. future["weekday"] = [date.weekday() for date in future["ds"]]
  501. future_weekly = future.groupby("weekday").mean()
  502. future_weekly.index = ["Mon", "Tues", "Wed", "Thurs", "Fri", "Sat", "Sun"]
  503. # Color labels based on the users objective
  504. colors = [
  505. "red"
  506. if (
  507. ((weight > 0) & (info["objective"] == "lose"))
  508. | ((weight < 0) & (info["objective"] == "gain"))
  509. )
  510. else "green"
  511. for weight in future_weekly["weekly"]
  512. ]
  513. self.reset_plot()
  514. # Create a bar plot with labels for positive and negative changes
  515. plt.figure(figsize=(10, 8))
  516. xvalues = list(range(len(future_weekly)))
  517. plt.bar(
  518. xvalues, future_weekly["weekly"], color=colors, edgecolor="k", linewidth=2
  519. )
  520. plt.xticks(xvalues, list(future_weekly.index))
  521. red_patch = mpatches.Patch(color="red", linewidth=2, label="Needs Work")
  522. green_patch = mpatches.Patch(color="green", linewidth=2, label="Solid")
  523. plt.legend(handles=[red_patch, green_patch])
  524. plt.xlabel("Day of Week")
  525. plt.ylabel("Trend (lbs)")
  526. plt.title("%s Weekly Trends" % user)
  527. plt.savefig(
  528. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png"
  529. )
  530. # Upload the image to slack and delete local file
  531. self.slack.files.upload(
  532. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png",
  533. channels="#weight_tracker",
  534. title="%s Weekly Trends" % user,
  535. )
  536. os.remove(
  537. "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png"
  538. )