weighter.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. # pandas and numpy for data manipulation
  2. import pandas as pd
  3. import numpy as np
  4. # fbprophet for additive models
  5. import fbprophet
  6. # gspread for Google Sheets access
  7. import gspread
  8. # slacker for interacting with Slack
  9. from slacker import Slacker
  10. # oauth2client for authorizing access to Google Sheets
  11. from oauth2client.service_account import ServiceAccountCredentials
  12. # os for deleting images
  13. import os
  14. # matplotlib for plotting
  15. import matplotlib.pyplot as plt
  16. import matplotlib.patches as mpatches
  17. import matplotlib
  18. class Weighter():
  19. """
  20. When weighter is initialized, we need to convert the usernames,
  21. get a dictionary of the unrecorded entries, construct a dictionary
  22. of the actions to take, and make sure all data is formatted correctly
  23. """
  24. def __init__(self, weights, gsheet, slack):
  25. # Weights is a dataframe
  26. self.weights = weights.copy()
  27. self.gsheet = gsheet
  28. self.slack = slack
  29. # Users is a list of the unique users in the data
  30. self.users = list(set(self.weights['Name']))
  31. correct_names = []
  32. # Name Changes
  33. for user in self.weights['Name']:
  34. # Have to hardcode in name Changes
  35. if user == 'koehrcl':
  36. correct_names.append('Craig')
  37. elif user == 'willkoehrsen':
  38. correct_names.append('Will')
  39. elif user == 'fletcher':
  40. correct_names.append('Fletcher')
  41. # Currently do not handle new users
  42. else:
  43. print('New User Detected')
  44. return
  45. self.weights['Name'] = correct_names
  46. # Users is a list of the unique users in the data
  47. self.users = list(set(self.weights['Name']))
  48. # Create a dataframe of the unrecorded entries
  49. self.unrecorded = self.weights[self.weights['Record'] != True]
  50. # Process the unrecorded entries
  51. self.process_unrecorded()
  52. # The remaning entries will all be weights
  53. self.weights['Entry'] = [float(weight) for weight in self.weights['Entry']]
  54. # Build the user dictionary
  55. self.build_user_dict()
  56. # Calculate the change and percentage change columns
  57. self.calculate_columns()
  58. """
  59. Constructs a dictionary for each user with critical information
  60. This forms the basis for the summarize function
  61. """
  62. def build_user_dict(self):
  63. user_dict = {}
  64. user_goals = {'Craig': 215.0, 'Fletcher': 200.0, 'Will': 155.0}
  65. user_colors = {'Craig': 'forestgreen', 'Fletcher': 'navy', 'Will': 'darkred'}
  66. for i, user in enumerate(self.users):
  67. user_weights = self.weights[self.weights['Name'] == user]
  68. goal = user_goals.get(user)
  69. start_weight = user_weights.ix[min(user_weights.index), 'Entry']
  70. start_date = min(user_weights.index)
  71. # Find minimum weight and date on which it occurs
  72. min_weight = min(user_weights['Entry'])
  73. min_weight_date = ((user_weights[user_weights['Entry'] == min_weight].index)[0])
  74. # Find maximum weight and date on which it occurs
  75. max_weight = max(user_weights['Entry'])
  76. max_weight_date = ((user_weights[user_weights['Entry'] == max_weight].index)[0])
  77. most_recent_weight = user_weights.ix[max(user_weights.index), 'Entry']
  78. if goal < start_weight:
  79. change = start_weight - most_recent_weight
  80. obj = 'lose'
  81. elif goal > start_weight:
  82. change = most_recent_weight - start_weight
  83. obj = 'gain'
  84. pct_change = 100 * change / start_weight
  85. pct_to_goal = 100 * (change / abs(start_weight - goal) )
  86. # Color for plotting
  87. user_color = user_colors[user]
  88. user_dict[user] = {'min_weight': min_weight, 'max_weight': max_weight,
  89. 'min_date': min_weight_date, 'max_date': max_weight_date,
  90. 'recent': most_recent_weight, 'abs_change': change,
  91. 'pct_change': pct_change, 'pct_towards_goal': pct_to_goal,
  92. 'start_weight': start_weight, 'start_date': start_date,
  93. 'goal_weight': goal, 'objective': obj, 'color': user_color}
  94. self.user_dict = user_dict
  95. """
  96. Builds a dictionary of unrecorded entries where each key is the user
  97. and the value is a list of weights and methods called for by the user.
  98. This dictionary is saved as the entries attribute of the class.
  99. Removes the none weights from the data and from the google sheet.
  100. """
  101. def process_unrecorded(self):
  102. entries = {name:[] for name in self.users}
  103. drop = []
  104. location = {}
  105. for index in self.unrecorded.index:
  106. entry = self.unrecorded.ix[index, 'Entry']
  107. user = str(self.unrecorded.ix[index, 'Name'])
  108. # Try and except does not seem like the best way to handle this
  109. try:
  110. entry = float(entry)
  111. entries[user].append(entry)
  112. location[index] = True
  113. except:
  114. entry = str(entry)
  115. entries[user].append(entry.strip())
  116. location[index] = 'remove'
  117. drop.append(index)
  118. self.weights.ix[index, 'Record'] = True
  119. # Indexes of new entries
  120. self.location = location
  121. # Update the Google Sheet before dropping
  122. self.update_sheet()
  123. # Drop the rows which do not contain a weight
  124. self.weights.drop(drop, axis=0, inplace=True)
  125. # Entries is all of the new entries
  126. self.entries = entries
  127. """
  128. Update the Google Spreadsheet. This involves removing the rows without weight
  129. entries and putting a True in the record column for all weights.
  130. """
  131. def update_sheet(self):
  132. delete_count = 0
  133. # Iterate through the locations and update as appropriate
  134. for index, action in self.location.items():
  135. cell_row = (np.where(self.weights.index == index))[0][0] + 2 - delete_count
  136. if action == 'remove':
  137. self.gsheet.delete_row(index = cell_row)
  138. delete_count += 1
  139. elif action:
  140. self.gsheet.update_acell(label='D%d' % cell_row, val = 'True')
  141. """
  142. Iterates through the unrecorded entries and delegates
  143. each one to the appropriate method.
  144. Updates the record cell in the google sheet
  145. """
  146. def process_entries(self):
  147. for user, user_entries in self.entries.items():
  148. for entry in user_entries:
  149. # If a weight, display the basic message
  150. if type(entry) == float:
  151. self.basic_message(user)
  152. # If the message is a string hand off to the appropriate function
  153. else:
  154. # Require at lesat 8 days of data
  155. if len(self.weights[self.weights['Name'] == user]) < 8:
  156. message = "\nAt least 8 days of data required for detailed analysis."
  157. self.slack.chat.post_message(channel='#weight_tracker', text = message, username = "Data Analyst", icon_emoji=":calendar:")
  158. elif entry.lower() == 'summary':
  159. self.summary(user)
  160. elif entry.lower() == 'percent':
  161. self.percentage_plot()
  162. elif entry.lower() == 'history':
  163. self.history_plot(user)
  164. elif entry.lower() == 'future':
  165. self.future_plot(user)
  166. elif entry.lower() == 'analysis':
  167. self.analyze(user)
  168. # Display a help message if the string is not valid
  169. else:
  170. message = ("\nPlease enter a valid message:\n\n"
  171. "Your weight\n"
  172. "'Summary' to see a personal summary\n"
  173. "'Percent' to see a plot of all users percentage changes\n"
  174. "'History' to see a plot of your personal history\n"
  175. "'Future' to see your predictions for the next thirty days\n"
  176. "'Analysis' to view personalized advice\n"
  177. "For more help, contact @koehrsen_will on Twitter.\n")
  178. self.slack.chat.post_message(channel='#weight_tracker', text = message, username = "Help",
  179. icon_emoji=":interrobang:")
  180. """
  181. Adds the change and percentage change columns to the self.weights df
  182. """
  183. def calculate_columns(self):
  184. self.weights = self.weights.sort_values('Name')
  185. self.weights['change'] = 0
  186. self.weights['pct_change'] = 0
  187. self.weights.reset_index(level=0, inplace = True)
  188. for index in self.weights.index:
  189. user = self.weights.ix[index, 'Name']
  190. weight = self.weights.ix[index, 'Entry']
  191. start_weight = self.user_dict[user]['start_weight']
  192. objective = self.user_dict[user]['objective']
  193. if objective == 'lose':
  194. self.weights.ix[index, 'change'] = start_weight - weight
  195. self.weights.ix[index, 'pct_change'] = 100 * (start_weight - weight) / start_weight
  196. elif objective == 'gain':
  197. self.weights.ix[index, 'change'] = weight - start_weight
  198. self.weights.ix[index, 'pct_change'] = 100 * (weight - start_weight) / start_weight
  199. self.weights.set_index('Date', drop=True, inplace=True)
  200. """
  201. This method is automatically run for each new weight
  202. """
  203. def basic_message(self, user):
  204. # Find information for user, construct message, post message to Slack
  205. user_info = self.user_dict.get(user)
  206. message = ("\n{}: Total Weight Change = {:.2f} lbs.\n\n"
  207. "Percentage Weight Change = {:.2f}%\n").format(user, user_info['abs_change'],
  208. user_info['pct_change'])
  209. self.slack.chat.post_message('#weight_tracker', text=message, username='Update', icon_emoji=':scales:')
  210. """
  211. Displays comprehensive stats about the user
  212. """
  213. def summary(self, user):
  214. user_info = self.user_dict.get(user)
  215. message = ("\n{}, your most recent weight was {:.2f} lbs.\n\n"
  216. "Absolute weight change = {:.2f} lbs, percentage weight change = {:.2f}%.\n\n"
  217. "Minimum weight = {:.2f} lbs on {} and maximum weight = {:.2f} lbs on {}.\n\n"
  218. "Your goal weight = {:.2f} lbs. and you are {:.2f}% of the way there.\n\n"
  219. "You started at {:.2f} lbs on {}. Congratulations on the progress!\n").format(user,
  220. user_info['recent'], user_info['abs_change'], user_info['pct_change'],
  221. user_info['min_weight'], str(user_info['min_date'].date()),
  222. user_info['max_weight'], str(user_info['max_date'].date()),
  223. user_info['goal_weight'], user_info['pct_towards_goal'],
  224. user_info['start_weight'], str(user_info['start_date'].date()))
  225. self.slack.chat.post_message('#weight_tracker', text=message, username='Summary', icon_emoji=":earth_africa:")
  226. """
  227. Reset the plot and institute basic parameters
  228. """
  229. @staticmethod
  230. def reset_plot():
  231. matplotlib.rcParams.update(matplotlib.rcParamsDefault)
  232. matplotlib.rcParams['text.color'] = 'k'
  233. """
  234. Plot of all users percentage changes.
  235. Includes polynomial fits (degree may need to be adjusted).
  236. """
  237. def percentage_plot(self):
  238. self.reset_plot()
  239. plt.style.use('fivethirtyeight')
  240. plt.figure(figsize=(10,8))
  241. for i, user in enumerate(self.users):
  242. user_color = self.user_dict[user]['color']
  243. # Select the user and order dataframe by date
  244. df = self.weights[self.weights['Name'] == user]
  245. df.sort_index(inplace=True)
  246. # List is used for fitting polynomial
  247. xvalues = list(range(len(df)))
  248. # Create a polynomial fit
  249. z = np.polyfit(xvalues, df['pct_change'], deg=6)
  250. # Create a function from the fit
  251. p = np.poly1d(z)
  252. # Map the x values to y values
  253. fit_data = p(xvalues)
  254. # Plot the actual points and the fit
  255. plt.plot(df.index, df['pct_change'], 'o', color = user_color, label = '%s Observations' % user)
  256. plt.plot(df.index, fit_data, '-', color = user_color, linewidth = 5, label = '%s Smooth Fit' % user)
  257. # Plot formatting
  258. plt.xlabel('Date'); plt.ylabel('% Change from Start')
  259. plt.title('Percentage Changes')
  260. plt.grid(color='k', alpha=0.4)
  261. plt.legend(prop={'size':14})
  262. plt.savefig('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png')
  263. self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png', channels='#weight_tracker', title="Percent Plot")
  264. os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png')
  265. """
  266. Plot of a single user's history.
  267. Also plot a polynomial fit on the observations.
  268. """
  269. def history_plot(self, user):
  270. self.reset_plot()
  271. plt.style.use('fivethirtyeight')
  272. plt.figure(figsize=(10, 8))
  273. df = self.weights[self.weights['Name'] == user]
  274. df.sort_index(inplace=True)
  275. user_color = self.user_dict[user]['color']
  276. # List is used for fitting polynomial
  277. xvalues = list(range(len(df)))
  278. # Create a polynomial fit
  279. z = np.polyfit(xvalues, df['Entry'], deg=6)
  280. # Create a function from the fit
  281. p = np.poly1d(z)
  282. # Map the x values to y values
  283. fit_data = p(xvalues)
  284. # Make a simple plot and upload to slack
  285. plt.plot(df.index, df['Entry'], 'ko', ms = 8, label = 'Observed')
  286. plt.plot(df.index, fit_data, '-', color = user_color, linewidth = 5, label = 'Smooth Fit')
  287. plt.xlabel('Date'); plt.ylabel('Weight (lbs)'); plt.title('%s Weight History' % user)
  288. plt.legend(prop={'size': 14});
  289. plt.savefig(fname='C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png')
  290. self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png', channels='#weight_tracker', title="%s History" % user)
  291. # Remove the plot from local storage
  292. os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png')
  293. """
  294. Create a prophet model for forecasting and trend analysis.
  295. Might need to adjust model hyperparameters.
  296. """
  297. def prophet_model(self):
  298. model = fbprophet.Prophet(daily_seasonality=False, yearly_seasonality=False)
  299. return model
  300. """
  301. Plot the prophet forecast for the next thirty days
  302. Print the expected weight at the end of the forecast
  303. """
  304. def future_plot(self, user):
  305. self.reset_plot()
  306. df = self.weights[self.weights['Name'] == user]
  307. dates = [date.date() for date in df.index]
  308. df['ds'] = dates
  309. df['y'] = df['Entry']
  310. df.sort_index(inplace=True)
  311. # Prophet model
  312. model = self.prophet_model()
  313. model.fit(df)
  314. # Future dataframe for predictions
  315. future = model.make_future_dataframe(periods=30, freq='D')
  316. future = model.predict(future)
  317. color = self.user_dict[user]['color']
  318. # Write a message and post to slack
  319. message = ('{} Your predicted weight on {} = {:.2f} lbs.'.format(
  320. user, max(future['ds']).date(), future.ix[len(future) - 1, 'yhat']))
  321. self.slack.chat.post_message(channel="#weight_tracker", text=message, username = 'The Future', icon_emoji=":city_sunrise:")
  322. # Create the plot and upload to slack
  323. fig, ax = plt.subplots(1, 1, figsize=(10, 8))
  324. ax.plot(df['ds'], df['y'], 'o', color = 'k', ms = 8, label = 'observations')
  325. ax.plot(future['ds'], future['yhat'], '-', color = color, label = 'modeled')
  326. ax.fill_between(future['ds'].dt.to_pydatetime(), future['yhat_upper'], future['yhat_lower'], facecolor = color,
  327. alpha = 0.4, edgecolor = 'k', linewidth = 1.8, label = 'confidence interval')
  328. plt.xlabel('Date'); plt.ylabel('Weight (lbs)'); plt.title('%s 30 Day Prediction' % user)
  329. plt.legend()
  330. plt.savefig('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png')
  331. self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png', channels="#weight_tracker", title="%s Future Predictions" % user)
  332. os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png')
  333. """
  334. Analyze user trends and provide recommendations.
  335. Determine if the user is on track to meet their goal.
  336. """
  337. def analyze(self, user):
  338. self.reset_plot()
  339. # Get user info and sort dataframe by date
  340. info = self.user_dict.get(user)
  341. goal_weight = info['goal_weight']
  342. df = self.weights[self.weights['Name'] == user]
  343. df = df.sort_index()
  344. df['ds'] = [date.date() for date in df.index]
  345. df['y'] = df['Entry']
  346. model = self.prophet_model()
  347. model.fit(df)
  348. prediction_days = 2 * len(df)
  349. future = model.make_future_dataframe(periods = prediction_days, freq = 'D')
  350. future = model.predict(future)
  351. # lbs change per day
  352. change_per_day = info['abs_change'] / (max(df['ds']) - min(df['ds'])).days
  353. days_to_goal = abs(int((info['recent'] - goal_weight) / change_per_day))
  354. date_for_goal = max(df['ds']) + pd.DateOffset(days=days_to_goal)
  355. # future dataframe where the user in above goal
  356. goal_future = future[future['yhat'] < goal_weight]
  357. # The additive model predicts the user will meet their goal
  358. if len(goal_future) > 0:
  359. model_goal_date = min(goal_future['ds'])
  360. message = ("\n{} Your average weight change per day is {:.2f} lbs\n"
  361. "Extrapolating the average loss per day, you will reach your goal of {} lbs in {} days on {}.\n\n"
  362. "The additive model predicts you will reach your goal on {}\n".format(
  363. user, change_per_day, goal_weight, days_to_goal, date_for_goal.date(), model_goal_date.date()))
  364. # The additive model does not predict the user will meet their goal
  365. else:
  366. final_future_date = max(future['ds'])
  367. message = ("\n{} Your average weight change per day is {:.2f} lbs\n\n"
  368. "Extrapolating the average loss per day, you will reach your goal of {} lbs in {} days on {}.\n\n"
  369. "The additive model does not forecast you reaching your goal by {}.\n".format(
  370. user, change_per_day, goal_weight, days_to_goal, date_for_goal.date(), final_future_date))
  371. self.slack.chat.post_message(channel="#weight_tracker", text=message, username="Analysis", icon_emoji=":bar_chart:")
  372. # Identify Weekly Trends
  373. future['weekday'] = [date.weekday() for date in future['ds']]
  374. future_weekly = future.groupby('weekday').mean()
  375. future_weekly.index = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun']
  376. # Color labels based on the users objective
  377. colors = ['red' if ( ((weight > 0) & (info['objective'] == 'lose')) | ((weight < 0) & (info['objective'] == 'gain'))) else 'green' for weight in future_weekly['weekly']]
  378. self.reset_plot()
  379. # Create a bar plot with labels for positive and negative changes
  380. plt.figure(figsize=(10, 8))
  381. xvalues = list(range(len(future_weekly)))
  382. plt.bar(xvalues, future_weekly['weekly'], color = colors, edgecolor = 'k', linewidth = 2)
  383. plt.xticks(xvalues, list(future_weekly.index))
  384. red_patch = mpatches.Patch(color='red', linewidth = 2, label='Needs Work')
  385. green_patch = mpatches.Patch(color='green', linewidth = 2, label='Solid')
  386. plt.legend(handles=[red_patch, green_patch])
  387. plt.xlabel('Day of Week')
  388. plt.ylabel('Trend (lbs)')
  389. plt.title('%s Weekly Trends' % user)
  390. plt.savefig('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png')
  391. # Upload the image to slack and delete local file
  392. self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png', channels = '#weight_tracker', title="%s Weekly Trends" % user)
  393. os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png')