weighter.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. # pandas and numpy for data manipulation
  2. import pandas as pd
  3. import numpy as np
  4. # fbprophet for additive models
  5. import fbprophet
  6. # gspread for Google Sheets access
  7. import gspread
  8. # slacker for interacting with Slack
  9. from slacker import Slacker
  10. # oauth2client for authorizing access to Google Sheets
  11. from oauth2client.service_account import ServiceAccountCredentials
  12. # os for deleting images
  13. import os
  14. # matplotlib for plotting
  15. import matplotlib.pyplot as plt
  16. import matplotlib.patches as mpatches
  17. import matplotlib
  18. class Weighter():
  19. """
  20. When weighter is initialized, we need to convert the usernames,
  21. get a dictionary of the unrecorded entries, construct a dictionary
  22. of the actions to take, and make sure all data is formatted correctly
  23. """
  24. def __init__(self, weights, gsheet, slack):
  25. # Weights is a dataframe
  26. self.weights = weights.copy()
  27. self.gsheet = gsheet
  28. self.slack = slack
  29. # Users is a list of the unique users in the data
  30. self.users = list(set(self.weights['Name']))
  31. correct_names = []
  32. # Name Changes
  33. for user in self.weights['Name']:
  34. # Have to hardcode in name Changes
  35. if user == 'koehrcl':
  36. correct_names.append('Craig')
  37. elif user == 'willkoehrsen':
  38. correct_names.append('Will')
  39. elif user == 'fletcher':
  40. correct_names.append('Fletcher')
  41. # Currently do not handle new users
  42. else:
  43. print('New User Detected')
  44. return
  45. self.weights['Name'] = correct_names
  46. # Users is a list of the unique users in the data
  47. self.users = list(set(self.weights['Name']))
  48. # Create a dataframe of the unrecorded entries
  49. self.unrecorded = self.weights[self.weights['Record'] != True]
  50. # Process the unrecorded entries
  51. self.process_unrecorded()
  52. # The remaning entries will all be weights
  53. self.weights['Entry'] = [float(weight) for weight in self.weights['Entry']]
  54. # Build the user dictionary
  55. self.build_user_dict()
  56. # Calculate the change and percentage change columns
  57. self.calculate_columns()
  58. """
  59. Constructs a dictionary for each user with critical information
  60. This forms the basis for the summarize function
  61. """
  62. def build_user_dict(self):
  63. user_dict = {}
  64. user_goals = {'Craig': 215.0, 'Fletcher': 200.0, 'Will': 155.0}
  65. user_colors = {'Craig': 'forestgreen', 'Fletcher': 'navy', 'Will': 'darkred'}
  66. for i, user in enumerate(self.users):
  67. user_weights = self.weights[self.weights['Name'] == user]
  68. goal = user_goals.get(user)
  69. start_weight = user_weights.ix[min(user_weights.index), 'Entry']
  70. start_date = min(user_weights.index)
  71. # Find minimum weight and date on which it occurs
  72. min_weight = min(user_weights['Entry'])
  73. min_weight_date = ((user_weights[user_weights['Entry'] == min_weight].index)[0])
  74. # Find maximum weight and date on which it occurs
  75. max_weight = max(user_weights['Entry'])
  76. max_weight_date = ((user_weights[user_weights['Entry'] == max_weight].index)[0])
  77. most_recent_weight = user_weights.ix[max(user_weights.index), 'Entry']
  78. if goal < start_weight:
  79. change = start_weight - most_recent_weight
  80. obj = 'lose'
  81. elif goal > start_weight:
  82. change = most_recent_weight - start_weight
  83. obj = 'gain'
  84. pct_change = 100 * change / start_weight
  85. pct_to_goal = 100 * (change / abs(start_weight - goal) )
  86. # Color for plotting
  87. user_color = user_colors[user]
  88. user_dict[user] = {'min_weight': min_weight, 'max_weight': max_weight,
  89. 'min_date': min_weight_date, 'max_date': max_weight_date,
  90. 'recent': most_recent_weight, 'abs_change': change,
  91. 'pct_change': pct_change, 'pct_towards_goal': pct_to_goal,
  92. 'start_weight': start_weight, 'start_date': start_date,
  93. 'goal_weight': goal, 'objective': obj, 'color': user_color}
  94. self.user_dict = user_dict
  95. """
  96. Builds a dictionary of unrecorded entries where each key is the user
  97. and the value is a list of weights and methods called for by the user.
  98. This dictionary is saved as the entries attribute of the class.
  99. Removes the none weights from the data and from the google sheet.
  100. """
  101. def process_unrecorded(self):
  102. entries = {name:[] for name in self.users}
  103. drop = []
  104. location = {}
  105. for index in self.unrecorded.index:
  106. entry = self.unrecorded.ix[index, 'Entry']
  107. user = str(self.unrecorded.ix[index, 'Name'])
  108. # Try and except does not seem like the best way to handle this
  109. try:
  110. entry = float(entry)
  111. entries[user].append(entry)
  112. location[index] = True
  113. except:
  114. entry = str(entry)
  115. entries[user].append(entry.strip())
  116. location[index] = 'remove'
  117. drop.append(index)
  118. self.weights.ix[index, 'Record'] = True
  119. # Indexes of new entries
  120. self.location = location
  121. # Update the Google Sheet before dropping
  122. self.update_sheet()
  123. # Drop the rows which do not contain a weight
  124. self.weights.drop(drop, axis=0, inplace=True)
  125. # Entries is all of the new entries
  126. self.entries = entries
  127. """
  128. Update the Google Spreadsheet. This involves removing the rows without weight
  129. entries and putting a True in the record column for all weights.
  130. """
  131. def update_sheet(self):
  132. delete_count = 0
  133. # Iterate through the locations and update as appropriate
  134. for index, action in self.location.items():
  135. cell_row = (np.where(self.weights.index == index))[0][0] + 2 - delete_count
  136. if action == 'remove':
  137. self.gsheet.delete_row(index = cell_row)
  138. delete_count += 1
  139. elif action:
  140. self.gsheet.update_acell(label='D%d' % cell_row, val = 'True')
  141. """
  142. Iterates through the unrecorded entries and delegates
  143. each one to the appropriate method.
  144. Updates the record cell in the google sheet
  145. """
  146. def process_entries(self):
  147. for user, user_entries in self.entries.items():
  148. for entry in user_entries:
  149. # If a weight, display the basic message
  150. if type(entry) == float:
  151. self.basic_message(user)
  152. # If the message is a string hand off to the appropriate function
  153. else:
  154. # Require at lesat 8 days of data
  155. if len(self.weights[self.weights['Name'] == user]) < 8:
  156. message = "\nAt least 8 days of data required for detailed analysis."
  157. self.slack.chat.post_message(channel='#weight_tracker', message = message, username = "Weight Tracker Data Management")
  158. elif entry.lower() == 'summary':
  159. self.summary(user)
  160. elif entry.lower() == 'percent':
  161. self.percentage_plot()
  162. elif entry.lower() == 'history':
  163. self.history_plot(user)
  164. elif entry.lower() == 'future':
  165. self.future_plot(user)
  166. elif entry.lower() == 'analysis':
  167. self.analyze(user)
  168. # Display a help message if the string is not valid
  169. else:
  170. message = ("\nPlease enter a valid message:\n"
  171. "Your weight"
  172. "'Summary' to see a personal summary"
  173. "'Percent' to see a plot of all users percentage changes"
  174. "'History' to see a plot of your personal history"
  175. "'Future' to see your predictions for the next thirty days"
  176. "'Analysis' to view personalized advice\n"
  177. "For more help, contact @koehrsen_will on Twitter.\n")
  178. self.slack.chat.post_message(channel='#weight_tracker', message = message, username = "Weight Tracker Help")
  179. """
  180. Adds the change and percentage change columns to the self.weights df
  181. """
  182. def calculate_columns(self):
  183. self.weights = self.weights.sort_values('Name')
  184. self.weights['change'] = 0
  185. self.weights['pct_change'] = 0
  186. self.weights.reset_index(level=0, inplace = True)
  187. for index in self.weights.index:
  188. user = self.weights.ix[index, 'Name']
  189. weight = self.weights.ix[index, 'Entry']
  190. start_weight = self.user_dict[user]['start_weight']
  191. objective = self.user_dict[user]['objective']
  192. if objective == 'lose':
  193. self.weights.ix[index, 'change'] = start_weight - weight
  194. self.weights.ix[index, 'pct_change'] = 100 * (start_weight - weight) / start_weight
  195. elif objective == 'gain':
  196. self.weights.ix[index, 'change'] = weight - start_weight
  197. self.weights.ix[index, 'pct_change'] = 100 * (weight - start_weight) / start_weight
  198. self.weights.set_index('Date', drop=True, inplace=True)
  199. """
  200. This method is automatically run for each new weight
  201. """
  202. def basic_message(self, user):
  203. # Find information for user, construct message, post message to Slack
  204. user_info = self.user_dict.get(user)
  205. message = ("\n{}: Total Weight Change = {:.2f} lbs.\n\n"
  206. "Percentage Weight Change = {:.2f}%\n").format(user, user_info['abs_change'],
  207. user_info['pct_change'])
  208. self.slack.chat.post_message('#weight_tracker', text=message, username='Weight Challenge Update')
  209. """
  210. Displays comprehensive stats about the user
  211. """
  212. def summary(self, user):
  213. user_info = self.user_dict.get(user)
  214. message = ("\n{}, your most recent weight was {:.2f} lbs.\n\n"
  215. "Absolute weight change = {:.2f} lbs, percentage weight change = {:.2f}%.\n\n"
  216. "Minimum weight = {:.2f} lbs on {} and maximum weight = {:.2f} lbs on {}.\n\n"
  217. "Your goal weight = {:.2f} lbs. and you are {:.2f}% of the way there.\n\n"
  218. "You started at {:.2f} lbs on {}. Congratulations on the progress!\n").format(user,
  219. user_info['recent'], user_info['abs_change'], user_info['pct_change'],
  220. user_info['min_weight'], str(user_info['min_date'].date()),
  221. user_info['max_weight'], str(user_info['max_date'].date()),
  222. user_info['goal_weight'], user_info['pct_towards_goal'],
  223. user_info['start_weight'], str(user_info['start_date'].date()))
  224. self.slack.chat.post_message('#weight_tracker', text=message, username='%s Summary' % user)
  225. """
  226. Reset the plot and institute basic parameters
  227. """
  228. @staticmethod
  229. def reset_plot():
  230. matplotlib.rcParams.update(matplotlib.rcParamsDefault)
  231. matplotlib.rcParams['text.color'] = 'k'
  232. """
  233. Plot of all users percentage changes.
  234. Includes polynomial fits (degree may need to be adjusted).
  235. """
  236. def percentage_plot(self):
  237. self.reset_plot()
  238. plt.style.use('fivethirtyeight')
  239. plt.figure(figsize=(10,8))
  240. for i, user in enumerate(self.users):
  241. user_color = self.user_dict[user]['color']
  242. # Select the user and order dataframe by date
  243. df = self.weights[self.weights['Name'] == user]
  244. df.sort_index(inplace=True)
  245. # List is used for fitting polynomial
  246. xvalues = list(range(len(df)))
  247. # Create a polynomial fit
  248. z = np.polyfit(xvalues, df['pct_change'], deg=6)
  249. # Create a function from the fit
  250. p = np.poly1d(z)
  251. # Map the x values to y values
  252. fit_data = p(xvalues)
  253. # Plot the actual points and the fit
  254. plt.plot(df.index, df['pct_change'], 'o', color = user_color, label = '%s' % user)
  255. plt.plot(df.index, fit_data, '-', color = user_color, linewidth = 5, label = '%s' % user)
  256. # Plot formatting
  257. plt.xlabel('Date'); plt.ylabel('% Change from Start')
  258. plt.title('Percentage Changes')
  259. plt.grid(color='k', alpha=0.4)
  260. plt.legend(prop={'size':14})
  261. plt.savefig('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png')
  262. self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png', channels='#weight_tracker')
  263. os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png')
  264. """
  265. Plot of a single user's history.
  266. Also plot a polynomial fit on the observations.
  267. """
  268. def history_plot(self, user):
  269. self.reset_plot()
  270. plt.style.use('fivethirtyeight')
  271. plt.figure(figsize=(10, 8))
  272. df = self.weights[self.weights['Name'] == user]
  273. df.sort_index(inplace=True)
  274. user_color = self.user_dict[user]['color']
  275. # List is used for fitting polynomial
  276. xvalues = list(range(len(df)))
  277. # Create a polynomial fit
  278. z = np.polyfit(xvalues, df['Entry'], deg=6)
  279. # Create a function from the fit
  280. p = np.poly1d(z)
  281. # Map the x values to y values
  282. fit_data = p(xvalues)
  283. # Make a simple plot and upload to slack
  284. plt.plot(df.index, df['Entry'], 'ko', ms = 8, label = 'Observed')
  285. plt.plot(df.index, fit_data, '-', color = user_color, linewidth = 5, label = 'Smooth Fit')
  286. plt.xlabel('Date'); plt.ylabel('Weight (lbs)'); plt.title('%s Weight History' % user)
  287. plt.legend(prop={'size': 14});
  288. plt.savefig(fname='C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png')
  289. self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png', channels='#weight_tracker')
  290. # Remove the plot from local storage
  291. os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png')
  292. """
  293. Create a prophet model for forecasting and trend analysis.
  294. Might need to adjust model hyperparameters.
  295. """
  296. def prophet_model(self):
  297. model = fbprophet.Prophet(daily_seasonality=False, yearly_seasonality=False)
  298. return model
  299. """
  300. Plot the prophet forecast for the next thirty days
  301. Print the expected weight at the end of the forecast
  302. """
  303. def future_plot(self, user):
  304. self.reset_plot()
  305. df = self.weights[self.weights['Name'] == user]
  306. dates = [date.date() for date in df.index]
  307. df['ds'] = dates
  308. df['y'] = df['Entry']
  309. df.sort_index(inplace=True)
  310. # Prophet model
  311. model = self.prophet_model()
  312. model.fit(df)
  313. # Future dataframe for predictions
  314. future = model.make_future_dataframe(periods=30, freq='D')
  315. future = model.predict(future)
  316. color = self.user_dict[user]['color']
  317. # Write a message and post to slack
  318. message = ('{} Your predicted weight on {} = {:.2f} lbs.'.format(
  319. user, max(future['ds']).date(), future.ix[len(future) - 1, 'yhat']))
  320. self.slack.chat.post_message(channel="#weight_tracker", text=message, username = 'Future Prediction')
  321. # Create the plot and upload to slack
  322. fig, ax = plt.subplots(1, 1, figsize=(10, 8))
  323. ax.plot(df['ds'], df['y'], 'o', color = 'k', ms = 8, label = 'observations')
  324. ax.plot(future['ds'], future['yhat'], '-', color = color, label = 'modeled')
  325. ax.fill_between(future['ds'].dt.to_pydatetime(), future['yhat_upper'], future['yhat_lower'], facecolor = color,
  326. alpha = 0.4, edgecolor = 'k', linewidth = 1.8, label = 'confidence interval')
  327. plt.xlabel('Date'); plt.ylabel('Weight (lbs)'); plt.title('%s 30 Day Prediction' % user)
  328. plt.legend()
  329. plt.savefig('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png')
  330. self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png', channels="#weight_tracker")
  331. os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png')
  332. """
  333. Analyze user trends and provide recommendations.
  334. Determine if the user is on track to meet their goal.
  335. """
  336. def analyze(self, user):
  337. self.reset_plot()
  338. # Get user info and sort dataframe by date
  339. info = self.user_dict.get(user)
  340. goal_weight = info['goal_weight']
  341. df = self.weights[self.weights['Name'] == user]
  342. df = df.sort_index()
  343. df['ds'] = [date.date() for date in df.index]
  344. df['y'] = df['Entry']
  345. model = self.prophet_model()
  346. model.fit(df)
  347. prediction_days = 2 * len(df)
  348. future = model.make_future_dataframe(periods = prediction_days, freq = 'D')
  349. future = model.predict(future)
  350. # lbs change per day
  351. change_per_day = info['abs_change'] / (max(df['ds']) - min(df['ds'])).days
  352. days_to_goal = abs(int((info['recent'] - goal_weight) / change_per_day))
  353. date_for_goal = max(df['ds']) + pd.DateOffset(days=days_to_goal)
  354. # future dataframe where the user in above goal
  355. goal_future = future[future['yhat'] < goal_weight]
  356. # The additive model predicts the user will meet their goal
  357. if len(goal_future) > 0:
  358. model_goal_date = min(goal_future['ds'])
  359. message = ("\n{} Your average weight change per day is {:.2f} lbs\n"
  360. "Extrapolating the average loss per day, you will reach your goal of {} lbs in {} days on {}.\n\n"
  361. "The additive model predicts you will reach your goal on {}\n".format(
  362. user, change_per_day, goal_weight, days_to_goal, date_for_goal.date(), model_goal_date.date()))
  363. # The additive model does not predict the user will meet their goal
  364. else:
  365. final_future_date = max(future['ds'])
  366. message = ("\n{} Your average weight change per day is {:.2f} lbs\n\n"
  367. "Extrapolating the average loss per day, you will reach your goal of {} lbs in {} days on {}.\n\n"
  368. "The additive model does not forecast you reaching your goal by {}.\n".format(
  369. user, change_per_day, goal_weight, days_to_goal, date_for_goal.date(), final_future_date))
  370. self.slack.chat.post_message(channel="#weight_tracker", text=message, username="Weight Tracker Analysis")
  371. # Identify Weekly Trends
  372. future['weekday'] = [date.weekday() for date in future['ds']]
  373. future_weekly = future.groupby('weekday').mean()
  374. future_weekly.index = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun']
  375. # Color labels based on the users objective
  376. colors = ['red' if ( ((weight > 0) & (info['objective'] == 'lose')) | ((weight < 0) & (info['objective'] == 'gain'))) else 'green' for weight in future_weekly['weekly']]
  377. self.reset_plot()
  378. # Create a bar plot with labels for positive and negative changes
  379. plt.figure(figsize=(10, 8))
  380. xvalues = list(range(len(future_weekly)))
  381. plt.bar(xvalues, future_weekly['weekly'], color = colors, edgecolor = 'k', linewidth = 2)
  382. plt.xticks(xvalues, list(future_weekly.index))
  383. red_patch = mpatches.Patch(color='red', linewidth = 2, label='Needs Work')
  384. green_patch = mpatches.Patch(color='green', linewidth = 2, label='Solid')
  385. plt.legend(handles=[red_patch, green_patch])
  386. plt.xlabel('Day of Week')
  387. plt.ylabel('Trend (lbs)')
  388. plt.title('%s Weekly Trends' % user)
  389. plt.savefig('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png')
  390. # Upload the image to slack and delete local file
  391. self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png', channels = '#weight_tracker')
  392. os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png')