4 years ago · b72edeebe2
--- a/datashader-work/formatting_data.py
+++ b/datashader-work/formatting_data.py
@@ -1,6 +1,3 @@
 
																-
															
 
																-
															
 
																-
															
 
																 import geoviews as gv
															
 
																 import geoviews.feature as gf
															
 
																 import xarray as xr
															
@@ -9,31 +6,34 @@ from cartopy import crs
 
																 import pandas as pd
															
 
																 import numpy as np
															
 
																-gv.extension('bokeh', 'matplotlib')
															
 
																+gv.extension("bokeh", "matplotlib")
															
 
																-xr_ensemble = xr.open_dataset('Data-Analysis/datashader-work/geoviews-examples/data/ensemble.nc').load()
															
 
																+xr_ensemble = xr.open_dataset(
															
 
																+    "Data-Analysis/datashader-work/geoviews-examples/data/ensemble.nc"
															
 
																+).load()
															
 
																 from sqlalchemy import create_engine
															
 
																-engine = create_engine('postgres://localhost:5432/global_fishing_watch')
															
 
																-engine.table_names()
															
 
																-df = pd.read_sql("""SELECT * FROM fishing_effort LIMIT 10000""",
															
 
																-                engine, parse_dates=['date'])
															
 
																-df['flag'] = df['flag'].astype('category')
															
 
																-df['geartype'] = df['geartype'].astype('category')
															
 
																-df['lat'] = df['lat_bin'] / 100
															
 
																-df['lon'] = df['lon_bin'] / 100
															
 
																+engine = create_engine("postgres://localhost:5432/global_fishing_watch")
															
 
																+engine.table_names()
															
 
																+df = pd.read_sql(
															
 
																+    """SELECT * FROM fishing_effort LIMIT 10000""", engine, parse_dates=["date"]
															
 
																+)
															
 
																+
															
 
																+df["flag"] = df["flag"].astype("category")
															
 
																+df["geartype"] = df["geartype"].astype("category")
															
 
																+df["lat"] = df["lat_bin"] / 100
															
 
																+df["lon"] = df["lon_bin"] / 100
															
 
																 df.info()
															
 
																-
															
 
																 def format_df(df, n=10_000):
															
 
																-    ...:     df = df.iloc[:n]
															
 
																-    ...:     df = df.drop_duplicates(subset=['lat', 'lon', 'date'])
															
 
																-    ...:     df = df.sort_values(['lat', 'lon', 'date'])
															
 
																-    ...:     index = pd.MultiIndex.from_arrays([df['lat'], df['lon'], df['date']])
															
 
																-    ...:     df.index = index
															
 
																-    ...:     latitudes = df.index.levels[0]
															
 
																-    ...:     longitudes = df.index.levels[1]
															
 
																-    ...:     times = df.index.levels[2]
															
 
																-    ...:     return latitudes, longitudes, times, df
															
 
																+    df = df.iloc[:n]
															
 
																+    df = df.drop_duplicates(subset=["lat", "lon", "date"])
															
 
																+    df = df.sort_values(["lat", "lon", "date"])
															
 
																+    index = pd.MultiIndex.from_arrays([df["lat"], df["lon"], df["date"]])
															
 
																+    df.index = index
															
 
																+    latitudes = df.index.levels[0]
															
 
																+    longitudes = df.index.levels[1]
															
 
																+    times = df.index.levels[2]
															
 
																+    return latitudes, longitudes, times, df
															
--- a/medium/bargraphs.py
+++ b/medium/bargraphs.py
@@ -14,7 +14,7 @@ def get_all_pages(driver, xpath, months, suffix):
 
																     # Initially starting at today
															
 
																     latest_date_in_graph = datetime.now().date()
															
 
																-    print('Starting on ', latest_date_in_graph)
															
 
																+    print("Starting on ", latest_date_in_graph)
															
 
																     views = []
															
 
																     dates = []
															
@@ -25,15 +25,15 @@ def get_all_pages(driver, xpath, months, suffix):
 
																         graph_dates = []
															
 
																         # Extract the bar graph
															
 
																         bargraph = BeautifulSoup(driver.page_source).find_all(
															
 
																-            attrs={'class': 'bargraph'})[0]
															
 
																+            attrs={"class": "bargraph"}
															
 
																+        )[0]
															
 
																         # Get all the bars in the bargraph
															
 
																-        bardata = bargraph.find_all(attrs={'class': 'bargraph-bar'})
															
 
																+        bardata = bargraph.find_all(attrs={"class": "bargraph-bar"})
															
 
																         # Sort the bar data by x position (which will be date order) with most recent first
															
 
																-        bardata = sorted(bardata, key=lambda x: float(
															
 
																-            x.get('x')), reverse=True)
															
 
																-        bardata = [bar.get('data-tooltip') for bar in bardata]
															
 
																-        latest_day = int(bardata[0].split('\xa0')[-1])
															
 
																+        bardata = sorted(bardata, key=lambda x: float(x.get("x")), reverse=True)
															
 
																+        bardata = [bar.get("data-tooltip") for bar in bardata]
															
 
																+        latest_day = int(bardata[0].split("\xa0")[-1])
															
 
																         # Some months are not overlapping
															
 
																         if latest_day != latest_date_in_graph.day:
															
@@ -41,7 +41,7 @@ def get_all_pages(driver, xpath, months, suffix):
 
																         # Iterate through the bars which now are sorted in reverse date order (newest to oldest)
															
 
																         for i, data in enumerate(bardata):
															
 
																-            graph_views.append(float(data.split(' ')[0].replace(',', '')))
															
 
																+            graph_views.append(float(data.split(" ")[0].replace(",", "")))
															
 
																             graph_dates.append(latest_date_in_graph - timedelta(days=i))
															
 
																         views.extend(graph_views)
															
@@ -55,17 +55,22 @@ def get_all_pages(driver, xpath, months, suffix):
 
																         # Go to the previous graph
															
 
																         driver.find_element_by_xpath(xpath).click()
															
 
																         time.sleep(2)
															
 
																-        print(f'{100 * m /(months):.0f}% complete.', end='\r')
															
 
																-
															
 
																-    results = pd.DataFrame({'date': pd.to_datetime(
															
 
																-        dates), suffix: views}).groupby('date').sum()
															
 
																-    results = results.loc[results[results['views'] != 0.0].index.min():, ]
															
 
																-    print('First views on ', str(results.index.min().date()))
															
 
																+        print(f"{100 * m /(months):.0f}% complete.", end="\r")
															
 
																+
															
 
																+    results = (
															
 
																+        pd.DataFrame({"date": pd.to_datetime(dates), suffix: views})
															
 
																+        .groupby("date")
															
 
																+        .sum()
															
 
																+    )
															
 
																+    results = results.loc[
															
 
																+        results[results["views"] != 0.0].index.min() :,
															
 
																+    ]
															
 
																+    print("First views on ", str(results.index.min().date()))
															
 
																     # Save using the date as the file name
															
 
																-    fname = f'data/{str(datetime.now().date())}_{suffix}'
															
 
																+    fname = f"data/{str(datetime.now().date())}_{suffix}"
															
 
																     results.to_parquet(fname)
															
 
																-    print('Stats saved to ', fname)
															
 
																+    print("Stats saved to ", fname)
															
 
																     return results
															
@@ -75,29 +80,31 @@ if __name__ == "__main__":
 
																     driver = webdriver.Chrome(ChromeDriverManager().install())
															
 
																     driver.get("https://medium.com/me/stats")
															
 
																     # Wait for user to log in
															
 
																-    input('Waiting for you to log in. Press enter when ready: ')
															
 
																+    input("Waiting for you to log in. Press enter when ready: ")
															
 
																     # Find earliest date
															
 
																     earliest_article_date = parser.parse(
															
 
																-        input('Enter earliest article date as string: ')).date()
															
 
																-    days = (datetime.now().date()
															
 
																-            - earliest_article_date).total_seconds() / (60 * 60 * 24)
															
 
																+        input("Enter earliest article date as string: ")
															
 
																+    ).date()
															
 
																+    days = (datetime.now().date() - earliest_article_date).total_seconds() / (
															
 
																+        60 * 60 * 24
															
 
																+    )
															
 
																     months = math.ceil(days / 30)
															
 
																     # Get the xpath from user
															
 
																-    xpath = input('Paste xpath with no quotation marks: ')
															
 
																+    xpath = input("Paste xpath with no quotation marks: ")
															
 
																     # Gather the results
															
 
																-    results = get_all_pages(driver, xpath, months, suffix='views')
															
 
																-    print('Refresh page and click on reads')
															
 
																+    results = get_all_pages(driver, xpath, months, suffix="views")
															
 
																+    print("Refresh page and click on reads")
															
 
																     # Get the xpath from user
															
 
																-    xpath = input('Paste xpath with no quotation marks: ')
															
 
																+    xpath = input("Paste xpath with no quotation marks: ")
															
 
																     # Gather the results
															
 
																-    results = get_all_pages(driver, xpath, months, suffix='reads')
															
 
																+    results = get_all_pages(driver, xpath, months, suffix="reads")
															
 
																-    print('Refresh page and click on fans')
															
 
																+    print("Refresh page and click on fans")
															
 
																     # Get the xpath from user
															
 
																-    xpath = input('Paste xpath with no quotation marks: ')
															
 
																+    xpath = input("Paste xpath with no quotation marks: ")
															
 
																     # Gather the results
															
 
																-    results = get_all_pages(driver, xpath, months, suffix='fans')
															
 
																+    results = get_all_pages(driver, xpath, months, suffix="fans")
															
 
																     print("Complete. All results saved in data directory.")
															
--- a/medium/images/data_for_fitting.py
+++ b/medium/images/data_for_fitting.py
@@ -10,10 +10,16 @@ def data_for_fitting(*, building_id, date):
 
																         previous_bday = pd.to_datetime(date) - BDay(1)
															
 
																         # If a holiday, this will return None
															
 
																-        lease_start = (db().execute(building_daily_stats.select().where(
															
 
																-            building_daily_stats.c.building_id == building_id).where(
															
 
																-                building_daily_stats.c.date == previous_bday)).fetchone().
															
 
																-                       lease_obligations_start_at)
															
 
																+        lease_start = (
															
 
																+            db()
															
 
																+            .execute(
															
 
																+                building_daily_stats.select()
															
 
																+                .where(building_daily_stats.c.building_id == building_id)
															
 
																+                .where(building_daily_stats.c.date == previous_bday)
															
 
																+            )
															
 
																+            .fetchone()
															
 
																+            .lease_obligations_start_at
															
 
																+        )
															
 
																         date = previous_bday
															
@@ -21,4 +27,5 @@ def data_for_fitting(*, building_id, date):
 
																     return load_sensor_values(
															
 
																         building_id=building_id,
															
 
																         start_time=lease_start,
															
 
																-        end_time=lease_start + timedelta(hours=8))
															
 
																+        end_time=lease_start + timedelta(hours=8),
															
 
																+    )
															
--- a/medium/retrieval.py
+++ b/medium/retrieval.py
@@ -9,7 +9,7 @@ import pandas as pd
 
																 from datetime import datetime
															
 
																-def get_table_rows(fname='stats.html'):
															
 
																+def get_table_rows(fname="stats.html"):
															
 
																     """
															
 
																     Extract the table rows from the statistics
															
@@ -18,20 +18,23 @@ def get_table_rows(fname='stats.html'):
 
																     :return table_rows: list of BeautifulSoup objects to be passed to `process_in_parallel`
															
 
																     """
															
 
																-    soup = BeautifulSoup(
															
 
																-        open(f'data/{fname}', 'r', encoding='utf8'), features='lxml')
															
 
																-    table_rows = soup.find_all(
															
 
																-        attrs={'class': "sortableTable-row js-statsTableRow"})
															
 
																-    print(f'Found {len(table_rows)} entries in table.')
															
 
																+    soup = BeautifulSoup(open(f"data/{fname}", "r", encoding="utf8"), features="lxml")
															
 
																+    table_rows = soup.find_all(attrs={"class": "sortableTable-row js-statsTableRow"})
															
 
																+    print(f"Found {len(table_rows)} entries in table.")
															
 
																     return table_rows
															
 
																 def convert_timestamp(ts: int, tz: str):
															
 
																     """Convert a unix timestamp to a date timestamp"""
															
 
																-    return pd.to_datetime(ts, origin='unix', unit='ms').tz_localize('UTC').tz_convert(tz).tz_localize(None)
															
 
																+    return (
															
 
																+        pd.to_datetime(ts, origin="unix", unit="ms")
															
 
																+        .tz_localize("UTC")
															
 
																+        .tz_convert(tz)
															
 
																+        .tz_localize(None)
															
 
																+    )
															
 
																-def process_entry(entry, parallel=True, tz='America/Chicago'):
															
 
																+def process_entry(entry, parallel=True, tz="America/Chicago"):
															
 
																     """
															
 
																     Extract data from one entry in table
															
@@ -44,106 +47,127 @@ def process_entry(entry, parallel=True, tz='America/Chicago'):
 
																     """
															
 
																     # Convert to soup when running in parallel
															
 
																     if parallel:
															
 
																-        entry = BeautifulSoup(entry, features='lxml').body.tr
															
 
																+        entry = BeautifulSoup(entry, features="lxml").body.tr
															
 
																     entry_dict = {}
															
 
																     # Extract information
															
 
																-    for value, key in zip(entry.find_all(attrs={'class': 'sortableTable-value'}),
															
 
																-                          ['published_date', 'views', 'reads', 'ratio', 'fans']):
															
 
																-        entry_dict[key] = float(
															
 
																-            value.text) if key == 'ratio' else int(value.text)
															
 
																+    for value, key in zip(
															
 
																+        entry.find_all(attrs={"class": "sortableTable-value"}),
															
 
																+        ["published_date", "views", "reads", "ratio", "fans"],
															
 
																+    ):
															
 
																+        entry_dict[key] = float(value.text) if key == "ratio" else int(value.text)
															
 
																-    entry_dict['read_time'] = int(entry.find_all(attrs={'class': 'readingTime'})[
															
 
																-                                  0].get('title').split(' ')[0])
															
 
																+    entry_dict["read_time"] = int(
															
 
																+        entry.find_all(attrs={"class": "readingTime"})[0].get("title").split(" ")[0]
															
 
																+    )
															
 
																     # Unlisted vs published
															
 
																-    entry_dict['type'] = 'unlisted' if len(
															
 
																-        entry.find_all(text=' Unlisted')) > 0 else 'published'
															
 
																+    entry_dict["type"] = (
															
 
																+        "unlisted" if len(entry.find_all(text=" Unlisted")) > 0 else "published"
															
 
																+    )
															
 
																     # Publication
															
 
																-    publication = entry.find_all(attrs={'class': 'sortableTable-text'})
															
 
																-    if 'In' in publication[0].text:
															
 
																-        entry_dict['publication'] = publication[0].text.split('In ')[
															
 
																-            1].split('View')[0]
															
 
																+    publication = entry.find_all(attrs={"class": "sortableTable-text"})
															
 
																+    if "In" in publication[0].text:
															
 
																+        entry_dict["publication"] = publication[0].text.split("In ")[1].split("View")[0]
															
 
																     else:
															
 
																-        entry_dict['publication'] = 'None'
															
 
																+        entry_dict["publication"] = "None"
															
 
																     # Convert datetimes
															
 
																-    entry_dict['published_date'] = convert_timestamp(
															
 
																-        entry_dict['published_date'], tz=tz)
															
 
																-    entry_dict['started_date'] = convert_timestamp(
															
 
																-        entry.get('data-timestamp'), tz=tz)
															
 
																+    entry_dict["published_date"] = convert_timestamp(
															
 
																+        entry_dict["published_date"], tz=tz
															
 
																+    )
															
 
																+    entry_dict["started_date"] = convert_timestamp(entry.get("data-timestamp"), tz=tz)
															
 
																     # Get the link
															
 
																-    link = entry.find_all(text='View story',
															
 
																-                               attrs={'class': 'sortableTable-link'})[0].get('href')
															
 
																-    entry_dict['link'] = link
															
 
																+    link = entry.find_all(text="View story", attrs={"class": "sortableTable-link"})[
															
 
																+        0
															
 
																+    ].get("href")
															
 
																+    entry_dict["link"] = link
															
 
																     # Retrieve the article and create a soup
															
 
																     entry = requests.get(link).content
															
 
																-    entry_soup = BeautifulSoup(entry, features='lxml')
															
 
																+    entry_soup = BeautifulSoup(entry, features="lxml")
															
 
																     # Get the title
															
 
																     try:
															
 
																         title = entry_soup.h1.text
															
 
																     except:
															
 
																-        title = 'response'
															
 
																+        title = "response"
															
 
																     title_word_count = len(re.findall(r"[\w']+|[.,!?;]", title))
															
 
																     # Main text entries
															
 
																-    entry_text = [p.text for p in entry_soup.find_all(
															
 
																-        ['h1', 'h2', 'h3', 'p', 'blockquote'])]
															
 
																+    entry_text = [
															
 
																+        p.text for p in entry_soup.find_all(["h1", "h2", "h3", "p", "blockquote"])
															
 
																+    ]
															
 
																     # Make sure to catch everything
															
 
																-    entry_text.extend(s.text for s in entry_soup.find_all(
															
 
																-        attrs={'class': 'graf graf--li graf-after--li'}))
															
 
																-    entry_text.extend(s.text for s in entry_soup.find_all(
															
 
																-        attrs={'class': 'graf graf--li graf-after--p'}))
															
 
																-    entry_text.extend(s.text for s in entry_soup.find_all(
															
 
																-        attrs={'class': 'graf graf--li graf-after--blockquote'}))
															
 
																-    entry_text.extend(s.text for s in entry_soup.find_all(
															
 
																-        attrs={'class': 'graf graf--li graf-after--pullquote'}))
															
 
																-
															
 
																-    entry_text = ' '.join(entry_text)
															
 
																+    entry_text.extend(
															
 
																+        s.text
															
 
																+        for s in entry_soup.find_all(attrs={"class": "graf graf--li graf-after--li"})
															
 
																+    )
															
 
																+    entry_text.extend(
															
 
																+        s.text
															
 
																+        for s in entry_soup.find_all(attrs={"class": "graf graf--li graf-after--p"})
															
 
																+    )
															
 
																+    entry_text.extend(
															
 
																+        s.text
															
 
																+        for s in entry_soup.find_all(
															
 
																+            attrs={"class": "graf graf--li graf-after--blockquote"}
															
 
																+        )
															
 
																+    )
															
 
																+    entry_text.extend(
															
 
																+        s.text
															
 
																+        for s in entry_soup.find_all(
															
 
																+            attrs={"class": "graf graf--li graf-after--pullquote"}
															
 
																+        )
															
 
																+    )
															
 
																+
															
 
																+    entry_text = " ".join(entry_text)
															
 
																     # Word count
															
 
																     word_count = len(re.findall(r"[\w']+|[.,!?;]", entry_text))
															
 
																     # Number of claps
															
 
																     clap_pattern = re.compile(
															
 
																-        '^[0-9]{1,} claps|^[0-9]{1,}.[0-9]{1,}K claps|^[0-9]{1,}K claps')
															
 
																+        "^[0-9]{1,} claps|^[0-9]{1,}.[0-9]{1,}K claps|^[0-9]{1,}K claps"
															
 
																+    )
															
 
																     claps = entry_soup.find_all(text=clap_pattern)
															
 
																     if len(claps) > 0:
															
 
																-        if 'K' in claps[0]:
															
 
																-            clap_number = int(1e3 * float(claps[0].split('K')[0]))
															
 
																+        if "K" in claps[0]:
															
 
																+            clap_number = int(1e3 * float(claps[0].split("K")[0]))
															
 
																         else:
															
 
																-            clap_number = int(claps[0].split(' ')[0])
															
 
																+            clap_number = int(claps[0].split(" ")[0])
															
 
																     else:
															
 
																         clap_number = 0
															
 
																     # Post tags
															
 
																-    tags = entry_soup.find_all(
															
 
																-        attrs={'class': 'tags tags--postTags tags--borderless'})
															
 
																-    tags = [li.text for li in tags[0].find_all('li')]
															
 
																+    tags = entry_soup.find_all(attrs={"class": "tags tags--postTags tags--borderless"})
															
 
																+    tags = [li.text for li in tags[0].find_all("li")]
															
 
																     # Responses to entry
															
 
																-    responses = entry_soup.find_all(attrs={'class': 'button button--chromeless u-baseColor--buttonNormal u-marginRight12',
															
 
																-                                           'data-action': 'scroll-to-responses'})
															
 
																+    responses = entry_soup.find_all(
															
 
																+        attrs={
															
 
																+            "class": "button button--chromeless u-baseColor--buttonNormal u-marginRight12",
															
 
																+            "data-action": "scroll-to-responses",
															
 
																+        }
															
 
																+    )
															
 
																     num_responses = int(responses[0].text) if len(responses) > 0 else 0
															
 
																     # Store in dictionary
															
 
																-    entry_dict['title'] = title
															
 
																-    entry_dict['title_word_count'] = title_word_count
															
 
																-    entry_dict['text'] = entry_text
															
 
																-    entry_dict['word_count'] = word_count
															
 
																-    entry_dict['claps'] = clap_number
															
 
																-    entry_dict['tags'] = tags
															
 
																-    entry_dict['num_responses'] = num_responses
															
 
																+    entry_dict["title"] = title
															
 
																+    entry_dict["title_word_count"] = title_word_count
															
 
																+    entry_dict["text"] = entry_text
															
 
																+    entry_dict["word_count"] = word_count
															
 
																+    entry_dict["claps"] = clap_number
															
 
																+    entry_dict["tags"] = tags
															
 
																+    entry_dict["num_responses"] = num_responses
															
 
																     # Time since publication
															
 
																-    entry_dict['days_since_publication'] = (
															
 
																-        datetime.now() - entry_dict['published_date']).total_seconds() / (3600 * 24)
															
 
																+    entry_dict["days_since_publication"] = (
															
 
																+        datetime.now() - entry_dict["published_date"]
															
 
																+    ).total_seconds() / (3600 * 24)
															
 
																     return entry_dict
															
@@ -173,44 +197,44 @@ def process_in_parallel(table_rows, processes=20):
 
																     start = timer()
															
 
																     for i, r in enumerate(pool.imap_unordered(process_entry, table_rows_str)):
															
 
																         # Report progress
															
 
																-        print(f'{100 * i / len(table_rows_str):.2f}% complete.', end='\r')
															
 
																+        print(f"{100 * i / len(table_rows_str):.2f}% complete.", end="\r")
															
 
																         results.append(r)
															
 
																     pool.close()
															
 
																     pool.join()
															
 
																     end = timer()
															
 
																-    print(
															
 
																-        f'Processed {len(table_rows_str)} articles in {end-start:.2f} seconds.')
															
 
																+    print(f"Processed {len(table_rows_str)} articles in {end-start:.2f} seconds.")
															
 
																     # Convert to dataframe
															
 
																     df = pd.DataFrame(results)
															
 
																     # Rename ratio
															
 
																-    df.rename(columns={'ratio': 'read_ratio'}, inplace=True)
															
 
																+    df.rename(columns={"ratio": "read_ratio"}, inplace=True)
															
 
																     # Add extra columns with more data
															
 
																-    df['claps_per_word'] = df['claps'] / df['word_count']
															
 
																-    df['editing_days'] = ((df['published_date'] - df['started_date']
															
 
																-                           ).dt.total_seconds() / (60 * 60 * 24)).astype(int)
															
 
																+    df["claps_per_word"] = df["claps"] / df["word_count"]
															
 
																+    df["editing_days"] = (
															
 
																+        (df["published_date"] - df["started_date"]).dt.total_seconds() / (60 * 60 * 24)
															
 
																+    ).astype(int)
															
 
																     # Rounding
															
 
																-    df['published_date'] = df['published_date'].dt.round('min')
															
 
																-    df['started_date'] = df['started_date'].dt.round('min')
															
 
																-    df['read_ratio'] = df['read_ratio'].round(2)
															
 
																+    df["published_date"] = df["published_date"].dt.round("min")
															
 
																+    df["started_date"] = df["started_date"].dt.round("min")
															
 
																+    df["read_ratio"] = df["read_ratio"].round(2)
															
 
																     # 5 most common tags (might want to include more tags)
															
 
																     n = 5
															
 
																-    all_tags = list(chain(*df['tags'].tolist()))
															
 
																+    all_tags = list(chain(*df["tags"].tolist()))
															
 
																     tag_counts = Counter(all_tags)
															
 
																     tags = tag_counts.most_common(n)
															
 
																     # Adding columns with indication of tag
															
 
																     for tag, count in tags:
															
 
																-        flag = [1 if tag in tags else 0 for tags in df['tags']]
															
 
																-        df.loc[:, f'<tag>{tag}'] = flag
															
 
																+        flag = [1 if tag in tags else 0 for tags in df["tags"]]
															
 
																+        df.loc[:, f"<tag>{tag}"] = flag
															
 
																-    df.sort_values('published_date', inplace=True)
															
 
																+    df.sort_values("published_date", inplace=True)
															
 
																     return df
															
 
																-def get_data(fname='stats.html', processes=20):
															
 
																+def get_data(fname="stats.html", processes=20):
															
 
																     """
															
 
																     Retrieve medium article statistics
															
--- a/medium/view_extraction.py
+++ b/medium/view_extraction.py
@@ -6,33 +6,38 @@ import pandas as pd
 
																 def process_bargraph(bargraph):
															
 
																-    bardata = [bar.get('data-tooltip')
															
 
																-               for bar in bargraph.find_all(attrs={'class': 'bargraph-bar'})]
															
 
																+    bardata = [
															
 
																+        bar.get("data-tooltip")
															
 
																+        for bar in bargraph.find_all(attrs={"class": "bargraph-bar"})
															
 
																+    ]
															
 
																     print(len(bardata))
															
 
																     return
															
 
																     # Sort by xposition
															
 
																-    bardata = sorted(bardata, key=lambda x: float(x.get('x')))
															
 
																-    views = [float(s.split(' ')[0].replace(',', '')) for s in bardata]
															
 
																-    dates = [s.split(' ')[-1].split('\xa0')[0] + ' '
															
 
																-             + s.split(' ')[-1].split('\xa0')[1] for s in bardata]
															
 
																+    bardata = sorted(bardata, key=lambda x: float(x.get("x")))
															
 
																+    views = [float(s.split(" ")[0].replace(",", "")) for s in bardata]
															
 
																+    dates = [
															
 
																+        s.split(" ")[-1].split("\xa0")[0] + " " + s.split(" ")[-1].split("\xa0")[1]
															
 
																+        for s in bardata
															
 
																+    ]
															
 
																     year = str((datetime.now() - pd.Timedelta(days=i * 30)).year)
															
 
																-    dates = [parser.parse(d + ' ' + year) for d in dates]
															
 
																+    dates = [parser.parse(d + " " + year) for d in dates]
															
 
																     return views, dates
															
 
																-files = os.listdir('html_pages')
															
 
																+files = os.listdir("html_pages")
															
 
																 v = []
															
 
																 d = []
															
 
																 for fid in files:
															
 
																-    i = int(fid.split('.')[0].split('p')[1])
															
 
																-    graph = BeautifulSoup(
															
 
																-        open(f'html_pages/{fid}', 'r')).find_all(attrs={'class': 'bargraph'})[0]
															
 
																+    i = int(fid.split(".")[0].split("p")[1])
															
 
																+    graph = BeautifulSoup(open(f"html_pages/{fid}", "r")).find_all(
															
 
																+        attrs={"class": "bargraph"}
															
 
																+    )[0]
															
 
																     r = process_bargraph(graph, i)
															
 
																     v.extend(r[0])
															
 
																     d.extend(r[1])
															
 
																-    results = pd.DataFrame({'date': d, 'views': v})
															
 
																+    results = pd.DataFrame({"date": d, "views": v})
															
 
																-results['date'] = pd.to_datetime(results['date'])
															
 
																-results.to_parquet('medium_views_time')
															
 
																+results["date"] = pd.to_datetime(results["date"])
															
 
																+results.to_parquet("medium_views_time")
															
--- a/medium/visuals.py
+++ b/medium/visuals.py
@@ -9,6 +9,7 @@ from scipy import stats
 
																 import plotly.graph_objs as go
															
 
																 import cufflinks
															
 
																+
															
 
																 cufflinks.go_offline()
															
@@ -31,7 +32,7 @@ def make_hist(df, x, category=None):
 
																     layout = go.Layout(
															
 
																         yaxis=dict(title="Count"),
															
 
																-        xaxis=dict(title=x.replace('_', ' ').title()),
															
 
																+        xaxis=dict(title=x.replace("_", " ").title()),
															
 
																         title=f"{x.replace('_', ' ').title()} Distribution by {category.replace('_', ' ').title()}"
															
 
																         if category
															
 
																         else f"{x.replace('_', ' ').title()} Distribution",
															
@@ -63,8 +64,7 @@ def make_cum_plot(df, y, category=None, ranges=False):
 
																                     mode="lines+markers",
															
 
																                     text=group["title"],
															
 
																                     name=name,
															
 
																-                    marker=dict(size=10, opacity=0.8,
															
 
																-                                symbol=i + 2),
															
 
																+                    marker=dict(size=10, opacity=0.8, symbol=i + 2),
															
 
																                 )
															
 
																             )
															
 
																     else:
															
@@ -77,17 +77,27 @@ def make_cum_plot(df, y, category=None, ranges=False):
 
																                     name=y[0].title(),
															
 
																                     mode="lines+markers",
															
 
																                     text=df["title"],
															
 
																-                    marker=dict(size=10, color='blue', opacity=0.6, line=dict(color='black'),
															
 
																-                                )),
															
 
																+                    marker=dict(
															
 
																+                        size=10,
															
 
																+                        color="blue",
															
 
																+                        opacity=0.6,
															
 
																+                        line=dict(color="black"),
															
 
																+                    ),
															
 
																+                ),
															
 
																                 go.Scatter(
															
 
																                     x=df["published_date"],
															
 
																                     y=df[y[1]].cumsum(),
															
 
																-                    yaxis='y2',
															
 
																+                    yaxis="y2",
															
 
																                     name=y[1].title(),
															
 
																                     mode="lines+markers",
															
 
																                     text=df["title"],
															
 
																-                    marker=dict(size=10, color='red', opacity=0.6, line=dict(color='black'),
															
 
																-                                )),
															
 
																+                    marker=dict(
															
 
																+                        size=10,
															
 
																+                        color="red",
															
 
																+                        opacity=0.6,
															
 
																+                        line=dict(color="black"),
															
 
																+                    ),
															
 
																+                ),
															
 
																             ]
															
 
																         else:
															
 
																             data = [
															
@@ -96,23 +106,31 @@ def make_cum_plot(df, y, category=None, ranges=False):
 
																                     y=df[y].cumsum(),
															
 
																                     mode="lines+markers",
															
 
																                     text=df["title"],
															
 
																-                    marker=dict(size=12, color='blue', opacity=0.6, line=dict(color='black'),
															
 
																-                                ),
															
 
																+                    marker=dict(
															
 
																+                        size=12,
															
 
																+                        color="blue",
															
 
																+                        opacity=0.6,
															
 
																+                        line=dict(color="black"),
															
 
																+                    ),
															
 
																                 )
															
 
																             ]
															
 
																     if len(y) == 2:
															
 
																         layout = go.Layout(
															
 
																             xaxis=dict(title="Published Date", type="date"),
															
 
																-            yaxis=dict(title=y[0].replace('_', ' ').title(), color='blue'),
															
 
																-            yaxis2=dict(title=y[1].replace('_', ' ').title(), color='red',
															
 
																-                        overlaying='y', side='right'),
															
 
																+            yaxis=dict(title=y[0].replace("_", " ").title(), color="blue"),
															
 
																+            yaxis2=dict(
															
 
																+                title=y[1].replace("_", " ").title(),
															
 
																+                color="red",
															
 
																+                overlaying="y",
															
 
																+                side="right",
															
 
																+            ),
															
 
																             font=dict(size=14),
															
 
																             title=f"Cumulative {y[0].title()} and {y[1].title()}",
															
 
																         )
															
 
																     else:
															
 
																         layout = go.Layout(
															
 
																             xaxis=dict(title="Published Date", type="date"),
															
 
																-            yaxis=dict(title=y.replace('_', ' ').title()),
															
 
																+            yaxis=dict(title=y.replace("_", " ").title()),
															
 
																             font=dict(size=14),
															
 
																             title=f"Cumulative {y.replace('_', ' ').title()} by {category.replace('_', ' ').title()}"
															
 
																             if category is not None
															
@@ -134,14 +152,27 @@ def make_cum_plot(df, y, category=None, ranges=False):
 
																         rangeslider = dict(visible=True)
															
 
																         layout["xaxis"]["rangeselector"] = rangeselector
															
 
																         layout["xaxis"]["rangeslider"] = rangeslider
															
 
																-        layout['width'] = 1000
															
 
																-        layout['height'] = 600
															
 
																+        layout["width"] = 1000
															
 
																+        layout["height"] = 600
															
 
																     figure = go.Figure(data=data, layout=layout)
															
 
																     return figure
															
 
																-def make_scatter_plot(df, x, y, fits=None, xlog=False, ylog=False, category=None, scale=None, sizeref=2, annotations=None, ranges=False, title_override=None):
															
 
																+def make_scatter_plot(
															
 
																+    df,
															
 
																+    x,
															
 
																+    y,
															
 
																+    fits=None,
															
 
																+    xlog=False,
															
 
																+    ylog=False,
															
 
																+    category=None,
															
 
																+    scale=None,
															
 
																+    sizeref=2,
															
 
																+    annotations=None,
															
 
																+    ranges=False,
															
 
																+    title_override=None,
															
 
																+):
															
 
																     """
															
 
																     Make an interactive scatterplot, optionally segmented by `category`
															
@@ -164,48 +195,83 @@ def make_scatter_plot(df, x, y, fits=None, xlog=False, ylog=False, category=None
 
																         title = f"{y.replace('_', ' ').title()} vs {x.replace('_', ' ').title()} by {category.replace('_', ' ').title()}"
															
 
																         data = []
															
 
																         for i, (name, group) in enumerate(df.groupby(category)):
															
 
																-            data.append(go.Scatter(x=group[x],
															
 
																-                                   y=group[y],
															
 
																-                                   mode='markers',
															
 
																-                                   text=group['title'],
															
 
																-                                   name=name,
															
 
																-                                   marker=dict(size=8, symbol=i + 2)))
															
 
																+            data.append(
															
 
																+                go.Scatter(
															
 
																+                    x=group[x],
															
 
																+                    y=group[y],
															
 
																+                    mode="markers",
															
 
																+                    text=group["title"],
															
 
																+                    name=name,
															
 
																+                    marker=dict(size=8, symbol=i + 2),
															
 
																+                )
															
 
																+            )
															
 
																     else:
															
 
																         if scale is not None:
															
 
																             title = f"{y.replace('_', ' ').title()} vs {x.replace('_', ' ').title()} Scaled by {scale.title()}"
															
 
																-            data = [go.Scatter(x=df[x],
															
 
																-                               y=df[y],
															
 
																-                               mode='markers',
															
 
																-                               text=df['title'], marker=dict(size=df[scale],
															
 
																-                                                             line=dict(color='black', width=0.5), sizemode='area', sizeref=sizeref, opacity=0.8,
															
 
																-                                                             colorscale='Viridis', color=df[scale], showscale=True, sizemin=2))]
															
 
																+            data = [
															
 
																+                go.Scatter(
															
 
																+                    x=df[x],
															
 
																+                    y=df[y],
															
 
																+                    mode="markers",
															
 
																+                    text=df["title"],
															
 
																+                    marker=dict(
															
 
																+                        size=df[scale],
															
 
																+                        line=dict(color="black", width=0.5),
															
 
																+                        sizemode="area",
															
 
																+                        sizeref=sizeref,
															
 
																+                        opacity=0.8,
															
 
																+                        colorscale="Viridis",
															
 
																+                        color=df[scale],
															
 
																+                        showscale=True,
															
 
																+                        sizemin=2,
															
 
																+                    ),
															
 
																+                )
															
 
																+            ]
															
 
																         else:
															
 
																             df.sort_values(x, inplace=True)
															
 
																             title = f"{y.replace('_', ' ').title()} vs {x.replace('_', ' ').title()}"
															
 
																-            data = [go.Scatter(x=df[x],
															
 
																-                               y=df[y],
															
 
																-                               mode='markers',
															
 
																-                               text=df['title'], marker=dict(
															
 
																-                size=12, color='blue', opacity=0.8, line=dict(color='black')),
															
 
																-                name='observations')]
															
 
																+            data = [
															
 
																+                go.Scatter(
															
 
																+                    x=df[x],
															
 
																+                    y=df[y],
															
 
																+                    mode="markers",
															
 
																+                    text=df["title"],
															
 
																+                    marker=dict(
															
 
																+                        size=12, color="blue", opacity=0.8, line=dict(color="black")
															
 
																+                    ),
															
 
																+                    name="observations",
															
 
																+                )
															
 
																+            ]
															
 
																             if fits is not None:
															
 
																                 for fit in fits:
															
 
																-                    data.append(go.Scatter(x=df[x], y=df[fit], text=df['title'],
															
 
																-                                           mode='lines+markers', marker=dict
															
 
																-                                           (size=8, opacity=0.6),
															
 
																-                                           line=dict(dash='dash'), name=fit))
															
 
																-
															
 
																-                title += ' with Fit'
															
 
																-    layout = go.Layout(annotations=annotations,
															
 
																-                       xaxis=dict(title=x.replace('_', ' ').title() + (' (log scale)' if xlog else ''),
															
 
																-                                  type='log' if xlog else None),
															
 
																-                       yaxis=dict(title=y.replace('_', ' ').title() + (' (log scale)' if ylog else ''),
															
 
																-                                  type='log' if ylog else None),
															
 
																-                       font=dict(size=14),
															
 
																-                       title=title if title_override is None else title_override,
															
 
																-                       )
															
 
																+                    data.append(
															
 
																+                        go.Scatter(
															
 
																+                            x=df[x],
															
 
																+                            y=df[fit],
															
 
																+                            text=df["title"],
															
 
																+                            mode="lines+markers",
															
 
																+                            marker=dict(size=8, opacity=0.6),
															
 
																+                            line=dict(dash="dash"),
															
 
																+                            name=fit,
															
 
																+                        )
															
 
																+                    )
															
 
																+
															
 
																+                title += " with Fit"
															
 
																+    layout = go.Layout(
															
 
																+        annotations=annotations,
															
 
																+        xaxis=dict(
															
 
																+            title=x.replace("_", " ").title() + (" (log scale)" if xlog else ""),
															
 
																+            type="log" if xlog else None,
															
 
																+        ),
															
 
																+        yaxis=dict(
															
 
																+            title=y.replace("_", " ").title() + (" (log scale)" if ylog else ""),
															
 
																+            type="log" if ylog else None,
															
 
																+        ),
															
 
																+        font=dict(size=14),
															
 
																+        title=title if title_override is None else title_override,
															
 
																+    )
															
 
																     # Add a rangeselector and rangeslider for a data xaxis
															
 
																     if ranges:
															
@@ -222,8 +288,8 @@ def make_scatter_plot(df, x, y, fits=None, xlog=False, ylog=False, category=None
 
																         rangeslider = dict(visible=True)
															
 
																         layout["xaxis"]["rangeselector"] = rangeselector
															
 
																         layout["xaxis"]["rangeslider"] = rangeslider
															
 
																-        layout['width'] = 1000
															
 
																-        layout['height'] = 600
															
 
																+        layout["width"] = 1000
															
 
																+        layout["height"] = 600
															
 
																     figure = go.Figure(data=data, layout=layout)
															
 
																     return figure
															
@@ -243,14 +309,16 @@ def make_linear_regression(df, x, y, intercept_0):
 
																         lin_model = LinearRegression()
															
 
																         lin_model.fit(df[x], df[y])
															
 
																-        slopes, intercept, = lin_model.coef_, lin_model.intercept_
															
 
																-        df['predicted'] = lin_model.predict(df[x])
															
 
																+        slopes, intercept, = (
															
 
																+            lin_model.coef_,
															
 
																+            lin_model.intercept_,
															
 
																+        )
															
 
																+        df["predicted"] = lin_model.predict(df[x])
															
 
																         r2 = lin_model.score(df[x], df[y])
															
 
																-        rmse = np.sqrt(mean_squared_error(
															
 
																-            y_true=df[y], y_pred=df['predicted']))
															
 
																+        rmse = np.sqrt(mean_squared_error(y_true=df[y], y_pred=df["predicted"]))
															
 
																         equation = f'{y.replace("_", " ")} ='
															
 
																-        names = ['r2', 'rmse', 'intercept']
															
 
																+        names = ["r2", "rmse", "intercept"]
															
 
																         values = [r2, rmse, intercept]
															
 
																         for i, (p, s) in enumerate(zip(x, slopes)):
															
 
																             if (i + 1) % 3 == 0:
															
@@ -260,19 +328,26 @@ def make_linear_regression(df, x, y, intercept_0):
 
																             names.append(p)
															
 
																             values.append(s)
															
 
																-        equation += f' {intercept:.2f}'
															
 
																-        annotations = [dict(x=0.4 * df.index.max(), y=0.9 * df[y].max(), showarrow=False,
															
 
																-                            text=equation,
															
 
																-                            font=dict(size=10))]
															
 
																+        equation += f" {intercept:.2f}"
															
 
																+        annotations = [
															
 
																+            dict(
															
 
																+                x=0.4 * df.index.max(),
															
 
																+                y=0.9 * df[y].max(),
															
 
																+                showarrow=False,
															
 
																+                text=equation,
															
 
																+                font=dict(size=10),
															
 
																+            )
															
 
																+        ]
															
 
																-        df['index'] = list(df.index)
															
 
																-        figure = make_scatter_plot(df, x='index', y=y, fits=[
															
 
																-                                   'predicted'], annotations=annotations)
															
 
																-        summary = pd.DataFrame({'name': names, 'value': values})
															
 
																+        df["index"] = list(df.index)
															
 
																+        figure = make_scatter_plot(
															
 
																+            df, x="index", y=y, fits=["predicted"], annotations=annotations
															
 
																+        )
															
 
																+        summary = pd.DataFrame({"name": names, "value": values})
															
 
																     else:
															
 
																         if intercept_0:
															
 
																             lin_reg = sm.OLS(df[y], df[x]).fit()
															
 
																-            df['fit_values'] = lin_reg.fittedvalues
															
 
																+            df["fit_values"] = lin_reg.fittedvalues
															
 
																             summary = lin_reg.summary()
															
 
																             slope = float(lin_reg.params)
															
 
																             equation = f"${y.replace('_', ' ')} = {slope:.2f} * {x.replace('_', ' ')}$"
															
@@ -280,19 +355,26 @@ def make_linear_regression(df, x, y, intercept_0):
 
																         else:
															
 
																             lin_reg = stats.linregress(df[x], df[y])
															
 
																             intercept, slope = lin_reg.intercept, lin_reg.slope
															
 
																-            params = ['pvalue', 'rvalue', 'slope', 'intercept']
															
 
																+            params = ["pvalue", "rvalue", "slope", "intercept"]
															
 
																             values = []
															
 
																             for p in params:
															
 
																                 values.append(getattr(lin_reg, p))
															
 
																-            summary = pd.DataFrame({'param': params, 'value': values})
															
 
																-            df['fit_values'] = df[x] * slope + intercept
															
 
																+            summary = pd.DataFrame({"param": params, "value": values})
															
 
																+            df["fit_values"] = df[x] * slope + intercept
															
 
																             equation = f"${y.replace('_', ' ')} = {slope:.2f} * {x.replace('_', ' ')} + {intercept:.2f}$"
															
 
																-        annotations = [dict(x=0.75 * df[x].max(), y=0.9 * df[y].max(), showarrow=False,
															
 
																-                            text=equation,
															
 
																-                            font=dict(size=32))]
															
 
																+        annotations = [
															
 
																+            dict(
															
 
																+                x=0.75 * df[x].max(),
															
 
																+                y=0.9 * df[y].max(),
															
 
																+                showarrow=False,
															
 
																+                text=equation,
															
 
																+                font=dict(size=32),
															
 
																+            )
															
 
																+        ]
															
 
																         figure = make_scatter_plot(
															
 
																-            df, x=x, y=y, fits=['fit_values'], annotations=annotations)
															
 
																+            df, x=x, y=y, fits=["fit_values"], annotations=annotations
															
 
																+        )
															
 
																     return figure, summary
															
@@ -317,15 +399,14 @@ def make_poly_fits(df, x, y, degree=6):
 
																     # Make each fit
															
 
																     for i in range(1, degree + 1):
															
 
																-        fit_name = f'fit degree = {i}'
															
 
																+        fit_name = f"fit degree = {i}"
															
 
																         fit_list.append(fit_name)
															
 
																         z, res, *rest = np.polyfit(df[x], df[y], i, full=True)
															
 
																         fit_params.append(z)
															
 
																         df.loc[:, fit_name] = np.poly1d(z)(df[x])
															
 
																         rmse.append(np.sqrt(res[0]))
															
 
																-    fit_stats = pd.DataFrame(
															
 
																-        {'fit': fit_list, 'rmse': rmse, 'params': fit_params})
															
 
																+    fit_stats = pd.DataFrame({"fit": fit_list, "rmse": rmse, "params": fit_params})
															
 
																     figure = make_scatter_plot(df, x=x, y=y, fits=fit_list)
															
 
																     return figure, fit_stats
															
@@ -344,47 +425,62 @@ def make_extrapolation(df, y, years, degree=4):
 
																     """
															
 
																     df = df.copy()
															
 
																-    x = 'days_since_start'
															
 
																-    df['days_since_start'] = (
															
 
																-        (df['published_date'] - df['published_date'].min()).
															
 
																-        dt.total_seconds() / (3600 * 24)).astype(int)
															
 
																+    x = "days_since_start"
															
 
																+    df["days_since_start"] = (
															
 
																+        (df["published_date"] - df["published_date"].min()).dt.total_seconds()
															
 
																+        / (3600 * 24)
															
 
																+    ).astype(int)
															
 
																-    cumy = f'cum_{y}'
															
 
																+    cumy = f"cum_{y}"
															
 
																     df[cumy] = df.sort_values(x)[y].cumsum()
															
 
																     figure, summary = make_poly_fits(df, x, cumy, degree=degree)
															
 
																-    min_date = df['published_date'].min()
															
 
																-    max_date = df['published_date'].max()
															
 
																+    min_date = df["published_date"].min()
															
 
																+    max_date = df["published_date"].max()
															
 
																-    date_range = pd.date_range(start=min_date,
															
 
																-                               end=max_date + pd.Timedelta(days=int(years * 365)))
															
 
																+    date_range = pd.date_range(
															
 
																+        start=min_date, end=max_date + pd.Timedelta(days=int(years * 365))
															
 
																+    )
															
 
																-    future_df = pd.DataFrame({'date': date_range})
															
 
																+    future_df = pd.DataFrame({"date": date_range})
															
 
																     future_df[x] = (
															
 
																-        (future_df['date'] - future_df['date'].min()).
															
 
																-        dt.total_seconds() / (3600 * 24)).astype(int)
															
 
																+        (future_df["date"] - future_df["date"].min()).dt.total_seconds() / (3600 * 24)
															
 
																+    ).astype(int)
															
 
																-    newcumy = f'cumulative_{y}'
															
 
																+    newcumy = f"cumulative_{y}"
															
 
																-    future_df = future_df.merge(df[[x, cumy]], on=x, how='left').\
															
 
																-        rename(columns={cumy: newcumy})
															
 
																+    future_df = future_df.merge(df[[x, cumy]], on=x, how="left").rename(
															
 
																+        columns={cumy: newcumy}
															
 
																+    )
															
 
																-    z = np.poly1d(summary.iloc[-1]['params'])
															
 
																-    pred_name = f'predicted_{y}'
															
 
																+    z = np.poly1d(summary.iloc[-1]["params"])
															
 
																+    pred_name = f"predicted_{y}"
															
 
																     future_df[pred_name] = z(future_df[x])
															
 
																-    future_df['title'] = ''
															
 
																-
															
 
																-    last_date = future_df.loc[future_df['date'].idxmax()]
															
 
																-    prediction_text = (
															
 
																-        f"On {last_date['date'].date()} the {y} will be {float(last_date[pred_name]):,.0f}.")
															
 
																-    annotations = [dict(x=future_df['date'].quantile(0.4),
															
 
																-                        y=0.8 * future_df[pred_name].max(), text=prediction_text, showarrow=False,
															
 
																-                        font=dict(size=16))]
															
 
																+    future_df["title"] = ""
															
 
																+
															
 
																+    last_date = future_df.loc[future_df["date"].idxmax()]
															
 
																+    prediction_text = f"On {last_date['date'].date()} the {y} will be {float(last_date[pred_name]):,.0f}."
															
 
																+    annotations = [
															
 
																+        dict(
															
 
																+            x=future_df["date"].quantile(0.4),
															
 
																+            y=0.8 * future_df[pred_name].max(),
															
 
																+            text=prediction_text,
															
 
																+            showarrow=False,
															
 
																+            font=dict(size=16),
															
 
																+        )
															
 
																+    ]
															
 
																     title_override = f'{y.replace("_", " ").title()} with Extrapolation {years} Years into the Future'
															
 
																-    figure = make_scatter_plot(future_df, 'date', newcumy, fits=[
															
 
																-                               pred_name], annotations=annotations, ranges=True, title_override=title_override)
															
 
																+    figure = make_scatter_plot(
															
 
																+        future_df,
															
 
																+        "date",
															
 
																+        newcumy,
															
 
																+        fits=[pred_name],
															
 
																+        annotations=annotations,
															
 
																+        ranges=True,
															
 
																+        title_override=title_override,
															
 
																+    )
															
 
																     return figure, future_df
															
--- a/sentdex_data_analysis/HPI_tpot_pipeline.py
+++ b/sentdex_data_analysis/HPI_tpot_pipeline.py
@@ -7,15 +7,23 @@ from sklearn.pipeline import make_pipeline, make_union
 
																 from sklearn.preprocessing import FunctionTransformer, MaxAbsScaler, MinMaxScaler
															
 
																 # NOTE: Make sure that the class is labeled 'class' in the data file
															
 
																-tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
															
 
																-features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)
															
 
																-training_features, testing_features, training_classes, testing_classes = \
															
 
																-    train_test_split(features, tpot_data['class'], random_state=42)
															
 
																+tpot_data = np.recfromcsv(
															
 
																+    "PATH/TO/DATA/FILE", delimiter="COLUMN_SEPARATOR", dtype=np.float64
															
 
																+)
															
 
																+features = np.delete(
															
 
																+    tpot_data.view(np.float64).reshape(tpot_data.size, -1),
															
 
																+    tpot_data.dtype.names.index("class"),
															
 
																+    axis=1,
															
 
																+)
															
 
																+(
															
 
																+    training_features,
															
 
																+    testing_features,
															
 
																+    training_classes,
															
 
																+    testing_classes,
															
 
																+) = train_test_split(features, tpot_data["class"], random_state=42)
															
 
																 exported_pipeline = make_pipeline(
															
 
																-    MaxAbsScaler(),
															
 
																-    MinMaxScaler(),
															
 
																-    LogisticRegression(C=49.0, dual=True, penalty="l2")
															
 
																+    MaxAbsScaler(), MinMaxScaler(), LogisticRegression(C=49.0, dual=True, penalty="l2")
															
 
																 )
															
 
																 exported_pipeline.fit(training_features, training_classes)
															
--- a/sentdex_data_analysis/pandas_IO.py
+++ b/sentdex_data_analysis/pandas_IO.py
@@ -1,34 +1,40 @@
 
																-import pandas as pd 
															
 
																+import pandas as pd
															
 
																-df = pd.read_csv('ZILL-Z77006_C.csv') # reading in file
															
 
																-df.set_index('Date', inplace = True) # setting index to date column
															
 
																+df = pd.read_csv("ZILL-Z77006_C.csv")  # reading in file
															
 
																+df.set_index("Date", inplace=True)  # setting index to date column
															
 
																 print(df.head())
															
 
																 # df.to_csv('ZILLOW_44106.csv')
															
 
																-df = pd.read_csv('ZILLOW_44106.csv', index_col=0) # reading in file and setting index to the first column
															
 
																+df = pd.read_csv(
															
 
																+    "ZILLOW_44106.csv", index_col=0
															
 
																+)  # reading in file and setting index to the first column
															
 
																 print(df.head())
															
 
																-df.columns = ['Cleveland_HPI'] # House Price Index # renaming the columns
															
 
																+df.columns = ["Cleveland_HPI"]  # House Price Index # renaming the columns
															
 
																 # print(df.head())
															
 
																 # df.to_csv('ZILLOW_44106_Rev3.csv', header = False)
															
 
																 # reading in data, renaming columns, and setting index as first column
															
 
																-df = pd.read_csv('ZILLOW_44106_Rev3.csv', names=['Date', 'Cleveland_HPI'], index_col=0)
															
 
																+df = pd.read_csv("ZILLOW_44106_Rev3.csv", names=["Date", "Cleveland_HPI"], index_col=0)
															
 
																 # print(df.head())
															
 
																-df.to_html('example.html')  # to HTML (viewable in a web browser)
															
 
																+df.to_html("example.html")  # to HTML (viewable in a web browser)
															
 
																-df = pd.read_csv('ZILLOW_44106_Rev3.csv', names=['Date', 'Cleveland_HPI']) # reading in data and setting headers of columns
															
 
																+df = pd.read_csv(
															
 
																+    "ZILLOW_44106_Rev3.csv", names=["Date", "Cleveland_HPI"]
															
 
																+)  # reading in data and setting headers of columns
															
 
																 print(df.head())
															
 
																-df.rename(columns={'Cleveland_HPI': 'Cleveland_44106_HPI'}, inplace = True) # renaming a column
															
 
																-df.rename(columns={'Cleveland_44106_HPI' : 'Cleveland_HPI'}, inplace=True)
															
 
																-df.set_index('Date', inplace = True)
															
 
																+df.rename(
															
 
																+    columns={"Cleveland_HPI": "Cleveland_44106_HPI"}, inplace=True
															
 
																+)  # renaming a column
															
 
																+df.rename(columns={"Cleveland_44106_HPI": "Cleveland_HPI"}, inplace=True)
															
 
																+df.set_index("Date", inplace=True)
															
 
																-print(df.head())
															
 
																+print(df.head())
															
--- a/sentdex_data_analysis/pandas_TPOT.py
+++ b/sentdex_data_analysis/pandas_TPOT.py
@@ -1,11 +1,15 @@
 
																-import pandas as pd 
															
 
																-import numpy as np 
															
 
																+import pandas as pd
															
 
																+import numpy as np
															
 
																 from tpot import TPOTClassifier
															
 
																 from sklearn.model_selection import train_test_split
															
 
																-benchmark = pd.read_pickle('us_pct.pickle')  # us overall housing price index percentage change
															
 
																-HPI = pd.read_pickle('HPI_complete.pickle') # all of the state data, thirty year mortgage, unemployment rate, GDP, SP500
															
 
																-HPI = HPI.join(benchmark['United States'])
															
 
																+benchmark = pd.read_pickle(
															
 
																+    "us_pct.pickle"
															
 
																+)  # us overall housing price index percentage change
															
 
																+HPI = pd.read_pickle(
															
 
																+    "HPI_complete.pickle"
															
 
																+)  # all of the state data, thirty year mortgage, unemployment rate, GDP, SP500
															
 
																+HPI = HPI.join(benchmark["United States"])
															
 
																 # all in percentage change since the start of the data (1975-01-01)
															
 
																 HPI.dropna(inplace=True)
															
@@ -13,25 +17,29 @@ HPI.dropna(inplace=True)
 
																 housing_pct = HPI.pct_change()
															
 
																 housing_pct.replace([np.inf, -np.inf], np.nan, inplace=True)
															
 
																-housing_pct['US_HPI_future'] = housing_pct['United States'].shift(-1)
															
 
																+housing_pct["US_HPI_future"] = housing_pct["United States"].shift(-1)
															
 
																 housing_pct.dropna(inplace=True)
															
 
																+
															
 
																 def create_labels(cur_hpi, fut_hpi):
															
 
																     if fut_hpi > cur_hpi:
															
 
																         return 1
															
 
																     else:
															
 
																         return 0
															
 
																-housing_pct['label'] = list(map(create_labels, housing_pct['United States'], housing_pct['US_HPI_future']))
															
 
																+
															
 
																+housing_pct["label"] = list(
															
 
																+    map(create_labels, housing_pct["United States"], housing_pct["US_HPI_future"])
															
 
																+)
															
 
																 # housing_pct['ma_apply_example'] = housing_pct['M30'].rolling(window=10).apply(moving_average)
															
 
																 # print(housing_pct.tail())
															
 
																-X = np.array(housing_pct.drop(['label', 'US_HPI_future'], 1))
															
 
																-y = np.array(housing_pct['label'])
															
 
																+X = np.array(housing_pct.drop(["label", "US_HPI_future"], 1))
															
 
																+y = np.array(housing_pct["label"])
															
 
																-X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25)
															
 
																+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
															
 
																 tpot = TPOTClassifier(generations=10, population_size=20, verbosity=2)
															
 
																 tpot.fit(X_train, y_train)
															
 
																 print(tpot.score(X_test, y_test))
															
 
																-tpot.export('HPI_tpot_pipeline.py')
															
 
																+tpot.export("HPI_tpot_pipeline.py")
															
--- a/sentdex_data_analysis/pandas_additionalEconomic.py
+++ b/sentdex_data_analysis/pandas_additionalEconomic.py
@@ -1,70 +1,83 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-style.use('seaborn')
															
 
																+style.use("seaborn")
															
 
																+
															
 
																+quandl.ApiConfig.api_key = "rFsSehe51RLzREtYhLfo"
															
 
																-quandl.ApiConfig.api_key = 'rFsSehe51RLzREtYhLfo'
															
 
																 def mortgage_30yr():
															
 
																-	df = quandl.get('FMAC/MORTG', trim_start="1975-01-01")
															
 
																-	df['Value'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100
															
 
																-	df = df.resample('M').mean()
															
 
																-	df.rename(columns={'Value': 'M30'}, inplace=True)
															
 
																-	df = df['M30']
															
 
																-	return df 
															
 
																+    df = quandl.get("FMAC/MORTG", trim_start="1975-01-01")
															
 
																+    df["Value"] = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100
															
 
																+    df = df.resample("M").mean()
															
 
																+    df.rename(columns={"Value": "M30"}, inplace=True)
															
 
																+    df = df["M30"]
															
 
																+    return df
															
 
																+
															
 
																 def sp500_data():
															
 
																     df = quandl.get("YAHOO/INDEX_GSPC", trim_start="1975-01-01")
															
 
																-    df["Adjusted Close"] = (df["Adjusted Close"]-df["Adjusted Close"][0]) / df["Adjusted Close"][0] * 100.0
															
 
																-    df=df.resample('M').mean()
															
 
																-    df.rename(columns={'Adjusted Close':'sp500'}, inplace=True)
															
 
																-    df = df['sp500']
															
 
																+    df["Adjusted Close"] = (
															
 
																+        (df["Adjusted Close"] - df["Adjusted Close"][0])
															
 
																+        / df["Adjusted Close"][0]
															
 
																+        * 100.0
															
 
																+    )
															
 
																+    df = df.resample("M").mean()
															
 
																+    df.rename(columns={"Adjusted Close": "sp500"}, inplace=True)
															
 
																+    df = df["sp500"]
															
 
																     return df
															
 
																+
															
 
																 def gdp_data():
															
 
																     df = quandl.get("BCB/4385", trim_start="1975-01-01")
															
 
																-    df["Value"] = (df["Value"]-df["Value"][0]) / df["Value"][0] * 100.0
															
 
																-    df=df.resample('M').mean()
															
 
																-    df.rename(columns={'Value':'GDP'}, inplace=True)
															
 
																-    df = df['GDP'] # DataFrame to Series
															
 
																+    df["Value"] = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100.0
															
 
																+    df = df.resample("M").mean()
															
 
																+    df.rename(columns={"Value": "GDP"}, inplace=True)
															
 
																+    df = df["GDP"]  # DataFrame to Series
															
 
																     return df
															
 
																+
															
 
																 def us_unemployment():
															
 
																     df = quandl.get("ECPI/JOB_G", trim_start="1975-01-01")
															
 
																-    df["Unemployment Rate"] = (df["Unemployment Rate"]-df["Unemployment Rate"][0]) / df["Unemployment Rate"][0] * 100.0
															
 
																-    df=df.resample('1D').mean()
															
 
																-    df=df.resample('M').mean()
															
 
																+    df["Unemployment Rate"] = (
															
 
																+        (df["Unemployment Rate"] - df["Unemployment Rate"][0])
															
 
																+        / df["Unemployment Rate"][0]
															
 
																+        * 100.0
															
 
																+    )
															
 
																+    df = df.resample("1D").mean()
															
 
																+    df = df.resample("M").mean()
															
 
																     return df
															
 
																+
															
 
																 # m30 = mortgage_30yr() # Series
															
 
																 # sp500 = sp500_data() # Series
															
 
																 # gdp = gdp_data() # Series
															
 
																 # unemployment = us_unemployment() # DataFrame
															
 
																 # HPI = HPI_data.join([m30, unemployment, gdp, sp500])
															
 
																-ax1 = plt.subplot(2,1,1)
															
 
																-ax2 = plt.subplot(2,1,2, sharex=ax1)
															
 
																+ax1 = plt.subplot(2, 1, 1)
															
 
																+ax2 = plt.subplot(2, 1, 2, sharex=ax1)
															
 
																 # initial_state_data()
															
 
																-pickle_in = open('fifty_states_pct.pickle' , 'rb')
															
 
																+pickle_in = open("fifty_states_pct.pickle", "rb")
															
 
																 HPI_data = pickle.load(pickle_in)
															
 
																 # HPI_Benchmark()
															
 
																-pickle_in = open('us_pct.pickle','rb')
															
 
																+pickle_in = open("us_pct.pickle", "rb")
															
 
																 benchmark = pickle.load(pickle_in)
															
 
																-pickle_in = open('HPI_complete.pickle', 'rb')
															
 
																+pickle_in = open("HPI_complete.pickle", "rb")
															
 
																 HPI = pickle.load(pickle_in)
															
 
																 HPI.dropna(inplace=True)
															
 
																 print(HPI.head())
															
 
																-state_HPI_M30 = HPI_data.join(HPI['M30'])
															
 
																+state_HPI_M30 = HPI_data.join(HPI["M30"])
															
 
																-# print(state_HPI_M30.corr().describe()['M30'])
															
 
																+# print(state_HPI_M30.corr().describe()['M30'])
															
--- a/sentdex_data_analysis/pandas_basics.py
+++ b/sentdex_data_analysis/pandas_basics.py
@@ -1,25 +1,28 @@
 
																-import pandas as pd 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-style.use('ggplot')
															
 
																-web_stats = {'Day': [1,2,3,4,5,6],
															
 
																-			 'Visitors': [54, 65, 76, 76, 34, 34],
															
 
																-			 'Bounce_Rate': [54, 23, 32, 54, 54, 32]}
															
 
																+style.use("ggplot")
															
 
																+
															
 
																+web_stats = {
															
 
																+    "Day": [1, 2, 3, 4, 5, 6],
															
 
																+    "Visitors": [54, 65, 76, 76, 34, 34],
															
 
																+    "Bounce_Rate": [54, 23, 32, 54, 54, 32],
															
 
																+}
															
 
																 df = pd.DataFrame(web_stats)
															
 
																 # print(df.head())
															
 
																-df.set_index('Day', inplace = True)
															
 
																+df.set_index("Day", inplace=True)
															
 
																- # print(df.index)
															
 
																+# print(df.index)
															
 
																 df.Visitors.plot()
															
 
																 # plt.show()
															
 
																-print(df[['Visitors','Bounce_Rate']])
															
 
																+print(df[["Visitors", "Bounce_Rate"]])
															
 
																 ex_list = df.Visitors.tolist()
															
 
																-print(ex_list)
															
 
																+print(ex_list)
															
--- a/sentdex_data_analysis/pandas_building_dataset.py
+++ b/sentdex_data_analysis/pandas_building_dataset.py
@@ -1,13 +1,11 @@
 
																 import quandl
															
 
																-import pandas as pd 
															
 
																+import pandas as pd
															
 
																-api_key = 'rFsSehe51RLzREtYhLfo'
															
 
																+api_key = "rFsSehe51RLzREtYhLfo"
															
 
																 # df = quandl.get('FMAC/HPI_AK', authtoken = api_key)
															
 
																-fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
															
 
																+fifty_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states")
															
 
																 for abbv in fifty_states[0][0][1:]:
															
 
																-	print('FMAC/HPI_' + str(abbv))
															
 
																-
															
 
																-
															
 
																+    print("FMAC/HPI_" + str(abbv))
															
--- a/sentdex_data_analysis/pandas_cocantenating_appending.py
+++ b/sentdex_data_analysis/pandas_cocantenating_appending.py
@@ -1,19 +1,31 @@
 
																 import pandas as pd
															
 
																-df1 = pd.DataFrame({'HPI':[80,85,88,85],
															
 
																-                    'Int_rate':[2, 3, 2, 2],
															
 
																-                    'US_GDP_Thousands':[50, 55, 65, 55]},
															
 
																-                   index = [2001, 2002, 2003, 2004])
															
 
																-
															
 
																-df2 = pd.DataFrame({'HPI':[80,85,88,85],
															
 
																-                    'Int_rate':[2, 3, 2, 2],
															
 
																-                    'US_GDP_Thousands':[50, 55, 65, 55]},
															
 
																-                   index = [2005, 2006, 2007, 2008])
															
 
																-
															
 
																-df3 = pd.DataFrame({'HPI':[80,85,88,85],
															
 
																-                    'Int_rate':[2, 3, 2, 2],
															
 
																-                    'Low_tier_HPI':[50, 52, 50, 53]},
															
 
																-                   index = [2001, 2002, 2003, 2004])
															
 
																+df1 = pd.DataFrame(
															
 
																+    {
															
 
																+        "HPI": [80, 85, 88, 85],
															
 
																+        "Int_rate": [2, 3, 2, 2],
															
 
																+        "US_GDP_Thousands": [50, 55, 65, 55],
															
 
																+    },
															
 
																+    index=[2001, 2002, 2003, 2004],
															
 
																+)
															
 
																+
															
 
																+df2 = pd.DataFrame(
															
 
																+    {
															
 
																+        "HPI": [80, 85, 88, 85],
															
 
																+        "Int_rate": [2, 3, 2, 2],
															
 
																+        "US_GDP_Thousands": [50, 55, 65, 55],
															
 
																+    },
															
 
																+    index=[2005, 2006, 2007, 2008],
															
 
																+)
															
 
																+
															
 
																+df3 = pd.DataFrame(
															
 
																+    {
															
 
																+        "HPI": [80, 85, 88, 85],
															
 
																+        "Int_rate": [2, 3, 2, 2],
															
 
																+        "Low_tier_HPI": [50, 52, 50, 53],
															
 
																+    },
															
 
																+    index=[2001, 2002, 2003, 2004],
															
 
																+)
															
 
																 # df1.set_index('HPI', inplace=True)
															
 
																 concat = pd.concat([df1, df3])
															
@@ -23,6 +35,5 @@ df4 = df1.append(df3)
 
																 print(concat)
															
 
																 print(df4)
															
 
																-s = pd.Series([[80, 2, 50],[80, 54, 56], [56, 43, 23]])
															
 
																+s = pd.Series([[80, 2, 50], [80, 54, 56], [56, 43, 23]])
															
 
																 print(s)
															
 
																-
															
--- a/sentdex_data_analysis/pandas_comparisonOperators.py
+++ b/sentdex_data_analysis/pandas_comparisonOperators.py
@@ -1,57 +1,57 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-style.use('seaborn')
															
 
																+style.use("seaborn")
															
 
																-bridge_height = {'meters':[10.26, 10.31, 10.27, 10.22, 10.23, 6212.42, 10.28, 10.25, 10.31]}
															
 
																+bridge_height = {
															
 
																+    "meters": [10.26, 10.31, 10.27, 10.22, 10.23, 6212.42, 10.28, 10.25, 10.31]
															
 
																+}
															
 
																 df = pd.DataFrame(bridge_height)
															
 
																-df['std'] = df['meters'].rolling(window=2).std()
															
 
																+df["std"] = df["meters"].rolling(window=2).std()
															
 
																-df_std = df.describe()['meters']['std']
															
 
																-df_mean = df.describe()['meters']['mean']
															
 
																+df_std = df.describe()["meters"]["std"]
															
 
																+df_mean = df.describe()["meters"]["mean"]
															
 
																 # df = df[df['std'] < df_std] # sentdex methods
															
 
																-df = df[df['meters'] < (df_mean + df_std)] # my methods
															
 
																+df = df[df["meters"] < (df_mean + df_std)]  # my methods
															
 
																 print(df)
															
 
																-df['meters'].plot()
															
 
																+df["meters"].plot()
															
 
																 plt.show()
															
 
																-ax1 = plt.subplot(2,1,1)
															
 
																-ax2 = plt.subplot(2,1,2, sharex=ax1)
															
 
																+ax1 = plt.subplot(2, 1, 1)
															
 
																+ax2 = plt.subplot(2, 1, 2, sharex=ax1)
															
 
																 # initial_state_data()
															
 
																-pickle_in = open('fifty_states_pct.pickle' , 'rb')
															
 
																+pickle_in = open("fifty_states_pct.pickle", "rb")
															
 
																 HPI_data = pickle.load(pickle_in)
															
 
																 # HPI_Benchmark()
															
 
																-pickle_in = open('us_pct.pickle','rb')
															
 
																+pickle_in = open("us_pct.pickle", "rb")
															
 
																 benchmark = pickle.load(pickle_in)
															
 
																 # rolling statistics
															
 
																-HPI_data['TX12MA'] = HPI_data['TX'].rolling(window=12, center=False).mean()
															
 
																-HPI_data['TX12STD']= HPI_data['TX'].rolling(window=12, center=False).std() 
															
 
																+HPI_data["TX12MA"] = HPI_data["TX"].rolling(window=12, center=False).mean()
															
 
																+HPI_data["TX12STD"] = HPI_data["TX"].rolling(window=12, center=False).std()
															
 
																 # standard deviation is a measure of the volatility of the price
															
 
																 HPI_data.dropna(inplace=True)
															
 
																-TK_AK_12corr = HPI_data['TX'].rolling(window=12).corr(HPI_data['AK'])
															
 
																+TK_AK_12corr = HPI_data["TX"].rolling(window=12).corr(HPI_data["AK"])
															
 
																-HPI_data['TX'].plot(ax=ax1, label = 'TX HPI')
															
 
																-HPI_data['AK'].plot(ax=ax1, label = 'AK HPI')
															
 
																+HPI_data["TX"].plot(ax=ax1, label="TX HPI")
															
 
																+HPI_data["AK"].plot(ax=ax1, label="AK HPI")
															
 
																 ax1.legend(loc=4)
															
 
																-TK_AK_12corr.plot(ax=ax2, label= 'TK AK 12 month correlation')
															
 
																+TK_AK_12corr.plot(ax=ax2, label="TK AK 12 month correlation")
															
 
																 ax2.legend(loc=4)
															
 
																 # HPI_data[['TX12MA','TX']].plot(ax=ax1)
															
 
																 # HPI_data['TX12STD'].plot(ax=ax2)
															
 
																 # plt.show()
															
 
																-
															
 
																-
															
--- a/sentdex_data_analysis/pandas_handlingNan.py
+++ b/sentdex_data_analysis/pandas_handlingNan.py
@@ -1,23 +1,25 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-style.use('seaborn')
															
 
																+style.use("seaborn")
															
 
																+
															
 
																+api_key = "rFsSehe51RLzREtYhLfo"
															
 
																-api_key = 'rFsSehe51RLzREtYhLfo'
															
 
																 def state_list():
															
 
																-    fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
															
 
																+    fifty_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states")
															
 
																     return fifty_states[0][0][1:]
															
 
																+
															
 
																 def initial_state_data():
															
 
																     states = state_list()
															
 
																     main_df = pd.DataFrame()
															
 
																     for abbv in states:
															
 
																-        query = 'FMAC/HPI_' + str(abbv)
															
 
																+        query = "FMAC/HPI_" + str(abbv)
															
 
																         df = quandl.get(query, authtoken=api_key)
															
 
																         df.columns = [str(abbv)]
															
 
																         df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
															
@@ -26,29 +28,31 @@ def initial_state_data():
 
																         else:
															
 
																             main_df = main_df.join(df)
															
 
																-    pickle_out = open('fifty_states_pct.pickle', 'wb')
															
 
																+    pickle_out = open("fifty_states_pct.pickle", "wb")
															
 
																     pickle.dump(main_df, pickle_out)
															
 
																     pickle_out.close()
															
 
																+
															
 
																 def HPI_Benchmark():
															
 
																-    df = quandl.get('FMAC/HPI_USA' , authtoken=api_key)
															
 
																-    df['United States'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
															
 
																-    
															
 
																-    pickle_out = open('us_pct.pickle', 'wb')
															
 
																+    df = quandl.get("FMAC/HPI_USA", authtoken=api_key)
															
 
																+    df["United States"] = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100.0
															
 
																+
															
 
																+    pickle_out = open("us_pct.pickle", "wb")
															
 
																     pickle.dump(df, pickle_out)
															
 
																     pickle_out.close()
															
 
																+
															
 
																 # fig = plt.figure()
															
 
																-ax1 = plt.subplot(1,1,1)
															
 
																+ax1 = plt.subplot(1, 1, 1)
															
 
																 # initial_state_data()
															
 
																-pickle_in = open('fifty_states_pct.pickle' , 'rb')
															
 
																+pickle_in = open("fifty_states_pct.pickle", "rb")
															
 
																 HPI_data = pickle.load(pickle_in)
															
 
																 # HPI_Benchmark()
															
 
																-pickle_in = open('us_pct.pickle','rb')
															
 
																+pickle_in = open("us_pct.pickle", "rb")
															
 
																 benchmark = pickle.load(pickle_in)
															
 
																 # HPI_data = HPI_data.pct_change()
															
@@ -57,21 +61,18 @@ benchmark = pickle.load(pickle_in)
 
																 # benchmark['United States'].plot(ax=ax1, color='k', linewidth=10)
															
 
																 # plt.legend().remove()
															
 
																-TX1yr = HPI_data['TX'].resample('A').mean()
															
 
																-HPI_data['TX1yr'] = TX1yr
															
 
																+TX1yr = HPI_data["TX"].resample("A").mean()
															
 
																+HPI_data["TX1yr"] = TX1yr
															
 
																 # print(HPI_data[['TX1yr','TX']])
															
 
																 print(HPI_data.isnull().values.sum())
															
 
																-HPI_data.fillna(method='bfill', inplace=True)
															
 
																+HPI_data.fillna(method="bfill", inplace=True)
															
 
																 # HPI_data.dropna(inplace=True)
															
 
																 # print(HPI_data[['TX1yr','TX']])
															
 
																-HPI_data[['TX1yr', 'TX']].plot(ax=ax1)
															
 
																+HPI_data[["TX1yr", "TX"]].plot(ax=ax1)
															
 
																 # plt.show()
															
 
																-print(HPI_data['TX'].hasnans)
															
 
																+print(HPI_data["TX"].hasnans)
															
 
																 print(HPI_data.isnull().values.sum())
															
 
																-
															
 
																-
															
 
																-
															
--- a/sentdex_data_analysis/pandas_indexing.py
+++ b/sentdex_data_analysis/pandas_indexing.py
@@ -1,62 +1,71 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-style.use('seaborn')
															
 
																+style.use("seaborn")
															
 
																+
															
 
																+api_key = "rFsSehe51RLzREtYhLfo"
															
 
																-api_key = 'rFsSehe51RLzREtYhLfo'
															
 
																 def state_list():
															
 
																-	fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
															
 
																-	return fifty_states[0][0][1:]
															
 
																+    fifty_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states")
															
 
																+    return fifty_states[0][0][1:]
															
 
																+
															
 
																 def initial_state_data():
															
 
																-	states = state_list()
															
 
																-	main_df = pd.DataFrame()
															
 
																-
															
 
																-	for abbv in states:
															
 
																-		query = 'FMAC/HPI_' + str(abbv)
															
 
																-		df = quandl.get(query, authtoken=api_key)
															
 
																-		df.columns = [str(abbv)]
															
 
																-		df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
															
 
																-		if main_df.empty:
															
 
																-			main_df = df
															
 
																-		else:
															
 
																-			main_df = main_df.join(df)
															
 
																-
															
 
																-	pickle_out = open('fifty_states_pct.pickle', 'wb')
															
 
																-	pickle.dump(main_df, pickle_out)
															
 
																-	pickle_out.close()
															
 
																+    states = state_list()
															
 
																+    main_df = pd.DataFrame()
															
 
																+
															
 
																+    for abbv in states:
															
 
																+        query = "FMAC/HPI_" + str(abbv)
															
 
																+        df = quandl.get(query, authtoken=api_key)
															
 
																+        df.columns = [str(abbv)]
															
 
																+        df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
															
 
																+        if main_df.empty:
															
 
																+            main_df = df
															
 
																+        else:
															
 
																+            main_df = main_df.join(df)
															
 
																+
															
 
																+    pickle_out = open("fifty_states_pct.pickle", "wb")
															
 
																+    pickle.dump(main_df, pickle_out)
															
 
																+    pickle_out.close()
															
 
																+
															
 
																 # initial_state_data()
															
 
																+
															
 
																 def HPI_Benchmark():
															
 
																-	df = quandl.get('FMAC/HPI_USA' , authtoken=api_key)
															
 
																-	df['United States'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
															
 
																-	return df
															
 
																+    df = quandl.get("FMAC/HPI_USA", authtoken=api_key)
															
 
																+    df["United States"] = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100.0
															
 
																+    return df
															
 
																+
															
 
																 fig = plt.figure()
															
 
																-ax1 = plt.subplot2grid((1,1), (0,0))
															
 
																+ax1 = plt.subplot2grid((1, 1), (0, 0))
															
 
																-pickle_in = open('fifty_states_pct.pickle' , 'rb')
															
 
																+pickle_in = open("fifty_states_pct.pickle", "rb")
															
 
																 HPI_data = pickle.load(pickle_in)
															
 
																 benchmark = HPI_Benchmark()
															
 
																 # HPI_data = HPI_data.pct_change()
															
 
																 HPI_data.plot(ax=ax1)
															
 
																-benchmark['United States'].plot(ax=ax1, color='k', linewidth=10)
															
 
																+benchmark["United States"].plot(ax=ax1, color="k", linewidth=10)
															
 
																 plt.legend().remove()
															
 
																 HPI_complete_data = HPI_data
															
 
																-HPI_complete_data['United States'] = benchmark['United States']
															
 
																+HPI_complete_data["United States"] = benchmark["United States"]
															
 
																 # print(HPI_complete_data.head())
															
 
																 HPI_State_Correlation = HPI_data.corr()
															
 
																 HPI_complete_correlation = HPI_complete_data.corr()
															
 
																-HPI_US_correlation = HPI_complete_correlation['United States']
															
 
																+HPI_US_correlation = HPI_complete_correlation["United States"]
															
 
																 HPI_US_correlation_sorted = HPI_US_correlation.sort_values(ascending=True)
															
 
																-print(HPI_US_correlation_sorted[HPI_US_correlation_sorted == HPI_US_correlation_sorted[-2]].index)
															
 
																+print(
															
 
																+    HPI_US_correlation_sorted[
															
 
																+        HPI_US_correlation_sorted == HPI_US_correlation_sorted[-2]
															
 
																+    ].index
															
 
																+)
															
 
																 plt.show()
															
 
																-# print(HPI_data[['IL','WI']].corr())
															
 
																+# print(HPI_data[['IL','WI']].corr())
															
--- a/sentdex_data_analysis/pandas_intro.py
+++ b/sentdex_data_analysis/pandas_intro.py
@@ -1,9 +1,10 @@
 
																-import pandas as pd 
															
 
																+import pandas as pd
															
 
																 import datetime
															
 
																 from pandas_datareader import data
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-style.use('seaborn-dark')
															
 
																+
															
 
																+style.use("seaborn-dark")
															
 
																 start = datetime.datetime(2010, 1, 1)
															
 
																 end = datetime.datetime(2016, 12, 31)
															
@@ -12,6 +13,6 @@ df = data.DataReader("GM", "yahoo", start, end)
 
																 print(df.head())
															
 
																-df['Adj Close'].plot()
															
 
																+df["Adj Close"].plot()
															
 
																-plt.show()
															
 
																+plt.show()
															
--- a/sentdex_data_analysis/pandas_joiningData.py
+++ b/sentdex_data_analysis/pandas_joiningData.py
@@ -1,32 +1,33 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-style.use('seaborn')
															
 
																+style.use("seaborn")
															
 
																+
															
 
																+quandl.ApiConfig.api_key = "rFsSehe51RLzREtYhLfo"
															
 
																-quandl.ApiConfig.api_key = 'rFsSehe51RLzREtYhLfo'
															
 
																 def mortgage_30yr():
															
 
																-	df = quandl.get('FMAC/MORTG')
															
 
																-	df = df[df.index > "1974-12-01"]
															
 
																-	df = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100
															
 
																-	df = df.resample('M').mean()
															
 
																-	return df 
															
 
																+    df = quandl.get("FMAC/MORTG")
															
 
																+    df = df[df.index > "1974-12-01"]
															
 
																+    df = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100
															
 
																+    df = df.resample("M").mean()
															
 
																+    return df
															
 
																-ax1 = plt.subplot(2,1,1)
															
 
																-ax2 = plt.subplot(2,1,2, sharex=ax1)
															
 
																+ax1 = plt.subplot(2, 1, 1)
															
 
																+ax2 = plt.subplot(2, 1, 2, sharex=ax1)
															
 
																 # initial_state_data()
															
 
																-pickle_in = open('fifty_states_pct.pickle' , 'rb')
															
 
																+pickle_in = open("fifty_states_pct.pickle", "rb")
															
 
																 HPI_data = pickle.load(pickle_in)
															
 
																 # HPI_Benchmark()
															
 
																-pickle_in = open('us_pct.pickle','rb')
															
 
																+pickle_in = open("us_pct.pickle", "rb")
															
 
																 benchmark = pickle.load(pickle_in)
															
@@ -35,6 +36,6 @@ m30 = mortgage_30yr()
 
																 HPI_Bench = benchmark
															
 
																 state_HPI_M30 = HPI_data.join(m30)
															
 
																-state_HPI_M30.rename({'Value' : 'M30'}, inplace=True)
															
 
																+state_HPI_M30.rename({"Value": "M30"}, inplace=True)
															
 
																-print(state_HPI_M30.corr().describe()['Value'])
															
 
																+print(state_HPI_M30.corr().describe()["Value"])
															
--- a/sentdex_data_analysis/pandas_joining_merging.py
+++ b/sentdex_data_analysis/pandas_joining_merging.py
@@ -1,21 +1,29 @@
 
																 import pandas as pd
															
 
																-df1 = pd.DataFrame({'HPI':[80,86,88,85],
															
 
																-                    'Int_rate':[2, 3, 2, 2],
															
 
																-                    'US_GDP_Thousands':[50, 55, 65, 55],
															
 
																-                   'Year' : [2001, 2002, 2003, 2005]})
															
 
																-
															
 
																-'''
															
 
																+df1 = pd.DataFrame(
															
 
																+    {
															
 
																+        "HPI": [80, 86, 88, 85],
															
 
																+        "Int_rate": [2, 3, 2, 2],
															
 
																+        "US_GDP_Thousands": [50, 55, 65, 55],
															
 
																+        "Year": [2001, 2002, 2003, 2005],
															
 
																+    }
															
 
																+)
															
 
																+
															
 
																+"""
															
 
																 df2 = pd.DataFrame({'HPI':[80,85,88,85],
															
 
																                     'Int_rate':[5, 3, 2, 2],
															
 
																                     'US_GDP_Thousands':[50, 55, 65, 55]},
															
 
																                    index = [2005, 2006, 2007, 2008])
															
 
																-'''
															
 
																+"""
															
 
																-df3 = pd.DataFrame({'HPI':[95, 86, 88, 90],
															
 
																-                    'Unemployment':[7, 8, 9, 6],
															
 
																-                    'Low_tier_HPI':[50, 52, 50, 53],
															
 
																-                   'Year' : [2000, 2002, 2003, 2004]})
															
 
																+df3 = pd.DataFrame(
															
 
																+    {
															
 
																+        "HPI": [95, 86, 88, 90],
															
 
																+        "Unemployment": [7, 8, 9, 6],
															
 
																+        "Low_tier_HPI": [50, 52, 50, 53],
															
 
																+        "Year": [2000, 2002, 2003, 2004],
															
 
																+    }
															
 
																+)
															
 
																 # print(pd.merge(df1, df3, on=['HPI']))
															
@@ -26,6 +34,5 @@ df3 = pd.DataFrame({'HPI':[95, 86, 88, 90],
 
																 # df3.set_index('Year', inplace=True)
															
 
																-
															
 
																-merged = pd.merge(df1, df3, on='Year', how='outer')
															
 
																-print(merged)
															
 
																+merged = pd.merge(df1, df3, on="Year", how="outer")
															
 
																+print(merged)
															
--- a/sentdex_data_analysis/pandas_mappingFunctions.py
+++ b/sentdex_data_analysis/pandas_mappingFunctions.py
@@ -1,15 +1,16 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-import numpy as np 
															
 
																-from statistics import mean 
															
 
																+import numpy as np
															
 
																+from statistics import mean
															
 
																-style.use('seaborn-dark-palette')
															
 
																+style.use("seaborn-dark-palette")
															
 
																+
															
 
																+ax1 = plt.subplot(2, 1, 1)
															
 
																+ax2 = plt.subplot(2, 1, 2, sharex=ax1)
															
 
																-ax1 = plt.subplot(2,1,1)
															
 
																-ax2 = plt.subplot(2,1,2, sharex=ax1)
															
 
																 def create_labels(cur_hpi, fut_hpi):
															
 
																     if fut_hpi > cur_hpi:
															
@@ -17,12 +18,18 @@ def create_labels(cur_hpi, fut_hpi):
 
																     else:
															
 
																         return 0
															
 
																+
															
 
																 def moving_average(values):
															
 
																     return mean(values)
															
 
																-benchmark = pd.read_pickle('us_pct.pickle')  # us overall housing price index percentage change
															
 
																-HPI = pd.read_pickle('HPI_complete.pickle') # all of the state data, thirty year mortgage, unemployment rate, GDP, SP500
															
 
																-HPI = HPI.join(benchmark['United States'])
															
 
																+
															
 
																+benchmark = pd.read_pickle(
															
 
																+    "us_pct.pickle"
															
 
																+)  # us overall housing price index percentage change
															
 
																+HPI = pd.read_pickle(
															
 
																+    "HPI_complete.pickle"
															
 
																+)  # all of the state data, thirty year mortgage, unemployment rate, GDP, SP500
															
 
																+HPI = HPI.join(benchmark["United States"])
															
 
																 # all in percentage change since the start of the data (1975-01-01)
															
 
																 HPI.dropna(inplace=True)
															
@@ -30,14 +37,18 @@ HPI.dropna(inplace=True)
 
																 housing_pct = HPI.pct_change()
															
 
																 housing_pct.replace([np.inf, -np.inf], np.nan, inplace=True)
															
 
																-housing_pct['US_HPI_future'] = housing_pct['United States'].shift(-1)
															
 
																+housing_pct["US_HPI_future"] = housing_pct["United States"].shift(-1)
															
 
																 housing_pct.dropna(inplace=True)
															
 
																-housing_pct['label'] = list(map(create_labels, housing_pct['United States'], housing_pct['US_HPI_future']))
															
 
																+housing_pct["label"] = list(
															
 
																+    map(create_labels, housing_pct["United States"], housing_pct["US_HPI_future"])
															
 
																+)
															
 
																 # housing_pct['ma_apply_example'] = pd.rolling_apply(housing_pct['M30'], 10, moving_average)
															
 
																-housing_pct['ma_apply_example'] = housing_pct['M30'].rolling(window=10).apply(moving_average)
															
 
																+housing_pct["ma_apply_example"] = (
															
 
																+    housing_pct["M30"].rolling(window=10).apply(moving_average)
															
 
																+)
															
 
																 print(housing_pct.tail())
															
 
																 # state_HPI_M30 = HPI_data.join(HPI['M30']) # fifty states plus mortgage data
															
 
																-# print(state_HPI_M30.corr().describe().tail())
															
 
																+# print(state_HPI_M30.corr().describe().tail())
															
--- a/sentdex_data_analysis/pandas_percentChange_correlation.py
+++ b/sentdex_data_analysis/pandas_percentChange_correlation.py
@@ -1,63 +1,65 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-style.use('seaborn')
															
 
																+style.use("seaborn")
															
 
																+
															
 
																+api_key = "rFsSehe51RLzREtYhLfo"
															
 
																-api_key = 'rFsSehe51RLzREtYhLfo'
															
 
																 def state_list():
															
 
																-	fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
															
 
																-	return fifty_states[0][0][1:]
															
 
																+    fifty_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states")
															
 
																+    return fifty_states[0][0][1:]
															
 
																-def initial_state_data():
															
 
																-	states = state_list()
															
 
																-	main_df = pd.DataFrame()
															
 
																-	for abbv in states:
															
 
																-		query = 'FMAC/HPI_' + str(abbv)
															
 
																-		df = quandl.get(query, authtoken=api_key)
															
 
																-		df.columns = [str(abbv)]
															
 
																-		df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
															
 
																-		if main_df.empty:
															
 
																-			main_df = df
															
 
																-		else:
															
 
																-			main_df = main_df.join(df)
															
 
																+def initial_state_data():
															
 
																+    states = state_list()
															
 
																+    main_df = pd.DataFrame()
															
 
																-	print(main_df.head())
															
 
																+    for abbv in states:
															
 
																+        query = "FMAC/HPI_" + str(abbv)
															
 
																+        df = quandl.get(query, authtoken=api_key)
															
 
																+        df.columns = [str(abbv)]
															
 
																+        df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
															
 
																+        if main_df.empty:
															
 
																+            main_df = df
															
 
																+        else:
															
 
																+            main_df = main_df.join(df)
															
 
																-	pickle_out = open('fifty_states_pct.pickle', 'wb')
															
 
																-	pickle.dump(main_df, pickle_out)
															
 
																-	pickle_out.close()
															
 
																+    print(main_df.head())
															
 
																+    pickle_out = open("fifty_states_pct.pickle", "wb")
															
 
																+    pickle.dump(main_df, pickle_out)
															
 
																+    pickle_out.close()
															
 
																 def HPI_Benchmark():
															
 
																-	df = quandl.get('FMAC/HPI_USA' , authtoken=api_key)
															
 
																-	df['United States'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
															
 
																-	
															
 
																-	pickle_out = open('us_pct.pickle', 'wb')
															
 
																-	pickle.dump(df, pickle_out)
															
 
																-	pickle_out.close()
															
 
																+    df = quandl.get("FMAC/HPI_USA", authtoken=api_key)
															
 
																+    df["United States"] = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100.0
															
 
																+
															
 
																+    pickle_out = open("us_pct.pickle", "wb")
															
 
																+    pickle.dump(df, pickle_out)
															
 
																+    pickle_out.close()
															
 
																+
															
 
																 fig = plt.figure()
															
 
																-ax1 = plt.subplot2grid((1,1), (0,0))
															
 
																+ax1 = plt.subplot2grid((1, 1), (0, 0))
															
 
																 # initial_state_data()
															
 
																-pickle_in = open('fifty_states_pct.pickle' , 'rb')
															
 
																+pickle_in = open("fifty_states_pct.pickle", "rb")
															
 
																 HPI_data = pickle.load(pickle_in)
															
 
																 # HPI_Benchmark()
															
 
																-pickle_in = open('us_pct.pickle' , 'rb')
															
 
																+pickle_in = open("us_pct.pickle", "rb")
															
 
																 benchmark = pickle.load(pickle_in)
															
 
																 # HPI_data = HPI_data.pct_change()
															
 
																 HPI_data.plot(ax=ax1)
															
 
																-benchmark['United States'].plot(ax=ax1, color='k', linewidth=10)
															
 
																+benchmark["United States"].plot(ax=ax1, color="k", linewidth=10)
															
 
																 plt.legend().remove()
															
 
																 HPI_State_Correlation = HPI_data.corr()
															
--- a/sentdex_data_analysis/pandas_pickling.py
+++ b/sentdex_data_analysis/pandas_pickling.py
@@ -1,35 +1,38 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+
															
 
																+api_key = "rFsSehe51RLzREtYhLfo"
															
 
																-api_key = 'rFsSehe51RLzREtYhLfo'
															
 
																 def state_list():
															
 
																-	fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
															
 
																-	return fifty_states[0][0][1:]
															
 
																+    fifty_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states")
															
 
																+    return fifty_states[0][0][1:]
															
 
																+
															
 
																 def initial_state_data():
															
 
																-	states = state_list()
															
 
																-	main_df = pd.DataFrame()
															
 
																+    states = state_list()
															
 
																+    main_df = pd.DataFrame()
															
 
																+
															
 
																+    for abbv in states:
															
 
																+        query = "FMAC/HPI_" + str(abbv)
															
 
																+        df = quandl.get(query, authtoken=api_key)
															
 
																+        df.columns = [str(abbv)]
															
 
																+        if main_df.empty:
															
 
																+            main_df = df
															
 
																+        else:
															
 
																+            main_df = main_df.join(df)
															
 
																-	for abbv in states:
															
 
																-		query = 'FMAC/HPI_' + str(abbv)
															
 
																-		df = quandl.get(query, authtoken=api_key)
															
 
																-		df.columns = [str(abbv)]
															
 
																-		if main_df.empty:
															
 
																-			main_df = df
															
 
																-		else:
															
 
																-			main_df = main_df.join(df)
															
 
																+    print(main_df.head())
															
 
																-	print(main_df.head())
															
 
																+    pickle_out = open("fifty_states.pickle", "wb")
															
 
																+    pickle.dump(main_df, pickle_out)
															
 
																+    pickle_out.close()
															
 
																-	pickle_out = open('fifty_states.pickle', 'wb')
															
 
																-	pickle.dump(main_df, pickle_out)
															
 
																-	pickle_out.close()
															
 
																 # initial_state_data()
															
 
																-pickle_in = open('fifty_states.pickle' , 'rb')
															
 
																+pickle_in = open("fifty_states.pickle", "rb")
															
 
																 HPI_data = pickle.load(pickle_in)
															
 
																-print(HPI_data)
															
 
																+print(HPI_data)
															
--- a/sentdex_data_analysis/pandas_pickling_sentdex.py
+++ b/sentdex_data_analysis/pandas_pickling_sentdex.py
@@ -2,16 +2,16 @@ import quandl
 
																 import pandas as pd
															
 
																 # Not necessary, I just do this so I do not show my API key.
															
 
																-api_key = 'rFsSehe51RLzREtYhLfo'
															
 
																-fiddy_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
															
 
																+api_key = "rFsSehe51RLzREtYhLfo"
															
 
																+fiddy_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states")
															
 
																 main_df = pd.DataFrame()
															
 
																 for abbv in fiddy_states[0][0][1:]:
															
 
																-    query = "FMAC/HPI_"+str(abbv)
															
 
																+    query = "FMAC/HPI_" + str(abbv)
															
 
																     df = quandl.get(query, authtoken=api_key)
															
 
																     if main_df.empty:
															
 
																         main_df = df
															
 
																     else:
															
 
																-        main_df = main_df.join(df)
															
 
																+        main_df = main_df.join(df)
															
--- a/sentdex_data_analysis/pandas_resampling.py
+++ b/sentdex_data_analysis/pandas_resampling.py
@@ -1,54 +1,58 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-style.use('seaborn')
															
 
																+style.use("seaborn")
															
 
																+
															
 
																+api_key = "rFsSehe51RLzREtYhLfo"
															
 
																-api_key = 'rFsSehe51RLzREtYhLfo'
															
 
																 def state_list():
															
 
																-	fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
															
 
																-	return fifty_states[0][0][1:]
															
 
																+    fifty_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states")
															
 
																+    return fifty_states[0][0][1:]
															
 
																+
															
 
																 def initial_state_data():
															
 
																-	states = state_list()
															
 
																-	main_df = pd.DataFrame()
															
 
																-
															
 
																-	for abbv in states:
															
 
																-		query = 'FMAC/HPI_' + str(abbv)
															
 
																-		df = quandl.get(query, authtoken=api_key)
															
 
																-		df.columns = [str(abbv)]
															
 
																-		df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
															
 
																-		if main_df.empty:
															
 
																-			main_df = df
															
 
																-		else:
															
 
																-			main_df = main_df.join(df)
															
 
																-
															
 
																-	pickle_out = open('fifty_states_pct.pickle', 'wb')
															
 
																-	pickle.dump(main_df, pickle_out)
															
 
																-	pickle_out.close()
															
 
																+    states = state_list()
															
 
																+    main_df = pd.DataFrame()
															
 
																+
															
 
																+    for abbv in states:
															
 
																+        query = "FMAC/HPI_" + str(abbv)
															
 
																+        df = quandl.get(query, authtoken=api_key)
															
 
																+        df.columns = [str(abbv)]
															
 
																+        df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
															
 
																+        if main_df.empty:
															
 
																+            main_df = df
															
 
																+        else:
															
 
																+            main_df = main_df.join(df)
															
 
																+
															
 
																+    pickle_out = open("fifty_states_pct.pickle", "wb")
															
 
																+    pickle.dump(main_df, pickle_out)
															
 
																+    pickle_out.close()
															
 
																+
															
 
																 def HPI_Benchmark():
															
 
																-	df = quandl.get('FMAC/HPI_USA' , authtoken=api_key)
															
 
																-	df['United States'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
															
 
																-	
															
 
																-	pickle_out = open('us_pct.pickle', 'wb')
															
 
																-	pickle.dump(df, pickle_out)
															
 
																-	pickle_out.close()
															
 
																+    df = quandl.get("FMAC/HPI_USA", authtoken=api_key)
															
 
																+    df["United States"] = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100.0
															
 
																+
															
 
																+    pickle_out = open("us_pct.pickle", "wb")
															
 
																+    pickle.dump(df, pickle_out)
															
 
																+    pickle_out.close()
															
 
																+
															
 
																 # fig = plt.figure()
															
 
																-ax1 = plt.subplot(1,1,1)
															
 
																+ax1 = plt.subplot(1, 1, 1)
															
 
																 # initial_state_data()
															
 
																-pickle_in = open('fifty_states_pct.pickle' , 'rb')
															
 
																+pickle_in = open("fifty_states_pct.pickle", "rb")
															
 
																 HPI_data = pickle.load(pickle_in)
															
 
																 # HPI_Benchmark()
															
 
																-pickle_in = open('us_pct.pickle','rb')
															
 
																+pickle_in = open("us_pct.pickle", "rb")
															
 
																 benchmark = pickle.load(pickle_in)
															
 
																 # HPI_data = HPI_data.pct_change()
															
@@ -58,13 +62,13 @@ benchmark = pickle.load(pickle_in)
 
																 # plt.legend().remove()
															
 
																 HPI_complete_data = HPI_data
															
 
																-HPI_complete_data['United States'] = benchmark['United States']
															
 
																-US1YR = benchmark['United States'].resample('A').mean() # new method of resampling
															
 
																-HPI1YR = HPI_data.resample('A').mean() # can change rate of sampling and method of sampling 
															
 
																+HPI_complete_data["United States"] = benchmark["United States"]
															
 
																+US1YR = benchmark["United States"].resample("A").mean()  # new method of resampling
															
 
																+HPI1YR = HPI_data.resample(
															
 
																+    "A"
															
 
																+).mean()  # can change rate of sampling and method of sampling
															
 
																 US1YR.plot(ax=ax1)
															
 
																-benchmark['United States'].plot(ax=ax1)
															
 
																-plt.legend(['Yearly sampled', 'Monthly sampled']) # original data is sampled monthly
															
 
																+benchmark["United States"].plot(ax=ax1)
															
 
																+plt.legend(["Yearly sampled", "Monthly sampled"])  # original data is sampled monthly
															
 
																 plt.show()
															
 
																-
															
 
																-
															
--- a/sentdex_data_analysis/pandas_rollingStatistics.py
+++ b/sentdex_data_analysis/pandas_rollingStatistics.py
@@ -1,23 +1,25 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-style.use('seaborn')
															
 
																+style.use("seaborn")
															
 
																+
															
 
																+api_key = "rFsSehe51RLzREtYhLfo"
															
 
																-api_key = 'rFsSehe51RLzREtYhLfo'
															
 
																 def state_list():
															
 
																-    fifty_states = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
															
 
																+    fifty_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states")
															
 
																     return fifty_states[0][0][1:]
															
 
																+
															
 
																 def initial_state_data():
															
 
																     states = state_list()
															
 
																     main_df = pd.DataFrame()
															
 
																     for abbv in states:
															
 
																-        query = 'FMAC/HPI_' + str(abbv)
															
 
																+        query = "FMAC/HPI_" + str(abbv)
															
 
																         df = quandl.get(query, authtoken=api_key)
															
 
																         df.columns = [str(abbv)]
															
 
																         df[abbv] = (df[abbv] - df[abbv][0]) / df[abbv][0] * 100.0
															
@@ -26,29 +28,31 @@ def initial_state_data():
 
																         else:
															
 
																             main_df = main_df.join(df)
															
 
																-    pickle_out = open('fifty_states_pct.pickle', 'wb')
															
 
																+    pickle_out = open("fifty_states_pct.pickle", "wb")
															
 
																     pickle.dump(main_df, pickle_out)
															
 
																     pickle_out.close()
															
 
																+
															
 
																 def HPI_Benchmark():
															
 
																-    df = quandl.get('FMAC/HPI_USA' , authtoken=api_key)
															
 
																-    df['United States'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
															
 
																-    
															
 
																-    pickle_out = open('us_pct.pickle', 'wb')
															
 
																+    df = quandl.get("FMAC/HPI_USA", authtoken=api_key)
															
 
																+    df["United States"] = (df["Value"] - df["Value"][0]) / df["Value"][0] * 100.0
															
 
																+
															
 
																+    pickle_out = open("us_pct.pickle", "wb")
															
 
																     pickle.dump(df, pickle_out)
															
 
																     pickle_out.close()
															
 
																-ax1 = plt.subplot(2,1,1)
															
 
																-ax2 = plt.subplot(2,1,2, sharex=ax1)
															
 
																+
															
 
																+ax1 = plt.subplot(2, 1, 1)
															
 
																+ax2 = plt.subplot(2, 1, 2, sharex=ax1)
															
 
																 # initial_state_data()
															
 
																-pickle_in = open('fifty_states_pct.pickle' , 'rb')
															
 
																+pickle_in = open("fifty_states_pct.pickle", "rb")
															
 
																 HPI_data = pickle.load(pickle_in)
															
 
																 # HPI_Benchmark()
															
 
																-pickle_in = open('us_pct.pickle','rb')
															
 
																+pickle_in = open("us_pct.pickle", "rb")
															
 
																 benchmark = pickle.load(pickle_in)
															
 
																 # HPI_data = HPI_data.pct_change()
															
@@ -57,12 +61,12 @@ benchmark = pickle.load(pickle_in)
 
																 # benchmark['United States'].plot(ax=ax1, color='k', linewidth=10)
															
 
																 # plt.legend().remove()
															
 
																-TX1yr = HPI_data['TX'].resample('A').mean()
															
 
																-HPI_data['TX1yr'] = TX1yr
															
 
																+TX1yr = HPI_data["TX"].resample("A").mean()
															
 
																+HPI_data["TX1yr"] = TX1yr
															
 
																 # print(HPI_data[['TX1yr','TX']])
															
 
																 print(HPI_data.isnull().values.sum())
															
 
																-HPI_data.fillna(method='bfill', inplace=True)
															
 
																+HPI_data.fillna(method="bfill", inplace=True)
															
 
																 # HPI_data.dropna(inplace=True)
															
 
																 print(HPI_data.isnull().values.sum())
															
@@ -74,23 +78,21 @@ print(HPI_data.isnull().values.sum())
 
																 # print(HPI_data['TX'].hasnans)
															
 
																 # rolling statistics
															
 
																-HPI_data['TX12MA'] = HPI_data['TX'].rolling(window=12, center=False).mean()
															
 
																-HPI_data['TX12STD']= HPI_data['TX'].rolling(window=12, center=False).std() 
															
 
																+HPI_data["TX12MA"] = HPI_data["TX"].rolling(window=12, center=False).mean()
															
 
																+HPI_data["TX12STD"] = HPI_data["TX"].rolling(window=12, center=False).std()
															
 
																 # standard deviation is a measure of the volatility of the price
															
 
																 HPI_data.dropna(inplace=True)
															
 
																-TK_AK_12corr = HPI_data['TX'].rolling(window=12).corr(HPI_data['AK'])
															
 
																+TK_AK_12corr = HPI_data["TX"].rolling(window=12).corr(HPI_data["AK"])
															
 
																-HPI_data['TX'].plot(ax=ax1, label = 'TX HPI')
															
 
																-HPI_data['AK'].plot(ax=ax1, label = 'AK HPI')
															
 
																+HPI_data["TX"].plot(ax=ax1, label="TX HPI")
															
 
																+HPI_data["AK"].plot(ax=ax1, label="AK HPI")
															
 
																 ax1.legend(loc=4)
															
 
																-TK_AK_12corr.plot(ax=ax2, label= 'TK AK 12 month correlation')
															
 
																+TK_AK_12corr.plot(ax=ax2, label="TK AK 12 month correlation")
															
 
																 ax2.legend(loc=4)
															
 
																 # HPI_data[['TX12MA','TX']].plot(ax=ax1)
															
 
																 # HPI_data['TX12STD'].plot(ax=ax2)
															
 
																 # print(HPI_data.head())
															
 
																 plt.show()
															
 
																-
															
 
																-
															
--- a/sentdex_data_analysis/pandas_scikitLearn.py
+++ b/sentdex_data_analysis/pandas_scikitLearn.py
@@ -1,19 +1,20 @@
 
																 import pickle
															
 
																-import pandas as pd 
															
 
																-import quandl 
															
 
																-import matplotlib.pyplot as plt 
															
 
																+import pandas as pd
															
 
																+import quandl
															
 
																+import matplotlib.pyplot as plt
															
 
																 from matplotlib import style
															
 
																-import numpy as np 
															
 
																-from statistics import mean 
															
 
																+import numpy as np
															
 
																+from statistics import mean
															
 
																 from sklearn import svm
															
 
																 from sklearn.preprocessing import scale, MinMaxScaler, MaxAbsScaler
															
 
																 from sklearn.linear_model import LogisticRegression
															
 
																 from sklearn.model_selection import train_test_split
															
 
																-style.use('seaborn-dark-palette')
															
 
																+style.use("seaborn-dark-palette")
															
 
																+
															
 
																+ax1 = plt.subplot(2, 1, 1)
															
 
																+ax2 = plt.subplot(2, 1, 2, sharex=ax1)
															
 
																-ax1 = plt.subplot(2,1,1)
															
 
																-ax2 = plt.subplot(2,1,2, sharex=ax1)
															
 
																 def create_labels(cur_hpi, fut_hpi):
															
 
																     if fut_hpi > cur_hpi:
															
@@ -21,12 +22,18 @@ def create_labels(cur_hpi, fut_hpi):
 
																     else:
															
 
																         return 0
															
 
																+
															
 
																 def moving_average(values):
															
 
																     return mean(values)
															
 
																-benchmark = pd.read_pickle('us_pct.pickle')  # us overall housing price index percentage change
															
 
																-HPI = pd.read_pickle('HPI_complete.pickle') # all of the state data, thirty year mortgage, unemployment rate, GDP, SP500
															
 
																-HPI = HPI.join(benchmark['United States'])
															
 
																+
															
 
																+benchmark = pd.read_pickle(
															
 
																+    "us_pct.pickle"
															
 
																+)  # us overall housing price index percentage change
															
 
																+HPI = pd.read_pickle(
															
 
																+    "HPI_complete.pickle"
															
 
																+)  # all of the state data, thirty year mortgage, unemployment rate, GDP, SP500
															
 
																+HPI = HPI.join(benchmark["United States"])
															
 
																 # all in percentage change since the start of the data (1975-01-01)
															
 
																 HPI.dropna(inplace=True)
															
@@ -34,15 +41,17 @@ HPI.dropna(inplace=True)
 
																 housing_pct = HPI.pct_change()
															
 
																 housing_pct.replace([np.inf, -np.inf], np.nan, inplace=True)
															
 
																-housing_pct['US_HPI_future'] = housing_pct['United States'].shift(-1)
															
 
																+housing_pct["US_HPI_future"] = housing_pct["United States"].shift(-1)
															
 
																 housing_pct.dropna(inplace=True)
															
 
																-housing_pct['label'] = list(map(create_labels, housing_pct['United States'], housing_pct['US_HPI_future']))
															
 
																+housing_pct["label"] = list(
															
 
																+    map(create_labels, housing_pct["United States"], housing_pct["US_HPI_future"])
															
 
																+)
															
 
																 # housing_pct['ma_apply_example'] = housing_pct['M30'].rolling(window=10).apply(moving_average)
															
 
																 # print(housing_pct.tail())
															
 
																-X = np.array(housing_pct.drop(['label', 'US_HPI_future'], 1))
															
 
																-y = np.array(housing_pct['label'])
															
 
																+X = np.array(housing_pct.drop(["label", "US_HPI_future"], 1))
															
 
																+y = np.array(housing_pct["label"])
															
 
																 X = scale(X)
															
@@ -54,13 +63,13 @@ clflog_accuracy = []
 
																 clfsvm_accuracy = []
															
 
																 for i in range(10):
															
 
																-	clflog = LogisticRegression(C=49.0, dual=False, penalty="l1")
															
 
																-	clflog.fit(X_train, y_train)
															
 
																-	clflog_accuracy.append(clflog.score(x_test,y_test))
															
 
																+    clflog = LogisticRegression(C=49.0, dual=False, penalty="l1")
															
 
																+    clflog.fit(X_train, y_train)
															
 
																+    clflog_accuracy.append(clflog.score(x_test, y_test))
															
 
																-	clfsvm = svm.SVC(kernel='linear')
															
 
																-	clfsvm.fit(X_train, y_train)
															
 
																-	clfsvm_accuracy.append(clfsvm.score(x_test,y_test))
															
 
																+    clfsvm = svm.SVC(kernel="linear")
															
 
																+    clfsvm.fit(X_train, y_train)
															
 
																+    clfsvm_accuracy.append(clfsvm.score(x_test, y_test))
															
 
																-print('Accuracy of logistic regression = %0.4f' % (mean(clflog_accuracy) * 100))
															
 
																-print('Accuracy of support vector machine = %0.4f' % (mean(clfsvm_accuracy) * 100))
															
 
																+print("Accuracy of logistic regression = %0.4f" % (mean(clflog_accuracy) * 100))
															
 
																+print("Accuracy of support vector machine = %0.4f" % (mean(clfsvm_accuracy) * 100))
															
--- a/sentdex_data_analysis/tpot_basic.py
+++ b/sentdex_data_analysis/tpot_basic.py
@@ -4,11 +4,12 @@ from sklearn.model_selection import train_test_split
 
																 digits = load_digits()
															
 
																-X_train, X_test, y_train,  y_test = train_test_split(digits.data, digits.target,
															
 
																-													train_size = 0.75, test_size = 0.25)
															
 
																+X_train, X_test, y_train, y_test = train_test_split(
															
 
																+    digits.data, digits.target, train_size=0.75, test_size=0.25
															
 
																+)
															
 
																-tpot = TPOTClassifier(generations = 5, population_size = 20, verbosity = 2)
															
 
																+tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2)
															
 
																 tpot.fit(X_train, y_train)
															
 
																 print(tpot.score(X_test, y_test))
															
 
																-tpot.export('tpot_mnist_pipeline.py')
															
 
																+tpot.export("tpot_mnist_pipeline.py")
															
--- a/slack_interaction/utils.py
+++ b/slack_interaction/utils.py
@@ -9,52 +9,54 @@ import matplotlib.pyplot as plt
 
																 def plot_history(history):
															
 
																     """Plot Results of Keras training"""
															
 
																-    plt.style.use('fivethirtyeight')
															
 
																-    epochs = list(range(1, len(history['loss']) + 1))
															
 
																-    plt.figure(figsize = (18, 6))
															
 
																-    
															
 
																+    plt.style.use("fivethirtyeight")
															
 
																+    epochs = list(range(1, len(history["loss"]) + 1))
															
 
																+    plt.figure(figsize=(18, 6))
															
 
																+
															
 
																     # Losses
															
 
																     plt.subplot(1, 2, 1)
															
 
																-    plt.plot(epochs, history['loss'], '-o', ms = 10, label = "Training Loss")
															
 
																-    plt.plot(epochs, history['val_loss'], '-*',  ms = 10, label = "Validation Loss")
															
 
																-    plt.legend(); 
															
 
																-    plt.xlabel('Epoch'); plt.ylabel('Loss')
															
 
																-    plt.title('Losses');
															
 
																-    
															
 
																+    plt.plot(epochs, history["loss"], "-o", ms=10, label="Training Loss")
															
 
																+    plt.plot(epochs, history["val_loss"], "-*", ms=10, label="Validation Loss")
															
 
																+    plt.legend()
															
 
																+    plt.xlabel("Epoch")
															
 
																+    plt.ylabel("Loss")
															
 
																+    plt.title("Losses")
															
 
																+
															
 
																     # Accuracy
															
 
																     plt.subplot(1, 2, 2)
															
 
																-    plt.plot(epochs, history['acc'], '-o', ms = 10, label = 'Training Acc')
															
 
																-    plt.plot(epochs, history['val_acc'], '-*',  ms = 10, label = "Validation Acc")
															
 
																+    plt.plot(epochs, history["acc"], "-o", ms=10, label="Training Acc")
															
 
																+    plt.plot(epochs, history["val_acc"], "-*", ms=10, label="Validation Acc")
															
 
																     plt.legend()
															
 
																-    plt.xlabel('Epoch'); plt.ylabel('Acc')
															
 
																-    plt.title('Accuracy');
															
 
																-    
															
 
																-    plt.suptitle('Training Curves', y= 1.05)
															
 
																+    plt.xlabel("Epoch")
															
 
																+    plt.ylabel("Acc")
															
 
																+    plt.title("Accuracy")
															
 
																+
															
 
																+    plt.suptitle("Training Curves", y=1.05)
															
 
																 def get_options(slack):
															
 
																-    command_dict = {'functions': {},
															
 
																-                    'attributes': {}}
															
 
																+    command_dict = {"functions": {}, "attributes": {}}
															
 
																     # Modules
															
 
																     for d in dir(slack):
															
 
																-        if not d.startswith('_'):
															
 
																-            command_dict['functions'][d] = []
															
 
																-            command_dict['attributes'][d] = []
															
 
																+        if not d.startswith("_"):
															
 
																+            command_dict["functions"][d] = []
															
 
																+            command_dict["attributes"][d] = []
															
 
																             # Iterate through methods and attributes
															
 
																             for dd in dir(getattr(slack, d)):
															
 
																-                if not dd.startswith('_'):
															
 
																+                if not dd.startswith("_"):
															
 
																                     # List of methods and attributes
															
 
																                     l = dir(getattr(getattr(slack, d), dd))
															
 
																                     # Method (function)
															
 
																-                    if '__call__' in l:
															
 
																-                        command_dict['functions'][d].append(dd)
															
 
																+                    if "__call__" in l:
															
 
																+                        command_dict["functions"][d].append(dd)
															
 
																                     # Attributes
															
 
																                     else:
															
 
																-                        command_dict['attributes'][d].append(dd)
															
 
																-                        
															
 
																+                        command_dict["attributes"][d].append(dd)
															
 
																+
															
 
																     return command_dict
															
 
																+
															
 
																 def get_data_and_model():
															
 
																     batch_size = 128
															
 
																     num_classes = 10
															
@@ -66,7 +68,7 @@ def get_data_and_model():
 
																     # the data, split between train and test sets
															
 
																     (x_train, y_train), (x_test, y_test) = mnist.load_data()
															
 
																-    if K.image_data_format() == 'channels_first':
															
 
																+    if K.image_data_format() == "channels_first":
															
 
																         x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
															
 
																         x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
															
 
																         input_shape = (1, img_rows, img_cols)
															
@@ -75,32 +77,34 @@ def get_data_and_model():
 
																         x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
															
 
																         input_shape = (img_rows, img_cols, 1)
															
 
																-    x_train = x_train.astype('float32')
															
 
																-    x_test = x_test.astype('float32')
															
 
																+    x_train = x_train.astype("float32")
															
 
																+    x_test = x_test.astype("float32")
															
 
																     x_train /= 255
															
 
																     x_test /= 255
															
 
																-    print('x_train shape:', x_train.shape)
															
 
																-    print(x_train.shape[0], 'train samples')
															
 
																-    print(x_test.shape[0], 'test samples')
															
 
																+    print("x_train shape:", x_train.shape)
															
 
																+    print(x_train.shape[0], "train samples")
															
 
																+    print(x_test.shape[0], "test samples")
															
 
																     # convert class vectors to binary class matrices
															
 
																     y_train = keras.utils.to_categorical(y_train, num_classes)
															
 
																     y_test = keras.utils.to_categorical(y_test, num_classes)
															
 
																     model = Sequential()
															
 
																-    model.add(Conv2D(32, kernel_size=(3, 3),
															
 
																-                     activation='relu',
															
 
																-                     input_shape=input_shape))
															
 
																-    model.add(Conv2D(64, (3, 3), activation='relu'))
															
 
																+    model.add(
															
 
																+        Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape)
															
 
																+    )
															
 
																+    model.add(Conv2D(64, (3, 3), activation="relu"))
															
 
																     model.add(MaxPooling2D(pool_size=(2, 2)))
															
 
																     model.add(Dropout(0.25))
															
 
																     model.add(Flatten())
															
 
																-    model.add(Dense(128, activation='relu'))
															
 
																+    model.add(Dense(128, activation="relu"))
															
 
																     model.add(Dropout(0.5))
															
 
																-    model.add(Dense(num_classes, activation='softmax'))
															
 
																+    model.add(Dense(num_classes, activation="softmax"))
															
 
																+
															
 
																+    model.compile(
															
 
																+        loss=keras.losses.categorical_crossentropy,
															
 
																+        optimizer=keras.optimizers.Adadelta(),
															
 
																+        metrics=["accuracy"],
															
 
																+    )
															
 
																-    model.compile(loss=keras.losses.categorical_crossentropy,
															
 
																-                  optimizer=keras.optimizers.Adadelta(),
															
 
																-                  metrics=['accuracy'])
															
 
																-    
															
 
																-    return x_train, x_test, y_train, y_test, model
															
 
																+    return x_train, x_test, y_train, y_test, model
															
--- a/stocker/stocker.py
+++ b/stocker/stocker.py
--- a/time_features/time_features_utils.py
+++ b/time_features/time_features_utils.py
@@ -5,14 +5,20 @@ from tqdm import tqdm_notebook
 
																 def cyclical_encoding(series, period):

															
 
																-    features = pd.concat([np.sin((2 * np.pi * series / period)),

															
 
																-                          np.cos((2 * np.pi * series / period))], axis=1)

															
 
																-    features.columns = [f'sin_{series.name}', f'cos_{series.name}']

															
 
																+    features = pd.concat(

															
 
																+        [np.sin((2 * np.pi * series / period)), np.cos((2 * np.pi * series / period))],

															
 
																+        axis=1,

															
 
																+    )

															
 
																+    features.columns = [f"sin_{series.name}", f"cos_{series.name}"]

															
 
																     return features

															
 
																 def create_time_features(

															
 
																-    fld, keep_frac_only=False, include_additional=False, cyc_encode=False, timezone=None,

															
 
																+    fld,

															
 
																+    keep_frac_only=False,

															
 
																+    include_additional=False,

															
 
																+    cyc_encode=False,

															
 
																+    timezone=None,

															
 
																 ):

															
 
																     """

															
 
																     Create features out of a series of datetimes.

															
@@ -42,8 +48,17 @@ def create_time_features(
 
																         df["local"] = fld

															
 
																     # Basic attributes

															
 
																-    attr = ["second", "minute", "hour", "year", "month",

															
 
																-            "week", "day", "dayofweek", "dayofyear"]

															
 
																+    attr = [

															
 
																+        "second",

															
 
																+        "minute",

															
 
																+        "hour",

															
 
																+        "year",

															
 
																+        "month",

															
 
																+        "week",

															
 
																+        "day",

															
 
																+        "dayofweek",

															
 
																+        "dayofyear",

															
 
																+    ]

															
 
																     if include_additional:

															
 
																         # Additional attributes to extract

															
@@ -69,14 +84,10 @@ def create_time_features(
 
																     ) / 24

															
 
																     # Add fractional time of week

															
 
																-    df[prefix + "fracweek"] = (

															
 
																-        df[prefix + "dayofweek"] + df[prefix + "fracday"]

															
 
																-    ) / 7

															
 
																+    df[prefix + "fracweek"] = (df[prefix + "dayofweek"] + df[prefix + "fracday"]) / 7

															
 
																     # Add fractional time of month

															
 
																-    df[prefix + "fracmonth"] = (

															
 
																-        (df[prefix + "day"] - 1) + df[prefix + "fracday"]

															
 
																-    ) / (

															
 
																+    df[prefix + "fracmonth"] = ((df[prefix + "day"] - 1) + df[prefix + "fracday"]) / (

															
 
																         fld.dt.days_in_month

															
 
																     )  # Use fld days_in_month in case this is not

															
 
																     # one of the attributes specified

															
@@ -84,7 +95,7 @@ def create_time_features(
 
																     # Calculate days in year (accounting for leap year rules)

															
 
																     days_in_year = np.where(

															
 
																         (df[prefix + "year"] % 4 == 0)

															
 
																-        & ( ( df[prefix + "year"] % 100 != 0) | (df[prefix + "year"] % 400 == 0)),

															
 
																+        & ((df[prefix + "year"] % 100 != 0) | (df[prefix + "year"] % 400 == 0)),

															
 
																         366,

															
 
																         365,

															
 
																     )

															
@@ -95,15 +106,13 @@ def create_time_features(
 
																     ) / days_in_year

															
 
																     if cyc_encode:

															
 
																-        df = pd.concat([df, cyclical_encoding(

															
 
																-            df[prefix + 'hour'], 24)], axis=1)

															
 
																-        df = pd.concat([df, cyclical_encoding(

															
 
																-            df[prefix + 'dayofweek'], 6)], axis=1)

															
 
																-        df = pd.concat([df, cyclical_encoding(df[prefix + 'day'], 31)], axis=1)

															
 
																-        df = pd.concat([df, cyclical_encoding(

															
 
																-            df[prefix + 'month'], 12)], axis=1)

															
 
																-        df = pd.concat([df] + [cyclical_encoding(df[c], 1)

															
 
																-                               for c in df if 'frac' in c], axis=1)

															
 
																+        df = pd.concat([df, cyclical_encoding(df[prefix + "hour"], 24)], axis=1)

															
 
																+        df = pd.concat([df, cyclical_encoding(df[prefix + "dayofweek"], 6)], axis=1)

															
 
																+        df = pd.concat([df, cyclical_encoding(df[prefix + "day"], 31)], axis=1)

															
 
																+        df = pd.concat([df, cyclical_encoding(df[prefix + "month"], 12)], axis=1)

															
 
																+        df = pd.concat(

															
 
																+            [df] + [cyclical_encoding(df[c], 1) for c in df if "frac" in c], axis=1

															
 
																+        )

															
 
																     if keep_frac_only:

															
 
																         df = df.drop(

															
@@ -133,7 +142,7 @@ def monthly_validation(data, model, track=False):
 
																     train_stops = np.unique(data.index[data.index.is_month_end].date)

															
 
																     X = data.copy()

															
 
																-    y = X.pop('energy')

															
 
																+    y = X.pop("energy")

															
 
																     weighted_score = 0

															
 
																     total_possible = 0

															
 
																     train_points = []

															
@@ -153,7 +162,8 @@ def monthly_validation(data, model, track=False):
 
																         if track:

															
 
																             print(

															
 
																-                f'Accuracy: {score:.2f}% testing from {test_start} to {test_end} ({n_days} days).')

															
 
																+                f"Accuracy: {score:.2f}% testing from {test_start} to {test_end} ({n_days} days)."

															
 
																+            )

															
 
																         weighted_score += score * len(X_test)

															
 
																         total_possible += 100 * len(X_test)

															
 
																         train_points.append(len(X_train))

															
@@ -163,12 +173,14 @@ def monthly_validation(data, model, track=False):
 
																     model.fit(X, y)

															
 
																     importance_df = None

															
 
																-    if hasattr(model, 'feature_importances_'):

															
 
																+    if hasattr(model, "feature_importances_"):

															
 
																         importance_df = pd.DataFrame(

															
 
																-            dict(features=X.columns, importance=model.feature_importances_))

															
 
																+            dict(features=X.columns, importance=model.feature_importances_)

															
 
																+        )

															
 
																     final_score = weighted_score / total_possible

															
 
																     results_df = pd.DataFrame(

															
 
																-        dict(train_points=train_points, test_points=test_points, score=scores))

															
 
																+        dict(train_points=train_points, test_points=test_points, score=scores)

															
 
																+    )

															
 
																     return dict(results=results_df, importances=importance_df, score=final_score)

															
@@ -177,21 +189,21 @@ def mape(y_true, y_pred):
 
																 def data_reading(filename):

															
 
																-    data = pd.read_csv(filename, parse_dates=['timestamp'])

															
 
																-    data = data.dropna(subset=['energy'])

															
 
																-    freq_counts = data['timestamp'].diff(1).value_counts()

															
 
																+    data = pd.read_csv(filename, parse_dates=["timestamp"])

															
 
																+    data = data.dropna(subset=["energy"])

															
 
																+    freq_counts = data["timestamp"].diff(1).value_counts()

															
 
																     freq = round(freq_counts.idxmax().total_seconds() / 60)

															
 
																-    data = data.set_index('timestamp').sort_index()

															
 
																+    data = data.set_index("timestamp").sort_index()

															
 
																     return data, freq, len(data)

															
 
																 def data_testing(filename, model):

															
 
																-    building_id = filename.split('_')[-1].split('.csv')[0]

															
 
																+    building_id = filename.split("_")[-1].split(".csv")[0]

															
 
																     data, freq, dpoints = data_reading(filename)

															
 
																     results = test_time_features(data, model)

															
 
																-    results['freq'] = freq

															
 
																-    results['dpoints'] = dpoints

															
 
																-    results['building_id'] = building_id

															
 
																+    results["freq"] = freq

															
 
																+    results["dpoints"] = dpoints

															
 
																+    results["building_id"] = building_id

															
 
																     return results

															
@@ -202,17 +214,27 @@ def test_time_features(data, model):
 
																     scores = []

															
 
																     methods = []

															
 
																-    y = data.pop('energy')

															
 
																-

															
 
																-    normal_features = ['timestamp_' + t for t in ['hour',

															
 
																-                                                  'dayofweek', 'month', 'dayofyear', 'year']]

															
 
																-    normal_cyc_features = ['sin_' + t for t in normal_features if t not in ['timestamp_dayofyear', 'timestamp_year']

															
 
																-                           ] + ['cos_' + t for t in normal_features if t not in ['timestamp_dayofyear', 'timestamp_year']]

															
 
																-

															
 
																-    frac_features = ['timestamp_' +

															
 
																-                     t for t in ['fracday', 'fracweek', 'fracmonth', 'fracyear']]

															
 
																-    frac_cyc_features = ['sin_' + t for t in frac_features] + \

															
 
																-        ['cos_' + t for t in frac_features]

															
 
																+    y = data.pop("energy")

															
 
																+

															
 
																+    normal_features = [

															
 
																+        "timestamp_" + t for t in ["hour", "dayofweek", "month", "dayofyear", "year"]

															
 
																+    ]

															
 
																+    normal_cyc_features = [

															
 
																+        "sin_" + t

															
 
																+        for t in normal_features

															
 
																+        if t not in ["timestamp_dayofyear", "timestamp_year"]

															
 
																+    ] + [

															
 
																+        "cos_" + t

															
 
																+        for t in normal_features

															
 
																+        if t not in ["timestamp_dayofyear", "timestamp_year"]

															
 
																+    ]

															
 
																+

															
 
																+    frac_features = [

															
 
																+        "timestamp_" + t for t in ["fracday", "fracweek", "fracmonth", "fracyear"]

															
 
																+    ]

															
 
																+    frac_cyc_features = ["sin_" + t for t in frac_features] + [

															
 
																+        "cos_" + t for t in frac_features

															
 
																+    ]

															
 
																     data_normal = data[normal_features].copy()

															
 
																     data_normal_cyc = data[normal_cyc_features].copy()

															
@@ -220,22 +242,21 @@ def test_time_features(data, model):
 
																     data_frac_cyc = data[frac_cyc_features].copy()

															
 
																     results = {}

															
 
																-    dataset_names = ['normal', 'normal_cyc', 'frac', 'frac_cyc']

															
 
																+    dataset_names = ["normal", "normal_cyc", "frac", "frac_cyc"]

															
 
																-    for dataset, name in zip([data_normal,

															
 
																-                              data_normal_cyc,

															
 
																-                              data_frac,

															
 
																-                              data_frac_cyc],

															
 
																-                             dataset_names):

															
 
																+    for dataset, name in zip(

															
 
																+        [data_normal, data_normal_cyc, data_frac, data_frac_cyc], dataset_names

															
 
																+    ):

															
 
																-        to_drop = dataset.columns[(dataset.nunique() == 1)

															
 
																-                                  | (dataset.nunique() == len(dataset))]

															
 
																+        to_drop = dataset.columns[

															
 
																+            (dataset.nunique() == 1) | (dataset.nunique() == len(dataset))

															
 
																+        ]

															
 
																         dataset = dataset.drop(columns=to_drop)

															
 
																-        dataset['energy'] = y.copy()

															
 
																+        dataset["energy"] = y.copy()

															
 
																         try:

															
 
																             data_results = monthly_validation(dataset, model)

															
 
																-            scores.append(data_results['score'])

															
 
																+            scores.append(data_results["score"])

															
 
																             methods.append(name)

															
 
																         except Exception as e:

															
 
																             print(e, name)

															
--- a/web_automation/canvas_upload.py
+++ b/web_automation/canvas_upload.py
@@ -1,4 +1,3 @@
 
																-
															
 
																 # selenium for web driving
															
 
																 import selenium
															
 
																 from selenium import webdriver
															
@@ -14,130 +13,134 @@ import os
 
																 def submit_assignment(file_tup):
															
 
																-	# Using Chrome to access web
															
 
																-	driver = webdriver.Chrome()
															
 
																-
															
 
																-	time.sleep(5)
															
 
																+    # Using Chrome to access web
															
 
																+    driver = webdriver.Chrome()
															
 
																-	# Open the website
															
 
																-	driver.get('https://canvas.case.edu')
															
 
																+    time.sleep(5)
															
 
																+
															
 
																+    # Open the website
															
 
																+    driver.get("https://canvas.case.edu")
															
 
																+
															
 
																+    # Password for Canvas
															
 
																+    with open("C:/Users/Will Koehrsen/Desktop/cp.txt", "r") as f:
															
 
																+        cp = f.read()
															
 
																-	# Password for Canvas
															
 
																-	with open('C:/Users/Will Koehrsen/Desktop/cp.txt', 'r') as f:
															
 
																-	    cp = f.read()
															
 
																+    # Locate id and password
															
 
																+    id_box = driver.find_element_by_name("username")
															
 
																+    pass_box = driver.find_element_by_name("password")
															
 
																+    # Send login information
															
 
																+    id_box.send_keys("wjk68")
															
 
																+    pass_box.send_keys(cp)
															
 
																-	# Locate id and password
															
 
																-	id_box = driver.find_element_by_name('username')
															
 
																-	pass_box = driver.find_element_by_name('password')
															
 
																+    # Click login
															
 
																+    login_button = driver.find_element_by_name("submit")
															
 
																+    login_button.click()
															
 
																-	# Send login information
															
 
																-	id_box.send_keys('wjk68')
															
 
																-	pass_box.send_keys(cp)
															
 
																+    # Find and click on list of courses
															
 
																+    courses_button = driver.find_element_by_id("global_nav_courses_link")
															
 
																+    courses_button.click()
															
 
																-	# Click login
															
 
																-	login_button = driver.find_element_by_name('submit')
															
 
																-	login_button.click()
															
 
																+    # Wait for the page to load
															
 
																+    time.sleep(2)
															
 
																-	# Find and click on list of courses
															
 
																-	courses_button = driver.find_element_by_id('global_nav_courses_link')
															
 
																-	courses_button.click()
															
 
																+    # Get the name of the folder
															
 
																+    folder = file_tup[0]
															
 
																+    # Class to select depends on folder
															
 
																+    if folder == "DSCI451":
															
 
																+        class_select = driver.find_element_by_link_text(
															
 
																+            "Applied Data Science Research (100/5047)"
															
 
																+        )
															
 
																+    elif folder == "DCSI453":
															
 
																+        class_select = driver.find_element_by_link_text(
															
 
																+            "Data Science: Statistical Learning, Modeling and Prediction (100/5046)"
															
 
																+        )
															
 
																+    elif folder == "EECS491":
															
 
																+        class_select = driver.find_element_by_link_text(
															
 
																+            "Artificial Intelligence: Probabilistic Graphical Models (100/10039)"
															
 
																+        )
															
 
																+    elif folder == "EECS531":
															
 
																+        class_select = driver.find_element_by_link_text("Computer Vision (100/10040)")
															
 
																-	# Wait for the page to load
															
 
																-	time.sleep(2)
															
 
																+    # Click on the specific class
															
 
																+    class_select.click()
															
 
																-	# Get the name of the folder
															
 
																-	folder = file_tup[0]
															
 
																-	    
															
 
																-	# Class to select depends on folder
															
 
																-	if folder == 'DSCI451':
															
 
																-	    class_select = driver.find_element_by_link_text('Applied Data Science Research (100/5047)')
															
 
																-	elif folder == 'DCSI453':
															
 
																-	    class_select = driver.find_element_by_link_text('Data Science: Statistical Learning, Modeling and Prediction (100/5046)')
															
 
																-	elif folder == 'EECS491':
															
 
																-	    class_select = driver.find_element_by_link_text('Artificial Intelligence: Probabilistic Graphical Models (100/10039)')
															
 
																-	elif folder == 'EECS531':
															
 
																-	    class_select = driver.find_element_by_link_text('Computer Vision (100/10040)')
															
 
																+    assignment_button = driver.find_element_by_link_text("Assignments")
															
 
																+    assignment_button.click()
															
 
																-	# Click on the specific class
															
 
																-	class_select.click()
															
 
																+    # Wait for the page to load
															
 
																+    time.sleep(2)
															
 
																-	assignment_button = driver.find_element_by_link_text('Assignments')
															
 
																-	assignment_button.click()
															
 
																+    # Locate the specific assignment
															
 
																+    file_name = file_tup[1]
															
 
																+    file_locator = file_name.split(".")[0]
															
 
																-	# Wait for the page to load 
															
 
																-	time.sleep(2)
															
 
																+    specific_assigment = driver.find_element_by_link_text(file_locator)
															
 
																+    specific_assigment.click()
															
 
																-	# Locate the specific assignment
															
 
																-	file_name = file_tup[1]
															
 
																-	file_locator = file_name.split('.')[0]
															
 
																-	 
															
 
																-	specific_assigment = driver.find_element_by_link_text(file_locator)
															
 
																-	specific_assigment.click()
															
 
																+    # Click on the button to submit an assignment
															
 
																+    try:
															
 
																+        submit_assignment_button = driver.find_element_by_link_text("Submit Assignment")
															
 
																+    # If assignment has already been submitted
															
 
																+    except:
															
 
																+        print("Assignment already submitted, re-submitting")
															
 
																+        submit_assignment_button = driver.find_element_by_link_text(
															
 
																+            "Re-submit Assignment"
															
 
																+        )
															
 
																-	# Click on the button to submit an assignment
															
 
																-	try:
															
 
																-	    submit_assignment_button = driver.find_element_by_link_text('Submit Assignment')
															
 
																-	# If assignment has already been submitted
															
 
																-	except:
															
 
																-	    print('Assignment already submitted, re-submitting')
															
 
																-	    submit_assignment_button = driver.find_element_by_link_text('Re-submit Assignment')
															
 
																+    submit_assignment_button.click()
															
 
																-	submit_assignment_button.click()
															
 
																+    # Wait for the page to load
															
 
																+    time.sleep(2)
															
 
																-	# Wait for the page to load
															
 
																-	time.sleep(2)
															
 
																+    # Choose file button
															
 
																+    choose_file = driver.find_element_by_name("attachments[0][uploaded_data]")
															
 
																-	# Choose file button
															
 
																-	choose_file = driver.find_element_by_name('attachments[0][uploaded_data]')
															
 
																+    # Send the name of the file to the button
															
 
																+    file_location = os.path.join(submission_dir, folder, file_name)
															
 
																+    choose_file.send_keys(file_location)
															
 
																-	# Send the name of the file to the button
															
 
																-	file_location = os.path.join(submission_dir, folder, file_name)
															
 
																-	choose_file.send_keys(file_location)
															
 
																+    submit_assignment = driver.find_element_by_id("submit_file_button")
															
 
																+    submit_assignment.click()
															
 
																-	submit_assignment = driver.find_element_by_id('submit_file_button')
															
 
																-	submit_assignment.click()
															
 
																+    # Wait for the page
															
 
																+    time.sleep(2)
															
 
																-	# Wait for the page
															
 
																-	time.sleep(2)
															
 
																+    # Move the file to the submitted folder
															
 
																+    submitted_dir = "C:/Users/Will Koehrsen/Desktop/submitted_assignments"
															
 
																+    submitted_dir = os.path.join(submitted_dir, folder)
															
 
																+    submitted_file_name = "Submitted " + file_name
															
 
																-	# Move the file to the submitted folder
															
 
																-	submitted_dir = 'C:/Users/Will Koehrsen/Desktop/submitted_assignments'
															
 
																-	submitted_dir = os.path.join(submitted_dir, folder)
															
 
																-	submitted_file_name = 'Submitted ' + file_name
															
 
																+    submitted_file_location = os.path.join(submitted_dir, submitted_file_name)
															
 
																+    # os.rename(file_location, submitted_file_location)
															
 
																-	submitted_file_location = os.path.join(submitted_dir, submitted_file_name)
															
 
																-	# os.rename(file_location, submitted_file_location)
															
 
																+    print(
															
 
																+        "{} Assignment for Class {} successfully submitted at {}.".format(
															
 
																+            file_name, folder, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
															
 
																+        )
															
 
																+    )
															
 
																-	print('{} Assignment for Class {} successfully submitted at {}.'.format(
															
 
																-		file_name, folder, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
															
 
																+    print("Submitted assignment available at {}.".format(submitted_file_location))
															
 
																-	print('Submitted assignment available at {}.'.format(submitted_file_location))
															
 
																+    return
															
 
																-	return
															
 
																 if __name__ == "__main__":
															
 
																-	# Build tuple of (folder, file) to turn in
															
 
																-	submission_dir = 'C:/Users/Will Koehrsen/Desktop/completed_assignments'
															
 
																-	dir_list = list(os.listdir(submission_dir))
															
 
																-
															
 
																-	for directory in dir_list:
															
 
																-	    file_list = list(os.listdir(os.path.join(submission_dir, directory)))
															
 
																-	    if len(file_list) != 0:
															
 
																-	        file_tup = (directory, file_list[0])
															
 
																-
															
 
																-	if len(file_tup) == 0:
															
 
																-		print('No files to submit')
															
 
																-
															
 
																-	else:
															
 
																-		print('Assignment "{}" for "{}" found.'.format(file_tup[1], file_tup[0]))
															
 
																-		input('Press enter to proceed: ')
															
 
																-		submit_assignment(file_tup)
															
 
																-
															
 
																-
															
 
																-
															
 
																+    # Build tuple of (folder, file) to turn in
															
 
																+    submission_dir = "C:/Users/Will Koehrsen/Desktop/completed_assignments"
															
 
																+    dir_list = list(os.listdir(submission_dir))
															
 
																+    for directory in dir_list:
															
 
																+        file_list = list(os.listdir(os.path.join(submission_dir, directory)))
															
 
																+        if len(file_list) != 0:
															
 
																+            file_tup = (directory, file_list[0])
															
 
																+    if len(file_tup) == 0:
															
 
																+        print("No files to submit")
															
 
																+    else:
															
 
																+        print('Assignment "{}" for "{}" found.'.format(file_tup[1], file_tup[0]))
															
 
																+        input("Press enter to proceed: ")
															
 
																+        submit_assignment(file_tup)
															
--- a/weighter/run_weighter.py
+++ b/weighter/run_weighter.py
@@ -1,4 +1,3 @@
 
																-
															
 
																 # pandas and numpy for data manipulation
															
 
																 import pandas as pd
															
 
																 import numpy as np
															
@@ -18,7 +17,7 @@ from oauth2client.service_account import ServiceAccountCredentials
 
																 # os for deleting images
															
 
																 import os
															
 
																-# matplotlib for plotting 
															
 
																+# matplotlib for plotting
															
 
																 import matplotlib.pyplot as plt
															
 
																 import matplotlib.patches as mpatches
															
 
																 import matplotlib
															
@@ -28,55 +27,53 @@ from weighter import Weighter
 
																 if __name__ == "__main__":
															
 
																-	# google sheets access
															
 
																-	scope = ['https://spreadsheets.google.com/feeds']
															
 
																-
															
 
																-	# Use local stored credentials in json file
															
 
																-	# make sure to first share the sheet with the email in the json file
															
 
																-	credentials = ServiceAccountCredentials.from_json_keyfile_name('C:/Users/Will Koehrsen/Desktop/weighter-2038ffb4e5a6.json', scope)
															
 
																-
															
 
																-	# Authorize access
															
 
																-	gc = gspread.authorize(credentials);
															
 
																-
															
 
																-	# Slack api key is stored as text file
															
 
																-	with open('C:/Users/Will Koehrsen/Desktop/slack_api.txt', 'r') as f:
															
 
																-	    slack_api_key = f.read()
															
 
																+    # google sheets access
															
 
																+    scope = ["https://spreadsheets.google.com/feeds"]
															
 
																-	slack = Slacker(slack_api_key)
															
 
																+    # Use local stored credentials in json file
															
 
																+    # make sure to first share the sheet with the email in the json file
															
 
																+    credentials = ServiceAccountCredentials.from_json_keyfile_name(
															
 
																+        "C:/Users/Will Koehrsen/Desktop/weighter-2038ffb4e5a6.json", scope
															
 
																+    )
															
 
																-	# Open the sheet, need to share the sheet with email specified in json file
															
 
																-	gsheet = gc.open('Auto Weight Challenge').sheet1
															
 
																+    # Authorize access
															
 
																+    gc = gspread.authorize(credentials)
															
 
																-	# List of lists with each row in the sheet as a list
															
 
																-	weight_lists = gsheet.get_all_values()
															
 
																+    # Slack api key is stored as text file
															
 
																+    with open("C:/Users/Will Koehrsen/Desktop/slack_api.txt", "r") as f:
															
 
																+        slack_api_key = f.read()
															
 
																-	# Headers are the first list
															
 
																-	# Pop returns the element (list in this case) and removes it from the list
															
 
																-	headers = weight_lists.pop(0)
															
 
																+    slack = Slacker(slack_api_key)
															
 
																-	# Convert list of lists to a dataframe with specified column header
															
 
																-	weights = pd.DataFrame(weight_lists, columns=headers)
															
 
																+    # Open the sheet, need to share the sheet with email specified in json file
															
 
																+    gsheet = gc.open("Auto Weight Challenge").sheet1
															
 
																-	# Record column should be a boolean
															
 
																-	weights['Record'] = weights['Record'].astype(bool)
															
 
																+    # List of lists with each row in the sheet as a list
															
 
																+    weight_lists = gsheet.get_all_values()
															
 
																-	# Name column is a string
															
 
																-	weights['Name'] = weights['Name'].astype(str)
															
 
																+    # Headers are the first list
															
 
																+    # Pop returns the element (list in this case) and removes it from the list
															
 
																+    headers = weight_lists.pop(0)
															
 
																-	# Convert dates to datetime, then set as index, then set the time zone
															
 
																-	weights['Date'] = pd.to_datetime(weights['Date'], unit='s')
															
 
																-	weights  = weights.set_index('Date', drop = True).tz_localize(tz='US/Eastern')
															
 
																+    # Convert list of lists to a dataframe with specified column header
															
 
																+    weights = pd.DataFrame(weight_lists, columns=headers)
															
 
																-	# Drop any extra entries
															
 
																-	weights = weights.drop('NaT')
															
 
																+    # Record column should be a boolean
															
 
																+    weights["Record"] = weights["Record"].astype(bool)
															
 
																-	# If there are new entries create the weighter object
															
 
																-	if len(weights) > np.count_nonzero(weights['Record']):
															
 
																-		# Initialize with dataframe of weights, google sheet, and slack object
															
 
																-    	 weighter = Weighter(weights, gsheet, slack)
															
 
																-    	 weighter.process_entries()
															
 
																-    	 print('Success')
															
 
																+    # Name column is a string
															
 
																+    weights["Name"] = weights["Name"].astype(str)
															
 
																+    # Convert dates to datetime, then set as index, then set the time zone
															
 
																+    weights["Date"] = pd.to_datetime(weights["Date"], unit="s")
															
 
																+    weights = weights.set_index("Date", drop=True).tz_localize(tz="US/Eastern")
															
 
																-	
															
 
																+    # Drop any extra entries
															
 
																+    weights = weights.drop("NaT")
															
 
																+    # If there are new entries create the weighter object
															
 
																+    if len(weights) > np.count_nonzero(weights["Record"]):
															
 
																+        # Initialize with dataframe of weights, google sheet, and slack object
															
 
																+        weighter = Weighter(weights, gsheet, slack)
															
 
																+        weighter.process_entries()
															
 
																+        print("Success")
															
--- a/weighter/weighter.py
+++ b/weighter/weighter.py
@@ -17,169 +17,181 @@ from oauth2client.service_account import ServiceAccountCredentials
 
																 # os for deleting images
															
 
																 import os
															
 
																-# matplotlib for plotting 
															
 
																+# matplotlib for plotting
															
 
																 import matplotlib.pyplot as plt
															
 
																 import matplotlib.patches as mpatches
															
 
																 import matplotlib
															
 
																-class Weighter():
															
 
																-    
															
 
																+class Weighter:
															
 
																+
															
 
																     """
															
 
																     When weighter is initialized, we need to convert the usernames,
															
 
																     get a dictionary of the unrecorded entries, construct a dictionary
															
 
																     of the actions to take, and make sure all data is formatted correctly
															
 
																     """
															
 
																-    
															
 
																+
															
 
																     def __init__(self, weights, gsheet, slack):
															
 
																-        
															
 
																+
															
 
																         # Weights is a dataframe
															
 
																         self.weights = weights.copy()
															
 
																         self.gsheet = gsheet
															
 
																         self.slack = slack
															
 
																-        
															
 
																         # Users is a list of the unique users in the data
															
 
																-        self.users = list(set(self.weights['Name']))
															
 
																-        
															
 
																+        self.users = list(set(self.weights["Name"]))
															
 
																+
															
 
																         correct_names = []
															
 
																-        
															
 
																+
															
 
																         # Name Changes
															
 
																-        for user in self.weights['Name']:
															
 
																-            
															
 
																+        for user in self.weights["Name"]:
															
 
																+
															
 
																             # Have to hardcode in name Changes
															
 
																-            if user == 'koehrcl':
															
 
																-                correct_names.append('Craig')
															
 
																-            elif user == 'willkoehrsen':
															
 
																-                correct_names.append('Will')
															
 
																-            elif user == 'fletcher':
															
 
																-                correct_names.append('Fletcher')
															
 
																-            
															
 
																+            if user == "koehrcl":
															
 
																+                correct_names.append("Craig")
															
 
																+            elif user == "willkoehrsen":
															
 
																+                correct_names.append("Will")
															
 
																+            elif user == "fletcher":
															
 
																+                correct_names.append("Fletcher")
															
 
																+
															
 
																             # Currently do not handle new users
															
 
																             else:
															
 
																-                print('New User Detected')
															
 
																+                print("New User Detected")
															
 
																                 return
															
 
																-            
															
 
																-        self.weights['Name'] = correct_names
															
 
																-        
															
 
																+
															
 
																+        self.weights["Name"] = correct_names
															
 
																+
															
 
																         # Users is a list of the unique users in the data
															
 
																-        self.users = list(set(self.weights['Name']))
															
 
																-        
															
 
																+        self.users = list(set(self.weights["Name"]))
															
 
																+
															
 
																         # Create a dataframe of the unrecorded entries
															
 
																-        self.unrecorded = self.weights[self.weights['Record'] != True]
															
 
																-        
															
 
																+        self.unrecorded = self.weights[self.weights["Record"] != True]
															
 
																+
															
 
																         # Process the unrecorded entries
															
 
																         self.process_unrecorded()
															
 
																-        
															
 
																+
															
 
																         # The remaning entries will all be weights
															
 
																-        self.weights['Entry'] = [float(weight) for weight in self.weights['Entry']]
															
 
																-        
															
 
																+        self.weights["Entry"] = [float(weight) for weight in self.weights["Entry"]]
															
 
																+
															
 
																         # Build the user dictionary
															
 
																         self.build_user_dict()
															
 
																-        
															
 
																+
															
 
																         # Calculate the change and percentage change columns
															
 
																         self.calculate_columns()
															
 
																-        
															
 
																+
															
 
																     """ 
															
 
																     Constructs a dictionary for each user with critical information
															
 
																     This forms the basis for the summarize function
															
 
																     """
															
 
																-    
															
 
																+
															
 
																     def build_user_dict(self):
															
 
																-        
															
 
																+
															
 
																         user_dict = {}
															
 
																-        
															
 
																-        user_goals = {'Craig': 215.0, 'Fletcher': 200.0, 'Will': 155.0}
															
 
																-        user_colors = {'Craig': 'forestgreen', 'Fletcher': 'navy', 'Will': 'darkred'}
															
 
																-        
															
 
																+
															
 
																+        user_goals = {"Craig": 215.0, "Fletcher": 200.0, "Will": 155.0}
															
 
																+        user_colors = {"Craig": "forestgreen", "Fletcher": "navy", "Will": "darkred"}
															
 
																+
															
 
																         for i, user in enumerate(self.users):
															
 
																-            
															
 
																-            user_weights = self.weights[self.weights['Name'] == user]
															
 
																+
															
 
																+            user_weights = self.weights[self.weights["Name"] == user]
															
 
																             goal = user_goals.get(user)
															
 
																-            start_weight = user_weights.ix[min(user_weights.index), 'Entry']   
															
 
																+            start_weight = user_weights.ix[min(user_weights.index), "Entry"]
															
 
																             start_date = min(user_weights.index)
															
 
																-            
															
 
																+
															
 
																             # Find minimum weight and date on which it occurs
															
 
																-            min_weight =  min(user_weights['Entry'])
															
 
																-            min_weight_date = ((user_weights[user_weights['Entry'] == min_weight].index)[0])
															
 
																-            
															
 
																+            min_weight = min(user_weights["Entry"])
															
 
																+            min_weight_date = (user_weights[user_weights["Entry"] == min_weight].index)[
															
 
																+                0
															
 
																+            ]
															
 
																+
															
 
																             # Find maximum weight and date on which it occurs
															
 
																-            max_weight = max(user_weights['Entry'])
															
 
																-            max_weight_date = ((user_weights[user_weights['Entry'] == max_weight].index)[0])
															
 
																-            
															
 
																-            most_recent_weight = user_weights.ix[max(user_weights.index), 'Entry']
															
 
																-            
															
 
																+            max_weight = max(user_weights["Entry"])
															
 
																+            max_weight_date = (user_weights[user_weights["Entry"] == max_weight].index)[
															
 
																+                0
															
 
																+            ]
															
 
																+
															
 
																+            most_recent_weight = user_weights.ix[max(user_weights.index), "Entry"]
															
 
																+
															
 
																             if goal < start_weight:
															
 
																                 change = start_weight - most_recent_weight
															
 
																-                obj = 'lose'
															
 
																+                obj = "lose"
															
 
																             elif goal > start_weight:
															
 
																                 change = most_recent_weight - start_weight
															
 
																-                obj = 'gain'
															
 
																-                
															
 
																+                obj = "gain"
															
 
																+
															
 
																             pct_change = 100 * change / start_weight
															
 
																-            
															
 
																-            pct_to_goal = 100 * (change / abs(start_weight - goal) )
															
 
																-            
															
 
																+
															
 
																+            pct_to_goal = 100 * (change / abs(start_weight - goal))
															
 
																+
															
 
																             # Color for plotting
															
 
																             user_color = user_colors[user]
															
 
																-            
															
 
																-            user_dict[user] = {'min_weight': min_weight, 'max_weight': max_weight,
															
 
																-                               'min_date': min_weight_date, 'max_date': max_weight_date,
															
 
																-                               'recent': most_recent_weight, 'abs_change': change,
															
 
																-                               'pct_change': pct_change, 'pct_towards_goal': pct_to_goal,
															
 
																-                               'start_weight': start_weight, 'start_date': start_date,
															
 
																-                               'goal_weight': goal, 'objective': obj, 'color': user_color}
															
 
																-       
															
 
																+
															
 
																+            user_dict[user] = {
															
 
																+                "min_weight": min_weight,
															
 
																+                "max_weight": max_weight,
															
 
																+                "min_date": min_weight_date,
															
 
																+                "max_date": max_weight_date,
															
 
																+                "recent": most_recent_weight,
															
 
																+                "abs_change": change,
															
 
																+                "pct_change": pct_change,
															
 
																+                "pct_towards_goal": pct_to_goal,
															
 
																+                "start_weight": start_weight,
															
 
																+                "start_date": start_date,
															
 
																+                "goal_weight": goal,
															
 
																+                "objective": obj,
															
 
																+                "color": user_color,
															
 
																+            }
															
 
																+
															
 
																         self.user_dict = user_dict
															
 
																-             
															
 
																+
															
 
																     """
															
 
																     Builds a dictionary of unrecorded entries where each key is the user
															
 
																     and the value is a list of weights and methods called for by the user.
															
 
																     This dictionary is saved as the entries attribute of the class.
															
 
																     Removes the none weights from the data and from the google sheet.
															
 
																     """
															
 
																-    
															
 
																+
															
 
																     def process_unrecorded(self):
															
 
																-        
															
 
																-        entries = {name:[] for name in self.users}
															
 
																+
															
 
																+        entries = {name: [] for name in self.users}
															
 
																         drop = []
															
 
																-        
															
 
																+
															
 
																         location = {}
															
 
																-        
															
 
																+
															
 
																         for index in self.unrecorded.index:
															
 
																-            entry = self.unrecorded.ix[index, 'Entry']
															
 
																-            user = str(self.unrecorded.ix[index, 'Name'])
															
 
																-            
															
 
																+            entry = self.unrecorded.ix[index, "Entry"]
															
 
																+            user = str(self.unrecorded.ix[index, "Name"])
															
 
																+
															
 
																             # Try and except does not seem like the best way to handle this
															
 
																             try:
															
 
																                 entry = float(entry)
															
 
																                 entries[user].append(entry)
															
 
																                 location[index] = True
															
 
																-                
															
 
																-            except:  
															
 
																+
															
 
																+            except:
															
 
																                 entry = str(entry)
															
 
																                 entries[user].append(entry.strip())
															
 
																-                location[index] = 'remove'
															
 
																-                
															
 
																+                location[index] = "remove"
															
 
																+
															
 
																                 drop.append(index)
															
 
																-                
															
 
																-            self.weights.ix[index, 'Record'] = True
															
 
																-           
															
 
																+
															
 
																+            self.weights.ix[index, "Record"] = True
															
 
																+
															
 
																         # Indexes of new entries
															
 
																         self.location = location
															
 
																-        
															
 
																+
															
 
																         # Update the Google Sheet before dropping
															
 
																         self.update_sheet()
															
 
																-        
															
 
																+
															
 
																         # Drop the rows which do not contain a weight
															
 
																         self.weights.drop(drop, axis=0, inplace=True)
															
 
																         # Entries is all of the new entries
															
 
																         self.entries = entries
															
 
																-        
															
 
																+
															
 
																     """ 
															
 
																     Update the Google Spreadsheet. This involves removing the rows without weight
															
 
																     entries and putting a True in the record column for all weights. 
															
@@ -187,161 +199,198 @@ class Weighter():
 
																     def update_sheet(self):
															
 
																         delete_count = 0
															
 
																-        
															
 
																+
															
 
																         # Iterate through the locations and update as appropriate
															
 
																         for index, action in self.location.items():
															
 
																             cell_row = (np.where(self.weights.index == index))[0][0] + 2 - delete_count
															
 
																-            if action == 'remove':
															
 
																-                self.gsheet.delete_row(index = cell_row)
															
 
																+            if action == "remove":
															
 
																+                self.gsheet.delete_row(index=cell_row)
															
 
																                 delete_count += 1
															
 
																             elif action:
															
 
																-                self.gsheet.update_acell(label='D%d' % cell_row, val = 'True')
															
 
																-           
															
 
																+                self.gsheet.update_acell(label="D%d" % cell_row, val="True")
															
 
																+
															
 
																     """ 
															
 
																     Iterates through the unrecorded entries and delegates 
															
 
																     each one to the appropriate method.
															
 
																     Updates the record cell in the google sheet 
															
 
																     """
															
 
																+
															
 
																     def process_entries(self):
															
 
																         for user, user_entries in self.entries.items():
															
 
																             for entry in user_entries:
															
 
																-                
															
 
																+
															
 
																                 # If a weight, display the basic message
															
 
																                 if type(entry) == float:
															
 
																                     self.basic_message(user)
															
 
																-                    
															
 
																+
															
 
																                 # If the message is a string hand off to the appropriate function
															
 
																                 else:
															
 
																-                    
															
 
																+
															
 
																                     # Require at lesat 8 days of data
															
 
																-                    if len(self.weights[self.weights['Name'] == user]) < 8:
															
 
																-                        message = "\nAt least 8 days of data required for detailed analysis."
															
 
																-                        self.slack.chat.post_message(channel='#weight_tracker', text = message, username = "Data Analyst", icon_emoji=":calendar:")
															
 
																-                
															
 
																-                    elif entry.lower() == 'summary':
															
 
																+                    if len(self.weights[self.weights["Name"] == user]) < 8:
															
 
																+                        message = (
															
 
																+                            "\nAt least 8 days of data required for detailed analysis."
															
 
																+                        )
															
 
																+                        self.slack.chat.post_message(
															
 
																+                            channel="#weight_tracker",
															
 
																+                            text=message,
															
 
																+                            username="Data Analyst",
															
 
																+                            icon_emoji=":calendar:",
															
 
																+                        )
															
 
																+
															
 
																+                    elif entry.lower() == "summary":
															
 
																                         self.summary(user)
															
 
																-                    elif entry.lower() == 'percent':
															
 
																+                    elif entry.lower() == "percent":
															
 
																                         self.percentage_plot()
															
 
																-                    elif entry.lower() == 'history':
															
 
																+                    elif entry.lower() == "history":
															
 
																                         self.history_plot(user)
															
 
																-                    elif entry.lower() == 'future':
															
 
																+                    elif entry.lower() == "future":
															
 
																                         self.future_plot(user)
															
 
																-                    elif entry.lower() == 'analysis':
															
 
																+                    elif entry.lower() == "analysis":
															
 
																                         self.analyze(user)
															
 
																-    
															
 
																+
															
 
																                     # Display a help message if the string is not valid
															
 
																                     else:
															
 
																-                        message = ("\nPlease enter a valid message:\n\n"
															
 
																-                                   "Your weight\n"
															
 
																-                                   "'Summary' to see a personal summary\n"
															
 
																-                                   "'Percent' to see a plot of all users percentage changes\n"
															
 
																-                                   "'History' to see a plot of your personal history\n"
															
 
																-                                   "'Future' to see your predictions for the next thirty days\n"
															
 
																-                                   "'Analysis' to view personalized advice\n"
															
 
																-                                   "For more help, contact @koehrsen_will on Twitter.\n")
															
 
																-
															
 
																-                        self.slack.chat.post_message(channel='#weight_tracker', text = message, username = "Help", 
															
 
																-                        	icon_emoji=":interrobang:")
															
 
																-                    
															
 
																-            
															
 
																+                        message = (
															
 
																+                            "\nPlease enter a valid message:\n\n"
															
 
																+                            "Your weight\n"
															
 
																+                            "'Summary' to see a personal summary\n"
															
 
																+                            "'Percent' to see a plot of all users percentage changes\n"
															
 
																+                            "'History' to see a plot of your personal history\n"
															
 
																+                            "'Future' to see your predictions for the next thirty days\n"
															
 
																+                            "'Analysis' to view personalized advice\n"
															
 
																+                            "For more help, contact @koehrsen_will on Twitter.\n"
															
 
																+                        )
															
 
																+
															
 
																+                        self.slack.chat.post_message(
															
 
																+                            channel="#weight_tracker",
															
 
																+                            text=message,
															
 
																+                            username="Help",
															
 
																+                            icon_emoji=":interrobang:",
															
 
																+                        )
															
 
																+
															
 
																     """ 
															
 
																     Adds the change and percentage change columns to the self.weights df
															
 
																     """
															
 
																+
															
 
																     def calculate_columns(self):
															
 
																-        
															
 
																-        self.weights = self.weights.sort_values('Name')
															
 
																-        self.weights['change'] = 0
															
 
																-        self.weights['pct_change'] = 0
															
 
																-        self.weights.reset_index(level=0, inplace = True)
															
 
																-        
															
 
																+
															
 
																+        self.weights = self.weights.sort_values("Name")
															
 
																+        self.weights["change"] = 0
															
 
																+        self.weights["pct_change"] = 0
															
 
																+        self.weights.reset_index(level=0, inplace=True)
															
 
																+
															
 
																         for index in self.weights.index:
															
 
																-            user = self.weights.ix[index, 'Name']
															
 
																-            weight = self.weights.ix[index, 'Entry']
															
 
																-            start_weight = self.user_dict[user]['start_weight']
															
 
																-            objective = self.user_dict[user]['objective']
															
 
																-            
															
 
																-            if objective == 'lose':
															
 
																-                
															
 
																-                self.weights.ix[index, 'change'] = start_weight - weight
															
 
																-                self.weights.ix[index, 'pct_change'] = 100 * (start_weight - weight) / start_weight
															
 
																-                
															
 
																-            elif objective == 'gain':
															
 
																-                self.weights.ix[index, 'change'] = weight - start_weight
															
 
																-                self.weights.ix[index, 'pct_change'] = 100 * (weight - start_weight) / start_weight
															
 
																-
															
 
																-        self.weights.set_index('Date', drop=True, inplace=True)
															
 
																-        
															
 
																-                
															
 
																+            user = self.weights.ix[index, "Name"]
															
 
																+            weight = self.weights.ix[index, "Entry"]
															
 
																+            start_weight = self.user_dict[user]["start_weight"]
															
 
																+            objective = self.user_dict[user]["objective"]
															
 
																+
															
 
																+            if objective == "lose":
															
 
																+
															
 
																+                self.weights.ix[index, "change"] = start_weight - weight
															
 
																+                self.weights.ix[index, "pct_change"] = (
															
 
																+                    100 * (start_weight - weight) / start_weight
															
 
																+                )
															
 
																+
															
 
																+            elif objective == "gain":
															
 
																+                self.weights.ix[index, "change"] = weight - start_weight
															
 
																+                self.weights.ix[index, "pct_change"] = (
															
 
																+                    100 * (weight - start_weight) / start_weight
															
 
																+                )
															
 
																+
															
 
																+        self.weights.set_index("Date", drop=True, inplace=True)
															
 
																+
															
 
																     """ 
															
 
																     This method is automatically run for each new weight
															
 
																     """
															
 
																+
															
 
																     def basic_message(self, user):
															
 
																-    
															
 
																+
															
 
																         # Find information for user, construct message, post message to Slack
															
 
																         user_info = self.user_dict.get(user)
															
 
																-        message = ("\n{}: Total Weight Change = {:.2f} lbs.\n\n"
															
 
																-                    "Percentage Weight Change = {:.2f}%\n").format(user, user_info['abs_change'],
															
 
																-                                                     user_info['pct_change'])
															
 
																+        message = (
															
 
																+            "\n{}: Total Weight Change = {:.2f} lbs.\n\n"
															
 
																+            "Percentage Weight Change = {:.2f}%\n"
															
 
																+        ).format(user, user_info["abs_change"], user_info["pct_change"])
															
 
																+
															
 
																+        self.slack.chat.post_message(
															
 
																+            "#weight_tracker", text=message, username="Update", icon_emoji=":scales:"
															
 
																+        )
															
 
																-        self.slack.chat.post_message('#weight_tracker', text=message, username='Update', icon_emoji=':scales:')
															
 
																-                        
															
 
																     """ 
															
 
																     Displays comprehensive stats about the user
															
 
																     """
															
 
																-    
															
 
																+
															
 
																     def summary(self, user):
															
 
																         user_info = self.user_dict.get(user)
															
 
																-        message = ("\n{}, your most recent weight was {:.2f} lbs.\n\n"
															
 
																-                   "Absolute weight change = {:.2f} lbs, percentage weight change = {:.2f}%.\n\n"
															
 
																-                   "Minimum weight = {:.2f} lbs on {} and maximum weight = {:.2f} lbs on {}.\n\n"
															
 
																-                   "Your goal weight = {:.2f} lbs. and you are {:.2f}% of the way there.\n\n"
															
 
																-                   "You started at {:.2f} lbs on {}. Congratulations on the progress!\n").format(user, 
															
 
																-                     user_info['recent'], user_info['abs_change'], user_info['pct_change'], 
															
 
																-                     user_info['min_weight'], str(user_info['min_date'].date()),
															
 
																-                     user_info['max_weight'], str(user_info['max_date'].date()),
															
 
																-                     user_info['goal_weight'], user_info['pct_towards_goal'],                                                       
															
 
																-                     user_info['start_weight'], str(user_info['start_date'].date()))
															
 
																-        
															
 
																-        self.slack.chat.post_message('#weight_tracker', text=message, username='Summary', icon_emoji=":earth_africa:")
															
 
																-   
															
 
																+        message = (
															
 
																+            "\n{}, your most recent weight was {:.2f} lbs.\n\n"
															
 
																+            "Absolute weight change = {:.2f} lbs, percentage weight change = {:.2f}%.\n\n"
															
 
																+            "Minimum weight = {:.2f} lbs on {} and maximum weight = {:.2f} lbs on {}.\n\n"
															
 
																+            "Your goal weight = {:.2f} lbs. and you are {:.2f}% of the way there.\n\n"
															
 
																+            "You started at {:.2f} lbs on {}. Congratulations on the progress!\n"
															
 
																+        ).format(
															
 
																+            user,
															
 
																+            user_info["recent"],
															
 
																+            user_info["abs_change"],
															
 
																+            user_info["pct_change"],
															
 
																+            user_info["min_weight"],
															
 
																+            str(user_info["min_date"].date()),
															
 
																+            user_info["max_weight"],
															
 
																+            str(user_info["max_date"].date()),
															
 
																+            user_info["goal_weight"],
															
 
																+            user_info["pct_towards_goal"],
															
 
																+            user_info["start_weight"],
															
 
																+            str(user_info["start_date"].date()),
															
 
																+        )
															
 
																+
															
 
																+        self.slack.chat.post_message(
															
 
																+            "#weight_tracker",
															
 
																+            text=message,
															
 
																+            username="Summary",
															
 
																+            icon_emoji=":earth_africa:",
															
 
																+        )
															
 
																+
															
 
																     """
															
 
																     Reset the plot and institute basic parameters
															
 
																     """
															
 
																+
															
 
																     @staticmethod
															
 
																     def reset_plot():
															
 
																         matplotlib.rcParams.update(matplotlib.rcParamsDefault)
															
 
																-        matplotlib.rcParams['text.color'] = 'k'
															
 
																-        
															
 
																+        matplotlib.rcParams["text.color"] = "k"
															
 
																+
															
 
																     """
															
 
																     Plot of all users percentage changes.
															
 
																     Includes polynomial fits (degree may need to be adjusted).
															
 
																     """
															
 
																-    
															
 
																+
															
 
																     def percentage_plot(self):
															
 
																-        
															
 
																+
															
 
																         self.reset_plot()
															
 
																-        
															
 
																-        plt.style.use('fivethirtyeight')
															
 
																-        plt.figure(figsize=(10,8))
															
 
																+
															
 
																+        plt.style.use("fivethirtyeight")
															
 
																+        plt.figure(figsize=(10, 8))
															
 
																         for i, user in enumerate(self.users):
															
 
																-            
															
 
																-            user_color = self.user_dict[user]['color']
															
 
																+
															
 
																+            user_color = self.user_dict[user]["color"]
															
 
																             # Select the user and order dataframe by date
															
 
																-            df = self.weights[self.weights['Name'] == user]
															
 
																+            df = self.weights[self.weights["Name"] == user]
															
 
																             df.sort_index(inplace=True)
															
 
																-            
															
 
																+
															
 
																             # List is used for fitting polynomial
															
 
																             xvalues = list(range(len(df)))
															
 
																             # Create a polynomial fit
															
 
																-            z = np.polyfit(xvalues, df['pct_change'], deg=6)
															
 
																+            z = np.polyfit(xvalues, df["pct_change"], deg=6)
															
 
																             # Create a function from the fit
															
 
																             p = np.poly1d(z)
															
@@ -350,40 +399,62 @@ class Weighter():
 
																             fit_data = p(xvalues)
															
 
																             # Plot the actual points and the fit
															
 
																-            plt.plot(df.index, df['pct_change'], 'o', color = user_color, label = '%s Observations' % user)
															
 
																-            plt.plot(df.index, fit_data, '-', color = user_color, linewidth = 5, label = '%s Smooth Fit' % user)
															
 
																-
															
 
																+            plt.plot(
															
 
																+                df.index,
															
 
																+                df["pct_change"],
															
 
																+                "o",
															
 
																+                color=user_color,
															
 
																+                label="%s Observations" % user,
															
 
																+            )
															
 
																+            plt.plot(
															
 
																+                df.index,
															
 
																+                fit_data,
															
 
																+                "-",
															
 
																+                color=user_color,
															
 
																+                linewidth=5,
															
 
																+                label="%s Smooth Fit" % user,
															
 
																+            )
															
 
																         # Plot formatting
															
 
																-        plt.xlabel('Date'); plt.ylabel('% Change from Start')
															
 
																-        plt.title('Percentage Changes')
															
 
																-        plt.grid(color='k', alpha=0.4)
															
 
																-        plt.legend(prop={'size':14})
															
 
																-        plt.savefig('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png')
															
 
																-        
															
 
																-        self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png', channels='#weight_tracker', title="Percent Plot")
															
 
																-        
															
 
																-        os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png')
															
 
																-        
															
 
																+        plt.xlabel("Date")
															
 
																+        plt.ylabel("% Change from Start")
															
 
																+        plt.title("Percentage Changes")
															
 
																+        plt.grid(color="k", alpha=0.4)
															
 
																+        plt.legend(prop={"size": 14})
															
 
																+        plt.savefig(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png"
															
 
																+        )
															
 
																+
															
 
																+        self.slack.files.upload(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png",
															
 
																+            channels="#weight_tracker",
															
 
																+            title="Percent Plot",
															
 
																+        )
															
 
																+
															
 
																+        os.remove(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\percentage_plot.png"
															
 
																+        )
															
 
																+
															
 
																     """ 
															
 
																     Plot of a single user's history.
															
 
																     Also plot a polynomial fit on the observations.
															
 
																     """
															
 
																+
															
 
																     def history_plot(self, user):
															
 
																-        
															
 
																+
															
 
																         self.reset_plot()
															
 
																-        plt.style.use('fivethirtyeight')
															
 
																+        plt.style.use("fivethirtyeight")
															
 
																         plt.figure(figsize=(10, 8))
															
 
																-        
															
 
																-        df = self.weights[self.weights['Name'] == user]
															
 
																-        df.sort_index(inplace=True) 
															
 
																-        user_color = self.user_dict[user]['color']
															
 
																-        
															
 
																+
															
 
																+        df = self.weights[self.weights["Name"] == user]
															
 
																+        df.sort_index(inplace=True)
															
 
																+        user_color = self.user_dict[user]["color"]
															
 
																+
															
 
																         # List is used for fitting polynomial
															
 
																         xvalues = list(range(len(df)))
															
 
																         # Create a polynomial fit
															
 
																-        z = np.polyfit(xvalues, df['Entry'], deg=6)
															
 
																+        z = np.polyfit(xvalues, df["Entry"], deg=6)
															
 
																         # Create a function from the fit
															
 
																         p = np.poly1d(z)
															
@@ -392,150 +463,222 @@ class Weighter():
 
																         fit_data = p(xvalues)
															
 
																         # Make a simple plot and upload to slack
															
 
																-        plt.plot(df.index, df['Entry'], 'ko', ms = 8, label = 'Observed')
															
 
																-        plt.plot(df.index, fit_data, '-', color = user_color, linewidth = 5, label = 'Smooth Fit')
															
 
																-        plt.xlabel('Date'); plt.ylabel('Weight (lbs)'); plt.title('%s Weight History' % user)
															
 
																-        plt.legend(prop={'size': 14});
															
 
																-        
															
 
																-        plt.savefig(fname='C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png')
															
 
																-        self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png', channels='#weight_tracker', title="%s History" % user)
															
 
																-        
															
 
																+        plt.plot(df.index, df["Entry"], "ko", ms=8, label="Observed")
															
 
																+        plt.plot(
															
 
																+            df.index, fit_data, "-", color=user_color, linewidth=5, label="Smooth Fit"
															
 
																+        )
															
 
																+        plt.xlabel("Date")
															
 
																+        plt.ylabel("Weight (lbs)")
															
 
																+        plt.title("%s Weight History" % user)
															
 
																+        plt.legend(prop={"size": 14})
															
 
																+
															
 
																+        plt.savefig(
															
 
																+            fname="C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png"
															
 
																+        )
															
 
																+        self.slack.files.upload(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png",
															
 
																+            channels="#weight_tracker",
															
 
																+            title="%s History" % user,
															
 
																+        )
															
 
																+
															
 
																         # Remove the plot from local storage
															
 
																-        os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png')
															
 
																-   
															
 
																+        os.remove(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\history_plot.png"
															
 
																+        )
															
 
																+
															
 
																     """ 
															
 
																     Create a prophet model for forecasting and trend analysis.
															
 
																     Might need to adjust model hyperparameters.
															
 
																     """
															
 
																-    
															
 
																+
															
 
																     def prophet_model(self):
															
 
																         model = fbprophet.Prophet(daily_seasonality=False, yearly_seasonality=False)
															
 
																         return model
															
 
																-        
															
 
																+
															
 
																     """ 
															
 
																     Plot the prophet forecast for the next thirty days
															
 
																     Print the expected weight at the end of the forecast
															
 
																     """
															
 
																+
															
 
																     def future_plot(self, user):
															
 
																         self.reset_plot()
															
 
																-        
															
 
																-        df = self.weights[self.weights['Name'] == user]
															
 
																+
															
 
																+        df = self.weights[self.weights["Name"] == user]
															
 
																         dates = [date.date() for date in df.index]
															
 
																-        df['ds'] = dates
															
 
																-        df['y'] = df['Entry']
															
 
																-        
															
 
																+        df["ds"] = dates
															
 
																+        df["y"] = df["Entry"]
															
 
																+
															
 
																         df.sort_index(inplace=True)
															
 
																         # Prophet model
															
 
																         model = self.prophet_model()
															
 
																         model.fit(df)
															
 
																-        
															
 
																+
															
 
																         # Future dataframe for predictions
															
 
																-        future = model.make_future_dataframe(periods=30, freq='D')
															
 
																+        future = model.make_future_dataframe(periods=30, freq="D")
															
 
																         future = model.predict(future)
															
 
																-    
															
 
																-        color = self.user_dict[user]['color']
															
 
																-        
															
 
																+
															
 
																+        color = self.user_dict[user]["color"]
															
 
																+
															
 
																         # Write a message and post to slack
															
 
																-        message = ('{} Your predicted weight on {} = {:.2f} lbs.'.format(
															
 
																-            user, max(future['ds']).date(), future.ix[len(future) - 1, 'yhat']))
															
 
																-        
															
 
																-        self.slack.chat.post_message(channel="#weight_tracker", text=message, username = 'The Future', icon_emoji=":city_sunrise:")
															
 
																-        
															
 
																+        message = "{} Your predicted weight on {} = {:.2f} lbs.".format(
															
 
																+            user, max(future["ds"]).date(), future.ix[len(future) - 1, "yhat"]
															
 
																+        )
															
 
																+
															
 
																+        self.slack.chat.post_message(
															
 
																+            channel="#weight_tracker",
															
 
																+            text=message,
															
 
																+            username="The Future",
															
 
																+            icon_emoji=":city_sunrise:",
															
 
																+        )
															
 
																+
															
 
																         # Create the plot and upload to slack
															
 
																         fig, ax = plt.subplots(1, 1, figsize=(10, 8))
															
 
																-        ax.plot(df['ds'], df['y'], 'o', color = 'k', ms = 8, label = 'observations')
															
 
																-        ax.plot(future['ds'], future['yhat'], '-', color = color, label = 'modeled')
															
 
																-        ax.fill_between(future['ds'].dt.to_pydatetime(), future['yhat_upper'], future['yhat_lower'], facecolor = color, 
															
 
																-                alpha = 0.4, edgecolor = 'k', linewidth  = 1.8, label = 'confidence interval')
															
 
																-        plt.xlabel('Date'); plt.ylabel('Weight (lbs)'); plt.title('%s 30 Day Prediction' % user)
															
 
																+        ax.plot(df["ds"], df["y"], "o", color="k", ms=8, label="observations")
															
 
																+        ax.plot(future["ds"], future["yhat"], "-", color=color, label="modeled")
															
 
																+        ax.fill_between(
															
 
																+            future["ds"].dt.to_pydatetime(),
															
 
																+            future["yhat_upper"],
															
 
																+            future["yhat_lower"],
															
 
																+            facecolor=color,
															
 
																+            alpha=0.4,
															
 
																+            edgecolor="k",
															
 
																+            linewidth=1.8,
															
 
																+            label="confidence interval",
															
 
																+        )
															
 
																+        plt.xlabel("Date")
															
 
																+        plt.ylabel("Weight (lbs)")
															
 
																+        plt.title("%s 30 Day Prediction" % user)
															
 
																         plt.legend()
															
 
																-        plt.savefig('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png')
															
 
																-        
															
 
																-        self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png', channels="#weight_tracker", title="%s Future Predictions" % user)
															
 
																-        
															
 
																-        os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png')
															
 
																-        
															
 
																+        plt.savefig(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png"
															
 
																+        )
															
 
																+
															
 
																+        self.slack.files.upload(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png",
															
 
																+            channels="#weight_tracker",
															
 
																+            title="%s Future Predictions" % user,
															
 
																+        )
															
 
																+
															
 
																+        os.remove(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\future_plot.png"
															
 
																+        )
															
 
																+
															
 
																     """ 
															
 
																     Analyze user trends and provide recommendations. 
															
 
																     Determine if the user is on track to meet their goal.
															
 
																     """
															
 
																-    
															
 
																+
															
 
																     def analyze(self, user):
															
 
																-        
															
 
																+
															
 
																         self.reset_plot()
															
 
																-        
															
 
																+
															
 
																         # Get user info and sort dataframe by date
															
 
																         info = self.user_dict.get(user)
															
 
																-        goal_weight = info['goal_weight']
															
 
																-        df = self.weights[self.weights['Name'] == user]
															
 
																+        goal_weight = info["goal_weight"]
															
 
																+        df = self.weights[self.weights["Name"] == user]
															
 
																         df = df.sort_index()
															
 
																-        df['ds'] = [date.date() for date in df.index]
															
 
																-        df['y'] = df['Entry']
															
 
																-        
															
 
																+        df["ds"] = [date.date() for date in df.index]
															
 
																+        df["y"] = df["Entry"]
															
 
																+
															
 
																         model = self.prophet_model()
															
 
																         model.fit(df)
															
 
																-        
															
 
																+
															
 
																         prediction_days = 2 * len(df)
															
 
																-        
															
 
																-        future = model.make_future_dataframe(periods = prediction_days, freq = 'D')
															
 
																+
															
 
																+        future = model.make_future_dataframe(periods=prediction_days, freq="D")
															
 
																         future = model.predict(future)
															
 
																-        
															
 
																-        # lbs change per day 
															
 
																-        change_per_day = info['abs_change'] / (max(df['ds']) - min(df['ds'])).days
															
 
																-        
															
 
																-        days_to_goal = abs(int((info['recent'] - goal_weight) / change_per_day))
															
 
																-        date_for_goal = max(df['ds']) + pd.DateOffset(days=days_to_goal)
															
 
																-        
															
 
																+
															
 
																+        # lbs change per day
															
 
																+        change_per_day = info["abs_change"] / (max(df["ds"]) - min(df["ds"])).days
															
 
																+
															
 
																+        days_to_goal = abs(int((info["recent"] - goal_weight) / change_per_day))
															
 
																+        date_for_goal = max(df["ds"]) + pd.DateOffset(days=days_to_goal)
															
 
																+
															
 
																         # future dataframe where the user in above goal
															
 
																-        goal_future = future[future['yhat'] < goal_weight]
															
 
																-        
															
 
																+        goal_future = future[future["yhat"] < goal_weight]
															
 
																+
															
 
																         # The additive model predicts the user will meet their goal
															
 
																         if len(goal_future) > 0:
															
 
																-            model_goal_date = min(goal_future['ds'])
															
 
																-            message = ("\n{} Your average weight change per day is {:.2f} lbs\n"
															
 
																-                       "Extrapolating the average loss per day, you will reach your goal of {} lbs in {} days on {}.\n\n"
															
 
																-                       "The additive model predicts you will reach your goal on {}\n".format(
															
 
																-                       user, change_per_day, goal_weight, days_to_goal, date_for_goal.date(), model_goal_date.date()))
															
 
																-        
															
 
																+            model_goal_date = min(goal_future["ds"])
															
 
																+            message = (
															
 
																+                "\n{} Your average weight change per day is {:.2f} lbs\n"
															
 
																+                "Extrapolating the average loss per day, you will reach your goal of {} lbs in {} days on {}.\n\n"
															
 
																+                "The additive model predicts you will reach your goal on {}\n".format(
															
 
																+                    user,
															
 
																+                    change_per_day,
															
 
																+                    goal_weight,
															
 
																+                    days_to_goal,
															
 
																+                    date_for_goal.date(),
															
 
																+                    model_goal_date.date(),
															
 
																+                )
															
 
																+            )
															
 
																+
															
 
																         # The additive model does not predict the user will meet their goal
															
 
																         else:
															
 
																-            final_future_date = max(future['ds'])
															
 
																-            message = ("\n{} Your average weight change per day is {:.2f} lbs\n\n"
															
 
																-                       "Extrapolating the average loss per day, you will reach your goal of {} lbs in {} days on {}.\n\n"
															
 
																-                       "The additive model does not forecast you reaching your goal by {}.\n".format(
															
 
																-                           user, change_per_day, goal_weight, days_to_goal, date_for_goal.date(), final_future_date))
															
 
																-        
															
 
																-        
															
 
																-        
															
 
																-        self.slack.chat.post_message(channel="#weight_tracker", text=message, username="Analysis", icon_emoji=":bar_chart:")
															
 
																+            final_future_date = max(future["ds"])
															
 
																+            message = (
															
 
																+                "\n{} Your average weight change per day is {:.2f} lbs\n\n"
															
 
																+                "Extrapolating the average loss per day, you will reach your goal of {} lbs in {} days on {}.\n\n"
															
 
																+                "The additive model does not forecast you reaching your goal by {}.\n".format(
															
 
																+                    user,
															
 
																+                    change_per_day,
															
 
																+                    goal_weight,
															
 
																+                    days_to_goal,
															
 
																+                    date_for_goal.date(),
															
 
																+                    final_future_date,
															
 
																+                )
															
 
																+            )
															
 
																+
															
 
																+        self.slack.chat.post_message(
															
 
																+            channel="#weight_tracker",
															
 
																+            text=message,
															
 
																+            username="Analysis",
															
 
																+            icon_emoji=":bar_chart:",
															
 
																+        )
															
 
																         # Identify Weekly Trends
															
 
																-        future['weekday'] = [date.weekday() for date in future['ds']]
															
 
																-        future_weekly = future.groupby('weekday').mean()
															
 
																-        future_weekly.index = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun']
															
 
																-        
															
 
																+        future["weekday"] = [date.weekday() for date in future["ds"]]
															
 
																+        future_weekly = future.groupby("weekday").mean()
															
 
																+        future_weekly.index = ["Mon", "Tues", "Wed", "Thurs", "Fri", "Sat", "Sun"]
															
 
																+
															
 
																         # Color labels based on the users objective
															
 
																-        colors = ['red' if ( ((weight > 0) & (info['objective'] == 'lose')) | ((weight < 0) & (info['objective'] == 'gain'))) else 'green' for weight in future_weekly['weekly']]
															
 
																+        colors = [
															
 
																+            "red"
															
 
																+            if (
															
 
																+                ((weight > 0) & (info["objective"] == "lose"))
															
 
																+                | ((weight < 0) & (info["objective"] == "gain"))
															
 
																+            )
															
 
																+            else "green"
															
 
																+            for weight in future_weekly["weekly"]
															
 
																+        ]
															
 
																         self.reset_plot()
															
 
																-        
															
 
																+
															
 
																         # Create a bar plot with labels for positive and negative changes
															
 
																         plt.figure(figsize=(10, 8))
															
 
																         xvalues = list(range(len(future_weekly)))
															
 
																-        plt.bar(xvalues, future_weekly['weekly'], color = colors, edgecolor = 'k', linewidth = 2)
															
 
																+        plt.bar(
															
 
																+            xvalues, future_weekly["weekly"], color=colors, edgecolor="k", linewidth=2
															
 
																+        )
															
 
																         plt.xticks(xvalues, list(future_weekly.index))
															
 
																-        red_patch = mpatches.Patch(color='red',  linewidth = 2, label='Needs Work')
															
 
																-        green_patch = mpatches.Patch(color='green', linewidth = 2, label='Solid')
															
 
																+        red_patch = mpatches.Patch(color="red", linewidth=2, label="Needs Work")
															
 
																+        green_patch = mpatches.Patch(color="green", linewidth=2, label="Solid")
															
 
																         plt.legend(handles=[red_patch, green_patch])
															
 
																-        plt.xlabel('Day of Week')
															
 
																-        plt.ylabel('Trend (lbs)')
															
 
																-        plt.title('%s Weekly Trends' % user)
															
 
																-        plt.savefig('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png')
															
 
																-        
															
 
																-        # Upload the image to slack and delete local file
															
 
																-        self.slack.files.upload('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png', channels = '#weight_tracker', title="%s Weekly Trends" % user)
															
 
																-
															
 
																-        os.remove('C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png')
															
 
																+        plt.xlabel("Day of Week")
															
 
																+        plt.ylabel("Trend (lbs)")
															
 
																+        plt.title("%s Weekly Trends" % user)
															
 
																+        plt.savefig(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png"
															
 
																+        )
															
 
																-        
															
 
																+        # Upload the image to slack and delete local file
															
 
																+        self.slack.files.upload(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png",
															
 
																+            channels="#weight_tracker",
															
 
																+            title="%s Weekly Trends" % user,
															
 
																+        )
															
 
																+
															
 
																+        os.remove(
															
 
																+            "C:\\Users\\Will Koehrsen\\Documents\\Data-Analysis\\weighter\\images\\weekly_plot.png"
															
 
																+        )