6 tahun lalu · 108460edf0
--- a/03_Page_views_wrangling.ipynb
+++ b/03_Page_views_wrangling.ipynb
@@ -0,0 +1,53 @@
 
																+{
															
 
																+ "cells": [
															
 
																+  {
															
 
																+   "cell_type": "code",
															
 
																+   "execution_count": null,
															
 
																+   "metadata": {},
															
 
																+   "outputs": [],
															
 
																+   "source": [
															
 
																+    "import os\n",
															
 
																+    "import pandas\n",
															
 
																+    "\n",
															
 
																+    "DATA_DIR = os.path.join('data', 'pandas_website')"
															
 
																+   ]
															
 
																+  },
															
 
																+  {
															
 
																+   "cell_type": "code",
															
 
																+   "execution_count": null,
															
 
																+   "metadata": {},
															
 
																+   "outputs": [],
															
 
																+   "source": [
															
 
																+    "%load solutions/page_views_1.py"
															
 
																+   ]
															
 
																+  },
															
 
																+  {
															
 
																+   "cell_type": "code",
															
 
																+   "execution_count": null,
															
 
																+   "metadata": {},
															
 
																+   "outputs": [],
															
 
																+   "source": []
															
 
																+  }
															
 
																+ ],
															
 
																+ "metadata": {
															
 
																+  "kernelspec": {
															
 
																+   "display_name": "Python 3",
															
 
																+   "language": "python",
															
 
																+   "name": "python3"
															
 
																+  },
															
 
																+  "language_info": {
															
 
																+   "codemirror_mode": {
															
 
																+    "name": "ipython",
															
 
																+    "version": 3
															
 
																+   },
															
 
																+   "file_extension": ".py",
															
 
																+   "mimetype": "text/x-python",
															
 
																+   "name": "python",
															
 
																+   "nbconvert_exporter": "python",
															
 
																+   "pygments_lexer": "ipython3",
															
 
																+   "version": "3.7.3"
															
 
																+  }
															
 
																+ },
															
 
																+ "nbformat": 4,
															
 
																+ "nbformat_minor": 2
															
 
																+}
															
--- a/04_Page_views_eda.ipynb
+++ b/04_Page_views_eda.ipynb
@@ -0,0 +1,46 @@
 
																+{
															
 
																+ "cells": [
															
 
																+  {
															
 
																+   "cell_type": "code",
															
 
																+   "execution_count": null,
															
 
																+   "metadata": {},
															
 
																+   "outputs": [],
															
 
																+   "source": [
															
 
																+    "import os\n",
															
 
																+    "import pandas\n",
															
 
																+    "\n",
															
 
																+    "DATA_DIR = os.path.join('data', 'pandas_website')\n",
															
 
																+    "\n",
															
 
																+    "df = pandas.read_parquet(os.path.join(DATA_DIR, 'pandas_website_views_2018.parquet'))"
															
 
																+   ]
															
 
																+  },
															
 
																+  {
															
 
																+   "cell_type": "code",
															
 
																+   "execution_count": null,
															
 
																+   "metadata": {},
															
 
																+   "outputs": [],
															
 
																+   "source": []
															
 
																+  }
															
 
																+ ],
															
 
																+ "metadata": {
															
 
																+  "kernelspec": {
															
 
																+   "display_name": "Python 3",
															
 
																+   "language": "python",
															
 
																+   "name": "python3"
															
 
																+  },
															
 
																+  "language_info": {
															
 
																+   "codemirror_mode": {
															
 
																+    "name": "ipython",
															
 
																+    "version": 3
															
 
																+   },
															
 
																+   "file_extension": ".py",
															
 
																+   "mimetype": "text/x-python",
															
 
																+   "name": "python",
															
 
																+   "nbconvert_exporter": "python",
															
 
																+   "pygments_lexer": "ipython3",
															
 
																+   "version": "3.7.3"
															
 
																+  }
															
 
																+ },
															
 
																+ "nbformat": 4,
															
 
																+ "nbformat_minor": 2
															
 
																+}
															
--- a/data/pandas_website/pandas_website_views_2018.parquet
+++ b/data/pandas_website/pandas_website_views_2018.parquet
--- a/environment.yml
+++ b/environment.yml
@@ -4,6 +4,7 @@ channels:
 
																   - defaults
															
 
																 dependencies:
															
 
																   - python=3.7
															
 
																+  - jupyter=1.0
															
 
																   - pandas=0.25
															
 
																+  - pyarrow=0.14.1
															
 
																   - matplotlib=2.2
															
 
																-  - jupyter=1.0
															
--- a/solutions/page_views_1.py
+++ b/solutions/page_views_1.py
@@ -0,0 +1,23 @@
 
																+import locale
															
 
																+import glob
															
 
																+
															
 
																+
															
 
																+locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
															
 
																+
															
 
																+
															
 
																+(pandas.concat(pandas.read_csv(fname,
															
 
																+                               comment='#',
															
 
																+                               dtype={'Pageviews': str})
															
 
																+                     .head(5_000)
															
 
																+               for fname in glob.glob(os.path.join(DATA_DIR, '*.csv.gz')))
															
 
																+       .set_index('Page')
															
 
																+       .dropna()
															
 
																+       .drop(columns='Page Value')
															
 
																+       .assign(**{'Pageviews': lambda df: df['Pageviews'].apply(locale.atoi),
															
 
																+                  'Unique Pageviews': lambda df: df['Unique Pageviews'].apply(locale.atoi),
															
 
																+                  'Avg. Time on Page': lambda df: pandas.to_timedelta(df['Avg. Time on Page'].str.lstrip('<')).dt.seconds,
															
 
																+                  'Entrances': lambda df: df['Entrances'].apply(locale.atoi),
															
 
																+                  'Bounce Rate': lambda df: df['Bounce Rate'].str.rstrip('%').astype(float),
															
 
																+                  '% Exit': lambda df: df['% Exit'].str.rstrip('%').astype(float)})
															
 
																+       .to_parquet(os.path.join(DATA_DIR, 'pandas_website_views_2018.parquet'),
															
 
																+                   engine='pyarrow'))