|
@@ -0,0 +1,17 @@
|
|
|
+import operator
|
|
|
+import urllib.parse
|
|
|
+import pandas
|
|
|
+
|
|
|
+
|
|
|
+page_views = pandas.read_parquet(PAGE_VIEWS_FNAME)
|
|
|
+page_views.index = (page_views.index
|
|
|
+ .to_series()
|
|
|
+ .apply(urllib.parse.urlparse)
|
|
|
+ .apply(operator.attrgetter('path'))
|
|
|
+ .str.split('/')
|
|
|
+ .str[-1]
|
|
|
+ .str.rstrip('.html'))
|
|
|
+
|
|
|
+
|
|
|
+docstring_errors = (pandas.read_hdf(DOCSTRING_ERRORS_FNAME)
|
|
|
+ .join(page_views.groupby('Page')['Pageviews'].sum()))
|