123456789101112131415161718 |
- import operator
- import urllib.parse
- import pandas
- page_views = pandas.read_parquet(PAGE_VIEWS_FNAME)
- page_views.index = (page_views.index
- .to_series()
- .apply(urllib.parse.urlparse)
- .apply(operator.attrgetter('path'))
- .str.split('/')
- .str[-1]
- .str.rstrip('.html'))
- docstring_errors = (pandas.read_hdf(DOCSTRING_ERRORS_FNAME)
- .join(page_views.groupby('Page')['Pageviews'].sum()))
|