123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- # Data science imports
- from multiprocessing import Pool
- import requests
- import re
- from bs4 import BeautifulSoup
- from itertools import chain
- from collections import Counter, defaultdict
- from timeit import default_timer as timer
- import pandas as pd
- from scipy import stats
- # Interactive plotting
- import plotly.plotly as py
- import plotly.graph_objs as go
- from plotly.offline import iplot
- import cufflinks
- cufflinks.go_offline()
- def make_update_menu(base_title, article_annotations=None, response_annotations=None):
- """
- Make an updatemenu for interative plot
- :param base_title: string for title of plot
- :return updatemenus: a updatemenus object for adding to a layout
- """
- updatemenus = list([
- dict(
- buttons=list([
- dict(
- label='both', method='update',
- args=[dict(visible=[True, True]), dict(title=base_title,
- annotations=[article_annotations, response_annotations])]),
- dict(
- label='articles',
- method='update',
- args=[dict(visible=[True, False]), dict(title='Article ' + base_title,
- annotations=[article_annotations])]),
- dict(
- label='responses',
- method='update',
- args=[dict(visible=[False, True]), dict(title='Response ' + base_title,
- annotations=[response_annotations])]),
- ]))
- ])
- return updatemenus
- def make_iplot(data, x, y, base_title, time=False, eq_pos=(0.75, 0.25)):
- """
- Make an interactive plot. Adds a dropdown to separate articles from responses
- if there are responses in the data. If there is only articles (or only responses)
- adds a linear regression line.
- :param data: dataframe of entry data
- :param x: string for xaxis of plot
- :param y: sring for yaxis of plot
- :param base_title: string for title of plot
- :param time: boolean for whether the xaxis is a plot
- :param eq_pos: position of equation for linear regression
- :return figure: an interactive plotly object for display
- """
- # Extract the relevant data
- responses = data[data['response'] == 'response'].copy()
- articles = data[data['response'] == 'article'].copy()
- if not responses.empty:
- # Create scatterplot data, articles must be first for menu selection
- plot_data = [
- go.Scatter(
- x=articles[x],
- y=articles[y],
- mode='markers',
- name='articles',
- text=articles['title'],
- marker=dict(color='blue', size=12)),
- go.Scatter(
- x=responses[x],
- y=responses[y],
- mode='markers',
- name='responses',
- marker=dict(color='green', size=12))
- ]
- if not time:
- annotations = {}
- for df, name in zip([articles, responses],
- ['articles', 'responses']):
- regression = stats.linregress(x=df[x], y=df[y])
- slope = regression.slope
- intercept = regression.intercept
- rvalue = regression.rvalue
- xi = np.array(range(int(df[x].min()), int(df[x].max())))
- line = xi * slope + intercept
- trace = go.Scatter(
- x=xi,
- y=line,
- mode='lines',
- marker=dict(color='blue' if name ==
- 'articles' else 'green'),
- line=dict(width=4, dash='longdash'),
- name=f'{name} linear fit'
- )
- annotations[name] = dict(
- x=max(xi) * eq_pos[0],
- y=df[y].max() * eq_pos[1],
- showarrow=False,
- text=f'$R^2 = {rvalue:.2f}; Y = {slope:.2f}X + {intercept:.2f}$',
- font=dict(size=16, color='blue' if name ==
- 'articles' else 'green')
- )
- plot_data.append(trace)
- # Make a layout with update menus
- layout = go.Layout(annotations=list(annotations.values()),
- height=600,
- width=900,
- title=base_title,
- xaxis=dict(
- title=x.title(),
- tickfont=dict(size=14),
- titlefont=dict(size=16)),
- yaxis=dict(
- title=y.title(),
- tickfont=dict(size=14),
- titlefont=dict(size=16)),
- updatemenus=make_update_menu(base_title, annotations['articles'], annotations['responses']))
- # If there are only articles
- else:
- plot_data = [
- go.Scatter(
- x=data[x],
- y=data[y],
- mode='markers',
- name='observations',
- text=data['title'],
- marker=dict(color='blue', size=12))
- ]
- regression = stats.linregress(x=data[x], y=data[y])
- slope = regression.slope
- intercept = regression.intercept
- rvalue = regression.rvalue
- xi = np.array(range(int(data[x].min()), int(data[x].max())))
- line = xi * slope + intercept
- trace = go.Scatter(
- x=xi,
- y=line,
- mode='lines',
- marker=dict(color='red'),
- line=dict(width=4, dash='longdash'),
- name='linear fit'
- )
- annotations = [dict(
- x=max(xi) * eq_pos[0],
- y=data[y].max() * eq_pos[1],
- showarrow=False,
- text=f'$R^2 = {rvalue:.2f}; Y = {slope:.2f}X + {intercept:.2f}$',
- font=dict(size=16)
- )]
- plot_data.append(trace)
- layout = go.Layout(annotations=annotations,
- height=600,
- width=900,
- title=base_title,
- xaxis=dict(
- title=x.title(),
- tickfont=dict(size=14),
- titlefont=dict(size=16)),
- yaxis=dict(
- title=y.title(),
- tickfont=dict(size=14),
- titlefont=dict(size=16)))
- # Add a rangeselector and rangeslider for a data xaxis
- if time:
- rangeselector = dict(
- buttons=list([
- dict(count=1, label='1m', step='month', stepmode='backward'),
- dict(count=6, label='6m', step='month', stepmode='backward'),
- dict(count=1, label='YTD', step='year', stepmode='todate'),
- dict(count=1, label='1y', step='year', stepmode='backward'),
- dict(step='all')
- ]))
- rangeslider = dict(visible=True)
- layout['xaxis']['rangeselector'] = rangeselector
- layout['xaxis']['rangeslider'] = rangeslider
- figure = go.Figure(data=plot_data, layout=layout)
- return figure
- # Return the figure
- figure = go.Figure(data=plot_data, layout=layout)
- return figure
|