visuals.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. # Data science imports
  2. from multiprocessing import Pool
  3. import requests
  4. import re
  5. from bs4 import BeautifulSoup
  6. from itertools import chain
  7. from collections import Counter, defaultdict
  8. from timeit import default_timer as timer
  9. import pandas as pd
  10. from scipy import stats
  11. # Interactive plotting
  12. import plotly.plotly as py
  13. import plotly.graph_objs as go
  14. from plotly.offline import iplot
  15. import cufflinks
  16. cufflinks.go_offline()
  17. def make_update_menu(base_title, article_annotations=None, response_annotations=None):
  18. """
  19. Make an updatemenu for interative plot
  20. :param base_title: string for title of plot
  21. :return updatemenus: a updatemenus object for adding to a layout
  22. """
  23. updatemenus = list([
  24. dict(
  25. buttons=list([
  26. dict(
  27. label='both', method='update',
  28. args=[dict(visible=[True, True]), dict(title=base_title,
  29. annotations=[article_annotations, response_annotations])]),
  30. dict(
  31. label='articles',
  32. method='update',
  33. args=[dict(visible=[True, False]), dict(title='Article ' + base_title,
  34. annotations=[article_annotations])]),
  35. dict(
  36. label='responses',
  37. method='update',
  38. args=[dict(visible=[False, True]), dict(title='Response ' + base_title,
  39. annotations=[response_annotations])]),
  40. ]))
  41. ])
  42. return updatemenus
  43. def make_iplot(data, x, y, base_title, time=False, eq_pos=(0.75, 0.25)):
  44. """
  45. Make an interactive plot. Adds a dropdown to separate articles from responses
  46. if there are responses in the data. If there is only articles (or only responses)
  47. adds a linear regression line.
  48. :param data: dataframe of entry data
  49. :param x: string for xaxis of plot
  50. :param y: sring for yaxis of plot
  51. :param base_title: string for title of plot
  52. :param time: boolean for whether the xaxis is a plot
  53. :param eq_pos: position of equation for linear regression
  54. :return figure: an interactive plotly object for display
  55. """
  56. # Extract the relevant data
  57. responses = data[data['response'] == 'response'].copy()
  58. articles = data[data['response'] == 'article'].copy()
  59. if not responses.empty:
  60. # Create scatterplot data, articles must be first for menu selection
  61. plot_data = [
  62. go.Scatter(
  63. x=articles[x],
  64. y=articles[y],
  65. mode='markers',
  66. name='articles',
  67. text=articles['title'],
  68. marker=dict(color='blue', size=12)),
  69. go.Scatter(
  70. x=responses[x],
  71. y=responses[y],
  72. mode='markers',
  73. name='responses',
  74. marker=dict(color='green', size=12))
  75. ]
  76. if not time:
  77. annotations = {}
  78. for df, name in zip([articles, responses],
  79. ['articles', 'responses']):
  80. regression = stats.linregress(x=df[x], y=df[y])
  81. slope = regression.slope
  82. intercept = regression.intercept
  83. rvalue = regression.rvalue
  84. xi = np.array(range(int(df[x].min()), int(df[x].max())))
  85. line = xi * slope + intercept
  86. trace = go.Scatter(
  87. x=xi,
  88. y=line,
  89. mode='lines',
  90. marker=dict(color='blue' if name ==
  91. 'articles' else 'green'),
  92. line=dict(width=4, dash='longdash'),
  93. name=f'{name} linear fit'
  94. )
  95. annotations[name] = dict(
  96. x=max(xi) * eq_pos[0],
  97. y=df[y].max() * eq_pos[1],
  98. showarrow=False,
  99. text=f'$R^2 = {rvalue:.2f}; Y = {slope:.2f}X + {intercept:.2f}$',
  100. font=dict(size=16, color='blue' if name ==
  101. 'articles' else 'green')
  102. )
  103. plot_data.append(trace)
  104. # Make a layout with update menus
  105. layout = go.Layout(annotations=list(annotations.values()),
  106. height=600,
  107. width=900,
  108. title=base_title,
  109. xaxis=dict(
  110. title=x.title(),
  111. tickfont=dict(size=14),
  112. titlefont=dict(size=16)),
  113. yaxis=dict(
  114. title=y.title(),
  115. tickfont=dict(size=14),
  116. titlefont=dict(size=16)),
  117. updatemenus=make_update_menu(base_title, annotations['articles'], annotations['responses']))
  118. # If there are only articles
  119. else:
  120. plot_data = [
  121. go.Scatter(
  122. x=data[x],
  123. y=data[y],
  124. mode='markers',
  125. name='observations',
  126. text=data['title'],
  127. marker=dict(color='blue', size=12))
  128. ]
  129. regression = stats.linregress(x=data[x], y=data[y])
  130. slope = regression.slope
  131. intercept = regression.intercept
  132. rvalue = regression.rvalue
  133. xi = np.array(range(int(data[x].min()), int(data[x].max())))
  134. line = xi * slope + intercept
  135. trace = go.Scatter(
  136. x=xi,
  137. y=line,
  138. mode='lines',
  139. marker=dict(color='red'),
  140. line=dict(width=4, dash='longdash'),
  141. name='linear fit'
  142. )
  143. annotations = [dict(
  144. x=max(xi) * eq_pos[0],
  145. y=data[y].max() * eq_pos[1],
  146. showarrow=False,
  147. text=f'$R^2 = {rvalue:.2f}; Y = {slope:.2f}X + {intercept:.2f}$',
  148. font=dict(size=16)
  149. )]
  150. plot_data.append(trace)
  151. layout = go.Layout(annotations=annotations,
  152. height=600,
  153. width=900,
  154. title=base_title,
  155. xaxis=dict(
  156. title=x.title(),
  157. tickfont=dict(size=14),
  158. titlefont=dict(size=16)),
  159. yaxis=dict(
  160. title=y.title(),
  161. tickfont=dict(size=14),
  162. titlefont=dict(size=16)))
  163. # Add a rangeselector and rangeslider for a data xaxis
  164. if time:
  165. rangeselector = dict(
  166. buttons=list([
  167. dict(count=1, label='1m', step='month', stepmode='backward'),
  168. dict(count=6, label='6m', step='month', stepmode='backward'),
  169. dict(count=1, label='YTD', step='year', stepmode='todate'),
  170. dict(count=1, label='1y', step='year', stepmode='backward'),
  171. dict(step='all')
  172. ]))
  173. rangeslider = dict(visible=True)
  174. layout['xaxis']['rangeselector'] = rangeselector
  175. layout['xaxis']['rangeslider'] = rangeslider
  176. figure = go.Figure(data=plot_data, layout=layout)
  177. return figure
  178. # Return the figure
  179. figure = go.Figure(data=plot_data, layout=layout)
  180. return figure