sentiment.py 3.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. from datetime import datetime, timedelta
  2. import numpy
  3. from labours.plotting import apply_plot_style, deploy_plot, get_plot_path, import_pyplot
  4. from labours.utils import parse_date
  5. def show_sentiment_stats(args, name, resample, start_date, data):
  6. from scipy.signal import convolve, slepian
  7. matplotlib, pyplot = import_pyplot(args.backend, args.style)
  8. start_date = datetime.fromtimestamp(start_date)
  9. data = sorted(data.items())
  10. mood = numpy.zeros(data[-1][0] + 1, dtype=numpy.float32)
  11. timeline = numpy.array([start_date + timedelta(days=i) for i in range(mood.shape[0])])
  12. for d, val in data:
  13. mood[d] = (0.5 - val.Value) * 2
  14. resolution = 32
  15. window = slepian(len(timeline) // resolution, 0.5)
  16. window /= window.sum()
  17. mood_smooth = convolve(mood, window, "same")
  18. pos = mood_smooth.copy()
  19. pos[pos < 0] = 0
  20. neg = mood_smooth.copy()
  21. neg[neg >= 0] = 0
  22. resolution = 4
  23. window = numpy.ones(len(timeline) // resolution)
  24. window /= window.sum()
  25. avg = convolve(mood, window, "same")
  26. pyplot.fill_between(timeline, pos, color="#8DB843", label="Positive")
  27. pyplot.fill_between(timeline, neg, color="#E14C35", label="Negative")
  28. pyplot.plot(timeline, avg, color="grey", label="Average", linewidth=5)
  29. legend = pyplot.legend(loc=1, fontsize=args.font_size)
  30. pyplot.ylabel("Comment sentiment")
  31. pyplot.xlabel("Time")
  32. apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
  33. args.font_size, args.size)
  34. pyplot.xlim(parse_date(args.start_date, timeline[0]), parse_date(args.end_date, timeline[-1]))
  35. locator = pyplot.gca().xaxis.get_major_locator()
  36. # set the optimal xticks locator
  37. if "M" not in resample:
  38. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  39. locs = pyplot.gca().get_xticks().tolist()
  40. if len(locs) >= 16:
  41. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  42. locs = pyplot.gca().get_xticks().tolist()
  43. if len(locs) >= 16:
  44. pyplot.gca().xaxis.set_major_locator(locator)
  45. if locs[0] < pyplot.xlim()[0]:
  46. del locs[0]
  47. endindex = -1
  48. if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
  49. locs.append(pyplot.xlim()[1])
  50. endindex = len(locs) - 1
  51. startindex = -1
  52. if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
  53. locs.append(pyplot.xlim()[0])
  54. startindex = len(locs) - 1
  55. pyplot.gca().set_xticks(locs)
  56. # hacking time!
  57. labels = pyplot.gca().get_xticklabels()
  58. if startindex >= 0:
  59. labels[startindex].set_text(timeline[0].date())
  60. labels[startindex].set_text = lambda _: None
  61. labels[startindex].set_rotation(30)
  62. labels[startindex].set_ha("right")
  63. if endindex >= 0:
  64. labels[endindex].set_text(timeline[-1].date())
  65. labels[endindex].set_text = lambda _: None
  66. labels[endindex].set_rotation(30)
  67. labels[endindex].set_ha("right")
  68. overall_pos = sum(2 * (0.5 - d[1].Value) for d in data if d[1].Value < 0.5)
  69. overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5)
  70. title = "%s sentiment +%.1f -%.1f δ=%.1f" % (
  71. name, overall_pos, overall_neg, overall_pos - overall_neg)
  72. if args.mode == "all" and args.output:
  73. output = get_plot_path(args.output, "sentiment")
  74. else:
  75. output = args.output
  76. deploy_plot(title, output, args.background)