sentiment.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. from datetime import datetime, timedelta
  2. import numpy
  3. from labours.plotting import apply_plot_style, deploy_plot, get_plot_path, import_pyplot
  4. from labours.utils import parse_date
  5. def show_sentiment_stats(args, name, resample, start_date, data):
  6. from scipy.signal import convolve, slepian
  7. matplotlib, pyplot = import_pyplot(args.backend, args.style)
  8. start_date = datetime.fromtimestamp(start_date)
  9. data = sorted(data.items())
  10. mood = numpy.zeros(data[-1][0] + 1, dtype=numpy.float32)
  11. timeline = numpy.array(
  12. [start_date + timedelta(days=i) for i in range(mood.shape[0])]
  13. )
  14. for d, val in data:
  15. mood[d] = (0.5 - val.Value) * 2
  16. resolution = 32
  17. window = slepian(len(timeline) // resolution, 0.5)
  18. window /= window.sum()
  19. mood_smooth = convolve(mood, window, "same")
  20. pos = mood_smooth.copy()
  21. pos[pos < 0] = 0
  22. neg = mood_smooth.copy()
  23. neg[neg >= 0] = 0
  24. resolution = 4
  25. window = numpy.ones(len(timeline) // resolution)
  26. window /= window.sum()
  27. avg = convolve(mood, window, "same")
  28. pyplot.fill_between(timeline, pos, color="#8DB843", label="Positive")
  29. pyplot.fill_between(timeline, neg, color="#E14C35", label="Negative")
  30. pyplot.plot(timeline, avg, color="grey", label="Average", linewidth=5)
  31. legend = pyplot.legend(loc=1, fontsize=args.font_size)
  32. pyplot.ylabel("Comment sentiment")
  33. pyplot.xlabel("Time")
  34. apply_plot_style(
  35. pyplot.gcf(), pyplot.gca(), legend, args.background, args.font_size, args.size
  36. )
  37. pyplot.xlim(
  38. parse_date(args.start_date, timeline[0]),
  39. parse_date(args.end_date, timeline[-1]),
  40. )
  41. locator = pyplot.gca().xaxis.get_major_locator()
  42. # set the optimal xticks locator
  43. if "M" not in resample:
  44. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  45. locs = pyplot.gca().get_xticks().tolist()
  46. if len(locs) >= 16:
  47. pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
  48. locs = pyplot.gca().get_xticks().tolist()
  49. if len(locs) >= 16:
  50. pyplot.gca().xaxis.set_major_locator(locator)
  51. if locs[0] < pyplot.xlim()[0]:
  52. del locs[0]
  53. endindex = -1
  54. if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
  55. locs.append(pyplot.xlim()[1])
  56. endindex = len(locs) - 1
  57. startindex = -1
  58. if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
  59. locs.append(pyplot.xlim()[0])
  60. startindex = len(locs) - 1
  61. pyplot.gca().set_xticks(locs)
  62. # hacking time!
  63. labels = pyplot.gca().get_xticklabels()
  64. if startindex >= 0:
  65. labels[startindex].set_text(timeline[0].date())
  66. labels[startindex].set_text = lambda _: None
  67. labels[startindex].set_rotation(30)
  68. labels[startindex].set_ha("right")
  69. if endindex >= 0:
  70. labels[endindex].set_text(timeline[-1].date())
  71. labels[endindex].set_text = lambda _: None
  72. labels[endindex].set_rotation(30)
  73. labels[endindex].set_ha("right")
  74. overall_pos = sum(2 * (0.5 - d[1].Value) for d in data if d[1].Value < 0.5)
  75. overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5)
  76. title = "%s sentiment +%.1f -%.1f δ=%.1f" % (
  77. name,
  78. overall_pos,
  79. overall_neg,
  80. overall_pos - overall_neg,
  81. )
  82. if args.mode == "all" and args.output:
  83. output = get_plot_path(args.output, "sentiment")
  84. else:
  85. output = args.output
  86. deploy_plot(title, output, args.background)