streaming.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. from __future__ import division
  2. import math
  3. from collections import OrderedDict
  4. from bokeh.io import curdoc
  5. from bokeh.plotting import Figure
  6. from bokeh.models import ColumnDataSource, CustomJS
  7. from bokeh.tile_providers import STAMEN_TONER
  8. from bokeh.models import VBox, HBox, Paragraph, Select
  9. from bokeh.palettes import BuGn9
  10. import pandas as pd
  11. import datashader as ds
  12. import datashader.transfer_functions as tf
  13. def bin_data():
  14. global time_period, grouped, group_count, counter, times, groups
  15. grouped = df.groupby([times.hour, times.minute // time_period])
  16. groups = sorted(grouped.groups.keys(), key=lambda r: (r[0], r[1]))
  17. group_count = len(groups)
  18. counter = 0
  19. def on_time_select_change(attr, old, new):
  20. global time_period, counter, time_select_options
  21. time_period = time_select_options[new]
  22. counter = 0
  23. bin_data()
  24. counter = 0
  25. def update_data():
  26. global dims, grouped, group_count, counter, time_text, time_period
  27. dims_data = dims.data
  28. if not dims_data['width'] or not dims_data['height']:
  29. return
  30. group_num = counter % group_count
  31. group = groups[group_num]
  32. grouped_df = grouped.get_group(group)
  33. update_image(grouped_df)
  34. # update time text
  35. num_minute_groups = 60 // time_period
  36. mins = group[1] * time_period
  37. hr = group[0]
  38. end_mins = ((group[1] + 1) % num_minute_groups) * time_period
  39. end_hr = hr if end_mins > 0 else (hr + 1) % 24
  40. time_text.text = 'Time Period: {}:{} - {}:{}'.format(str(hr).zfill(2),
  41. str(mins).zfill(2),
  42. str(end_hr).zfill(2),
  43. str(end_mins).zfill(2))
  44. counter += 1
  45. def update_image(dataframe):
  46. global dims
  47. dims_data = dims.data
  48. if not dims_data['width'] or not dims_data['height']:
  49. return
  50. plot_width = int(math.ceil(dims_data['width'][0]))
  51. plot_height = int(math.ceil(dims_data['height'][0]))
  52. x_range = (dims_data['xmin'][0], dims_data['xmax'][0])
  53. y_range = (dims_data['ymin'][0], dims_data['ymax'][0])
  54. canvas = ds.Canvas(plot_width=plot_width,
  55. plot_height=plot_height,
  56. x_range=x_range,
  57. y_range=y_range)
  58. agg = canvas.points(dataframe, 'dropoff_x', 'dropoff_y',
  59. ds.count('trip_distance'))
  60. img = tf.shade(agg, cmap=BuGn9, how='log')
  61. new_data = {}
  62. new_data['image'] = [img.data]
  63. new_data['x'] = [x_range[0]]
  64. new_data['y'] = [y_range[0]]
  65. new_data['dh'] = [y_range[1] - y_range[0]]
  66. new_data['dw'] = [x_range[1] - x_range[0]]
  67. image_source.stream(new_data, 1)
  68. time_select_options = OrderedDict()
  69. time_select_options['1 Hour'] = 60
  70. time_select_options['30 Minutes'] = 30
  71. time_select_options['15 Minutes'] = 15
  72. time_period = list(time_select_options.values())[0]
  73. time_select = Select.create(name="Time Period", options=time_select_options)
  74. time_select.on_change('value', on_time_select_change)
  75. time_text = Paragraph(text='Time Period')
  76. # load nyc taxi data
  77. path = './data/nyc_taxi.csv'
  78. datetime_field = 'tpep_dropoff_datetime'
  79. cols = ['dropoff_x', 'dropoff_y', 'trip_distance', datetime_field]
  80. df = pd.read_csv(path, usecols=cols, parse_dates=[datetime_field]).dropna(axis=0)
  81. times = pd.DatetimeIndex(df[datetime_field])
  82. group_count = grouped = groups = None
  83. bin_data()
  84. # manage client-side dimensions
  85. dims = ColumnDataSource(data=dict(width=[], height=[], xmin=[], xmax=[], ymin=[], ymax=[]))
  86. dims_jscode = """
  87. var update_dims = function () {
  88. var new_data = {
  89. height: [plot.frame.height],
  90. width: [plot.frame.width],
  91. xmin: [plot.x_range.start],
  92. ymin: [plot.y_range.start],
  93. xmax: [plot.x_range.end],
  94. ymax: [plot.y_range.end]
  95. };
  96. dims.data = new_data;
  97. };
  98. if (typeof throttle != 'undefined' && throttle != null) {
  99. clearTimeout(throttle);
  100. }
  101. throttle = setTimeout(update_dims, 100, "replace");
  102. """
  103. # Create plot -------------------------------
  104. xmin = -8240227.037
  105. ymin = 4974203.152
  106. xmax = -8231283.905
  107. ymax = 4979238.441
  108. fig = Figure(x_range=(xmin, xmax),
  109. y_range=(ymin, ymax),
  110. plot_height=600,
  111. plot_width=900,
  112. tools='pan,wheel_zoom')
  113. fig.background_fill_color = 'black'
  114. fig.add_tile(STAMEN_TONER, alpha=.3)
  115. fig.x_range.callback = CustomJS(code=dims_jscode, args=dict(plot=fig, dims=dims))
  116. fig.y_range.callback = CustomJS(code=dims_jscode, args=dict(plot=fig, dims=dims))
  117. fig.axis.visible = False
  118. fig.grid.grid_line_alpha = 0
  119. fig.min_border_left = 0
  120. fig.min_border_right = 0
  121. fig.min_border_top = 0
  122. fig.min_border_bottom = 0
  123. image_source = ColumnDataSource(dict(image=[], x=[], y=[], dw=[], dh=[]))
  124. fig.image_rgba(source=image_source, image='image', x='x', y='y', dw='dw', dh='dh', dilate=False)
  125. time_text = Paragraph(text='Time Period: 00:00 - 00:00')
  126. controls = HBox(children=[time_text, time_select], width=fig.plot_width)
  127. layout = VBox(children=[fig, controls])
  128. curdoc().add_root(layout)
  129. curdoc().add_periodic_callback(update_data, 1000)