浏览代码

Updated gitignore

WillKoehrsen 6 年之前
父节点
当前提交
30a91c0dc6
共有 87 个文件被更改,包括 226871 次插入676 次删除
  1. 6 0
      .gitignore
  2. 0 6
      .vscode/settings.json
  3. 3 0
      datashader-work/air-quality_real_time.csv
  4. 87 0
      datashader-work/datashader-examples/FAQ.ipynb
  5. 80 0
      datashader-work/datashader-examples/README.md
  6. 二进制
      datashader-work/datashader-examples/assets/images/airport_connections.png
  7. 二进制
      datashader-work/datashader-examples/assets/images/chesapeake_farout.png
  8. 二进制
      datashader-work/datashader-examples/assets/images/chesbay_detail.png
  9. 二进制
      datashader-work/datashader-examples/assets/images/dashboard.png
  10. 二进制
      datashader-work/datashader-examples/assets/images/ds_hv_bokeh.png
  11. 二进制
      datashader-work/datashader-examples/assets/images/houston_district29.png
  12. 二进制
      datashader-work/datashader-examples/assets/images/landsat.png
  13. 二进制
      datashader-work/datashader-examples/assets/images/nyc_pickups_vs_dropoffs.jpg
  14. 二进制
      datashader-work/datashader-examples/assets/images/nyc_races.jpg
  15. 二进制
      datashader-work/datashader-examples/assets/images/nyc_taxi-paramnb.png
  16. 二进制
      datashader-work/datashader-examples/assets/images/nyc_taxi_100k.png
  17. 二进制
      datashader-work/datashader-examples/assets/images/parambokeh.png
  18. 二进制
      datashader-work/datashader-examples/assets/images/pcap.png
  19. 二进制
      datashader-work/datashader-examples/assets/images/pipeline.png
  20. 二进制
      datashader-work/datashader-examples/assets/images/pipeline2.png
  21. 二进制
      datashader-work/datashader-examples/assets/images/sym_attractors.jpg
  22. 二进制
      datashader-work/datashader-examples/assets/images/uk_researchers.png
  23. 二进制
      datashader-work/datashader-examples/assets/images/usa_census.jpg
  24. 248 0
      datashader-work/datashader-examples/dashboard.ipynb
  25. 149 0
      datashader-work/datashader-examples/dashboard.yml
  26. 3 0
      datashader-work/datashader-examples/data/nyc_crime.csv
  27. 75 0
      datashader-work/datashader-examples/datasets.yml
  28. 57 0
      datashader-work/datashader-examples/environment.yml
  29. 357 0
      datashader-work/datashader-examples/filetimes.py
  30. 41 0
      datashader-work/datashader-examples/filetimes.sh
  31. 24 0
      datashader-work/datashader-examples/filetimes.yml
  32. 7 0
      datashader-work/datashader-examples/get_raster_data.sh
  33. 100 0
      datashader-work/datashader-examples/getting_started/1_Introduction.ipynb
  34. 529 0
      datashader-work/datashader-examples/getting_started/2_Pipeline.ipynb
  35. 322 0
      datashader-work/datashader-examples/getting_started/3_Interactivity.ipynb
  36. 35 0
      datashader-work/datashader-examples/getting_started/index.ipynb
  37. 96 0
      datashader-work/datashader-examples/index.ipynb
  38. 260 0
      datashader-work/datashader-examples/nyc_taxi-nongeo.ipynb
  39. 103 0
      datashader-work/datashader-examples/pcap_to_parquet.py
  40. 107 0
      datashader-work/datashader-examples/raster.py
  41. 14 0
      datashader-work/datashader-examples/small.yml
  42. 517 0
      datashader-work/datashader-examples/streaming-aggregation.ipynb
  43. 163 0
      datashader-work/datashader-examples/streaming.py
  44. 76 0
      datashader-work/datashader-examples/taxi_preprocessing_example.py
  45. 375 0
      datashader-work/datashader-examples/tiling.ipynb
  46. 75 0
      datashader-work/datashader-examples/topics/attractors.yml
  47. 203 0
      datashader-work/datashader-examples/topics/bay_trimesh.ipynb
  48. 637 0
      datashader-work/datashader-examples/topics/census.ipynb
  49. 200 0
      datashader-work/datashader-examples/topics/gerrymandering.ipynb
  50. 68 0
      datashader-work/datashader-examples/topics/index.ipynb
  51. 295 0
      datashader-work/datashader-examples/topics/landsat.ipynb
  52. 345 0
      datashader-work/datashader-examples/topics/network_packets.ipynb
  53. 474 0
      datashader-work/datashader-examples/topics/nyc_taxi.ipynb
  54. 408 0
      datashader-work/datashader-examples/topics/opensky.ipynb
  55. 194 0
      datashader-work/datashader-examples/topics/osm-1billion.ipynb
  56. 132 0
      datashader-work/datashader-examples/topics/osm.ipynb
  57. 500 0
      datashader-work/datashader-examples/topics/strange_attractors.ipynb
  58. 170 0
      datashader-work/datashader-examples/topics/uk_researchers.ipynb
  59. 90 0
      datashader-work/datashader-examples/user_guide/10_Performance.ipynb
  60. 503 0
      datashader-work/datashader-examples/user_guide/1_Plotting_Pitfalls.ipynb
  61. 19 0
      datashader-work/datashader-examples/user_guide/2_Points.ipynb
  62. 461 0
      datashader-work/datashader-examples/user_guide/3_Timeseries.ipynb
  63. 179 0
      datashader-work/datashader-examples/user_guide/4_Trajectories.ipynb
  64. 271 0
      datashader-work/datashader-examples/user_guide/5_Rasters.ipynb
  65. 427 0
      datashader-work/datashader-examples/user_guide/6_Trimesh.ipynb
  66. 462 0
      datashader-work/datashader-examples/user_guide/7_Networks.ipynb
  67. 330 0
      datashader-work/datashader-examples/user_guide/8_Geography.ipynb
  68. 54 0
      datashader-work/datashader-examples/user_guide/9_Extending.ipynb
  69. 58 0
      datashader-work/datashader-examples/user_guide/index.ipynb
  70. 868 0
      datashader-work/datashader-tryout.ipynb
  71. 7976 0
      datashader-work/fishing_watch.ipynb
  72. 39 0
      datashader-work/formatting_data.py
  73. 2454 0
      datashader-work/geographic-plotting.ipynb
  74. 7469 0
      datashader-work/holoviews-0-2.ipynb
  75. 5898 0
      datashader-work/holoviews-3-4.ipynb
  76. 5252 0
      datashader-work/holoviews-5-6.ipynb
  77. 2779 0
      datashader-work/holoviews-geographic-data.ipynb
  78. 3234 0
      datashader-work/holoviews-large-data.ipynb
  79. 2789 0
      datashader-work/holoviews-pipelines.ipynb
  80. 46664 0
      datashader-work/solar-power-potential.ipynb
  81. 3 0
      datashader-work/solar-power_solar_potential_by_census_tract.csv
  82. 3 0
      datashader-work/solar-power_solar_potential_by_postal_code.csv
  83. 二进制
      datashader-work/solar_potential_by_postal_code_formatted.parquet
  84. 2695 0
      plotly/military-data.ipynb
  85. 3 0
      plotly/military_data.csv
  86. 128356 0
      plotly/plotly-express.ipynb
  87. 0 670
      testing-exercises.ipynb

+ 6 - 0
.gitignore

@@ -8,3 +8,9 @@ medium/data/*_files
 *time_features/data_raw
 datashader-work/datashader-examples/*
 datashader-work/pyviz-examples/*
+
+*pyviz-examples*
+*geoview-examples*
+*datashader-examples*
+
+*.vscode*

+ 0 - 6
.vscode/settings.json

@@ -1,6 +0,0 @@
-{
-    "python.unitTest.promptToConfigure": false,
-    "python.unitTest.pyTestEnabled": false,
-    "python.unitTest.unittestEnabled": false,
-    "python.unitTest.nosetestsEnabled": false
-}

+ 3 - 0
datashader-work/air-quality_real_time.csv

@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:592e5523e0fcadcb5525871374e71c932467d3069b21528cb458ec6cab1103a8
+size 2524336

+ 87 - 0
datashader-work/datashader-examples/FAQ.ipynb

@@ -0,0 +1,87 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### When should I use datashader?\n",
+    "\n",
+    "Datashader is designed for working with large datasets, for\n",
+    "cases where it is most crucial to faithfully represent the\n",
+    "*distribution* of your data.  datashader can work easily with\n",
+    "extremely large datasets, generating a fixed-size data structure\n",
+    "(regardless of the original number of records) that gets transferred to\n",
+    "your local browser for display.  If you ever find yourself subsampling\n",
+    "your data just so that you can plot it feasibly, or if you are forced\n",
+    "for practical reasons to iterate over chunks of it rather than looking\n",
+    "at all of it at once, then datashader can probably help you."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### When should I *not* use datashader?\n",
+    "\n",
+    "If you have a very small number of data points (in the hundreds\n",
+    "or thousands) or curves (in the tens or several tens, each with\n",
+    "hundreds or thousands of points), then conventional plotting packages\n",
+    "like [Bokeh](https://bokeh.pydata.org) may be more suitable.  With conventional browser-based\n",
+    "packages, all of the data points are passed directly to the browser for\n",
+    "display, allowing specific interaction with each curve or point,\n",
+    "including display of metadata, linking to sources, etc.  This approach\n",
+    "offers the most flexibility *per point* or *per curve*, but rapidly\n",
+    "runs into limitations on how much data can be processed by the browser,\n",
+    "and how much can be displayed on screen and resolved by the human\n",
+    "visual system.  If you are not having such problems, i.e., your data is\n",
+    "easily handled by your plotting infrastructure and you can easily see\n",
+    "and work with all your data onscreen already, then you probably don't\n",
+    "need datashader."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Is datashader part of Bokeh?\n",
+    "\n",
+    "Datashader is an independent project, focusing on generating\n",
+    "aggregate arrays and representations of them as images.  Bokeh is a\n",
+    "complementary project, focusing on building browser-based\n",
+    "visualizations and dashboards.  Bokeh (along with other plotting\n",
+    "packages) can display images rendered by datashader, providing axes,\n",
+    "interactive zooming and panning, selection, legends, hover\n",
+    "information, and so on.  Sample bokeh-based plotting code is provided\n",
+    "with datashader, but viewers for maptlotlib are already under\n",
+    "development, and similar code could be developed for any other\n",
+    "plotting package that can display images.  The library can also be\n",
+    "used separately, without any external plotting packages, generating\n",
+    "images that can be displayed directly or saved to disk, or generating\n",
+    "aggregate arrays suitable for further analysis."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What's the easiest way to use datashader interactively?\n",
+    "\n",
+    "[HoloViews](https://holoviews.org). HoloViews uses Bokeh behind\n",
+    "the scenes, but it offers a higher level API that is well suited to\n",
+    "the sorts of magic that allow interactive use of Datashader. For a\n",
+    "given dataset, HoloViews can easily construct either a raw Bokeh plot\n",
+    "or a Bokeh plot with server-side rendering from Datashader, hiding\n",
+    "nearly all of the complexity involved.\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 80 - 0
datashader-work/datashader-examples/README.md

@@ -0,0 +1,80 @@
+# Datashader Examples
+
+The best way to understand how Datashader works is to try out our
+extensive set of examples. [Datashader.org](http://datashader.org)
+includes static versions of the 
+[getting started guide](http://datashader.org/getting-started), 
+[user manual](http://datashader.org/user-guide), and
+[topic examples](http://datashader.org/topics), but for the full
+experience with dynamic updating you will need to install them on a
+live server. 
+
+These instructions assume you are using 
+[conda](https://conda.io/docs/install/quick.html), but they can be 
+adapted as needed to use [pip](https://pip.pypa.io/en/stable/installing/) 
+and [virtualenv](https://virtualenv.pypa.io) if desired.
+
+To get started, first go to your home directory and
+download the current list of everything needed for the examples:
+
+- Download the [conda ds environment file](https://raw.githubusercontent.com/bokeh/datashader/master/examples/environment.yml) and save it as `environment.yml`.
+
+Then run the following commands in your terminal (command) prompt, from wherever you saved `environment.yml`:
+
+```bash
+1. conda env create --file environment.yml
+2. conda activate ds
+3. datashader examples
+3. cd datashader-examples
+```
+
+Step 1 will read `environment.yml`, create a new Conda environment
+named `ds`, and install of the libraries needed into that environment
+(including datashader itself). It will use Python 3.6 by default, but
+you can edit that file to specify a different Python version if you
+prefer (which may require changing some of the dependencies in some
+cases).
+
+Step 2 will activate the `ds` environment, using it for all subsequent
+commands. You will need to re-run step 2 after closing your terminal or
+rebooting your machine, if you want to use anything in the `ds` environment.
+For older versions of conda, you may instead need to do `source activate ds`
+(mac/linux) or `activate ds` (windows).
+
+Step 3 will copy the datashader examples from wherever Conda placed
+them into a subdirectory `datashader-examples`, and will then download
+the sample data required for the examples.  (`datashader examples` is
+a shorthand for `datashader copy-examples --path datashader-examples
+&& datashader fetch-data --path datashader-examples`.)
+
+The total download size is currently about 4GB to transfer, requiring
+about 10GB on disk when unpacked, which can take some time depending on
+the speed of your connection.  The files involved are specified in the
+text file `datasets.yml` in the `datashader-examples` directory, and
+you are welcome to edit that file or to download the individual files
+specified therein manually if you prefer, as long as you put them into
+a subdirectory `data/` so the examples can find them.  Once these
+steps have completed, you will be ready to run any of the examples
+listed on [datashader.org](http://datashader.org).
+
+
+## Notebooks
+
+Most of the examples are in the form of runnable Jupyter
+notebooks. Once you have obtained the notebooks and the data they
+require, you can run them on your own system using Jupyter:
+
+```
+cd datashader-examples
+jupyter notebook
+```
+
+If you want the generated notebooks to work without an internet connection or
+with an unreliable connection (e.g. if you see `Loading BokehJS ...` but never
+`BokehJS sucessfully loaded`), then restart the Jupyter notebook server using:
+
+```
+BOKEH_RESOURCES=inline jupyter notebook --NotebookApp.iopub_data_rate_limit=100000000
+```
+
+See dashboard.ipynb in this directory for a Datashder dashboard for viewing data.

二进制
datashader-work/datashader-examples/assets/images/airport_connections.png


二进制
datashader-work/datashader-examples/assets/images/chesapeake_farout.png


二进制
datashader-work/datashader-examples/assets/images/chesbay_detail.png


二进制
datashader-work/datashader-examples/assets/images/dashboard.png


二进制
datashader-work/datashader-examples/assets/images/ds_hv_bokeh.png


二进制
datashader-work/datashader-examples/assets/images/houston_district29.png


二进制
datashader-work/datashader-examples/assets/images/landsat.png


二进制
datashader-work/datashader-examples/assets/images/nyc_pickups_vs_dropoffs.jpg


二进制
datashader-work/datashader-examples/assets/images/nyc_races.jpg


二进制
datashader-work/datashader-examples/assets/images/nyc_taxi-paramnb.png


二进制
datashader-work/datashader-examples/assets/images/nyc_taxi_100k.png


二进制
datashader-work/datashader-examples/assets/images/parambokeh.png


二进制
datashader-work/datashader-examples/assets/images/pcap.png


二进制
datashader-work/datashader-examples/assets/images/pipeline.png


二进制
datashader-work/datashader-examples/assets/images/pipeline2.png


二进制
datashader-work/datashader-examples/assets/images/sym_attractors.jpg


二进制
datashader-work/datashader-examples/assets/images/uk_researchers.png


二进制
datashader-work/datashader-examples/assets/images/usa_census.jpg


文件差异内容过多而无法显示
+ 248 - 0
datashader-work/datashader-examples/dashboard.ipynb


+ 149 - 0
datashader-work/datashader-examples/dashboard.yml

@@ -0,0 +1,149 @@
+sources:
+  osm-1b:
+    description: 1-billion-point OpenStreetMap GPS dataset
+    driver: parquet
+    args:
+      urlpath: '{{ CATALOG_DIR }}/data/osm-1billion.snappy.parq'
+      columns: ['x','y']
+    metadata:
+      fields:
+        counts:
+          label: GPS coordinates
+      plot:
+        xlim: !!python/tuple [-8240227, -8231284]
+        ylim: !!python/tuple [ 4974203,  4979238]
+        kind: points
+      plots:
+        counts:
+          label: 1 billion OpenStreetMap GPS locations
+          x: x
+          y: y
+          
+  nyc_taxi:
+    description: Large version of nyc taxi dataset
+    driver: parquet
+    args:
+      urlpath: '{{ CATALOG_DIR }}/data/nyc_taxi_wide.parq'
+      columns: ['dropoff_x','dropoff_y','pickup_x','pickup_y',
+                'dropoff_hour','pickup_hour','passenger_count']
+    metadata:
+      fields:
+        counts:
+          label: Ride counts
+        passenger_count:
+          label: Passenger Count
+        dropoff_hour:
+          label: Drop-off Hour
+        pickup_hour:
+          label: Pick-up Hour
+      plot:
+        xlim: !!python/tuple [-8240227.037, -8231283.905]
+        ylim: !!python/tuple [4974203.152, 4979238.441]
+        kind: points
+        hover_cols: ['dropoff_hour', 'pickup_hour', 'passenger_count']
+      plots:
+        dropoff:
+          label: NYC Taxi Dropoffs
+          x: dropoff_x
+          y: dropoff_y
+        pickup:
+          label: NYC Taxi Pickups
+          x: pickup_x
+          y: pickup_y
+          
+  nyc_taxi_50k:
+    description: Small version of nyc taxi dataset
+    driver: parquet
+    args:
+      urlpath: '{{ CATALOG_DIR }}/data/nyc_taxi_50k.parq'
+      columns: ['dropoff_x','dropoff_y','pickup_x','pickup_y',
+                'dropoff_hour','pickup_hour','passenger_count']
+    metadata:
+      fields:
+        counts:
+          label: Ride counts
+        passenger_count:
+          label: Passenger Count
+        dropoff_hour:
+          label: Drop-off Hour
+        pickup_hour:
+          label: Pick-up Hour
+      plot:
+        xlim: !!python/tuple [-8240227.037, -8231283.905]
+        ylim: !!python/tuple [4974203.152, 4979238.441]
+        kind: points
+        hover_cols: ['dropoff_hour', 'pickup_hour', 'passenger_count']
+      plots:
+        dropoff:
+          label: NYC Taxi Dropoffs
+          x: dropoff_x
+          y: dropoff_y
+        pickup:
+          label: NYC Taxi Pickups
+          x: pickup_x
+          y: pickup_y
+          
+  census:
+    description: US Census Synthetic Data
+    driver: parquet
+    args:
+      urlpath: '{{ CATALOG_DIR }}/data/census.snappy.parq'
+    metadata:
+      fields:
+        counts:
+          label: Counts
+        race:
+          label: Race
+# this doesn't work, but I think something like it might. 
+#           labels:
+#             w: White
+#             b: Black
+#             a: Asian
+#             h: Hispanic
+#             o: Other
+      plot:
+        x: easting
+        y: northing
+        xlim: !!python/tuple [-13914936, -7235767]
+        ylim: !!python/tuple [2632019, 6446276]
+        kind: points  
+      plots:
+        people:
+          label: US Census Synthetic people
+        race:
+          label: US Census Synthetic race
+          c: race
+          cmap:
+            w: blue
+            b: green
+            a: red
+            h: orange
+            o: saddlebrown
+  
+  opensky:
+    description: OpenSky Flight Paths
+    driver: netcdf
+    args:
+      urlpath: '{{ CATALOG_DIR }}/data/opensky.h5'
+      chunks: {}
+    metadata:
+      fields:
+        counts:
+          label: Counts
+        ascending:
+          label: Ascending vs. Descending
+          cat_colors:
+            True: blue
+            False: red
+          cat_names:
+            True: Ascending
+            False: Descending
+      plot:
+        xlim: !!python/tuple [-2000000, 2500000]
+        ylim: !!python/tuple [4100000, 7800000]
+        kind: points
+        x: longitude
+        y: latitude
+      plots:
+        flight_paths:
+          label: OpenSky Flight Paths

+ 3 - 0
datashader-work/datashader-examples/data/nyc_crime.csv

@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0426495b7e21a0b3d3e41e6743baa635821e085b9cdf06039dd632150cbfa9d7
+size 216578377

+ 75 - 0
datashader-work/datashader-examples/datasets.yml

@@ -0,0 +1,75 @@
+---
+
+data:
+
+  - url: http://s3.amazonaws.com/datashader-data/austin_dem.zip
+    title: 'Austin, TX Elevation Data'
+    files:
+      - austin_dem.tif
+
+  - url: http://s3.amazonaws.com/datashader-data/nyc_crime.zip
+    title: 'NYC Crime Data'
+    files:
+      - nyc_crime.csv
+
+  - url: http://s3.amazonaws.com/datashader-data/nyc_taxi.zip
+    title: 'NYC Taxi Data'
+    files:
+      - nyc_taxi.csv
+
+  - url: http://s3.amazonaws.com/datashader-data/calvert_uk_research2017.snappy.parq.zip
+    title: 'Graph for Edge Bundling (Calvert UK Research 2017)'
+    files:
+      - calvert_uk_research2017_nodes.snappy.parq
+      - calvert_uk_research2017_edges.snappy.parq
+
+  - url: http://s3.amazonaws.com/datashader-data/calvert_uk_research2017_nodes.zip
+    title: 'Institutions for Edge Bundling (Calvert UK Research 2017)'
+    files:
+      - calvert_uk_research2017_nodes.csv
+
+  - url: http://s3.amazonaws.com/datashader-data/census.snappy.parq.zip
+    title: 'Census Synthetic People'
+    files:
+      - census.snappy.parq
+
+  - url: http://s3.amazonaws.com/datashader-data/opensky.parq
+    title: 'OpenSky flights, Sept 5-13 2016'
+    files:
+      - opensky.parq
+
+  - url: http://s3.amazonaws.com/datashader-data/mobile_landsat8.zip
+    title: 'Mobile, AL Landsat8 Data'
+    files:
+      - MERCATOR_LC80210392016114LGN00_B1.TIF
+      - MERCATOR_LC80210392016114LGN00_B2.TIF
+      - MERCATOR_LC80210392016114LGN00_B3.TIF
+      - MERCATOR_LC80210392016114LGN00_B4.TIF
+      - MERCATOR_LC80210392016114LGN00_B5.TIF
+      - MERCATOR_LC80210392016114LGN00_B6.TIF
+      - MERCATOR_LC80210392016114LGN00_B7.TIF
+      - MERCATOR_LC80210392016114LGN00_B8.TIF
+      - MERCATOR_LC80210392016114LGN00_B9.TIF
+      - MERCATOR_LC80210392016114LGN00_B10.TIF
+      - MERCATOR_LC80210392016114LGN00_B11.TIF
+      - MERCATOR_LC80210392016114LGN00_BQA.TIF
+
+  - url: http://s3.amazonaws.com/datashader-data/maccdc2012_graph.zip
+    title: 'National CyberWatch Mid-Atlantic Collegiate Cyber Defense Competition'
+    files:
+      - maccdc2012_nodes.parq
+      - maccdc2012_edges.parq
+      - maccdc2012_full_nodes.parq
+      - maccdc2012_full_edges.parq
+
+# Original url: https://www2.census.gov/geo/tiger/GENZ2015/shp/cb_2015_us_cd114_5m.zip
+  - url: http://s3.amazonaws.com/datashader-data/cb_2015_us_cd114_5m.zip
+    title: '2015 Congressional districts'
+    files:
+      - cb_2015_us_cd114_5m.shp
+
+  - url: http://s3.amazonaws.com/datashader-data/Chesapeake_and_Delaware_Bays.zip
+    title: 'Depth data for the Chesapeake and Delaware Bay region of the USA'
+    files:
+      - Chesapeake_and_Delaware_Bays.3dm
+

+ 57 - 0
datashader-work/datashader-examples/environment.yml

@@ -0,0 +1,57 @@
+name: ds
+channels:
+ - bokeh
+ - conda-forge
+ - ioam
+
+dependencies:
+ - attrs
+ - beautifulsoup4
+ - bokeh
+ - cartopy
+ - colorcet
+ - conda-forge::graphviz
+ - conda-forge::pytest
+ - conda-forge::pytest-benchmark
+ - conda-forge::python-graphviz
+ - dask>=0.15.4
+ - datashader
+ - dill
+ - distributed
+ - fastparquet
+ - flake8
+ - geoviews
+ - ioam::holoviews>=1.8.3
+ - ipython
+ - iris
+ - jupyter
+ - jupyter_dashboards
+ - krb5
+ - matplotlib
+ - nbconvert
+ - nbformat
+ - networkx>=2.0
+ - numba
+ - numpy
+ - pandas
+ - param>=1.5.1
+ - paramnb
+ - pyproj
+ - pytables
+ - python-snappy
+ - python=3.6
+ - rasterio
+ - requests
+ - scikit-image
+ - scipy
+ - shapely
+ - snappy
+ - statsmodels
+ - tblib
+ - xarray
+ - yaml
+ - pip:
+   - cachey
+   - streamz==0.2.0
+   - webargs
+

+ 357 - 0
datashader-work/datashader-examples/filetimes.py

@@ -0,0 +1,357 @@
+#!/usr/bin/env python3
+
+"""
+Simple test of read and write times for columnar data formats:
+  python filetimes.py <filepath> [pandas|dask [hdf5base [xcolumn [ycolumn] [categories...]]]]
+
+Test files may be generated starting from any file format supported by Pandas:
+  python -c "import filetimes ; filetimes.base='<hdf5base>' ; filetimes.categories=['<cat1>','<cat2>']; filetimes.timed_write('<file>')"
+"""
+
+from __future__ import print_function
+
+import time
+global_start = time.time()
+
+import os, os.path, sys, glob, argparse, resource, multiprocessing
+import pandas as pd
+import dask.dataframe as dd
+import numpy as np
+import datashader as ds
+import bcolz
+import feather
+import fastparquet as fp
+
+from datashader.utils import export_image
+from datashader import transfer_functions as tf
+from collections import OrderedDict as odict
+
+#from multiprocessing.pool import ThreadPool
+#dask.set_options(pool=ThreadPool(3)) # select a pecific number of threads
+from dask import distributed
+
+# Toggled by command-line arguments
+DEBUG = False
+DD_FORCE_LOAD = False
+DASK_CLIENT = None
+
+class Parameters(object):
+    base,x,y='data','x','y'
+    dftype='pandas'
+    categories=[]
+    chunksize=76668751
+    cat_width=1 # Size of fixed-width string for representing categories
+    columns=None
+    cachesize=9e9
+    parq_opts=dict(file_scheme='hive', has_nulls=False, write_index=False)
+    n_workers=multiprocessing.cpu_count()
+
+
+p=Parameters()
+
+filetypes_storing_categories = {'parq'}
+
+
+class Kwargs(odict):
+    """Used to distinguish between dictionary argument values, and
+    keyword-arguments.
+    """
+    pass
+
+def benchmark(fn, args, filetype=None):
+    """Benchmark when "fn" function gets called on "args" tuple.
+    "args" may have a Kwargs instance at the end.
+    If "filetype" is provided, it may be used to convert columns to
+    categorical dtypes after reading (the "loading" is assumed).
+    """
+    posargs = list(args)
+    kwargs = {}
+    # Remove Kwargs instance at end of posargs list, if one exists
+    if posargs and isinstance(posargs[-1], Kwargs):
+        lastarg = posargs.pop()
+        kwargs.update(lastarg)
+
+    if DEBUG:
+        printable_posargs = ', '.join([str(posarg.head()) if hasattr(posarg, 'head') else str(posarg) for posarg in posargs])
+        printable_kwargs = ', '.join(['{}={}'.format(k, v) for k,v in kwargs.items()])
+        print('DEBUG: {}({}{})'.format(fn.__name__, printable_posargs, ', '+printable_kwargs if printable_kwargs else '', flush=True))
+
+    # Benchmark fn when run on posargs and kwargs
+    start = time.time()
+    res = fn(*posargs, **kwargs)
+
+    # If we're loading data
+    if filetype is not None:
+        if filetype not in filetypes_storing_categories:
+            opts=odict()
+            if p.dftype == 'pandas':
+                opts['copy']=False
+            for c in p.categories:
+                res[c]=res[c].astype('category',**opts)
+
+        # Force loading (--cache=persist was provided)
+        if p.dftype == 'dask' and DD_FORCE_LOAD:
+            if DASK_CLIENT is not None:
+                # 2017-04-28: This combination leads to a large drop in
+                #   aggregation performance (both --distributed and
+                #   --cache=persist were provided)
+                res = DASK_CLIENT.persist(res)
+                distributed.wait(res)
+            else:
+                if DEBUG:
+                    print("DEBUG: Force-loading Dask dataframe", flush=True)
+                res = res.persist()
+
+    end = time.time()
+
+    return end-start, res
+    
+
+
+read = odict([(f,odict()) for f in ["parq","snappy.parq","gz.parq","bcolz","feather","h5","csv"]])
+
+def read_csv_dask(filepath, usecols=None):
+    # Pandas writes CSV files out as a single file
+    if os.path.isfile(filepath):
+        return dd.read_csv(filepath, usecols=usecols)
+    # Dask may have written out CSV files in partitions
+    filepath_expr = filepath.replace('.csv', '*.csv')
+    return dd.read_csv(filepath_expr, usecols=usecols)
+read["csv"]          ["dask"]   = lambda filepath,p,filetype:  benchmark(read_csv_dask, (filepath, Kwargs(usecols=p.columns)), filetype)
+read["h5"]           ["dask"]   = lambda filepath,p,filetype:  benchmark(dd.read_hdf, (filepath, p.base, Kwargs(chunksize=p.chunksize, columns=p.columns)), filetype)
+def read_feather_dask(filepath):
+    df = feather.read_dataframe(filepath, columns=p.columns)
+    return dd.from_pandas(df, npartitions=p.n_workers)
+read["feather"]      ["dask"] = lambda filepath,p,filetype:  benchmark(read_feather_dask, (filepath,), filetype)
+read["bcolz"]        ["dask"]   = lambda filepath,p,filetype:  benchmark(dd.from_bcolz, (filepath, Kwargs(chunksize=1000000)), filetype)
+read["parq"]         ["dask"]   = lambda filepath,p,filetype:  benchmark(dd.read_parquet, (filepath, Kwargs(index=False, columns=p.columns)), filetype)
+read["gz.parq"]      ["dask"]   = lambda filepath,p,filetype:  benchmark(dd.read_parquet, (filepath, Kwargs(index=False, columns=p.columns)), filetype)
+read["snappy.parq"]  ["dask"]   = lambda filepath,p,filetype:  benchmark(dd.read_parquet, (filepath, Kwargs(index=False, columns=p.columns)), filetype)
+def read_csv_pandas(filepath, usecols=None):
+    # Pandas writes CSV files out as a single file
+    if os.path.isfile(filepath):
+        return pd.read_csv(filepath, usecols=usecols)
+    # Dask may have written out CSV files in partitions
+    filepath_expr = filepath.replace('.csv', '*.csv')
+    filepaths = glob.glob(filepath_expr)
+    return pd.concat((pd.read_csv(f, usecols=usecols) for f in filepaths))
+read["csv"]         ["pandas"] = lambda filepath,p,filetype:  benchmark(read_csv_pandas, (filepath, Kwargs(usecols=p.columns)), filetype)
+read["h5"]          ["pandas"] = lambda filepath,p,filetype:  benchmark(pd.read_hdf, (filepath, p.base, Kwargs(columns=p.columns)), filetype)
+read["feather"]     ["pandas"] = lambda filepath,p,filetype:  benchmark(feather.read_dataframe, (filepath,), filetype)
+def read_bcolz_pandas(filepath, chunksize=None):
+    return bcolz.ctable(rootdir=filepath).todataframe(columns=p.columns)
+read["bcolz"]       ["pandas"]   = lambda filepath,p,filetype:  benchmark(read_bcolz_pandas, (filepath, Kwargs(chunksize=1000000)), filetype)
+def read_parq_pandas(filepath):
+    return fp.ParquetFile(filepath).to_pandas()
+read["parq"]        ["pandas"] = lambda filepath,p,filetype:  benchmark(read_parq_pandas, (filepath,), filetype)
+read["gz.parq"]     ["pandas"] = lambda filepath,p,filetype:  benchmark(read_parq_pandas, (filepath,), filetype)
+read["snappy.parq"] ["pandas"] = lambda filepath,p,filetype:  benchmark(read_parq_pandas, (filepath,), filetype)
+
+
+write = odict([(f,odict()) for f in ["parq","snappy.parq","gz.parq","bcolz","feather","h5","csv"]])
+
+write["csv"]          ["dask"]   = lambda df,filepath,p:  benchmark(df.to_csv, (filepath.replace(".csv","*.csv"), Kwargs(index=False)))
+write["h5"]           ["dask"]   = lambda df,filepath,p:  benchmark(df.to_hdf, (filepath, p.base))
+def write_bcolz_dask(filepath, df):
+    return bcolz.ctable.fromdataframe(df.compute(), rootdir=filepath)
+write["bcolz"]        ["dask"] = lambda df,filepath,p:  benchmark(write_bcolz_dask, (filepath, df))
+def write_feather_dask(filepath, df):
+    return feather.write_dataframe(df.compute(), filepath)
+write["feather"]      ["dask"] = lambda df,filepath,p:  benchmark(write_feather_dask, (filepath, df))
+write["parq"]         ["dask"]   = lambda df,filepath,p:  benchmark(dd.to_parquet, (filepath, df)) # **p.parq_opts
+write["snappy.parq"]  ["dask"]   = lambda df,filepath,p:  benchmark(dd.to_parquet, (filepath, df, Kwargs(compression='SNAPPY'))) ## **p.parq_opts
+write["gz.parq"]      ["dask"]   = lambda df,filepath,p:  benchmark(dd.to_parquet, (filepath, df, Kwargs(compression='GZIP')))
+
+write["csv"]          ["pandas"] = lambda df,filepath,p:  benchmark(df.to_csv, (filepath, Kwargs(index=False)))
+write["h5"]           ["pandas"] = lambda df,filepath,p:  benchmark(df.to_hdf, (filepath, Kwargs(key=p.base, format='table')))
+write["bcolz"]        ["pandas"] = lambda df,filepath,p:  benchmark(bcolz.ctable.fromdataframe, (df, Kwargs(rootdir=filepath)))
+write["feather"]      ["pandas"] = lambda df,filepath,p:  benchmark(feather.write_dataframe, (df, filepath))
+write["parq"]         ["pandas"] = lambda df,filepath,p:  benchmark(fp.write, (filepath, df, Kwargs(**p.parq_opts)))
+write["gz.parq"]      ["pandas"] = lambda df,filepath,p:  benchmark(fp.write, (filepath, df, Kwargs(compression='GZIP', **p.parq_opts)))
+write["snappy.parq"]  ["pandas"] = lambda df,filepath,p:  benchmark(fp.write, (filepath, df, Kwargs(compression='SNAPPY', **p.parq_opts)))
+
+
+def timed_write(filepath,dftype,fsize='double',output_directory="times"):
+    """Accepts any file with a dataframe readable by the given dataframe type, and writes it out as a variety of file types"""
+    assert fsize in ('single', 'double')
+
+    p.dftype = dftype # This function may get called from outside main()
+    df,duration=timed_read(filepath,dftype)
+
+    for ext in write.keys():
+        directory,filename = os.path.split(filepath)
+        basename, extension = os.path.splitext(filename)
+        fname = output_directory+os.path.sep+basename+"."+ext
+        if os.path.exists(fname):
+            print("{:28} (keeping existing)".format(fname), flush=True)
+        else:
+            filetype=ext.split(".")[-1]
+            if not filetype in filetypes_storing_categories:
+                for c in p.categories:
+                    if filetype == 'parq' and df[c].dtype == 'object':
+                        df[c]=df[c].str.encode('utf8')
+                    else:
+                        df[c]=df[c].astype(str)
+
+            # Convert doubles to floats when writing out datasets
+            if fsize == 'single':
+                for colname in df.columns:
+                    if df[colname].dtype == 'float64':
+                        df[colname] = df[colname].astype(np.float32)
+
+            code = write[ext].get(dftype,None)
+
+            if code is None:
+                print("{:28} {:7} Operation not supported".format(fname,dftype), flush=True)
+            else:
+                duration, res = code(df,fname,p)
+                print("{:28} {:7} {:05.2f}".format(fname,dftype,duration), flush=True)
+
+            if not filetype in filetypes_storing_categories:
+                for c in p.categories:
+                    df[c]=df[c].astype('category')
+
+        
+def timed_read(filepath,dftype):
+    basename, extension = os.path.splitext(filepath)
+    extension = extension[1:]
+    filetype=extension.split(".")[-1]
+    code = read[extension].get(dftype,None)
+
+    if code is None:
+        return (None, -1)
+
+    p.columns=[p.x]+[p.y]+p.categories
+    
+    duration, df = code(filepath,p,filetype)
+    
+    return df, duration
+
+
+CACHED_RANGES = (None, None)
+def timed_agg(df, filepath, plot_width=int(900), plot_height=int(900*7.0/12), cache_ranges=True):
+    global CACHED_RANGES
+    start = time.time()
+    cvs = ds.Canvas(plot_width, plot_height, x_range=CACHED_RANGES[0], y_range=CACHED_RANGES[1])
+    agg = cvs.points(df, p.x, p.y)
+    end = time.time()
+    if cache_ranges:
+        CACHED_RANGES = (cvs.x_range, cvs.y_range)
+    img = export_image(tf.shade(agg),filepath,export_path=".")
+    return img, end-start
+
+
+def get_size(path):
+    total = 0
+
+    # CSV files are broken up by dask when they're written out
+    if os.path.isfile(path):
+        return os.path.getsize(path)
+    elif path.endswith('csv'):
+        for csv_fpath in glob.glob(path.replace('.csv', '*.csv')):
+            total += os.path.getsize(csv_fpath)
+        return total
+
+    # If path is a directory (such as parquet), sum all files in directory
+    for dirpath, dirnames, filenames in os.walk(path):
+        for f in filenames:
+            fp = os.path.join(dirpath, f)
+            total += os.path.getsize(fp)
+
+    return total
+
+
+def get_proc_mem():
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1e6
+
+
+def main(argv):
+    global DEBUG, DD_FORCE_LOAD, DASK_CLIENT
+
+    parser = argparse.ArgumentParser(epilog=__doc__, formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('filepath')
+    parser.add_argument('dftype')
+    parser.add_argument('base')
+    parser.add_argument('x')
+    parser.add_argument('y')
+    parser.add_argument('categories', nargs='+')
+    parser.add_argument('--debug', action='store_true', help='Enable increased verbosity and DEBUG messages')
+    parser.add_argument('--cache', choices=('persist', 'cachey'), default=None, help='Enable caching: "persist" causes Dask dataframes to force loading into memory; "cachey" uses dask.cache.Cache with a cachesize of {}. Caching is disabled by default'.format(int(p.cachesize)))
+    parser.add_argument('--distributed', action='store_true', help='Enable the distributed scheduler instead of the threaded, which is the default.')
+    parser.add_argument('--recalc-ranges', action='store_true', help='Tell datashader to recalculate the ranges on each aggregation, instead of caching them (by default).')
+    args = parser.parse_args(argv[1:])
+
+    if args.cache is None:
+        if args.debug:
+            print("DEBUG: Cache disabled", flush=True)
+    else:
+        if args.cache == 'cachey':
+            from dask.cache import Cache
+            cache = Cache(p.cachesize)
+            cache.register()
+        elif args.cache == 'persist':
+            DD_FORCE_LOAD = True
+
+        if args.debug:
+            print('DEBUG: Cache "{}" mode enabled'.format(args.cache), flush=True)
+
+    if args.dftype == 'dask' and args.distributed:
+        local_cluster = distributed.LocalCluster(n_workers=p.n_workers, threads_per_worker=1)
+        DASK_CLIENT = distributed.Client(local_cluster)
+        if args.debug:
+            print('DEBUG: "distributed" scheduler is enabled')
+    else:
+        if args.dftype != 'dask' and args.distributed:
+            raise ValueError('--distributed argument is only available with the dask dataframe type (not pandas)')
+        if args.debug:
+            print('DEBUG: "threaded" scheduler is enabled')
+
+    filepath = args.filepath
+    basename, extension = os.path.splitext(filepath)
+    p.dftype      = args.dftype
+    p.base        = args.base
+    p.x           = args.x
+    p.y           = args.y
+    p.categories  = args.categories
+    DEBUG = args.debug
+
+    if DEBUG:
+        print('DEBUG: Memory usage (before read):\t{} MB'.format(get_proc_mem(), flush=True))
+    df,loadtime = timed_read(filepath, p.dftype)
+
+    if df is None:
+        if loadtime == -1:
+            print("{:28} {:6}  Operation not supported".format(filepath, p.dftype), flush=True)
+        return 1
+
+    if DEBUG:
+        print('DEBUG: Memory usage (after read):\t{} MB'.format(get_proc_mem(), flush=True))
+
+    img,aggtime1 = timed_agg(df,filepath,5,5,cache_ranges=(not args.recalc_ranges))
+    if DEBUG:
+        mem_usage = df.memory_usage(deep=True)
+        if p.dftype == 'dask':
+            mem_usage = mem_usage.compute()
+        print('DEBUG:', mem_usage, flush=True)
+        mem_usage_total = mem_usage.sum()
+        print('DEBUG: DataFrame size:\t\t\t{} MB'.format(mem_usage_total / 1e6, flush=True))
+        for colname in df.columns:
+            print('DEBUG: column "{}" dtype: {}'.format(colname, df[colname].dtype))
+        print('DEBUG: Memory usage (after agg1):\t{} MB'.format(get_proc_mem(), flush=True))
+
+    img,aggtime2 = timed_agg(df,filepath,cache_ranges=(not args.recalc_ranges))
+    if DEBUG:
+        print('DEBUG: Memory usage (after agg2):\t{} MB'.format(get_proc_mem(), flush=True))
+    
+    in_size  = get_size(filepath)
+    out_size = get_size(filepath+".png")
+    
+    global_end = time.time()
+    print("{:28} {:6}  Aggregate1:{:06.2f} ({:06.2f}+{:06.2f})  Aggregate2:{:06.2f}  In:{:011d}  Out:{:011d}  Total:{:06.2f}"\
+          .format(filepath, p.dftype, loadtime+aggtime1, loadtime, aggtime1, aggtime2, in_size, out_size, global_end-global_start), flush=True)
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))

+ 41 - 0
datashader-work/datashader-examples/filetimes.sh

@@ -0,0 +1,41 @@
+#!/bin/sh
+
+# Usage:
+#    conda env create -f filetimes.yml
+#    source activate filetimes
+#    mkdir times
+#    python -c "import filetimes as ft ; ft.p.base='census' ; ft.p.x='easting' ; ft.p.y='northing' ; ft.p.categories=['race']; ft.DD_FORCE_LOAD=True; ft.DEBUG=True; ft.timed_write('data/tinycensus.csv',dftype='pandas',fsize='double')"
+#    # (dftype can also be 'dask', fsize can also be 'single')
+#    ./filetimes.sh times/tinycensus
+#    # (add a second argument to filetimes.sh to set the caching mode)
+#    # (add a third argument to filetimes.sh to set the ft.DEBUG variable)
+#
+#    More examples of filetimes.sh:
+#      1) Use no caching, but enable DEBUG messages:
+#             ./filetimes.sh times/tinycensus '' debug
+#      2) Use "persist" caching mode:
+#             ./filetimes.sh times/tinycensus persist
+#      3) Use "cachey" caching mode (force-loads dask dataframes), enable DEBUG messages:
+#             ./filetimes.sh times/tinycensus cachey debug
+
+timer=/usr/bin/time
+timer="" # External timing disabled to avoid unhelpful "Command terminated abnormally" messages
+
+# Display each command if a third argument is provided
+test -n "$3" && set -x
+
+${timer} python filetimes.py ${1}.parq         dask    census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.snappy.parq  dask    census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.gz.parq      dask    census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.bcolz        dask    census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.h5           dask    census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.csv          dask    census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.feather      dask    census easting northing race ${3:+--debug} ${2:+--cache=$2}
+
+${timer} python filetimes.py ${1}.parq         pandas  census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.snappy.parq  pandas  census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.gz.parq      pandas  census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.bcolz        pandas  census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.h5           pandas  census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.csv          pandas  census easting northing race ${3:+--debug} ${2:+--cache=$2}
+${timer} python filetimes.py ${1}.feather      pandas  census easting northing race ${3:+--debug} ${2:+--cache=$2}

+ 24 - 0
datashader-work/datashader-examples/filetimes.yml

@@ -0,0 +1,24 @@
+name: filetimes
+dependencies:
+- bokeh
+- matplotlib
+- jupyter
+- bcolz=1.1.2=np112py35_0
+- bokeh::datashader=0.4.0=py35_0
+- conda-forge::feather-format=0.3.1=py35_1
+- dask=0.14.3=py35_0
+- numba::numba=0.33.0=np112py35_0
+- numexpr=2.6.2=np112py35_0
+- numpy=1.12.1=py35_0
+- pandas=0.19.2=np112py35_1
+- pytest
+- python=3.5.2=0
+- conda-forge::python-snappy=0.5.1=py35_0
+- snappy=1.1.4=1
+- conda-forge::fastparquet=0.0.6=py35_1
+- bccp::cachey==0.1.1
+- bloscpack==0.10.0
+- blosc==1.9.2
+- pytables==3.4.2
+- pip:
+  - castra==0.1.7

+ 7 - 0
datashader-work/datashader-examples/get_raster_data.sh

@@ -0,0 +1,7 @@
+mkdir -p data
+cd data
+wget http://s3.amazonaws.com/bokeh_data/geotiff_example.zip
+unzip geotiff_example.zip
+rm geotiff_example.zip
+cd ..
+

文件差异内容过多而无法显示
+ 100 - 0
datashader-work/datashader-examples/getting_started/1_Introduction.ipynb


文件差异内容过多而无法显示
+ 529 - 0
datashader-work/datashader-examples/getting_started/2_Pipeline.ipynb


文件差异内容过多而无法显示
+ 322 - 0
datashader-work/datashader-examples/getting_started/3_Interactivity.ipynb


+ 35 - 0
datashader-work/datashader-examples/getting_started/index.ipynb

@@ -0,0 +1,35 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This 'Getting Started' guide aims to get you using Datashader productively as quickly as possible.\n",
+    "\n",
+    "Detailed documentation is contained in the [User Guide](../user_guide/index.html); we link to the user guide from the\n",
+    "appropriate sections of this guide.\n",
+    "\n",
+    "To see examples of what can be done with Datashader, see [Topics](../topics/index.html).\n",
+    "\n",
+    "We recommend you proceed through the following in order; it should take around 1 hour in total.\n",
+    "\n",
+    "* [1. Introduction](1_Introduction.ipynb)\n",
+    "  Simple self-contained example to show how Datashader works.\n",
+    "\n",
+    "* [2. Pipeline](2_Pipeline.ipynb)\n",
+    "  Detailed step-by-step explanation how Datashader turns your data into an image.\n",
+    "\n",
+    "* [3. Interactivity](3_Interactivity.html)\n",
+    "  Embedding images into rich, interactive plots in a web browser."
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 96 - 0
datashader-work/datashader-examples/index.ipynb

@@ -0,0 +1,96 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Datashader is a graphics pipeline system for creating meaningful\n",
+    "representations of large datasets quickly and flexibly. Datashader\n",
+    "breaks the creation of images into a series of explicit steps that\n",
+    "allow computations to be done on intermediate representations.  This\n",
+    "approach allows accurate and effective visualizations to be produced\n",
+    "automatically without trial-and-error parameter tuning, and also makes\n",
+    "it simple for data scientists to focus on particular data and\n",
+    "relationships of interest in a principled way.\n",
+    "\n",
+    "The computation-intensive steps in this process are written in Python\n",
+    "but transparently compiled to machine code using [Numba](http://numba.pydata.org) and flexibly\n",
+    "distributed across cores and processors using [Dask](http://dask.pydata.org), providing a\n",
+    "highly optimized rendering pipeline that makes it practical to work\n",
+    "with extremely large datasets even on standard hardware.\n",
+    "\n",
+    "\n",
+    "To make it concrete, here's an example of what datashader code looks like:\n",
+    "\n",
+    "```python\n",
+    "    >>> import datashader as ds\n",
+    "    >>> import datashader.transfer_functions as tf\n",
+    "    >>> import pandas as pd\n",
+    "    >>> df = pd.read_csv('user_data.csv')\n",
+    "\n",
+    "    >>> cvs = ds.Canvas(plot_width=400, plot_height=400)\n",
+    "    >>> agg = cvs.points(df, 'x_col', 'y_col', ds.mean('z_col'))\n",
+    "    >>> img = tf.shade(agg, cmap=['lightblue', 'darkblue'], how='log')\n",
+    "```\n",
+    "\n",
+    "This code reads a data file into a Pandas dataframe `df`, and then\n",
+    "projects the fields `x_col` and `y_col` onto the x and y dimensions of\n",
+    "400x400 grid, aggregating it by the mean value of the `z_col` of each\n",
+    "datapoint. The results are rendered into an image where the minimum\n",
+    "count will be plotted in `lightblue`, the maximum in `darkblue`, and\n",
+    "ranging logarithmically in between.\n",
+    "\n",
+    "And here are some sample outputs for 300 million points of data (one\n",
+    "per person in the USA) from the 2010 census, each constructed using\n",
+    "code like the above:\n",
+    "\n",
+    "<img src=\"assets/images/usa_census.jpg\" alt=\"image\" width=\"1000\" />\n",
+    "\n",
+    "<img src=\"assets/images/nyc_races.jpg\" alt=\"image\" width=\"1000\" />\n",
+    "\n",
+    "<img src=\"assets/images/sym_attractors.jpg\" alt=\"image\" width=\"1000\" />\n",
+    "\n",
+    "           \n",
+    "## Installation\n",
+    "\n",
+    "Please follow the instructions on the [Github repo](https://github.com/bokeh/datashader/tree/master/examples)\n",
+    "if you want to reproduce the specific examples on this website, or the ones at [PyViz.org](http://pyviz.org) if you want to try out Datashader together with related plotting tools.\n",
+    "\n",
+    "\n",
+    "\n",
+    "## Other resources\n",
+    "\n",
+    "You can watch a short talk about datashader on YouTube:\n",
+    "[Datashader: Revealing the Structure of Genuinely Big Data](https://www.youtube.com/watch?v=6m3CFbKmK_c).\n",
+    "The video [Visualizing Billions of Points of Data](http://go2.continuum.io/JN12XH0g0W0Rb300CZ00000) (and its [slides](http://go2.continuum.io/V0Nc000C300W100X20HZhR0))\n",
+    "from a February 2016 one-hour talk first introducing Datashader are also\n",
+    "available, but do not cover more recent extensions to the library.\n",
+    "\n",
+    "Some of the original ideas for datashader were developed under the\n",
+    "name Abstract Rendering, which is described in a [2014 SPIE VDA paper](http://spie.org/Publications/Proceedings/Paper/10.1117/12.2041200).\n",
+    "\n",
+    "The source code for datashader is maintained at our [Github site,](https://github.com/bokeh/datashader) and\n",
+    "is documented using the API link on this page.\n",
+    "\n",
+    "We recommend the [Getting Started Guide](getting_started) to learn\n",
+    "the basic concepts and start using Datashader as quickly as possible.\n",
+    "\n",
+    "The [User Guide](user_guide) covers specific topics in more detail.\n",
+    "\n",
+    "The [API](api) is the definitive guide to each part of\n",
+    "Datashader, but the same information is available more conveniently via\n",
+    "the `help()` command as needed when using each component.\n",
+    "\n",
+    "Please feel free to report [issues](https://github.com/bokeh/datashader/issues) or [contribute code](https://help.github.com/articles/about-pull-requests). You are also welcome to chat with the developers on [gitter](https://gitter.im/ioam/holoviews).\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 260 - 0
datashader-work/datashader-examples/nyc_taxi-nongeo.ipynb

@@ -0,0 +1,260 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Plotting non-geographic data\n",
+    "\n",
+    "Most of the datashader examples use geographic data, because it is so easily interpreted, but datashading will help exploration of any data dimensions.  Here let's start by plotting `trip_distance` versus `fare_amount` for the 12-million-point NYC taxi dataset from nyc_taxi.ipynb. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load NYC Taxi data\n",
+    "\n",
+    "(takes a dozen seconds or so...)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "df = pd.read_csv('data/nyc_taxi.csv',usecols=['trip_distance','fare_amount','tip_amount','passenger_count'])\n",
+    "\n",
+    "df.tail()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define a simple plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from bokeh.plotting import figure, output_notebook, show\n",
+    "\n",
+    "output_notebook()\n",
+    "\n",
+    "def base_plot():\n",
+    "    p = figure(\n",
+    "        x_range=(0, 20),\n",
+    "        y_range=(0, 40),\n",
+    "        tools='pan,wheel_zoom,box_zoom,reset', \n",
+    "        plot_width=800, \n",
+    "        plot_height=500,\n",
+    "    )\n",
+    "    p.xgrid.grid_line_color = None\n",
+    "    p.ygrid.grid_line_color = None\n",
+    "    p.xaxis.axis_label = \"Distance, miles\"\n",
+    "    p.yaxis.axis_label = \"Fare, $\"\n",
+    "    p.xaxis.axis_label_text_font_size = '12pt'\n",
+    "    p.yaxis.axis_label_text_font_size = '12pt'\n",
+    "    return p\n",
+    "    \n",
+    "options = dict(line_color=None, fill_color='blue', size=5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1000 points reveals the expected linear relationship"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "samples = df.sample(n=1000)\n",
+    "p = base_plot()\n",
+    "p.circle(x=samples['trip_distance'], y=samples['fare_amount'], **options)\n",
+    "show(p)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 10,000 points show more detailed, systematic patterns in fares and times\n",
+    "  \n",
+    "Perhaps there are different metering options, along with granularity in how times and fares are counted; in any case, the times and fares do not uniformly populate any region of this space:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "options = dict(line_color='blue', fill_color='blue', size=1, alpha=0.05)\n",
+    "samples = df.sample(n=10000)\n",
+    "p = base_plot()\n",
+    "p.circle(x=samples['trip_distance'], y=samples['fare_amount'], **options)\n",
+    "show(p)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Datashader reveals additional detail, especially when zooming in\n",
+    "\n",
+    "You can now see that there are a lot of points below the linear boundary, representing long trips for very little cost (presumably GPS errors?)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datashader as ds\n",
+    "from datashader.bokeh_ext import InteractiveImage"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p = base_plot()\n",
+    "pipeline = ds.Pipeline(df, ds.Point(\"trip_distance\", \"fare_amount\"))\n",
+    "InteractiveImage(p, pipeline)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we're using the default histogram-equalized color mapping function to reveal density differences across this space.  If we used a linear mapping, we can mainly see that there are a lot of values near the origin, but all the rest are colored the same minimum (defaulting to light blue) color:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datashader import transfer_functions as tf\n",
+    "import functools as ft\n",
+    "color_fn = ft.partial(tf.shade,how='linear')\n",
+    "\n",
+    "p = base_plot()\n",
+    "pipeline = ds.Pipeline(df, ds.Point(\"trip_distance\", \"fare_amount\"), color_fn=color_fn)\n",
+    "InteractiveImage(p, pipeline)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Fares are discretized to the nearest 50 cents, making patterns less visible, but there is both an upward trend in tips as fares increase (as expected), but also a large number of tips higher than the fare itself, which is surprising:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p = base_plot()\n",
+    "p.xaxis.axis_label = \"Fare, $\"\n",
+    "p.yaxis.axis_label = \"Tip, $\"\n",
+    "pipeline = ds.Pipeline(df, ds.Point(\"fare_amount\", \"tip_amount\"))\n",
+    "InteractiveImage(p, pipeline)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Interestingly, tips go down when the number of passengers is greater than 1:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datashader as ds\n",
+    "from datashader.bokeh_ext import InteractiveImage\n",
+    "from bokeh.models import Range1d\n",
+    "\n",
+    "p = base_plot()\n",
+    "p.xaxis.axis_label = \"Passengers\"\n",
+    "p.yaxis.axis_label = \"Tip, $\"\n",
+    "p.x_range = Range1d(-0.5, 6.5)\n",
+    "p.y_range = Range1d(0, 60)\n",
+    "\n",
+    "pipeline = ds.Pipeline(df, ds.Point(\"passenger_count\", \"tip_amount\"), width_scale=0.035)\n",
+    "InteractiveImage(p, pipeline)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we've reduced the resolution along the x axis so that instead of getting isolated points for this inherently discrete data, you can see more-visible horizontal line segments.\n",
+    "\n",
+    "The above plots all use Bokeh directly, but a much wider range of interactive plots can be built easily using the separate [HoloViews](http://holoviews.org) library, which builds Bokeh and Matplotlib plots from high-level specifications.  For instance, Datashader currently only provides 2D aggregates, but you can easily make a zoomable one-dimensional histogram using HoloViews to dynamically collapse across a second dimension:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result=None\n",
+    "try:\n",
+    "    import numpy as np\n",
+    "    import holoviews as hv\n",
+    "    from holoviews.operation.datashader import aggregate\n",
+    "    hv.notebook_extension('bokeh')\n",
+    "\n",
+    "    %opts Curve [width=800]\n",
+    "    \n",
+    "    dataset = hv.Dataset(df, kdims=['fare_amount', 'trip_distance'], vdims=[]).select(fare_amount=(0,60))\n",
+    "    agg = aggregate(dataset, aggregator=ds.count(), streams=[hv.streams.RangeX()], x_sampling=0.5, width=500, height=2)\n",
+    "    result = agg.map(lambda x: x.reduce(trip_distance=np.sum), hv.Image)\n",
+    "    \n",
+    "except ImportError: pass\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here datashader is aggregating over both fare_amount and trip_distance, but trip_distance was specified to have only a height of 2, because it will be further collapsed to create the histogram being displayed.  You can now use the wheel zoom tool when hovering over the x axis, and the plot will zoom in or out, dynamically resampling at the given location to make a new histogram (as long as there is a live Python server running). \n",
+    "\n",
+    "In this particular plot, there is a very wide range of fare amounts, with an implausibly high maximum fare of over 4000 dollars, but you can easily zoom in to the bulk of the data to show that nearly all fares are between 4 and 20 dollars, following something like a gamma distribution, and they are discretized to the nearest 50 cents in this dataset."
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

+ 103 - 0
datashader-work/datashader-examples/pcap_to_parquet.py

@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+"""
+Convert PCAP output to undirected graph and save in Parquet format.
+"""
+
+from __future__ import print_function
+
+import re
+import socket
+import struct
+import sys
+
+import fastparquet as fp
+import numpy as np
+import pandas as pd
+
+
+def ip_to_integer(s):
+    return struct.unpack("!I", socket.inet_aton(s))[0]
+
+
+def get_ip_protocol(s):
+    if "tcp" in s:
+        return "tcp"
+    if "UDP" in s:
+        return "udp"
+    if "EIGRP" in s:
+        return "eigrp"
+    if "ICMP" in s:
+        return "icmp"
+    return None
+
+
+def to_parquet(filename, prefix="maccdc2012"):
+    with open(filename) as f:
+        traffic = {}
+        nodes = set()
+
+        for line in f.readlines():
+            if "unreachable" in line:
+                continue
+            fields = line.split()
+            if not fields:
+                continue
+            if fields[1] != "IP":
+                continue
+            protocol = get_ip_protocol(line)
+            if protocol not in ("tcp", "udp", "eigrp", "icmp"):
+                continue
+            try:
+                addresses = []
+
+                # Extract source IP address and convert to integer
+                m = re.match(r'(?P<address>\d+\.\d+\.\d+\.\d+)', fields[2])
+                if not m:
+                    continue
+                addresses.append(ip_to_integer(m.group('address')))
+
+                # Extract target IP address and convert to integer
+                m = re.match(r'(?P<address>\d+\.\d+\.\d+\.\d+)', fields[4])
+                if not m:
+                    continue
+                addresses.append(ip_to_integer(m.group('address')))
+
+                nodes = nodes.union(addresses)
+                src, dst = sorted(addresses)
+                key = (protocol, src, dst)
+
+                # Extract packet size
+                nbytes = int(fields[-1])
+
+                if key in traffic:
+                    traffic[key] += nbytes
+                else:
+                    traffic[key] = nbytes
+            except:
+                pass
+
+        nodes = dict([(node, i) for i, node in enumerate(sorted(nodes))])
+
+        edges = []
+        for key in traffic:
+            edge = [nodes[key[1]], nodes[key[2]], key[0], traffic[key]]
+            edges.append(edge)
+
+        nodes_df = pd.DataFrame(np.arange(len(nodes)), columns=['id'])
+        nodes_df = nodes_df.set_index('id')
+
+        edges_df = pd.DataFrame(np.array(edges), columns=['source', 'target', 'protocol', 'weight'])
+        edges_df['source'] = pd.to_numeric(edges_df['source'])
+        edges_df['target'] = pd.to_numeric(edges_df['target'])
+        edges_df['weight'] = pd.to_numeric(edges_df['weight'])
+        edges_df['protocol'] = edges_df['protocol'].astype('category')
+
+        fp.write('{}_nodes.parq'.format(prefix), nodes_df)
+        fp.write('{}_edges.parq'.format(prefix), edges_df)
+
+if __name__ == '__main__':
+    if len(sys.argv) > 2:
+        to_parquet(sys.argv[1], prefix=sys.argv[2])
+    else:
+        to_parquet(sys.argv[1])

+ 107 - 0
datashader-work/datashader-examples/raster.py

@@ -0,0 +1,107 @@
+from __future__ import division
+
+if __name__ == "__main__":
+    from bokeh.io import curdoc
+    from bokeh.plotting import Figure
+    from bokeh.models import ColumnDataSource, CustomJS
+    from bokeh.tile_providers import STAMEN_TONER
+    
+    import rasterio as rio
+    import datashader as ds
+    import datashader.transfer_functions as tf
+    from datashader.colors import Hot
+    
+    def on_dims_change(attr, old, new):
+        update_image()
+    
+    def update_image():
+    
+        global dims, raster_data
+    
+        dims_data = dims.data
+    
+        if not dims_data['width'] or not dims_data['height']:
+            return
+    
+        xmin = max(dims_data['xmin'][0], raster_data.bounds.left)
+        ymin = max(dims_data['ymin'][0], raster_data.bounds.bottom)
+        xmax = min(dims_data['xmax'][0], raster_data.bounds.right)
+        ymax = min(dims_data['ymax'][0], raster_data.bounds.top)
+    
+        canvas = ds.Canvas(plot_width=dims_data['width'][0],
+                           plot_height=dims_data['height'][0],
+                           x_range=(xmin, xmax),
+                           y_range=(ymin, ymax))
+    
+        agg = canvas.raster(raster_data)
+        img = tf.shade(agg, cmap=Hot, how='linear')
+    
+        new_data = {}
+        new_data['image'] = [img.data]
+        new_data['x'] = [xmin]
+        new_data['y'] = [ymin]
+        new_data['dh'] = [ymax - ymin]
+        new_data['dw'] = [xmax - xmin]
+        image_source.stream(new_data, 1)
+    
+    # load nyc taxi data
+    path = './data/projected.tif'
+    raster_data = rio.open(path)
+    
+    # manage client-side dimensions
+    dims = ColumnDataSource(data=dict(width=[], height=[], xmin=[], xmax=[], ymin=[], ymax=[]))
+    dims.on_change('data', on_dims_change)
+    dims_jscode = """
+    var update_dims = function () {
+        var new_data = {
+            height: [plot.frame.height],
+            width: [plot.frame.width],
+            xmin: [plot.x_range.start],
+            ymin: [plot.y_range.start],
+            xmax: [plot.x_range.end],
+            ymax: [plot.y_range.end]
+        };
+        dims.data = new_data;
+    };
+    
+    if (typeof throttle != 'undefined' && throttle != null) {
+        clearTimeout(throttle);
+    }
+    
+    throttle = setTimeout(update_dims, 100, "replace");
+    """
+    
+    # Create plot -------------------------------
+    xmin = -8240227.037
+    ymin = 4974203.152
+    xmax = -8231283.905
+    ymax = 4979238.441
+    
+    path = './data/projected.tif'
+    
+    fig = Figure(x_range=(xmin, xmax),
+                 y_range=(ymin, ymax),
+                 plot_height=600,
+                 plot_width=900,
+                 tools='pan,wheel_zoom')
+    fig.background_fill_color = 'black'
+    fig.add_tile(STAMEN_TONER, alpha=0) # used to set axis ranges
+    fig.x_range.callback = CustomJS(code=dims_jscode, args=dict(plot=fig, dims=dims))
+    fig.y_range.callback = CustomJS(code=dims_jscode, args=dict(plot=fig, dims=dims))
+    fig.axis.visible = False
+    fig.grid.grid_line_alpha = 0
+    fig.min_border_left = 0
+    fig.min_border_right = 0
+    fig.min_border_top = 0
+    fig.min_border_bottom = 0
+    
+    image_source = ColumnDataSource(dict(image=[], x=[], y=[], dw=[], dh=[]))
+    fig.image_rgba(source=image_source,
+                   image='image',
+                   x='x',
+                   y='y',
+                   dw='dw',
+                   dh='dh',
+                   dilate=False)
+    
+    curdoc().add_root(fig)

+ 14 - 0
datashader-work/datashader-examples/small.yml

@@ -0,0 +1,14 @@
+---
+
+data:
+
+  - url: http://s3.amazonaws.com/datashader-data/calvert_uk_research2017.snappy.parq.zip
+    title: 'Graph for Edge Bundling (Calvert UK Research 2017)'
+    files:
+      - calvert_uk_research2017_nodes.snappy.parq
+      - calvert_uk_research2017_edges.snappy.parq
+
+  - url: http://s3.amazonaws.com/datashader-data/calvert_uk_research2017_nodes.zip
+    title: 'Institutions for Edge Bundling (Calvert UK Research 2017)'
+    files:
+      - calvert_uk_research2017_nodes.csv

+ 517 - 0
datashader-work/datashader-examples/streaming-aggregation.ipynb

@@ -0,0 +1,517 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import deque\n",
+    "from functools import partial\n",
+    "from itertools import cycle\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "import datashader as ds\n",
+    "import datashader.transfer_functions as tf\n",
+    "from datashader.colors import viridis\n",
+    "\n",
+    "from streamz import Stream"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def taxi_trips_stream(source='data/nyc_taxi.csv', frequency='T'):\n",
+    "    \"\"\"Generate dataframes grouped by given frequency\"\"\"\n",
+    "    def get_group(resampler, key):\n",
+    "        try:\n",
+    "            df = resampler.get_group(key)\n",
+    "            df.reset_index(drop=True)\n",
+    "        except KeyError:\n",
+    "            df = pd.DataFrame()\n",
+    "        return df\n",
+    "\n",
+    "    df = pd.read_csv(source,\n",
+    "                     infer_datetime_format=True,\n",
+    "                     parse_dates=['tpep_pickup_datetime', 'tpep_pickup_datetime'])\n",
+    "    df = df.set_index('tpep_pickup_datetime', drop=True)\n",
+    "    df = df.sort_index()\n",
+    "    r = df.resample(frequency)\n",
+    "    chunks = [get_group(r, g) for g in sorted(r.groups)]\n",
+    "    indices = cycle(range(len(chunks)))\n",
+    "    while True:\n",
+    "        yield chunks[next(indices)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create streams"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Given a stream of dataframes representing NYC taxi data, we create four streams: two streams are sliding window aggregations over some time period, while two other streams track the cumulative average for a particular value. The pipeline visualization below shows each step that makes up each stream.\n",
+    "\n",
+    "For each aggregation stream, the steps are 1) aggregate each dataframe using a Datashader reduction, 2) keep sliding window of those aggregations, and 3) combine sliding window collection into image. The first stream creates a two-day sliding window aggregation, while the second stream creates a 1-week sliding window aggregation.\n",
+    "\n",
+    "For each cumulative average stream, we track the cumulative sum of each value along with the number of cumulative data points.\n",
+    "\n",
+    "We use the primitives given in the `streamz` library to accomplish this. `aggregated_sliding_window_image_queue` creates each aggregation stream. `cumulative_mean_queue` creates each cumulative average stream, but this will likely be replaced by a native `streamz.StreamingDataFrame` container when ready. Each stream will place its final result into a double-ended queue, which is used to keep a history of previous results. By default, we only keep the most recent."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def aggregate_df(df, cvs, x, y, agg=None):\n",
+    "    return df.index.min(), df.index.max(), cvs.points(df, x, y, agg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def aggregate_images(iterable, cmap):\n",
+    "    name = \"{:.10} - {:.10}\".format(str(iterable[0][0]), str(iterable[-1][1]))\n",
+    "    total = sum([item[2] for item in iterable])\n",
+    "    return tf.shade(total, cmap=cmap, name=name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def aggregated_sliding_window_image_queue(source, agg1, agg2, window=1, history=1):\n",
+    "    q = deque(maxlen=history)\n",
+    "    s = source.map(agg1).sliding_window(window)\n",
+    "    s.map(agg2).sink(q.append)\n",
+    "    return q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cumulative_mean_queue(source, column, history=1):\n",
+    "    def accumulator(acc, df):\n",
+    "        n, total, oldest = acc\n",
+    "        if not oldest:\n",
+    "            oldest = df.index.min()\n",
+    "        return n + 1, total + df[column].sum(), oldest, df.index.max()\n",
+    "    \n",
+    "    def merge(value):\n",
+    "        n, total, oldest, latest = value\n",
+    "        return oldest, latest, total / n\n",
+    "\n",
+    "    q = deque(maxlen=history)\n",
+    "    source.accumulate(accumulator, start=(0, 0, None)).map(merge).sink(q.append)\n",
+    "    return q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def show_queue(q, column):\n",
+    "    pd.options.display.float_format = '{:.2f}'.format\n",
+    "    return pd.DataFrame(list(q), columns=['start', 'end', column])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_range = (-8243204.0, -8226511.0)\n",
+    "y_range = (4968192.0, 4982886.0)\n",
+    "cvs = ds.Canvas(plot_width=800, plot_height=600, x_range=x_range, y_range=y_range)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Helper functions for useful aggregations\n",
+    "min_amount     = partial(aggregate_df, cvs, x='pickup_x', y='pickup_y', agg=ds.min('total_amount'))\n",
+    "max_amount     = partial(aggregate_df, cvs, x='pickup_x', y='pickup_y', agg=ds.max('total_amount'))\n",
+    "mean_amount    = partial(aggregate_df, cvs, x='pickup_x', y='pickup_y', agg=ds.mean('total_amount'))\n",
+    "sum_amount     = partial(aggregate_df, cvs, x='pickup_x', y='pickup_y', agg=ds.sum('total_amount'))\n",
+    "max_passengers = partial(aggregate_df, cvs, x='pickup_x', y='pickup_y', agg=ds.max('passenger_count'))\n",
+    "sum_passengers = partial(aggregate_df, cvs, x='pickup_x', y='pickup_y', agg=ds.sum('passenger_count'))\n",
+    "sum_pickups    = partial(aggregate_df, cvs, x='pickup_x', y='pickup_y', agg=ds.count())\n",
+    "\n",
+    "reduce_viridis = partial(aggregate_images, cmap=viridis)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "source = Stream()\n",
+    "q_days = aggregated_sliding_window_image_queue(source, window=2, history=6, agg1=max_amount, agg2=reduce_viridis)\n",
+    "q_week = aggregated_sliding_window_image_queue(source, window=7, agg1=max_amount, agg2=reduce_viridis)\n",
+    "\n",
+    "q_avg_passengers = cumulative_mean_queue(source, 'passenger_count', history=7)\n",
+    "q_avg_amount     = cumulative_mean_queue(source, 'total_amount', history=7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "source.visualize()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Simplifying stream creation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you can see in the previous section, there are a few areas to improve upon:\n",
+    "\n",
+    "- less code/boilerplate\n",
+    "- hide individual steps seen in stream diagram\n",
+    "- encapsulate separate stream construction methods into helper classes\n",
+    "- separate stream creation and stream sink\n",
+    "- allow for partial results from sliding windows (not currently supported by `streamz`)\n",
+    "- output results into other collections besides queues\n",
+    "\n",
+    "By subclassing `streamz.Stream`, we've accomplished the above without sacrificing readability."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class SlidingWindowImageAggregate(Stream):\n",
+    "    def __init__(self, source, canvas, x, y, agg, n=7, cmap=None, bgcolor='black'):\n",
+    "        # Set internal streamz instance variables to control names in diagram\n",
+    "        self.n = n\n",
+    "        \n",
+    "        def aggregate_df(df):\n",
+    "            return df.index.min(), df.index.max(), canvas.points(df, x, y, agg)\n",
+    "\n",
+    "        def aggregate_images(iterable):\n",
+    "            name = \"{:.10} - {:.10}\".format(str(iterable[0][0]), str(iterable[-1][1]))\n",
+    "            total = sum([item[2] for item in iterable])\n",
+    "            return tf.set_background(tf.shade(total, cmap, name=name), color=bgcolor)\n",
+    "        \n",
+    "        self.cache = deque(maxlen=n)\n",
+    "        self.agg1 = aggregate_df\n",
+    "        self.agg2 = aggregate_images\n",
+    "        \n",
+    "        Stream.__init__(self, source)\n",
+    "        \n",
+    "    def update(self, x, who=None):\n",
+    "        self.cache.append(self.agg1(x))\n",
+    "        return self.emit(self.agg2(tuple(self.cache)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CumulativeMean(Stream):\n",
+    "    def __init__(self, source, column):\n",
+    "        # Set internal streamz instance variables to control names in diagram\n",
+    "        self.str_list = ['column']\n",
+    "        self.column = column\n",
+    "\n",
+    "        self.count = 0\n",
+    "        self.total = 0\n",
+    "        self.oldest = None\n",
+    "\n",
+    "        Stream.__init__(self, source)\n",
+    "\n",
+    "    def update(self, x, who=None):\n",
+    "        if not self.oldest:\n",
+    "            self.oldest = x.index.min()\n",
+    "        self.count, self.total = self.count + 1, self.total + x[self.column].sum()\n",
+    "        return self.emit((self.oldest, x.index.max(), self.total / self.count))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "source = Stream()\n",
+    "\n",
+    "cvs = ds.Canvas(plot_width=800, plot_height=600, x_range=x_range, y_range=y_range)\n",
+    "\n",
+    "q_days = deque(maxlen=6)\n",
+    "s_days = SlidingWindowImageAggregate(source, cvs, 'pickup_x', 'pickup_y', ds.max('total_amount'), n=2, cmap=viridis)\n",
+    "s_days.sink(q_days.append)\n",
+    "\n",
+    "q_week = deque(maxlen=1)\n",
+    "s_week = SlidingWindowImageAggregate(source, cvs, 'pickup_x', 'pickup_y', ds.max('total_amount'), n=7, cmap=viridis)\n",
+    "s_week.sink(q_week.append)\n",
+    "\n",
+    "q_avg_passengers = deque(maxlen=7)\n",
+    "s_avg_passengers = CumulativeMean(source, 'passenger_count')\n",
+    "s_avg_passengers.sink(q_avg_passengers.append)\n",
+    "\n",
+    "q_avg_amount = deque(maxlen=7)\n",
+    "s_avg_amount = CumulativeMean(source, 'total_amount')\n",
+    "s_avg_amount.sink(q_avg_amount.append)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "source.visualize()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Push data through streams"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We initially push 3 days worth of dataframes through the streams to view partial results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips_per_day = taxi_trips_stream(frequency='D')\n",
+    "for i in range(3):\n",
+    "    source.emit(next(trips_per_day))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.Images(*list(q_week))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in range(4):\n",
+    "    source.emit(next(trips_per_day))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.Images(*list(q_week))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Cumulative average of passengers (ordered by oldest first)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_queue(q_avg_passengers, 'cumulative average passengers')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Cumulative average of total fare (ordered by oldest first)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_queue(q_avg_amount, 'cumulative average total fare')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### History of 2-day aggregations (ordered by oldest first)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.Images(*list(q_days))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Current 1-week aggregation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.Images(*list(q_week))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we get the next day's worth of data and see how the streams have updated."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "source.emit(next(trips_per_day))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Cumulative average of passengers (ordered by oldest first)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_queue(q_avg_passengers, 'cumulative average passengers')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Cumulative average of total fare (ordered by oldest first)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_queue(q_avg_amount, 'cumulative average total fare')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### History of 2-day aggregations (ordered by oldest first)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.Images(*list(q_days))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Current 1-week aggregation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.Images(*list(q_week))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 163 - 0
datashader-work/datashader-examples/streaming.py

@@ -0,0 +1,163 @@
+from __future__ import division
+
+import math
+
+from collections import OrderedDict
+
+from bokeh.io import curdoc
+from bokeh.plotting import Figure
+from bokeh.models import ColumnDataSource, CustomJS
+from bokeh.tile_providers import STAMEN_TONER
+from bokeh.models import VBox, HBox, Paragraph, Select
+from bokeh.palettes import BuGn9
+
+import pandas as pd
+
+import datashader as ds
+import datashader.transfer_functions as tf
+
+def bin_data():
+    global time_period, grouped, group_count, counter, times, groups
+    grouped = df.groupby([times.hour, times.minute // time_period])
+    groups = sorted(grouped.groups.keys(), key=lambda r: (r[0], r[1]))
+    group_count = len(groups)
+    counter = 0
+
+def on_time_select_change(attr, old, new):
+    global time_period, counter, time_select_options
+    time_period = time_select_options[new]
+    counter = 0
+    bin_data()
+
+counter = 0
+def update_data():
+    global dims, grouped, group_count, counter, time_text, time_period
+
+    dims_data = dims.data
+
+    if not dims_data['width'] or not dims_data['height']:
+        return
+
+    group_num = counter % group_count
+    group = groups[group_num]
+    grouped_df = grouped.get_group(group)
+    update_image(grouped_df)
+
+    # update time text
+    num_minute_groups = 60 // time_period
+    mins = group[1] * time_period
+    hr = group[0]
+    end_mins = ((group[1] + 1) % num_minute_groups) * time_period
+    end_hr = hr if end_mins > 0 else (hr + 1) % 24
+    time_text.text = 'Time Period: {}:{} - {}:{}'.format(str(hr).zfill(2),
+                                                         str(mins).zfill(2),
+                                                         str(end_hr).zfill(2),
+                                                         str(end_mins).zfill(2))
+    counter += 1
+
+def update_image(dataframe):
+    global dims
+    dims_data = dims.data
+
+    if not dims_data['width'] or not dims_data['height']:
+        return
+
+    plot_width = int(math.ceil(dims_data['width'][0]))
+    plot_height = int(math.ceil(dims_data['height'][0]))
+    x_range = (dims_data['xmin'][0], dims_data['xmax'][0])
+    y_range = (dims_data['ymin'][0], dims_data['ymax'][0])
+
+    canvas = ds.Canvas(plot_width=plot_width,
+                       plot_height=plot_height,
+                       x_range=x_range,
+                       y_range=y_range)
+
+    agg = canvas.points(dataframe, 'dropoff_x', 'dropoff_y',
+                        ds.count('trip_distance'))
+
+    img = tf.shade(agg, cmap=BuGn9, how='log')
+
+    new_data = {}
+    new_data['image'] = [img.data]
+    new_data['x'] = [x_range[0]]
+    new_data['y'] = [y_range[0]]
+    new_data['dh'] = [y_range[1] - y_range[0]]
+    new_data['dw'] = [x_range[1] - x_range[0]]
+
+    image_source.stream(new_data, 1)
+
+
+time_select_options = OrderedDict()
+time_select_options['1 Hour'] = 60
+time_select_options['30 Minutes'] = 30
+time_select_options['15 Minutes'] = 15
+time_period = list(time_select_options.values())[0]
+
+time_select = Select.create(name="Time Period", options=time_select_options)
+time_select.on_change('value', on_time_select_change)
+
+time_text = Paragraph(text='Time Period')
+
+# load nyc taxi data
+path = './data/nyc_taxi.csv'
+datetime_field = 'tpep_dropoff_datetime'
+cols = ['dropoff_x', 'dropoff_y', 'trip_distance', datetime_field]
+
+df = pd.read_csv(path, usecols=cols, parse_dates=[datetime_field]).dropna(axis=0)
+times = pd.DatetimeIndex(df[datetime_field])
+group_count = grouped = groups = None
+bin_data()
+
+# manage client-side dimensions
+dims = ColumnDataSource(data=dict(width=[], height=[], xmin=[], xmax=[], ymin=[], ymax=[]))
+dims_jscode = """
+var update_dims = function () {
+    var new_data = {
+        height: [plot.frame.height],
+        width: [plot.frame.width],
+        xmin: [plot.x_range.start],
+        ymin: [plot.y_range.start],
+        xmax: [plot.x_range.end],
+        ymax: [plot.y_range.end]
+    };
+    dims.data = new_data;
+};
+
+if (typeof throttle != 'undefined' && throttle != null) {
+    clearTimeout(throttle);
+}
+
+throttle = setTimeout(update_dims, 100, "replace");
+"""
+
+# Create plot -------------------------------
+xmin = -8240227.037
+ymin = 4974203.152
+xmax = -8231283.905
+ymax = 4979238.441
+
+fig = Figure(x_range=(xmin, xmax),
+             y_range=(ymin, ymax),
+             plot_height=600,
+             plot_width=900,
+             tools='pan,wheel_zoom')
+fig.background_fill_color = 'black'
+fig.add_tile(STAMEN_TONER, alpha=.3)
+fig.x_range.callback = CustomJS(code=dims_jscode, args=dict(plot=fig, dims=dims))
+fig.y_range.callback = CustomJS(code=dims_jscode, args=dict(plot=fig, dims=dims))
+fig.axis.visible = False
+fig.grid.grid_line_alpha = 0
+fig.min_border_left = 0
+fig.min_border_right = 0
+fig.min_border_top = 0
+fig.min_border_bottom = 0
+
+image_source = ColumnDataSource(dict(image=[], x=[], y=[], dw=[], dh=[]))
+fig.image_rgba(source=image_source, image='image', x='x', y='y', dw='dw', dh='dh', dilate=False)
+
+time_text = Paragraph(text='Time Period: 00:00 - 00:00')
+controls = HBox(children=[time_text, time_select], width=fig.plot_width)
+layout = VBox(children=[fig, controls])
+
+curdoc().add_root(layout)
+curdoc().add_periodic_callback(update_data, 1000)

+ 76 - 0
datashader-work/datashader-examples/taxi_preprocessing_example.py

@@ -0,0 +1,76 @@
+"""Download data needed for the examples"""
+
+from __future__ import print_function
+
+if __name__ == "__main__":
+
+    from os import path, makedirs, remove
+    from download_sample_data import bar as progressbar
+    
+    import pandas as pd
+    import numpy as np
+    import sys
+    
+    try:
+        import requests
+    except ImportError:
+        print('Download script required requests package: conda install requests')
+        sys.exit(1)
+    
+    def _download_dataset(url):
+        r = requests.get(url, stream=True)
+        output_path = path.split(url)[1]
+        with open(output_path, 'wb') as f:
+            total_length = int(r.headers.get('content-length'))
+            for chunk in progressbar(r.iter_content(chunk_size=1024), expected_size=(total_length/1024) + 1):
+                if chunk:
+                    f.write(chunk)
+                    f.flush()
+    
+    examples_dir = path.dirname(path.realpath(__file__))
+    data_dir = path.join(examples_dir, 'data')
+    if not path.exists(data_dir):
+        makedirs(data_dir)
+    
+    # Taxi data
+    def latlng_to_meters(df, lat_name, lng_name):
+        lat = df[lat_name]
+        lng = df[lng_name]
+        origin_shift = 2 * np.pi * 6378137 / 2.0
+        mx = lng * origin_shift / 180.0
+        my = np.log(np.tan((90 + lat) * np.pi / 360.0)) / (np.pi / 180.0)
+        my = my * origin_shift / 180.0
+        df.loc[:, lng_name] = mx
+        df.loc[:, lat_name] = my
+    
+    taxi_path = path.join(data_dir, 'nyc_taxi.csv')
+    if not path.exists(taxi_path):
+        print("Downloading Taxi Data...")
+        url = ('https://storage.googleapis.com/tlc-trip-data/2015/'
+               'yellow_tripdata_2015-01.csv')
+    
+        _download_dataset(url)
+        df = pd.read_csv('yellow_tripdata_2015-01.csv')
+    
+        print('Filtering Taxi Data')
+        df = df.loc[(df.pickup_longitude < -73.75) &
+                    (df.pickup_longitude > -74.15) &
+                    (df.dropoff_longitude < -73.75) &
+                    (df.dropoff_longitude > -74.15) &
+                    (df.pickup_latitude > 40.68) &
+                    (df.pickup_latitude < 40.84) &
+                    (df.dropoff_latitude > 40.68) &
+                    (df.dropoff_latitude < 40.84)].copy()
+    
+        print('Reprojecting Taxi Data')
+        latlng_to_meters(df, 'pickup_latitude', 'pickup_longitude')
+        latlng_to_meters(df, 'dropoff_latitude', 'dropoff_longitude')
+        df.rename(columns={'pickup_longitude': 'pickup_x', 'dropoff_longitude': 'dropoff_x',
+                           'pickup_latitude': 'pickup_y', 'dropoff_latitude': 'dropoff_y'},
+                  inplace=True)
+        df.to_csv(taxi_path, index=False)
+        remove('yellow_tripdata_2015-01.csv')
+        
+    
+    print("\nAll data downloaded.")
+    

+ 375 - 0
datashader-work/datashader-examples/tiling.ipynb

@@ -0,0 +1,375 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Building Tilesets using Datashader"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Datashader provides `render_tiles` which is a utility function for creating tilesets from arbitrary datashader pipelines."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from datashader.tiles import render_tiles"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A couple of notes about the tiling process:\n",
+    "    \n",
+    "- By default, uses a simple `Web Mercator Tiling Scheme (EPSG:3857)`\n",
+    "- call `render_tiles` with the following arguments:\n",
+    "\n",
+    "```python\n",
+    "extent_of_area_i_want_to_tile = (-500000, -500000, 500000, 500000)  # xmin, ymin, xmax, ymax\n",
+    "render_tiles(extent_of_data_i_want_to_handle,\n",
+    "             tile_levels=range(6),\n",
+    "             output_path='example_tileset_output_directory',\n",
+    "             load_data_func=function_which_returns_dataframe,\n",
+    "             rasterize_func=function_which_creates_xarray_aggregate,\n",
+    "             shader_func=function_which_renders_aggregate_to_datashader_image,\n",
+    "             post_render_func=function_which_post_processes_image)\n",
+    "```\n",
+    "\n",
+    "- data representing x / y coordinates is assumed to be represented in meters (m) based on the Web Mercator coordinate system.\n",
+    "- the tiling extent is subdivided into `supertiles` generally of size `4096 x 4096`\n",
+    "- the `load_data_func` returns a dataframe-like object and contains your data access specific code.\n",
+    "- the `rasterize_func` returns a `xr.DataArray` and contains your xarray specific code.\n",
+    "- the `shader_func` returns a `ds.Image` and contains your datashader specific code.\n",
+    "- the `post_render_func` is called once for each final tile (`default 256 x 256`) and contains PIL (Python Imaging Library) specific code.  This is the hook for adding additional filters, text, watermarks, etc. to output tiles."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating Tile Component Functions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create `load_data_func`\n",
+    "- accepts `x_range` and `y_range` arguments which correspond to the ranges of the supertile being rendered.\n",
+    "- returns a dataframe-like object (pd.Dataframe / dask.Dataframe)\n",
+    "- this example `load_data_func` creates a pandas dataframe with `x` and `y` fields sampled from a wald distribution "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "df = None\n",
+    "def load_data_func(x_range, y_range):\n",
+    "    global df\n",
+    "    if df is None:\n",
+    "        xoffsets = [-1, 1, -1, 1]\n",
+    "        yoffsets = [-1, 1, 1, -1]\n",
+    "        xs = np.concatenate([np.random.wald(10000000, 10000000, size=10000000) * offset for offset in xoffsets])\n",
+    "        ys = np.concatenate([np.random.wald(10000000, 10000000, size=10000000) * offset for offset in yoffsets])\n",
+    "        df = pd.DataFrame(dict(x=xs, y=ys))\n",
+    "    \n",
+    "    return df.loc[df['x'].between(*x_range) & df['y'].between(*y_range)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create `rasterize_func`\n",
+    "- accepts `df`, `x_range`, `y_range`, `height`, `width` arguments which correspond to the data, ranges, and plot dimensions of the supertile being rendered.\n",
+    "- returns an `xr.DataArray` object representing the aggregate."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import datashader as ds\n",
+    "\n",
+    "def rasterize_func(df, x_range, y_range, height, width):\n",
+    "    # aggregate\n",
+    "    cvs = ds.Canvas(x_range=x_range, y_range=y_range,\n",
+    "                    plot_height=height, plot_width=width)\n",
+    "    agg = cvs.points(df, 'x', 'y')\n",
+    "    return agg"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create `shader_func`\n",
+    "- accepts `agg (xr.DataArray)`, `span (tuple(min, max))`.  The span argument can be used to control color mapping / auto-ranging across supertiles.\n",
+    "- returns an `ds.Image` object representing the shaded image."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import datashader.transfer_functions as tf\n",
+    "from datashader.colors import viridis\n",
+    "\n",
+    "def shader_func(agg, span=None):\n",
+    "    img = tf.shade(agg, cmap=reversed(viridis), span=span, how='log')\n",
+    "    img = tf.set_background(img, 'black')\n",
+    "    return img"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create `post_render_func`\n",
+    "- accepts `img `, `extras` arguments which correspond to the output PIL.Image before it is write to disk (or S3), and addtional image properties.\n",
+    "- returns image `(PIL.Image)`\n",
+    "- this is a good place to run any non-datashader-specific logic on each output tile."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from PIL import ImageDraw\n",
+    "\n",
+    "def post_render_func(img, **kwargs):\n",
+    "    info = \"x={},y={},z={}\".format(kwargs['x'], kwargs['y'], kwargs['z'])\n",
+    "    draw = ImageDraw.Draw(img)\n",
+    "    draw.text((5, 5), info, fill='rgb(255, 255, 255)')\n",
+    "    return img"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Render tiles to local filesystem"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "full_extent_of_data = (-500000, -500000, 500000, 500000)\n",
+    "output_path = 'tiles_output_directory/wald_tiles'\n",
+    "results = render_tiles(full_extent_of_data,\n",
+    "                       range(3),\n",
+    "                       load_data_func=load_data_func,\n",
+    "                       rasterize_func=rasterize_func,\n",
+    "                       shader_func=shader_func,\n",
+    "                       post_render_func=post_render_func,\n",
+    "                       output_path=output_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Preview the tileset using Bokeh\n",
+    "- Browse to the tile output directory and start an http server:\n",
+    "\n",
+    "```bash\n",
+    "$> cd test_tiles_output\n",
+    "$> python -m http.server\n",
+    "\n",
+    "Starting up http-server, serving ./\n",
+    "Available on:\n",
+    "  http://127.0.0.1:8080\n",
+    "  http://192.168.1.7:8080\n",
+    "Hit CTRL-C to stop the server\n",
+    "```\n",
+    "\n",
+    "- build a `bokeh.plotting.Figure`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from bokeh.plotting import figure\n",
+    "from bokeh.models.tiles import WMTSTileSource\n",
+    "from bokeh.io import show\n",
+    "from bokeh.io import output_notebook\n",
+    "\n",
+    "output_notebook()\n",
+    "\n",
+    "xmin, ymin, xmax, ymax = full_extent_of_data\n",
+    "\n",
+    "p = figure(width=800, height=800, \n",
+    "           x_range=(int(-20e6), int(20e6)),\n",
+    "           y_range=(int(-20e6), int(20e6)),\n",
+    "           tools=\"pan,wheel_zoom,reset\")\n",
+    "\n",
+    "p.background_fill_color = 'black'\n",
+    "p.grid.grid_line_alpha = 0\n",
+    "p.axis.visible = False\n",
+    "p.add_tile(WMTSTileSource(url=\"http://localhost:8080/{Z}/{X}/{Y}.png\"),\n",
+    "          render_parents=False)\n",
+    "show(p)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Render tiles to Amazon Simple Storage Service (S3)\n",
+    "\n",
+    "To render tiles directly to S3, you only need to use the `s3://` protocol in your `output_path` argument\n",
+    "\n",
+    "- Requires AWS Access / Secret Keys with appropriate IAM permissions for uploading to S3.\n",
+    "- Requires extra `boto3` dependency:\n",
+    "```bash\n",
+    "conda install boto3\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Configuring credentials\n",
+    "\n",
+    "- Quoting [`boto3 documentation regarding credential handling`](https://boto3.readthedocs.io/en/latest/guide/configuration.html):\n",
+    "\n",
+    "> The mechanism in which boto3 looks for credentials is to search through a list of possible locations and stop as soon as it finds credentials. The order in which Boto3 searches for credentials is:\n",
+    "1. ~~Passing credentials as parameters in the boto.client() method~~\n",
+    "- ~~Passing credentials as parameters when creating a Session object~~\n",
+    "- **Environment variables**\n",
+    "- **Shared credential file (~/.aws/credentials)**\n",
+    "- **AWS config file (~/.aws/config)**\n",
+    "- **Assume Role provider**\n",
+    "- **Boto2 config file (/etc/boto.cfg and ~/.boto)**\n",
+    "- **Instance metadata service on an Amazon EC2 instance that has an IAM role configured**.\n",
+    "\n",
+    "- Datashader's `render_tiles` function supports only credential search locations highlighted in bold above\n",
+    "- **NOTE**:  all tiles written to S3 are marked with `public-read` ACL settings."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Setup tile bucket using AWS CLI\n",
+    "\n",
+    "```bash\n",
+    "$> aws s3 mb s3://datashader-tiles-testing/\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "full_extent_of_data = (int(-20e6), int(-20e6), int(20e6), int(20e6))\n",
+    "output_path = 's3://datashader-tiles-testing/wald_tiles/'\n",
+    "try:\n",
+    "    results = render_tiles(full_extent_of_data,\n",
+    "                           range(3),\n",
+    "                           load_data_func=load_data_func,\n",
+    "                           rasterize_func=rasterize_func,\n",
+    "                           shader_func=shader_func,\n",
+    "                           post_render_func=post_render_func,\n",
+    "                           output_path=output_path)\n",
+    "except ImportError:\n",
+    "    print('you must install boto3 to save tiles to Amazon S3')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Preview S3 Tiles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "xmin, ymin, xmax, ymax = full_extent_of_data\n",
+    "\n",
+    "p = figure(width=800, height=800, \n",
+    "           x_range=(int(-20e6), int(20e6)),\n",
+    "           y_range=(int(-20e6), int(20e6)),\n",
+    "           tools=\"pan,wheel_zoom,reset\")\n",
+    "p.axis.visible = False\n",
+    "p.background_fill_color = 'black'\n",
+    "p.grid.grid_line_alpha = 0\n",
+    "p.add_tile(WMTSTileSource(url=\"https://datashader-tiles-testing.s3.amazonaws.com/wald_tiles/{Z}/{X}/{Y}.png\"),\n",
+    "           render_parents=False)\n",
+    "show(p)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 75 - 0
datashader-work/datashader-examples/topics/attractors.yml

@@ -0,0 +1,75 @@
+- [Clifford, bgy, 0, 0, -1.3, -1.3, -1.8, -1.9]
+- [Clifford, bmw, 0, 0, -1.4, 1.6, 1.0, 0.7]
+- [Clifford, bgyw, 0, 0, 1.7, 1.7, 0.6, 1.2]
+- [Clifford, bmy, 0, 0, 1.7, 0.7, 1.4, 2.0]
+- [Clifford, fire, 0, 0, -1.7, 1.8, -1.9, -0.4]
+- [Clifford, gray, 0, 0, 1.1, -1.32, -1.03, 1.54]
+- [Clifford, kgy, 0, 0, 0.77, 1.99, -1.31, -1.45]
+- [Clifford, kbc, 0, 0, -1.9, -1.9, -1.9, -1.0]
+- [Clifford, viridis, 0, 0, 0.75, 1.34, -1.93, 1.0]
+- [Clifford, inferno, 0, 0, -1.32, -1.65, 0.74, 1.81]
+- [Clifford, bgy, 0, 0, -1.6, 1.6, 0.7, -1.0]
+- [Clifford, bmw, 0, 0, -1.7, 1.5, -0.5, 0.7]
+- [De_Jong, kbc, 0, 0, -1.244, -1.251, -1.815, -1.908]
+- [De_Jong, viridis, 0, 0, 1.7, 1.7, 0.6, 1.2]
+- [De_Jong, inferno, 0, 0, 1.4, -2.3, 2.4, -2.1]
+- [De_Jong, bgy, 0, 0, -2.7, -0.09, -0.86, -2.2]
+- [De_Jong, bmw, 0, 0, -0.827, -1.637, 1.659, -0.943]
+- [De_Jong, bgyw, 0, 0, -2.24, 0.43, -0.65, -2.43]
+- [De_Jong, bmy, 0, 0, 2.01, -2.53, 1.61, -0.33]
+- [De_Jong, fire, 0, 0, 1.4, 1.56, 1.4, -6.56]
+- [Svensson, bmw, 0, 0, 1.5, -1.8, 1.6, 0.9]
+- [Svensson, bgyw, 0, 0, -1.78, 1.29, -0.09, -1.18]
+- [Svensson, bmy, 0, 0, -0.91, -1.29, -1.97, -1.56]
+- [Svensson, fire, 0, 0, 1.4, 1.56, 1.4, -6.56]
+- [Bedhead, kgy, 1, 1, -0.81, -0.92]
+- [Bedhead, kbc, 1, 1, -0.64, 0.76]
+- [Bedhead, viridis, 1, 1, 0.06, 0.98]
+- [Bedhead, inferno, 1, 1, -0.67, 0.83]
+- [Fractal_Dream, bgy, 0.1, 0.1, -0.966918, 2.879879, 0.765145, 0.744728]
+- [Fractal_Dream, bmw, 0.1, 0.1, -2.8276, 1.2813, 1.9655, 0.597]
+- [Fractal_Dream, bgyw, 0.1, 0.1, -1.1554, -2.3419, -1.9799, 2.1828]
+- [Fractal_Dream, bmy, 0.1, 0.1, -1.9956, -1.4528, -2.6206, 0.8517]
+- [Hopalong1, bgy, 0, 0, 2.0, 1.0, 0.0]
+- [Hopalong1, bmw, 0, 0, -11.0, 0.05, 0.5]
+- [Hopalong1, bgyw, 0, 0, 2.0, 0.05, 2.0]
+- [Hopalong1, bmy, 0, 0, 1.1, 0.5, 1.0]
+- [Hopalong2, gray, 0, 0, 7.17, 8.44, 2.56]
+- [Hopalong2, kgy, 0, 0, 7.8, 0.13, 8.15]
+- [Hopalong2, kbc, 0, 0, 9.7, 1.6, 7.9]
+- [Gumowski_Mira, bgy, 0.1, 0.1, 0.0, 0.5, -0.75]
+- [Gumowski_Mira, bmw, 0, 1, 0.008, 0.05, -0.496]
+- [Gumowski_Mira, bgyw, 0.1, 0.1, 0.0, 0.5, -0.7509]
+- [Gumowski_Mira, bmy, 0, 1, 0.0, 0.5, -0.22]
+- [Gumowski_Mira, fire, 0, 1, 0.008, 0.05, -0.9]
+- [Gumowski_Mira, gray, 0, 1, 0.008, 0.05, -0.45]
+- [Gumowski_Mira, kgy, 0.1, 0.1, 0.008, 0.05, 0.16]
+- [Gumowski_Mira, kbc, 0, 0.5, 0.008, 0.05, -0.7]
+- [Gumowski_Mira, viridis, 0.5, 0, 0.0, 0.05, -0.2]
+- [Gumowski_Mira, inferno, 0.5, 0.5, 0.0, 0.05, -0.22]
+- [Gumowski_Mira, bgy, 0, 0.5, 0.0, 0.05, -0.31]
+- [Gumowski_Mira, bmw, 0, 0.5, 0.0, 0.05, -0.55]
+- [Gumowski_Mira, bgyw, 0.5, 0.5, 0.0, 0.05, -0.23]
+- [Gumowski_Mira, bmy, 0.5, 0.5, 0.009, 0.05, 0.32]
+- [Gumowski_Mira, fire, 0.1, 0.1, 0.0, 0.5, -0.65]
+- [Gumowski_Mira, gray, 0.0, 0.5, 0.0, 0, -0.578]
+- [Gumowski_Mira, kgy, 0.0, 0.5, 0.0, 0, -0.604]
+- [Gumowski_Mira, kbc, 0.0, 0.5, 0.0, 0, 0.228]
+- [Gumowski_Mira, viridis, 0.0, 0.5, 0.0, 0, -0.002]
+- [Gumowski_Mira, inferno, 0.0, 0.5, 0.0, 0, -0.623]
+- [Symmetric_Icon, viridis, 0.01, 0.01, 1.8, 0.0, 1.0, 0.1, -1.93, 5]
+- [Symmetric_Icon, inferno, 0.01, 0.01, 5.0, -1.0, 1.0, 0.188, -2.5, 5]
+- [Symmetric_Icon, bgy, 0.01, 0.01, -1.0, 0.1, -0.82, 0.12, 1.56, 3]
+- [Symmetric_Icon, bmw, 0.01, 0.01, 1.806, 0.0, 1.0, 0.0, -1.806, 5]
+- [Symmetric_Icon, bgyw, 0.01, 0.01, 10.0, -12.0, 1.0, 0.0, -2.195, 3]
+- [Symmetric_Icon, bmy, 0.01, 0.01, -2.5, 0.0, 0.9, 0.0, 2.5, 3]
+- [Symmetric_Icon, fire, 0.01, 0.01, 3.0, -16.79, 1.0, 0.0, -2.05, 9]
+- [Symmetric_Icon, gray, 0.01, 0.01, 5.0, 1.5, 1.0, 0.0, -2.7, 6]
+- [Symmetric_Icon, kgy, 0.01, 0.01, 1.0, -0.1, 0.167, 0.0, -2.08, 7]
+- [Symmetric_Icon, kbc, 0.01, 0.01, 2.32, 0.0, 0.75, 0.0, -2.32, 5]
+- [Symmetric_Icon, viridis, 0.01, 0.01, -2.0, 0.0, -0.5, 0.0, 2.6, 5]
+- [Symmetric_Icon, inferno, 0.01, 0.01, 2.0, 0.2, 0.1, 0.0, -2.34, 5]
+- [Symmetric_Icon, bgy, 0.01, 0.01, 2.0, 0.0, 1.0, 0.1, -1.86, 4]
+- [Symmetric_Icon, bmw, 0.01, 0.01, -1.0, 0.1, -0.82, 0.0, 1.56, 3]
+- [Symmetric_Icon, bgyw, 0.01, 0.01, -1.0, 0.03, -0.8, 0.0, 1.455, 3]
+- [Symmetric_Icon, bmy, 0.01, 0.01, -2.5, -0.1, 0.9, -0.15, 2.39, 16]

文件差异内容过多而无法显示
+ 203 - 0
datashader-work/datashader-examples/topics/bay_trimesh.ipynb


文件差异内容过多而无法显示
+ 637 - 0
datashader-work/datashader-examples/topics/census.ipynb


文件差异内容过多而无法显示
+ 200 - 0
datashader-work/datashader-examples/topics/gerrymandering.ipynb


+ 68 - 0
datashader-work/datashader-examples/topics/index.ipynb

@@ -0,0 +1,68 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Examples of what can be done with Datashader.\n",
+    "\n",
+    "To learn how to use Datashader, see the [Getting Started Guide](../getting_started/index.ipynb).\n",
+    "\n",
+    "Detailed documentation is contained in the [User Guide](../user_guide/index.ipynb).\n",
+    "\n",
+    "Contents:\n",
+    "\n",
+    "* [census](census.ipynb)  \n",
+    "   Population and racial data from the 2010 US Census visualized as scatter points.\n",
+    "\n",
+    "* [gerrymandering](gerrymandering.ipynb)  \n",
+    "   US Congressional boundaries overlaid on 2010 US Census data.\n",
+    "   \n",
+    "* [landsat](landsat.ipynb)  \n",
+    "   Hyperspectral satellite imaging data from Landsat8 visualized as rasters.\n",
+    "\n",
+    "* [lidar](lidar.ipynb)  \n",
+    "   LIDAR imaging data visualized as points.\n",
+    "  \n",
+    "* [network packets](network_packets.ipynb)  \n",
+    "   PCAP network traffic data visualized as network graphs.\n",
+    "\n",
+    "* [nyc taxi](nyc_taxi.ipynb)  \n",
+    "   NYC Taxi data visualized as points, curves, and bars.  Uses trip data originally from the [NYC Taxi dataset](http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml)\n",
+    "but preprocessed using `taxi_preprocessing_example.py` for convenience.\n",
+    "   \n",
+    "* [opensky](opensky.ipynb)  \n",
+    "   OpenSky airplane trajectories visualized as points.\n",
+    "\n",
+    "* [osm-1billion](osm-1billion.ipynb)  \n",
+    "   1-billion-point OpenStreetMap dataset visualized as points on a laptop, in memory.\n",
+    "\n",
+    "* [osm](osm.ipynb)  \n",
+    "   2.7-billion-point OpenStreetMap dataset visualized as points on a laptop, out of core.\n",
+    "\n",
+    "* [param dashboard](param_dashboard.ipynb)  \n",
+    "   30-line dashboard for datashader plots based on the Param library.\n",
+    "\n",
+    "* [bay trimesh](bay_trimesh.ipynb)  \n",
+    "   Rendering a triangular mesh of water depths in the Chesapeake and Delaware bay areas.\n",
+    "\n",
+    "* [Amazon.com center distance](https://anaconda.org/defusco/amz_centers/notebook)  \n",
+    "   Cities in the USA colored by their distance to the nearest Amazon.com distribution center.\n",
+    "\n",
+    "* [solar](solar.ipynb)  \n",
+    "   Solar radiation data.\n",
+    "\n",
+    "* [uk_researchers](uk_researchers.ipynb)  \n",
+    "   Network graph of collaborations between UK researchers.\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

文件差异内容过多而无法显示
+ 295 - 0
datashader-work/datashader-examples/topics/landsat.ipynb


+ 345 - 0
datashader-work/datashader-examples/topics/network_packets.ipynb

@@ -0,0 +1,345 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Graphing network packets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook currently relies on HoloViews 1.9 or above. Run `conda install -c ioam/label/dev holoviews` to install it."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Preparing data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The data source comes from a publicly available network forensics repository: http://www.netresec.com/?page=PcapFiles. The selected file is https://download.netresec.com/pcap/maccdc-2012/maccdc2012_00000.pcap.gz.\n",
+    "\n",
+    "```\n",
+    "tcpdump -qns 0 -r maccdc2012_00000.pcap | grep tcp > maccdc2012_00000.txt\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For example, here is a snapshot of the resulting output:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "09:30:07.780000 IP 192.168.202.68.8080 > 192.168.24.100.1038: tcp 1380\n",
+    "09:30:07.780000 IP 192.168.24.100.1038 > 192.168.202.68.8080: tcp 0\n",
+    "09:30:07.780000 IP 192.168.202.68.8080 > 192.168.24.100.1038: tcp 1380\n",
+    "09:30:07.780000 IP 192.168.202.68.8080 > 192.168.24.100.1038: tcp 1380\n",
+    "09:30:07.780000 IP 192.168.27.100.37877 > 192.168.204.45.41936: tcp 0\n",
+    "09:30:07.780000 IP 192.168.24.100.1038 > 192.168.202.68.8080: tcp 0\n",
+    "09:30:07.780000 IP 192.168.202.68.8080 > 192.168.24.100.1038: tcp 1380\n",
+    "09:30:07.780000 IP 192.168.202.68.8080 > 192.168.24.100.1038: tcp 1380\n",
+    "09:30:07.780000 IP 192.168.202.68.8080 > 192.168.24.100.1038: tcp 1380\n",
+    "09:30:07.780000 IP 192.168.202.68.8080 > 192.168.24.100.1038: tcp 1380\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Given the directional nature of network traffic and the numerous ports per node, we will simplify the graph by treating traffic between nodes as undirected and ignorning the distinction between ports. The graph edges will have weights represented by the total number of bytes across both nodes in either direction."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "python pcap_to_parquet.py maccdc2012_00000.txt\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The resulting output will be two Parquet dataframes, `maccdc2012_nodes.parq` and `maccdc2012_edges.parq`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import holoviews as hv\n",
+    "from holoviews import opts, dim\n",
+    "import networkx as nx\n",
+    "import dask.dataframe as dd\n",
+    "\n",
+    "from holoviews.operation.datashader import (\n",
+    "    datashade, dynspread, directly_connect_edges, bundle_graph, stack\n",
+    ")\n",
+    "from holoviews.element.graphs import layout_nodes\n",
+    "from datashader.layout import random_layout\n",
+    "from colorcet import fire\n",
+    "\n",
+    "hv.extension('bokeh')\n",
+    "\n",
+    "keywords = dict(bgcolor='black', width=800, height=800, xaxis=None, yaxis=None)\n",
+    "opts.defaults(opts.Graph(**keywords), opts.Nodes(**keywords), opts.RGB(**keywords))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "edges_df = dd.read_parquet('../data/maccdc2012_full_edges.parq').compute()\n",
+    "edges_df = edges_df.reset_index(drop=True)\n",
+    "graph = hv.Graph(edges_df)\n",
+    "len(edges_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Edge bundling & layouts"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Datashader and HoloViews provide support for a number of different graph layouts including circular, force atlas and random layouts. Since large graphs with thousands of edges can become quite messy when plotted datashader also provides functionality to bundle the edges."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Circular layout"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "By default the HoloViews Graph object lays out nodes using a circular layout. Once we have declared the ``Graph`` object we can simply apply the ``bundle_graph`` operation. We also overlay the datashaded graph with the nodes, letting us identify each node by hovering."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "opts.defaults(opts.Nodes(size=5, padding=0.1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "circular = bundle_graph(graph)\n",
+    "datashade(circular, width=800, height=800) * circular.nodes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Force Atlas 2 layout"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For other graph layouts you can use the ``layout_nodes`` operation supplying the datashader or NetworkX layout function. Here we will use the ``nx.spring_layout`` function based on the [Fruchterman-Reingold](https://en.wikipedia.org/wiki/Force-directed_graph_drawing) algorithm. Instead of bundling the edges we may also use the directly_connect_edges function:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "forceatlas = directly_connect_edges(layout_nodes(graph, layout=nx.spring_layout))\n",
+    "datashade(forceatlas, width=800, height=800) * forceatlas.nodes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Random layout"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Datashader also provides a number of layout functions in case you don't want to depend on NetworkX:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "random = bundle_graph(layout_nodes(graph, layout=random_layout))\n",
+    "datashade(random, width=800, height=800) * random.nodes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Showing nodes with active traffic"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To select just nodes with active traffic we will split the dataframe of bundled paths and then apply ``select`` on the new Graph to select just those edges with a weight of more than 10,000. By overlaying the sub-graph of high traffic edges we can take advantage of the interactive hover and tap features that bokeh provides while still revealing the full datashaded graph in the background."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "overlay = datashade(circular, width=800, height=800) * circular.select(weight=(10000, None))\n",
+    "overlay.opts(\n",
+    "    opts.Graph(edge_line_color='white', edge_hover_line_color='blue', padding=0.1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Highlight TCP and UDP traffic"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Using the same selection features we can highlight TCP and UDP connections separately again by overlaying it on top of the full datashaded graph. The edges can be revealed over the highlighted nodes and by setting an alpha level we can also reveal connections with both TCP (blue) and UDP (red) connections in purple."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "udp_opts = opts.Graph(edge_hover_line_color='red', node_size=20, \n",
+    "                      node_fill_color='red', edge_selection_line_color='red')\n",
+    "tcp_opts = opts.Graph(edge_hover_line_color='blue', \n",
+    "                      node_fill_color='blue', edge_selection_line_color='blue')\n",
+    "\n",
+    "udp = forceatlas.select(protocol='udp', weight=(10000, None)).opts(udp_opts)\n",
+    "tcp = forceatlas.select(protocol='icmp', weight=(10000, None)).opts(tcp_opts)\n",
+    "layout = datashade(forceatlas, width=800, height=800, normalization='log', cmap=['black', 'white']) * tcp * udp\n",
+    "\n",
+    "layout.opts(\n",
+    "    opts.Graph(edge_alpha=0, edge_hover_alpha=0.5, edge_nonselection_alpha=0, inspection_policy='edges',\n",
+    "               node_size=8, node_alpha=0.5, edge_color=dim('weight')))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Coloring by protocol"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As we have already seen we can easily apply selection to the ``Graph`` objects. We can use this functionality to select by protocol, datashade the subgraph for each protocol and assign each a different color and finally stack the resulting datashaded layers:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from bokeh.palettes import Blues9, Reds9, Greens9\n",
+    "ranges = dict(x_range=(-.5, 1.6), y_range=(-.5, 1.6), width=800, height=800)\n",
+    "protocols = [('tcp', Blues9), ('udp', Reds9), ('icmp', Greens9)]\n",
+    "shaded = hv.Overlay([datashade(forceatlas.select(protocol=p), cmap=cmap, **ranges)\n",
+    "                     for p, cmap in protocols]).collate()\n",
+    "stack(shaded * dynspread(datashade(forceatlas.nodes, cmap=['white'], **ranges)), link_inputs=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Selecting the highest targets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With a bit of help from pandas we can also extract the twenty most targetted nodes and overlay them on top of the datashaded plot:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_counts = list(edges_df.groupby('target').count().sort_values('weight').iloc[-20:].index.values)\n",
+    "overlay = (datashade(forceatlas, cmap=fire[128:]) * \n",
+    "           datashade(forceatlas.nodes, cmap=['cyan']) *\n",
+    "           forceatlas.nodes.select(index=target_counts))\n",
+    "\n",
+    "overlay.opts( opts.Nodes(size=8), opts.RGB(width=800, height=800))"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

文件差异内容过多而无法显示
+ 474 - 0
datashader-work/datashader-examples/topics/nyc_taxi.ipynb


文件差异内容过多而无法显示
+ 408 - 0
datashader-work/datashader-examples/topics/opensky.ipynb


文件差异内容过多而无法显示
+ 194 - 0
datashader-work/datashader-examples/topics/osm-1billion.ipynb


文件差异内容过多而无法显示
+ 132 - 0
datashader-work/datashader-examples/topics/osm.ipynb


文件差异内容过多而无法显示
+ 500 - 0
datashader-work/datashader-examples/topics/strange_attractors.ipynb


文件差异内容过多而无法显示
+ 170 - 0
datashader-work/datashader-examples/topics/uk_researchers.ipynb


+ 90 - 0
datashader-work/datashader-examples/user_guide/10_Performance.ipynb

@@ -0,0 +1,90 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Datashader is designed to make it simple to work with even very large\n",
+    "datasets. To get good performance, it is essential that each step in the\n",
+    "overall processing pipeline be set up appropriately. Below we share some\n",
+    "of our suggestions based on our own [Benchmarking] and optimization\n",
+    "experience, which should help you obtain suitable performance in your\n",
+    "own work.\n",
+    "\n",
+    "File formats\n",
+    "------------\n",
+    "\n",
+    "Based on our [testing with various file formats], we recommend storing\n",
+    "any large columnar datasets in the [Apache Parquet] format when\n",
+    "possible, using the [fastparquet] library with \"Snappy\" compression:\n",
+    "\n",
+    "```\n",
+    ">>> import dask.dataframe as dd\n",
+    ">>> dd.to_parquet(filename, df, compression=\"SNAPPY\")\n",
+    "```\n",
+    "\n",
+    "If your data includes categorical values that take on a limited, fixed\n",
+    "number of possible values (e.g. \"Male\", \"Female\"),\n",
+    "Parquet's categorical columns use a more memory-efficient data representation and\n",
+    "are optimized for common operations such as sorting and finding uniques.\n",
+    "Before saving, just convert the column as follows:\n",
+    "\n",
+    "```\n",
+    ">>> df[colname] = df[colname].astype('category')\n",
+    "```\n",
+    "\n",
+    "By default, numerical datasets typically use 64-bit floats, but many\n",
+    "applications do not require 64-bit precision when aggregating over a\n",
+    "very large number of datapoints to show a distribution. Using 32-bit\n",
+    "floats reduces storage and memory requirements in half, and also\n",
+    "typically greatly speeds up computations because only half as much data\n",
+    "needs to be accessed in memory. If applicable to your particular\n",
+    "situation, just convert the data type before generating the file:\n",
+    "\n",
+    "```\n",
+    ">>> df[colname] = df[colname].astype(numpy.float32)\n",
+    "```\n",
+    "\n",
+    "Single machine\n",
+    "--------------\n",
+    "\n",
+    "Datashader supports both Pandas and Dask dataframes, but Dask dataframes\n",
+    "typically give higher performance even on a single machine, because it\n",
+    "makes good use of all available cores, and it also supports out-of-core\n",
+    "operation for datasets larger than memory.\n",
+    "\n",
+    "Dasks works on chunks of the data at any one time, called partitions.\n",
+    "With dask on a single machine, a rule of thumb for the number of\n",
+    "partitions to use is `multiprocessing.cpu_count()`, which allows Dask to\n",
+    "use one thread per core for parallelizing computations.\n",
+    "\n",
+    "When the entire dataset fits into memory at once, you can persist the\n",
+    "data as a Dask dataframe prior to passing it into datashader, to ensure\n",
+    "that data only needs to be loaded once:\n",
+    "\n",
+    "```\n",
+    ">>> from dask import dataframe as dd\n",
+    ">>> import multiprocessing as mp\n",
+    ">>> dask_df = dd.from_pandas(df, npartitions=mp.cpu_count())\n",
+    ">>> dask_df.persist()\n",
+    "...\n",
+    ">>> cvs = datashader.Canvas(...)\n",
+    ">>> agg = cvs.points(dask_df, ...)\n",
+    "```\n",
+    "\n",
+    "  [Benchmarking]: https://github.com/bokeh/datashader/issues/313\n",
+    "  [testing with various file formats]: https://github.com/bokeh/datashader/issues/129\n",
+    "  [Apache Parquet]: https://parquet.apache.org/\n",
+    "  [fastparquet]: https://github.com/dask/fastparquet"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

文件差异内容过多而无法显示
+ 503 - 0
datashader-work/datashader-examples/user_guide/1_Plotting_Pitfalls.ipynb


+ 19 - 0
datashader-work/datashader-examples/user_guide/2_Points.ipynb

@@ -0,0 +1,19 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook is under construction; please see the [nyc_taxi](../topics/nyc_taxi.ipynb) example for a tutorial covering Points."
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

文件差异内容过多而无法显示
+ 461 - 0
datashader-work/datashader-examples/user_guide/3_Timeseries.ipynb


文件差异内容过多而无法显示
+ 179 - 0
datashader-work/datashader-examples/user_guide/4_Trajectories.ipynb


文件差异内容过多而无法显示
+ 271 - 0
datashader-work/datashader-examples/user_guide/5_Rasters.ipynb


文件差异内容过多而无法显示
+ 427 - 0
datashader-work/datashader-examples/user_guide/6_Trimesh.ipynb


文件差异内容过多而无法显示
+ 462 - 0
datashader-work/datashader-examples/user_guide/7_Networks.ipynb


+ 330 - 0
datashader-work/datashader-examples/user_guide/8_Geography.ipynb

@@ -0,0 +1,330 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 8. Geography\n",
+    "\n",
+    "Datashader contains a `geo` module which contains helper functions which should be familiar to the geospatial community. \n",
+    "\n",
+    "Some of the functions available include\n",
+    "* [Generate Terrain](#ds.geo---generate-terrain)\n",
+    "* [Hillshade](#ds.geo---hillshade-function)\n",
+    "* [Slope](#ds.geo---slope-function)\n",
+    "* [Aspect](#ds.geo---aspect-function)\n",
+    "* [Bump](#ds.geo---bump-function)\n",
+    "* [NDVI](#ds.geo---ndvi-function)\n",
+    "* [Mean](#ds.geo---mean-function)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datashader.transfer_functions import shade, stack\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate Terrain Data\n",
+    "\n",
+    "To demonstrate using these functions, let's generate some fake terrain..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datashader import Canvas\n",
+    "from datashader.geo import generate_terrain\n",
+    "from datashader.colors import Elevation\n",
+    "\n",
+    "W = 1000\n",
+    "H = 750\n",
+    "\n",
+    "canvas = Canvas(plot_width=W, plot_height=H, x_range=(-20e6, 20e6), y_range=(-20e6, 20e6))\n",
+    "terrain = generate_terrain(canvas)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The grayscale value above shows the elevation linearly in intensity (with the large black areas indicating low elevation), but it will look more like a landscape if we map the lowest values to colors representing water, and the highest to colors representing mountaintops:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "shade(terrain, cmap=Elevation, how='linear')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hillshade\n",
+    "\n",
+    "[Hillshade](https://en.wikipedia.org/wiki/Terrain_cartography) is a technique used to visualize terrain as shaded relief, illuminating it with a hypothetical light source. The illumination value for each cell is determined by its orientation to the light source, which is based on slope and aspect."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datashader.geo import hillshade\n",
+    "\n",
+    "illuminated = hillshade(terrain)\n",
+    "\n",
+    "shade(illuminated, cmap=['gray', 'white'], alpha=255, how='linear')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can combine hillshading with elevation colormapping to indicate terrain types:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stack(shade(illuminated, cmap=['gray', 'white'], alpha=255, how='linear'),\n",
+    "      shade(terrain, cmap=Elevation, how='linear', alpha=128))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Slope\n",
+    "[Slope](https://en.wikipedia.org/wiki/Slope) is the inclination of a surface. \n",
+    "In geography, *slope* is amount of change in elevation of a terrain regarding its surroundings.\n",
+    "\n",
+    "Datashader's slope function returns slope in degrees.  Below we highlight areas at risk for avalanche by looking at [slopes around 38 degrees](http://wenatcheeoutdoors.org/2016/04/07/avalanche-abcs-for-snowshoers/)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datashader.geo import slope\n",
+    "\n",
+    "avalanche_slope_risk = slope(terrain)\n",
+    "avalanche_slope_risk.data = np.where(np.logical_and(avalanche_slope_risk.data > 25, \n",
+    "                                     avalanche_slope_risk.data < 50),\n",
+    "                                     1, np.nan)\n",
+    "\n",
+    "stack(\n",
+    "    shade(terrain, cmap=['black', 'white'], how='linear'),\n",
+    "    shade(illuminated, cmap=['black', 'white'], alpha=128, how='linear'),\n",
+    "    shade(avalanche_slope_risk, cmap='red', alpha=100), \n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Aspect\n",
+    "\n",
+    "[Aspect](https://en.wikipedia.org/wiki/Aspect_(geography)) is the orientation of slope, measured clockwise in degrees from 0 to 360, where 0 is north-facing, 90 is east-facing, 180 is south-facing, and 270 is west-facing.\n",
+    "\n",
+    "Below, we look to find slopes which face close to north."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datashader.geo import aspect\n",
+    "\n",
+    "north_faces = aspect(terrain)\n",
+    "north_faces.data = np.where(np.logical_or(north_faces.data > 350 ,\n",
+    "                                          north_faces.data < 10), 1, np.nan)\n",
+    "stack(\n",
+    "    shade(terrain, cmap=['black', 'white'], how='linear'),\n",
+    "    shade(illuminated, cmap=['black', 'white'], alpha=128, how='linear'),\n",
+    "    shade(north_faces, cmap=['aqua'], alpha=50), \n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## NDVI\n",
+    "\n",
+    "The Normalized Difference Vegetation Index (NDVI) quantifies vegetation by measuring the difference between near-infrared (which vegetation strongly reflects) and red light (which vegetation absorbs).\n",
+    "\n",
+    "For example, when you have negative values, it’s highly likely that it’s water. On the other hand, if you have a NDVI value close to +1, there’s a high possibility that it’s dense green leaves.\n",
+    "But when NDVI is close to zero, there isn’t green leaves and it could even be an urbanized area."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The output of *NDVI* ranges from [-1,+1], where `-1` means more \"Red\" radiation while `+1` means more \"NIR\" radiation.\n",
+    "\n",
+    "Below, we simulate the red and near-infrared bands using `datashader.perlin` random noise with different seeds and frequencies.  Green areas should be those > 0, where higher NDVI values would indicate vegetation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datashader.geo import ndvi\n",
+    "from datashader.geo import perlin\n",
+    "\n",
+    "near_infrared_band = perlin(W, H, freq=(4, 3), seed=1)\n",
+    "red_band = perlin(W, H, freq=(32, 32), seed=2)\n",
+    "vegetation_index = ndvi(near_infrared_band, red_band)\n",
+    "shade(vegetation_index, cmap=['purple','black','green'], how='linear')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Bump\n",
+    "Bump mapping is a cartographic technique often used to create the appearance of trees or other land features.\n",
+    "\n",
+    "The `datashader.bump` will produce a bump aggregate that can then used to add detail to the terrain.  In this case, I will pretend the bumps are trees and shade them with green."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datashader.geo import bump\n",
+    "from functools import partial\n",
+    "\n",
+    "def tree_heights(locations, min_val, max_val, height):\n",
+    "    out = np.zeros(len(locations))\n",
+    "    for i, (x, y) in enumerate(locations):\n",
+    "        val = terrain.data[y, x]\n",
+    "        if val > min_val and val < max_val:\n",
+    "            out[i] = height\n",
+    "        else:\n",
+    "            out[i] = 0\n",
+    "    return out\n",
+    "\n",
+    "TREE_COUNT = 200000\n",
+    "\n",
+    "trees = bump(W, H, count=TREE_COUNT // 3,\n",
+    "             height_func=partial(tree_heights, min_val=50, max_val=500, height=10))\n",
+    "\n",
+    "trees += bump(W, H, count=TREE_COUNT,\n",
+    "             height_func=partial(tree_heights, min_val=500, max_val=2000, height=20))\n",
+    "\n",
+    "trees += bump(W, H, count=TREE_COUNT // 3,\n",
+    "             height_func=partial(tree_heights, min_val=2000, max_val=3000, height=10))\n",
+    "\n",
+    "tree_colorize = trees.copy()\n",
+    "tree_colorize.data[tree_colorize.data == 0] = np.nan\n",
+    "\n",
+    "stack(shade(terrain + trees, cmap=['black', 'white'], how='linear'),\n",
+    "      shade(hillshade(terrain + trees), cmap=['black', 'white'], alpha=128, how='linear'),\n",
+    "      shade(tree_colorize, cmap='limegreen', how='linear'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Mean\n",
+    "The `datashader.mean` function will smooth a given aggregate by using a 3x3 mean convolution filter. Optional parameters include `passes`, which is used to run the mean filter multiple times, and also `excludes` which are values that will not be modified by the mean filter.\n",
+    "\n",
+    "Just for fun, let's add a coastal vignette to give out terrain scene a bit more character. Notice the water below now has a nice coastal gradient which adds some realism to our scene."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datashader.geo import mean\n",
+    "\n",
+    "LAND_CONSTANT = 50.\n",
+    "\n",
+    "water = terrain.copy()\n",
+    "water.data = np.where(water.data > 0, LAND_CONSTANT, 0)\n",
+    "water = mean(water, passes=10, excludes=[LAND_CONSTANT])\n",
+    "water.data[water.data == LAND_CONSTANT] = np.nan\n",
+    "\n",
+    "stack(\n",
+    "    shade(terrain, cmap=['black', 'white'], how='linear'),\n",
+    "    shade(water, cmap=['aqua','white']),\n",
+    "    shade(hillshade(terrain), cmap=['black', 'white'], alpha=128, how='linear'),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "\n",
+    "We've now seen a bunch of datashader's `geo` helper functions.\n",
+    "\n",
+    "Let's make our final archipelago scene by stacking `terrain`, `water`, `hillshade`, and `tree_highlights` together into one output image: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stack(shade(terrain + trees, cmap=Elevation, how='linear'),\n",
+    "      shade(water, cmap=['aqua','white']),\n",
+    "      shade(hillshade(terrain + trees), cmap=['black', 'white'], alpha=128, how='linear'),\n",
+    "      shade(tree_colorize, cmap='limegreen', how='linear'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### References\n",
+    "- Burrough, P. A., and McDonell, R. A., 1998. Principles of Geographical Information Systems (Oxford University Press, New York), pp 406\n",
+    "- Making Maps with Noise Functions: https://www.redblobgames.com/maps/terrain-from-noise/\n",
+    "- How Aspect Works: http://desktop.arcgis.com/en/arcmap/10.3/tools/spatial-analyst-toolbox/how-aspect-works.htm#ESRI_SECTION1_4198691F8852475A9F4BC71246579FAA"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

文件差异内容过多而无法显示
+ 54 - 0
datashader-work/datashader-examples/user_guide/9_Extending.ipynb


+ 58 - 0
datashader-work/datashader-examples/user_guide/index.ipynb

@@ -0,0 +1,58 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The User Guide explains key concepts in detail.\n",
+    "\n",
+    "New users may prefer to start with the introduction in our [Getting Started](../getting_started/index.ipynb) guide.\n",
+    "\n",
+    "To see examples of what can be done with Datashader, see [Topics](../topics/index.ipynb).\n",
+    "\n",
+    "Contents:\n",
+    "\n",
+    "-   [1. Plotting Pitfalls](1_Plotting_Pitfalls.ipynb)  \n",
+    "    Explains how Datashader avoids pitfalls encountered when plotting big datasets\n",
+    "    using techniques designed for small ones.\n",
+    "\n",
+    "-   [2. Points](../topics/nyc_taxi.ipynb)  \n",
+    "    (Under construction; meanwhile points to the [nyc_taxi](../topics/nyc_taxi.ipynb) notebook.)\n",
+    "\n",
+    "-   [3. Timeseries](3_Timeseries.ipynb)  \n",
+    "    Plotting timeseries and other curves.\n",
+    "\n",
+    "-   [4. Trajectories](4_Trajectories.ipynb)  \n",
+    "    Plotting trajectories (e.g. connected GPS points) in a plane.\n",
+    "\n",
+    "-   [5. Rasters](5_Rasters.ipynb)  \n",
+    "    Plotting gridded (raster) data, from regularly sampled 2D points in\n",
+    "    a plane.\n",
+    "\n",
+    "-   [6. Trimesh](6_Trimesh.ipynb)\n",
+    "    Plotting large irregular triangular grids (meshes).\n",
+    "\n",
+    "-   [7. Networks](7_Networks.ipynb)  \n",
+    "    Plotting large network graphs.\n",
+    "\n",
+    "-   [8. Geography](8_Geography.ipynb)  \n",
+    "    Using Datashader for geographic applications\n",
+    "\n",
+    "-   [9. Extending](9_Extending.ipynb)  \n",
+    "    Extending datashader with new components and functionality.\n",
+    "\n",
+    "-   [10. Performance](10_Performance.ipynb)  \n",
+    "    Hints for getting good performance out of Datashader in your\n",
+    "    applications."
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 868 - 0
datashader-work/datashader-tryout.ipynb

@@ -9,6 +9,874 @@
    ]
   },
   {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-03-26T00:34:45.649960Z",
+     "start_time": "2019-03-26T00:34:44.141982Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>indicator_name</th>\n",
+       "      <th>Access to electricity (% of population)</th>\n",
+       "      <th>Access to electricity, rural (% of rural population)</th>\n",
+       "      <th>Access to electricity, urban (% of urban population)</th>\n",
+       "      <th>Adjusted net enrollment rate, primary (% of primary school age children)</th>\n",
+       "      <th>Adjusted net enrollment rate, primary, female (% of primary school age children)</th>\n",
+       "      <th>Adjusted net enrollment rate, primary, male (% of primary school age children)</th>\n",
+       "      <th>Adjusted net national income (annual % growth)</th>\n",
+       "      <th>Adjusted net national income (constant 2010 US$)</th>\n",
+       "      <th>Adjusted net national income (current US$)</th>\n",
+       "      <th>Adjusted net national income per capita (annual % growth)</th>\n",
+       "      <th>Adjusted net national income per capita (constant 2010 US$)</th>\n",
+       "      <th>Adjusted net national income per capita (current US$)</th>\n",
+       "      <th>Adjusted net savings, excluding particulate emission damage (% of GNI)</th>\n",
+       "      <th>Adjusted savings: carbon dioxide damage (% of GNI)</th>\n",
+       "      <th>Adjusted savings: carbon dioxide damage (current US$)</th>\n",
+       "      <th>Adjusted savings: consumption of fixed capital (% of GNI)</th>\n",
+       "      <th>Adjusted savings: consumption of fixed capital (current US$)</th>\n",
+       "      <th>Adjusted savings: education expenditure (% of GNI)</th>\n",
+       "      <th>Adjusted savings: education expenditure (current US$)</th>\n",
+       "      <th>Adjusted savings: energy depletion (% of GNI)</th>\n",
+       "      <th>Adjusted savings: energy depletion (current US$)</th>\n",
+       "      <th>Adjusted savings: gross savings (% of GNI)</th>\n",
+       "      <th>Adjusted savings: mineral depletion (% of GNI)</th>\n",
+       "      <th>Adjusted savings: mineral depletion (current US$)</th>\n",
+       "      <th>Adjusted savings: natural resources depletion (% of GNI)</th>\n",
+       "      <th>Adjusted savings: net forest depletion (% of GNI)</th>\n",
+       "      <th>Adjusted savings: net forest depletion (current US$)</th>\n",
+       "      <th>Adjusted savings: net national savings (% of GNI)</th>\n",
+       "      <th>Adjusted savings: net national savings (current US$)</th>\n",
+       "      <th>Adolescent fertility rate (births per 1,000 women ages 15-19)</th>\n",
+       "      <th>Age dependency ratio (% of working-age population)</th>\n",
+       "      <th>Age dependency ratio, old (% of working-age population)</th>\n",
+       "      <th>Age dependency ratio, young (% of working-age population)</th>\n",
+       "      <th>Agricultural land (% of land area)</th>\n",
+       "      <th>Agricultural land (sq. km)</th>\n",
+       "      <th>Agricultural machinery, tractors</th>\n",
+       "      <th>Agricultural machinery, tractors per 100 sq. km of arable land</th>\n",
+       "      <th>Agricultural methane emissions (% of total)</th>\n",
+       "      <th>Agricultural methane emissions (thousand metric tons of CO2 equivalent)</th>\n",
+       "      <th>Agricultural nitrous oxide emissions (% of total)</th>\n",
+       "      <th>Agricultural nitrous oxide emissions (thousand metric tons of CO2 equivalent)</th>\n",
+       "      <th>Agricultural raw materials exports (% of merchandise exports)</th>\n",
+       "      <th>Agricultural raw materials imports (% of merchandise imports)</th>\n",
+       "      <th>Agriculture, forestry, and fishing, value added (% of GDP)</th>\n",
+       "      <th>Agriculture, forestry, and fishing, value added (annual % growth)</th>\n",
+       "      <th>Agriculture, forestry, and fishing, value added (constant 2010 US$)</th>\n",
+       "      <th>Agriculture, forestry, and fishing, value added (constant LCU)</th>\n",
+       "      <th>Agriculture, forestry, and fishing, value added (current LCU)</th>\n",
+       "      <th>Agriculture, forestry, and fishing, value added (current US$)</th>\n",
+       "      <th>Agriculture, forestry, and fishing, value added per worker (constant 2010 US$)</th>\n",
+       "      <th>Air transport, freight (million ton-km)</th>\n",
+       "      <th>Air transport, passengers carried</th>\n",
+       "      <th>Air transport, registered carrier departures worldwide</th>\n",
+       "      <th>Alternative and nuclear energy (% of total energy use)</th>\n",
+       "      <th>Aquaculture production (metric tons)</th>\n",
+       "      <th>Arable land (% of land area)</th>\n",
+       "      <th>Arable land (hectares per person)</th>\n",
+       "      <th>Arable land (hectares)</th>\n",
+       "      <th>Armed forces personnel (% of total labor force)</th>\n",
+       "      <th>Armed forces personnel, total</th>\n",
+       "      <th>Arms imports (SIPRI trend indicator values)</th>\n",
+       "      <th>Average grace period on new external debt commitments (years)</th>\n",
+       "      <th>Average grace period on new external debt commitments, official (years)</th>\n",
+       "      <th>Average grace period on new external debt commitments, private (years)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Self-employed, female (% of female employment) (modeled ILO estimate)</th>\n",
+       "      <th>Self-employed, male (% of male employment) (modeled ILO estimate)</th>\n",
+       "      <th>Self-employed, total (% of total employment) (modeled ILO estimate)</th>\n",
+       "      <th>Service exports (BoP, current US$)</th>\n",
+       "      <th>Service imports (BoP, current US$)</th>\n",
+       "      <th>Services, value added (% of GDP)</th>\n",
+       "      <th>Services, value added (annual % growth)</th>\n",
+       "      <th>Services, value added (constant 2010 US$)</th>\n",
+       "      <th>Services, value added (constant LCU)</th>\n",
+       "      <th>Services, value added (current LCU)</th>\n",
+       "      <th>Services, value added (current US$)</th>\n",
+       "      <th>Services, value added per worker (constant 2010 US$)</th>\n",
+       "      <th>Short-term debt (% of exports of goods, services and primary income)</th>\n",
+       "      <th>Short-term debt (% of total external debt)</th>\n",
+       "      <th>Short-term debt (% of total reserves)</th>\n",
+       "      <th>Surface area (sq. km)</th>\n",
+       "      <th>Survival to age 65, female (% of cohort)</th>\n",
+       "      <th>Survival to age 65, male (% of cohort)</th>\n",
+       "      <th>Taxes less subsidies on products (constant LCU)</th>\n",
+       "      <th>Taxes less subsidies on products (current LCU)</th>\n",
+       "      <th>Taxes less subsidies on products (current US$)</th>\n",
+       "      <th>Technical cooperation grants (BoP, current US$)</th>\n",
+       "      <th>Terms of trade adjustment (constant LCU)</th>\n",
+       "      <th>Total amount of debt rescheduled (current US$)</th>\n",
+       "      <th>Total change in external debt stocks (current US$)</th>\n",
+       "      <th>Total debt service (% of GNI)</th>\n",
+       "      <th>Total fisheries production (metric tons)</th>\n",
+       "      <th>Total greenhouse gas emissions (kt of CO2 equivalent)</th>\n",
+       "      <th>Total natural resources rents (% of GDP)</th>\n",
+       "      <th>Total reserves (% of total external debt)</th>\n",
+       "      <th>Total reserves (includes gold, current US$)</th>\n",
+       "      <th>Total reserves in months of imports</th>\n",
+       "      <th>Total reserves minus gold (current US$)</th>\n",
+       "      <th>Trade (% of GDP)</th>\n",
+       "      <th>Trade in services (% of GDP)</th>\n",
+       "      <th>Trademark applications, direct nonresident</th>\n",
+       "      <th>Trademark applications, direct resident</th>\n",
+       "      <th>Trademark applications, total</th>\n",
+       "      <th>Transport services (% of commercial service exports)</th>\n",
+       "      <th>Transport services (% of commercial service imports)</th>\n",
+       "      <th>Transport services (% of service exports, BoP)</th>\n",
+       "      <th>Transport services (% of service imports, BoP)</th>\n",
+       "      <th>Travel services (% of commercial service exports)</th>\n",
+       "      <th>Travel services (% of commercial service imports)</th>\n",
+       "      <th>Travel services (% of service exports, BoP)</th>\n",
+       "      <th>Travel services (% of service imports, BoP)</th>\n",
+       "      <th>Undisbursed external debt, official creditors (UND, current US$)</th>\n",
+       "      <th>Undisbursed external debt, private creditors (UND, current US$)</th>\n",
+       "      <th>Undisbursed external debt, total (UND, current US$)</th>\n",
+       "      <th>Unemployment, female (% of female labor force) (modeled ILO estimate)</th>\n",
+       "      <th>Unemployment, male (% of male labor force) (modeled ILO estimate)</th>\n",
+       "      <th>Unemployment, total (% of total labor force) (modeled ILO estimate)</th>\n",
+       "      <th>Unemployment, youth female (% of female labor force ages 15-24) (modeled ILO estimate)</th>\n",
+       "      <th>Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)</th>\n",
+       "      <th>Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)</th>\n",
+       "      <th>Urban population</th>\n",
+       "      <th>Urban population (% of total)</th>\n",
+       "      <th>Urban population growth (annual %)</th>\n",
+       "      <th>Use of IMF credit (DOD, current US$)</th>\n",
+       "      <th>Vulnerable employment, female (% of female employment) (modeled ILO estimate)</th>\n",
+       "      <th>Vulnerable employment, male (% of male employment) (modeled ILO estimate)</th>\n",
+       "      <th>Vulnerable employment, total (% of total employment) (modeled ILO estimate)</th>\n",
+       "      <th>Wage and salaried workers, female (% of female employment) (modeled ILO estimate)</th>\n",
+       "      <th>Wage and salaried workers, male (% of male employment) (modeled ILO estimate)</th>\n",
+       "      <th>Wage and salaried workers, total (% of total employment) (modeled ILO estimate)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Access to electricity (% of population)</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.967760</td>\n",
+       "      <td>0.900338</td>\n",
+       "      <td>0.778743</td>\n",
+       "      <td>0.788319</td>\n",
+       "      <td>0.778785</td>\n",
+       "      <td>-0.040982</td>\n",
+       "      <td>0.164313</td>\n",
+       "      <td>0.080233</td>\n",
+       "      <td>-0.008385</td>\n",
+       "      <td>0.467992</td>\n",
+       "      <td>0.452831</td>\n",
+       "      <td>0.300685</td>\n",
+       "      <td>0.213132</td>\n",
+       "      <td>0.117452</td>\n",
+       "      <td>0.273084</td>\n",
+       "      <td>0.149927</td>\n",
+       "      <td>0.111795</td>\n",
+       "      <td>0.162033</td>\n",
+       "      <td>-0.026653</td>\n",
+       "      <td>0.134945</td>\n",
+       "      <td>0.273615</td>\n",
+       "      <td>-0.142684</td>\n",
+       "      <td>0.061323</td>\n",
+       "      <td>-0.277651</td>\n",
+       "      <td>-0.516037</td>\n",
+       "      <td>-0.155885</td>\n",
+       "      <td>0.135280</td>\n",
+       "      <td>0.122615</td>\n",
+       "      <td>-0.753078</td>\n",
+       "      <td>-0.815717</td>\n",
+       "      <td>0.568068</td>\n",
+       "      <td>-0.823038</td>\n",
+       "      <td>-0.151445</td>\n",
+       "      <td>0.016380</td>\n",
+       "      <td>0.150554</td>\n",
+       "      <td>0.339583</td>\n",
+       "      <td>-0.373394</td>\n",
+       "      <td>0.019435</td>\n",
+       "      <td>-0.345924</td>\n",
+       "      <td>0.018509</td>\n",
+       "      <td>-0.395171</td>\n",
+       "      <td>-0.068828</td>\n",
+       "      <td>-0.740938</td>\n",
+       "      <td>-0.059554</td>\n",
+       "      <td>0.082937</td>\n",
+       "      <td>0.052887</td>\n",
+       "      <td>0.052405</td>\n",
+       "      <td>0.091760</td>\n",
+       "      <td>0.325666</td>\n",
+       "      <td>0.151190</td>\n",
+       "      <td>0.154554</td>\n",
+       "      <td>0.152831</td>\n",
+       "      <td>0.308758</td>\n",
+       "      <td>0.087754</td>\n",
+       "      <td>0.059715</td>\n",
+       "      <td>0.011458</td>\n",
+       "      <td>0.076666</td>\n",
+       "      <td>0.134433</td>\n",
+       "      <td>0.077643</td>\n",
+       "      <td>0.084582</td>\n",
+       "      <td>-0.061842</td>\n",
+       "      <td>-0.229851</td>\n",
+       "      <td>0.342986</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.791727</td>\n",
+       "      <td>-0.762249</td>\n",
+       "      <td>-0.785969</td>\n",
+       "      <td>0.148503</td>\n",
+       "      <td>0.148101</td>\n",
+       "      <td>0.489558</td>\n",
+       "      <td>-0.095213</td>\n",
+       "      <td>0.162039</td>\n",
+       "      <td>0.083086</td>\n",
+       "      <td>0.068215</td>\n",
+       "      <td>0.153152</td>\n",
+       "      <td>0.449570</td>\n",
+       "      <td>-0.067991</td>\n",
+       "      <td>0.261496</td>\n",
+       "      <td>-0.094819</td>\n",
+       "      <td>0.020513</td>\n",
+       "      <td>0.884138</td>\n",
+       "      <td>0.794181</td>\n",
+       "      <td>0.078323</td>\n",
+       "      <td>0.064857</td>\n",
+       "      <td>0.214238</td>\n",
+       "      <td>0.016230</td>\n",
+       "      <td>-0.040191</td>\n",
+       "      <td>0.058914</td>\n",
+       "      <td>0.096962</td>\n",
+       "      <td>0.201450</td>\n",
+       "      <td>0.084539</td>\n",
+       "      <td>0.097263</td>\n",
+       "      <td>-0.312598</td>\n",
+       "      <td>0.069057</td>\n",
+       "      <td>0.128002</td>\n",
+       "      <td>0.086818</td>\n",
+       "      <td>0.115966</td>\n",
+       "      <td>0.200763</td>\n",
+       "      <td>0.126568</td>\n",
+       "      <td>0.094728</td>\n",
+       "      <td>0.072757</td>\n",
+       "      <td>0.090422</td>\n",
+       "      <td>0.038891</td>\n",
+       "      <td>-0.373567</td>\n",
+       "      <td>0.131826</td>\n",
+       "      <td>-0.314060</td>\n",
+       "      <td>-0.065887</td>\n",
+       "      <td>0.238993</td>\n",
+       "      <td>-0.001109</td>\n",
+       "      <td>0.273434</td>\n",
+       "      <td>0.097864</td>\n",
+       "      <td>0.140708</td>\n",
+       "      <td>0.107847</td>\n",
+       "      <td>0.101278</td>\n",
+       "      <td>0.117717</td>\n",
+       "      <td>0.100356</td>\n",
+       "      <td>0.219540</td>\n",
+       "      <td>0.220289</td>\n",
+       "      <td>0.218698</td>\n",
+       "      <td>0.069052</td>\n",
+       "      <td>0.680238</td>\n",
+       "      <td>-0.570946</td>\n",
+       "      <td>0.130843</td>\n",
+       "      <td>-0.792334</td>\n",
+       "      <td>-0.774618</td>\n",
+       "      <td>-0.793262</td>\n",
+       "      <td>0.791727</td>\n",
+       "      <td>0.762249</td>\n",
+       "      <td>0.785969</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Access to electricity, rural (% of rural popul...</td>\n",
+       "      <td>0.967760</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.815735</td>\n",
+       "      <td>0.710961</td>\n",
+       "      <td>0.727187</td>\n",
+       "      <td>0.714469</td>\n",
+       "      <td>-0.041588</td>\n",
+       "      <td>0.157529</td>\n",
+       "      <td>0.054557</td>\n",
+       "      <td>-0.007615</td>\n",
+       "      <td>0.497899</td>\n",
+       "      <td>0.476476</td>\n",
+       "      <td>0.246278</td>\n",
+       "      <td>0.178954</td>\n",
+       "      <td>0.126030</td>\n",
+       "      <td>0.284193</td>\n",
+       "      <td>0.161879</td>\n",
+       "      <td>0.082516</td>\n",
+       "      <td>0.173460</td>\n",
+       "      <td>-0.070043</td>\n",
+       "      <td>0.138128</td>\n",
+       "      <td>0.206564</td>\n",
+       "      <td>-0.177359</td>\n",
+       "      <td>0.060857</td>\n",
+       "      <td>-0.280963</td>\n",
+       "      <td>-0.457900</td>\n",
+       "      <td>-0.170948</td>\n",
+       "      <td>0.061166</td>\n",
+       "      <td>0.126798</td>\n",
+       "      <td>-0.764386</td>\n",
+       "      <td>-0.804982</td>\n",
+       "      <td>0.593037</td>\n",
+       "      <td>-0.823623</td>\n",
+       "      <td>-0.137236</td>\n",
+       "      <td>-0.007659</td>\n",
+       "      <td>0.113987</td>\n",
+       "      <td>0.350716</td>\n",
+       "      <td>-0.322877</td>\n",
+       "      <td>-0.014108</td>\n",
+       "      <td>-0.310214</td>\n",
+       "      <td>-0.006246</td>\n",
+       "      <td>-0.375548</td>\n",
+       "      <td>-0.035783</td>\n",
+       "      <td>-0.663840</td>\n",
+       "      <td>-0.073352</td>\n",
+       "      <td>0.053009</td>\n",
+       "      <td>0.046368</td>\n",
+       "      <td>0.051956</td>\n",
+       "      <td>0.064805</td>\n",
+       "      <td>0.347843</td>\n",
+       "      <td>0.145052</td>\n",
+       "      <td>0.146155</td>\n",
+       "      <td>0.144586</td>\n",
+       "      <td>0.327551</td>\n",
+       "      <td>0.067831</td>\n",
+       "      <td>0.080188</td>\n",
+       "      <td>0.049162</td>\n",
+       "      <td>0.070840</td>\n",
+       "      <td>0.142566</td>\n",
+       "      <td>0.042607</td>\n",
+       "      <td>0.051362</td>\n",
+       "      <td>-0.047910</td>\n",
+       "      <td>-0.214207</td>\n",
+       "      <td>0.299903</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.790674</td>\n",
+       "      <td>-0.750135</td>\n",
+       "      <td>-0.779741</td>\n",
+       "      <td>0.147003</td>\n",
+       "      <td>0.144662</td>\n",
+       "      <td>0.440017</td>\n",
+       "      <td>-0.128833</td>\n",
+       "      <td>0.170720</td>\n",
+       "      <td>0.079684</td>\n",
+       "      <td>0.069943</td>\n",
+       "      <td>0.148404</td>\n",
+       "      <td>0.465658</td>\n",
+       "      <td>-0.053242</td>\n",
+       "      <td>0.254408</td>\n",
+       "      <td>-0.084619</td>\n",
+       "      <td>0.003619</td>\n",
+       "      <td>0.872854</td>\n",
+       "      <td>0.769268</td>\n",
+       "      <td>0.079490</td>\n",
+       "      <td>0.066764</td>\n",
+       "      <td>0.223254</td>\n",
+       "      <td>0.004896</td>\n",
+       "      <td>-0.040225</td>\n",
+       "      <td>0.040867</td>\n",
+       "      <td>0.087680</td>\n",
+       "      <td>0.217893</td>\n",
+       "      <td>0.059640</td>\n",
+       "      <td>0.073193</td>\n",
+       "      <td>-0.278328</td>\n",
+       "      <td>0.053087</td>\n",
+       "      <td>0.130154</td>\n",
+       "      <td>0.047759</td>\n",
+       "      <td>0.117078</td>\n",
+       "      <td>0.185101</td>\n",
+       "      <td>0.135614</td>\n",
+       "      <td>0.069746</td>\n",
+       "      <td>0.051621</td>\n",
+       "      <td>0.069562</td>\n",
+       "      <td>0.047384</td>\n",
+       "      <td>-0.397744</td>\n",
+       "      <td>0.116939</td>\n",
+       "      <td>-0.355709</td>\n",
+       "      <td>-0.093570</td>\n",
+       "      <td>0.250591</td>\n",
+       "      <td>-0.052648</td>\n",
+       "      <td>0.282557</td>\n",
+       "      <td>0.071293</td>\n",
+       "      <td>0.111057</td>\n",
+       "      <td>0.080088</td>\n",
+       "      <td>0.086720</td>\n",
+       "      <td>0.123460</td>\n",
+       "      <td>0.098469</td>\n",
+       "      <td>0.195224</td>\n",
+       "      <td>0.224138</td>\n",
+       "      <td>0.210252</td>\n",
+       "      <td>0.039789</td>\n",
+       "      <td>0.601805</td>\n",
+       "      <td>-0.565753</td>\n",
+       "      <td>0.112623</td>\n",
+       "      <td>-0.790501</td>\n",
+       "      <td>-0.763866</td>\n",
+       "      <td>-0.786949</td>\n",
+       "      <td>0.790674</td>\n",
+       "      <td>0.750135</td>\n",
+       "      <td>0.779741</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Access to electricity, urban (% of urban popul...</td>\n",
+       "      <td>0.900338</td>\n",
+       "      <td>0.815735</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.733129</td>\n",
+       "      <td>0.724329</td>\n",
+       "      <td>0.737436</td>\n",
+       "      <td>-0.031816</td>\n",
+       "      <td>0.154175</td>\n",
+       "      <td>0.089289</td>\n",
+       "      <td>-0.003022</td>\n",
+       "      <td>0.368258</td>\n",
+       "      <td>0.349837</td>\n",
+       "      <td>0.331598</td>\n",
+       "      <td>0.206007</td>\n",
+       "      <td>0.097781</td>\n",
+       "      <td>0.166410</td>\n",
+       "      <td>0.118767</td>\n",
+       "      <td>0.108439</td>\n",
+       "      <td>0.128157</td>\n",
+       "      <td>-0.027611</td>\n",
+       "      <td>0.112921</td>\n",
+       "      <td>0.276175</td>\n",
+       "      <td>-0.081406</td>\n",
+       "      <td>0.052398</td>\n",
+       "      <td>-0.241416</td>\n",
+       "      <td>-0.462255</td>\n",
+       "      <td>-0.068212</td>\n",
+       "      <td>0.184181</td>\n",
+       "      <td>0.104703</td>\n",
+       "      <td>-0.677207</td>\n",
+       "      <td>-0.690376</td>\n",
+       "      <td>0.443348</td>\n",
+       "      <td>-0.686689</td>\n",
+       "      <td>-0.142623</td>\n",
+       "      <td>0.057905</td>\n",
+       "      <td>0.155303</td>\n",
+       "      <td>0.263925</td>\n",
+       "      <td>-0.274161</td>\n",
+       "      <td>0.073052</td>\n",
+       "      <td>-0.291382</td>\n",
+       "      <td>0.064858</td>\n",
+       "      <td>-0.428305</td>\n",
+       "      <td>-0.066750</td>\n",
+       "      <td>-0.657957</td>\n",
+       "      <td>-0.044881</td>\n",
+       "      <td>0.114699</td>\n",
+       "      <td>0.060677</td>\n",
+       "      <td>0.047973</td>\n",
+       "      <td>0.109878</td>\n",
+       "      <td>0.248796</td>\n",
+       "      <td>0.132771</td>\n",
+       "      <td>0.138262</td>\n",
+       "      <td>0.137614</td>\n",
+       "      <td>0.235944</td>\n",
+       "      <td>0.097383</td>\n",
+       "      <td>0.075579</td>\n",
+       "      <td>-0.014718</td>\n",
+       "      <td>0.088693</td>\n",
+       "      <td>0.094327</td>\n",
+       "      <td>0.113816</td>\n",
+       "      <td>0.123744</td>\n",
+       "      <td>-0.041577</td>\n",
+       "      <td>-0.171477</td>\n",
+       "      <td>0.295788</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.638469</td>\n",
+       "      <td>-0.612671</td>\n",
+       "      <td>-0.634385</td>\n",
+       "      <td>0.131355</td>\n",
+       "      <td>0.133255</td>\n",
+       "      <td>0.412979</td>\n",
+       "      <td>-0.052931</td>\n",
+       "      <td>0.130609</td>\n",
+       "      <td>0.078862</td>\n",
+       "      <td>0.058456</td>\n",
+       "      <td>0.136064</td>\n",
+       "      <td>0.351330</td>\n",
+       "      <td>-0.085136</td>\n",
+       "      <td>0.171845</td>\n",
+       "      <td>-0.163707</td>\n",
+       "      <td>0.057812</td>\n",
+       "      <td>0.789128</td>\n",
+       "      <td>0.719375</td>\n",
+       "      <td>0.066413</td>\n",
+       "      <td>0.054298</td>\n",
+       "      <td>0.173104</td>\n",
+       "      <td>0.036701</td>\n",
+       "      <td>-0.033509</td>\n",
+       "      <td>0.072280</td>\n",
+       "      <td>0.089999</td>\n",
+       "      <td>0.139069</td>\n",
+       "      <td>0.105722</td>\n",
+       "      <td>0.117620</td>\n",
+       "      <td>-0.314668</td>\n",
+       "      <td>0.037412</td>\n",
+       "      <td>0.101960</td>\n",
+       "      <td>0.086292</td>\n",
+       "      <td>0.092608</td>\n",
+       "      <td>0.160571</td>\n",
+       "      <td>0.078418</td>\n",
+       "      <td>0.111946</td>\n",
+       "      <td>0.088856</td>\n",
+       "      <td>0.105551</td>\n",
+       "      <td>0.039999</td>\n",
+       "      <td>-0.272625</td>\n",
+       "      <td>0.141242</td>\n",
+       "      <td>-0.213146</td>\n",
+       "      <td>-0.104067</td>\n",
+       "      <td>0.150610</td>\n",
+       "      <td>-0.035177</td>\n",
+       "      <td>0.180917</td>\n",
+       "      <td>0.116864</td>\n",
+       "      <td>0.145090</td>\n",
+       "      <td>0.124721</td>\n",
+       "      <td>0.026533</td>\n",
+       "      <td>0.016846</td>\n",
+       "      <td>0.004535</td>\n",
+       "      <td>0.140961</td>\n",
+       "      <td>0.117002</td>\n",
+       "      <td>0.122609</td>\n",
+       "      <td>0.101500</td>\n",
+       "      <td>0.523818</td>\n",
+       "      <td>-0.447963</td>\n",
+       "      <td>0.129015</td>\n",
+       "      <td>-0.637635</td>\n",
+       "      <td>-0.620773</td>\n",
+       "      <td>-0.639211</td>\n",
+       "      <td>0.638469</td>\n",
+       "      <td>0.612671</td>\n",
+       "      <td>0.634385</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Adjusted net enrollment rate, primary (% of pr...</td>\n",
+       "      <td>0.778743</td>\n",
+       "      <td>0.710961</td>\n",
+       "      <td>0.733129</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.986786</td>\n",
+       "      <td>0.979660</td>\n",
+       "      <td>-0.043278</td>\n",
+       "      <td>0.146440</td>\n",
+       "      <td>0.054514</td>\n",
+       "      <td>-0.008870</td>\n",
+       "      <td>0.424795</td>\n",
+       "      <td>0.383135</td>\n",
+       "      <td>0.181438</td>\n",
+       "      <td>0.171375</td>\n",
+       "      <td>0.133369</td>\n",
+       "      <td>0.341817</td>\n",
+       "      <td>0.139552</td>\n",
+       "      <td>0.281205</td>\n",
+       "      <td>0.136543</td>\n",
+       "      <td>-0.040812</td>\n",
+       "      <td>0.122070</td>\n",
+       "      <td>0.194621</td>\n",
+       "      <td>-0.010766</td>\n",
+       "      <td>0.099075</td>\n",
+       "      <td>-0.219820</td>\n",
+       "      <td>-0.371080</td>\n",
+       "      <td>-0.062407</td>\n",
+       "      <td>0.039560</td>\n",
+       "      <td>0.187416</td>\n",
+       "      <td>-0.723223</td>\n",
+       "      <td>-0.697960</td>\n",
+       "      <td>0.486711</td>\n",
+       "      <td>-0.703345</td>\n",
+       "      <td>-0.075241</td>\n",
+       "      <td>-0.024204</td>\n",
+       "      <td>0.194741</td>\n",
+       "      <td>0.326738</td>\n",
+       "      <td>-0.264018</td>\n",
+       "      <td>-0.000853</td>\n",
+       "      <td>-0.304035</td>\n",
+       "      <td>0.008118</td>\n",
+       "      <td>-0.378115</td>\n",
+       "      <td>-0.068011</td>\n",
+       "      <td>-0.661277</td>\n",
+       "      <td>-0.056453</td>\n",
+       "      <td>0.056657</td>\n",
+       "      <td>0.060361</td>\n",
+       "      <td>0.053565</td>\n",
+       "      <td>0.092389</td>\n",
+       "      <td>0.296955</td>\n",
+       "      <td>0.152870</td>\n",
+       "      <td>0.159765</td>\n",
+       "      <td>0.153873</td>\n",
+       "      <td>0.326562</td>\n",
+       "      <td>0.105604</td>\n",
+       "      <td>0.151689</td>\n",
+       "      <td>-0.038088</td>\n",
+       "      <td>0.090317</td>\n",
+       "      <td>0.007147</td>\n",
+       "      <td>0.040852</td>\n",
+       "      <td>-0.038999</td>\n",
+       "      <td>-0.082783</td>\n",
+       "      <td>-0.306119</td>\n",
+       "      <td>0.267094</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.612831</td>\n",
+       "      <td>-0.603168</td>\n",
+       "      <td>-0.612722</td>\n",
+       "      <td>0.145752</td>\n",
+       "      <td>0.143817</td>\n",
+       "      <td>0.467512</td>\n",
+       "      <td>-0.062671</td>\n",
+       "      <td>0.140381</td>\n",
+       "      <td>0.079662</td>\n",
+       "      <td>0.068434</td>\n",
+       "      <td>0.155935</td>\n",
+       "      <td>0.387721</td>\n",
+       "      <td>-0.104080</td>\n",
+       "      <td>0.222250</td>\n",
+       "      <td>-0.049695</td>\n",
+       "      <td>-0.028384</td>\n",
+       "      <td>0.807704</td>\n",
+       "      <td>0.742608</td>\n",
+       "      <td>0.069702</td>\n",
+       "      <td>0.064036</td>\n",
+       "      <td>0.196931</td>\n",
+       "      <td>0.013903</td>\n",
+       "      <td>0.013865</td>\n",
+       "      <td>0.034723</td>\n",
+       "      <td>0.094920</td>\n",
+       "      <td>0.218278</td>\n",
+       "      <td>0.105137</td>\n",
+       "      <td>0.079983</td>\n",
+       "      <td>-0.256604</td>\n",
+       "      <td>0.104301</td>\n",
+       "      <td>0.164289</td>\n",
+       "      <td>0.047826</td>\n",
+       "      <td>0.149158</td>\n",
+       "      <td>0.185993</td>\n",
+       "      <td>0.113935</td>\n",
+       "      <td>0.098824</td>\n",
+       "      <td>0.094421</td>\n",
+       "      <td>0.103538</td>\n",
+       "      <td>-0.113182</td>\n",
+       "      <td>-0.378042</td>\n",
+       "      <td>0.004737</td>\n",
+       "      <td>-0.276108</td>\n",
+       "      <td>-0.003964</td>\n",
+       "      <td>0.203914</td>\n",
+       "      <td>0.056891</td>\n",
+       "      <td>0.246905</td>\n",
+       "      <td>0.095328</td>\n",
+       "      <td>0.109466</td>\n",
+       "      <td>0.100722</td>\n",
+       "      <td>0.022853</td>\n",
+       "      <td>0.059954</td>\n",
+       "      <td>0.037299</td>\n",
+       "      <td>0.141466</td>\n",
+       "      <td>0.168559</td>\n",
+       "      <td>0.157653</td>\n",
+       "      <td>0.083084</td>\n",
+       "      <td>0.579354</td>\n",
+       "      <td>-0.582496</td>\n",
+       "      <td>0.120715</td>\n",
+       "      <td>-0.613678</td>\n",
+       "      <td>-0.611295</td>\n",
+       "      <td>-0.617159</td>\n",
+       "      <td>0.612831</td>\n",
+       "      <td>0.603168</td>\n",
+       "      <td>0.612722</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Adjusted net enrollment rate, primary, female ...</td>\n",
+       "      <td>0.788319</td>\n",
+       "      <td>0.727187</td>\n",
+       "      <td>0.724329</td>\n",
+       "      <td>0.986786</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.934241</td>\n",
+       "      <td>-0.048493</td>\n",
+       "      <td>0.157943</td>\n",
+       "      <td>0.052355</td>\n",
+       "      <td>-0.013479</td>\n",
+       "      <td>0.439902</td>\n",
+       "      <td>0.402792</td>\n",
+       "      <td>0.178952</td>\n",
+       "      <td>0.189117</td>\n",
+       "      <td>0.139185</td>\n",
+       "      <td>0.358926</td>\n",
+       "      <td>0.141666</td>\n",
+       "      <td>0.309653</td>\n",
+       "      <td>0.143333</td>\n",
+       "      <td>-0.049903</td>\n",
+       "      <td>0.125167</td>\n",
+       "      <td>0.201943</td>\n",
+       "      <td>0.020420</td>\n",
+       "      <td>0.111724</td>\n",
+       "      <td>-0.210799</td>\n",
+       "      <td>-0.352545</td>\n",
+       "      <td>-0.059250</td>\n",
+       "      <td>0.035615</td>\n",
+       "      <td>0.190948</td>\n",
+       "      <td>-0.731950</td>\n",
+       "      <td>-0.726983</td>\n",
+       "      <td>0.508373</td>\n",
+       "      <td>-0.732711</td>\n",
+       "      <td>-0.065738</td>\n",
+       "      <td>-0.029544</td>\n",
+       "      <td>0.215352</td>\n",
+       "      <td>0.359209</td>\n",
+       "      <td>-0.257617</td>\n",
+       "      <td>-0.015972</td>\n",
+       "      <td>-0.298082</td>\n",
+       "      <td>0.003919</td>\n",
+       "      <td>-0.402923</td>\n",
+       "      <td>-0.118220</td>\n",
+       "      <td>-0.687455</td>\n",
+       "      <td>-0.071067</td>\n",
+       "      <td>0.052911</td>\n",
+       "      <td>0.043272</td>\n",
+       "      <td>0.039616</td>\n",
+       "      <td>0.099436</td>\n",
+       "      <td>0.302401</td>\n",
+       "      <td>0.168348</td>\n",
+       "      <td>0.174634</td>\n",
+       "      <td>0.168330</td>\n",
+       "      <td>0.336953</td>\n",
+       "      <td>0.118123</td>\n",
+       "      <td>0.123931</td>\n",
+       "      <td>-0.039366</td>\n",
+       "      <td>0.077602</td>\n",
+       "      <td>-0.018578</td>\n",
+       "      <td>0.035569</td>\n",
+       "      <td>-0.061950</td>\n",
+       "      <td>-0.046145</td>\n",
+       "      <td>-0.299525</td>\n",
+       "      <td>0.284926</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.649772</td>\n",
+       "      <td>-0.639764</td>\n",
+       "      <td>-0.649406</td>\n",
+       "      <td>0.160246</td>\n",
+       "      <td>0.156929</td>\n",
+       "      <td>0.482209</td>\n",
+       "      <td>-0.048860</td>\n",
+       "      <td>0.145967</td>\n",
+       "      <td>0.062230</td>\n",
+       "      <td>0.057727</td>\n",
+       "      <td>0.166750</td>\n",
+       "      <td>0.396028</td>\n",
+       "      <td>-0.082077</td>\n",
+       "      <td>0.244498</td>\n",
+       "      <td>-0.046021</td>\n",
+       "      <td>-0.038213</td>\n",
+       "      <td>0.819346</td>\n",
+       "      <td>0.748711</td>\n",
+       "      <td>0.066098</td>\n",
+       "      <td>0.058439</td>\n",
+       "      <td>0.202800</td>\n",
+       "      <td>0.009741</td>\n",
+       "      <td>0.014190</td>\n",
+       "      <td>0.032908</td>\n",
+       "      <td>0.107950</td>\n",
+       "      <td>0.237039</td>\n",
+       "      <td>0.112658</td>\n",
+       "      <td>0.083325</td>\n",
+       "      <td>-0.255822</td>\n",
+       "      <td>0.137900</td>\n",
+       "      <td>0.167890</td>\n",
+       "      <td>0.058591</td>\n",
+       "      <td>0.155298</td>\n",
+       "      <td>0.230228</td>\n",
+       "      <td>0.138319</td>\n",
+       "      <td>0.116490</td>\n",
+       "      <td>0.112381</td>\n",
+       "      <td>0.120923</td>\n",
+       "      <td>-0.111071</td>\n",
+       "      <td>-0.436075</td>\n",
+       "      <td>0.001679</td>\n",
+       "      <td>-0.324034</td>\n",
+       "      <td>0.004212</td>\n",
+       "      <td>0.256274</td>\n",
+       "      <td>0.064690</td>\n",
+       "      <td>0.299591</td>\n",
+       "      <td>0.098413</td>\n",
+       "      <td>0.102713</td>\n",
+       "      <td>0.102086</td>\n",
+       "      <td>0.038924</td>\n",
+       "      <td>0.082295</td>\n",
+       "      <td>0.062243</td>\n",
+       "      <td>0.165724</td>\n",
+       "      <td>0.188684</td>\n",
+       "      <td>0.182778</td>\n",
+       "      <td>0.084568</td>\n",
+       "      <td>0.608055</td>\n",
+       "      <td>-0.597672</td>\n",
+       "      <td>0.129998</td>\n",
+       "      <td>-0.651126</td>\n",
+       "      <td>-0.645804</td>\n",
+       "      <td>-0.652258</td>\n",
+       "      <td>0.649772</td>\n",
+       "      <td>0.639764</td>\n",
+       "      <td>0.649406</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                      indicator_name                                       ...                                         Wage and salaried workers, total (% of total employment) (modeled ILO estimate)\n",
+       "0            Access to electricity (% of population)                                       ...                                                                                  0.785969                              \n",
+       "1  Access to electricity, rural (% of rural popul...                                       ...                                                                                  0.779741                              \n",
+       "2  Access to electricity, urban (% of urban popul...                                       ...                                                                                  0.634385                              \n",
+       "3  Adjusted net enrollment rate, primary (% of pr...                                       ...                                                                                  0.612722                              \n",
+       "4  Adjusted net enrollment rate, primary, female ...                                       ...                                                                                  0.649406                              \n",
+       "\n",
+       "[5 rows x 838 columns]"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "df = pd.read_csv('C:/Users/willk/Downloads/corrs.csv')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
    "cell_type": "markdown",
    "metadata": {},
    "source": [

文件差异内容过多而无法显示
+ 7976 - 0
datashader-work/fishing_watch.ipynb


+ 39 - 0
datashader-work/formatting_data.py

@@ -0,0 +1,39 @@
+
+
+
+import geoviews as gv
+import geoviews.feature as gf
+import xarray as xr
+from cartopy import crs
+
+import pandas as pd
+import numpy as np
+
+gv.extension('bokeh', 'matplotlib')
+
+xr_ensemble = xr.open_dataset('Data-Analysis/datashader-work/geoviews-examples/data/ensemble.nc').load()
+
+from sqlalchemy import create_engine
+engine = create_engine('postgres://localhost:5432/global_fishing_watch')
+engine.table_names()
+df = pd.read_sql("""SELECT * FROM fishing_effort LIMIT 10000""",
+                engine, parse_dates=['date'])
+
+df['flag'] = df['flag'].astype('category')
+df['geartype'] = df['geartype'].astype('category')
+df['lat'] = df['lat_bin'] / 100
+df['lon'] = df['lon_bin'] / 100
+df.info()
+
+
+
+def format_df(df, n=10_000):
+    ...:     df = df.iloc[:n]
+    ...:     df = df.drop_duplicates(subset=['lat', 'lon', 'date'])
+    ...:     df = df.sort_values(['lat', 'lon', 'date'])
+    ...:     index = pd.MultiIndex.from_arrays([df['lat'], df['lon'], df['date']])
+    ...:     df.index = index
+    ...:     latitudes = df.index.levels[0]
+    ...:     longitudes = df.index.levels[1]
+    ...:     times = df.index.levels[2]
+    ...:     return latitudes, longitudes, times, df

文件差异内容过多而无法显示
+ 2454 - 0
datashader-work/geographic-plotting.ipynb


文件差异内容过多而无法显示
+ 7469 - 0
datashader-work/holoviews-0-2.ipynb


文件差异内容过多而无法显示
+ 5898 - 0
datashader-work/holoviews-3-4.ipynb


文件差异内容过多而无法显示
+ 5252 - 0
datashader-work/holoviews-5-6.ipynb


文件差异内容过多而无法显示
+ 2779 - 0
datashader-work/holoviews-geographic-data.ipynb


文件差异内容过多而无法显示
+ 3234 - 0
datashader-work/holoviews-large-data.ipynb


文件差异内容过多而无法显示
+ 2789 - 0
datashader-work/holoviews-pipelines.ipynb


文件差异内容过多而无法显示
+ 46664 - 0
datashader-work/solar-power-potential.ipynb


+ 3 - 0
datashader-work/solar-power_solar_potential_by_census_tract.csv

@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e28299c2d304ffa499df0b87fa108cf790bad1ba9a6dd109cb8a4310ee150e75
+size 38263522

+ 3 - 0
datashader-work/solar-power_solar_potential_by_postal_code.csv

@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19feb11bafdd2ccf1412b6d042905b3c7b27fe89a246d7ec93af0cd618f955ad
+size 10377291

二进制
datashader-work/solar_potential_by_postal_code_formatted.parquet


文件差异内容过多而无法显示
+ 2695 - 0
plotly/military-data.ipynb


+ 3 - 0
plotly/military_data.csv

@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c30ba6d1467c293683f3f6c64da3f064cb75b6ae6c98d69cd1fa9e4e55999e07
+size 9134

文件差异内容过多而无法显示
+ 128356 - 0
plotly/plotly-express.ipynb


+ 0 - 670
testing-exercises.ipynb

@@ -1,670 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:38:38.465675Z",
-     "start_time": "2019-02-23T16:38:38.422180Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>"
-      ],
-      "text/vnd.plotly.v1+html": [
-       "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>"
-      ],
-      "text/vnd.plotly.v1+html": [
-       "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "\n",
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
-    "\n",
-    "import sys\n",
-    "sys.path.append('../..')\n",
-    "\n",
-    "# Options for pandas\n",
-    "pd.options.display.max_columns = 20\n",
-    "pd.options.display.max_rows = 10\n",
-    "\n",
-    "# Display all cell outputs\n",
-    "from IPython.core.interactiveshell import InteractiveShell\n",
-    "InteractiveShell.ast_node_interactivity = 'all'\n",
-    "\n",
-    "import plotly.plotly as py\n",
-    "import plotly.graph_objs as go\n",
-    "from plotly.offline import iplot, init_notebook_mode\n",
-    "init_notebook_mode(connected=True)\n",
-    "\n",
-    "import cufflinks\n",
-    "cf.go_offline(connected=True)\n",
-    "cf.set_config_file(theme='pearl')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:39:00.607978Z",
-     "start_time": "2019-02-23T16:39:00.567876Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(1000, 100)"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = pd.DataFrame(np.random.randn(1000, 100))\n",
-    "df.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:39:16.845392Z",
-     "start_time": "2019-02-23T16:39:16.774748Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(100, 100)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "corrs = df.corr()\n",
-    "corrs.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:42:26.032480Z",
-     "start_time": "2019-02-23T16:42:25.998079Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([ 3,  8, 44, 45, 54, 96]), array([54, 96, 45, 44,  3,  8]))"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "threshold = -0.1\n",
-    "direction = 'less'\n",
-    "\n",
-    "if direction == 'greater':\n",
-    "    values_index = np.where(corrs > threshold)\n",
-    "elif direction == 'less':\n",
-    "    values_index = np.where(corrs < threshold)\n",
-    "    \n",
-    "values_index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:47:56.940313Z",
-     "start_time": "2019-02-23T16:47:56.909882Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "rows_index = values_index[0]\n",
-    "columns_index = values_index[1]\n",
-    "\n",
-    "pairs = list(map(tuple, set([frozenset((x, y)) for x, y in zip(rows_index, columns_index)])))\n",
-    "\n",
-    "from collections import Counter\n",
-    "\n",
-    "# Counter(pairs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:47:57.429941Z",
-     "start_time": "2019-02-23T16:47:57.397928Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[(3, 54), (8, 96), (44, 45)]"
-      ]
-     },
-     "execution_count": 36,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pairs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T17:04:56.074717Z",
-     "start_time": "2019-02-23T17:04:56.041811Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "subset_df = pd.DataFrame(dict(value=corrs.values[values_index], var1=corrs.index[values_index[0]],\n",
-    "                         var2=corrs.columns[values_index[1]]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-24T20:36:52.221603Z",
-     "start_time": "2019-02-24T20:36:52.182531Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>value</th>\n",
-       "      <th>var1</th>\n",
-       "      <th>var2</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.111172</td>\n",
-       "      <td>3</td>\n",
-       "      <td>54</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-0.117402</td>\n",
-       "      <td>8</td>\n",
-       "      <td>96</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.104640</td>\n",
-       "      <td>44</td>\n",
-       "      <td>45</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      value  var1  var2\n",
-       "0 -0.111172     3    54\n",
-       "1 -0.117402     8    96\n",
-       "2 -0.104640    44    45"
-      ]
-     },
-     "execution_count": 58,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "subset_df.iloc[:int(len(subset_df)/2)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T17:05:53.369263Z",
-     "start_time": "2019-02-23T17:05:53.337720Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "var1  var2\n",
-       "3     54      1\n",
-       "8     96      1\n",
-       "44    45      1\n",
-       "45    44      1\n",
-       "54    3       1\n",
-       "96    8       1\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "subset_df.groupby(['var1', 'var2']).size()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-24T14:57:30.059725Z",
-     "start_time": "2019-02-24T14:57:30.027029Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['value', 'variable1', 'var2'], dtype='object')"
-      ]
-     },
-     "execution_count": 55,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "subset_df.columns.str.replace('var1', 'variable1')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:50:24.548345Z",
-     "start_time": "2019-02-23T16:50:24.513301Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "values = []; indices = []; columns = []\n",
-    "\n",
-    "for pair in pairs:\n",
-    "    indices.append(corrs.index[pair[0]])\n",
-    "    columns.append(corrs.columns[pair[1]])\n",
-    "    values.append(corrs.values[pair])\n",
-    "    \n",
-    "subset_df = pd.DataFrame(dict(value=values, var1=indices, var2=columns))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:50:25.388032Z",
-     "start_time": "2019-02-23T16:50:25.352969Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>value</th>\n",
-       "      <th>var1</th>\n",
-       "      <th>var2</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.111172</td>\n",
-       "      <td>3</td>\n",
-       "      <td>54</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-0.117402</td>\n",
-       "      <td>8</td>\n",
-       "      <td>96</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.104640</td>\n",
-       "      <td>44</td>\n",
-       "      <td>45</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      value  var1  var2\n",
-       "0 -0.111172     3    54\n",
-       "1 -0.117402     8    96\n",
-       "2 -0.104640    44    45"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "subset_df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:50:35.252569Z",
-     "start_time": "2019-02-23T16:50:35.217497Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "-0.11117190190235929"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "corrs.loc[3, 54]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:50:55.128251Z",
-     "start_time": "2019-02-23T16:50:55.096675Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "-0.11117190190235929"
-      ]
-     },
-     "execution_count": 43,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "corrs.loc[54, 3]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:51:03.603309Z",
-     "start_time": "2019-02-23T16:51:03.569575Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "-0.11740191658722447"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "corrs.loc[96, 8]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:51:12.504120Z",
-     "start_time": "2019-02-23T16:51:12.472562Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "-0.10463995106844964"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "corrs.loc[44, 45]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:47:20.066448Z",
-     "start_time": "2019-02-23T16:47:20.032246Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "-0.11117190190235929"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "corrs.values[(3, 54)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2019-02-23T16:43:58.225875Z",
-     "start_time": "2019-02-23T16:43:58.190963Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[(3, 54), (8, 96), (44, 45)]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "[tuple(x) for x in set(map(frozenset, pairs))]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "hide_input": false,
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {},
-   "toc_section_display": true,
-   "toc_window_display": false
-  },
-  "varInspector": {
-   "cols": {
-    "lenName": 16,
-    "lenType": 16,
-    "lenVar": 40
-   },
-   "kernels_config": {
-    "python": {
-     "delete_cmd_postfix": "",
-     "delete_cmd_prefix": "del ",
-     "library": "var_list.py",
-     "varRefreshCmd": "print(var_dic_list())"
-    },
-    "r": {
-     "delete_cmd_postfix": ") ",
-     "delete_cmd_prefix": "rm(",
-     "library": "var_list.r",
-     "varRefreshCmd": "cat(var_dic_list()) "
-    }
-   },
-   "types_to_exclude": [
-    "module",
-    "function",
-    "builtin_function_or_method",
-    "instance",
-    "_Feature"
-   ],
-   "window_display": false
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}