formatting_data.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import geoviews as gv
  2. import geoviews.feature as gf
  3. import xarray as xr
  4. from cartopy import crs
  5. import pandas as pd
  6. import numpy as np
  7. gv.extension("bokeh", "matplotlib")
  8. xr_ensemble = xr.open_dataset(
  9. "Data-Analysis/datashader-work/geoviews-examples/data/ensemble.nc"
  10. ).load()
  11. from sqlalchemy import create_engine
  12. engine = create_engine("postgres://localhost:5432/global_fishing_watch")
  13. engine.table_names()
  14. df = pd.read_sql(
  15. """SELECT * FROM fishing_effort LIMIT 10000""", engine, parse_dates=["date"]
  16. )
  17. df["flag"] = df["flag"].astype("category")
  18. df["geartype"] = df["geartype"].astype("category")
  19. df["lat"] = df["lat_bin"] / 100
  20. df["lon"] = df["lon_bin"] / 100
  21. df.info()
  22. def format_df(df, n=10_000):
  23. df = df.iloc[:n]
  24. df = df.drop_duplicates(subset=["lat", "lon", "date"])
  25. df = df.sort_values(["lat", "lon", "date"])
  26. index = pd.MultiIndex.from_arrays([df["lat"], df["lon"], df["date"]])
  27. df.index = index
  28. latitudes = df.index.levels[0]
  29. longitudes = df.index.levels[1]
  30. times = df.index.levels[2]
  31. return latitudes, longitudes, times, df