Przeglądaj źródła

grass.pygrass: Use unique tmp mapset names in GridModule (#1967)

- Base the temporary mapset name on process ID and node name instead of just module name.
- Use more robust legalize_vector_name function instead of just removing dots.
- Allow for underscores in user-provided mapset name prefix (and in processed module name) by using stored prefix to delete temporary mapsets.
- This by allows multiple GridModules to run on one machine (including HPC) from different processes without the need to specify unique mapset prefix.
- The previous code required no underscore in the mapset name, but the documentation did not state that. This removes the requirement and avoid possible removal of mapsets with similar name.
- Documentation now comments on how unique the temporary name is and states when a custom prefix needs to be supplied (multiple GridModules from one process).
- Adds basic tests for GridModule (developed with the original version).
- Adds LD_LIBRARY_PATH setting for pytest (and removes broken LD_LIBRARY_PATH setting there).
- Fixes Pylint check done by pytest so that no actual tests are executed.
Vaclav Petras 3 lat temu
rodzic
commit
c8106cde3a

+ 1 - 1
.github/workflows/pylint.yml

@@ -85,4 +85,4 @@ jobs:
         run: |
           pip install pytest pytest-pylint
           export PYTHONPATH=`grass --config python_path`:$PYTHONPATH
-          pytest --pylint --pylint-rcfile=.pylintrc --pylint-jobs=$(nproc) --pylint-ignore-patterns="python/.*,gui/wxpython/.*,doc/.*,man/.*,utils/.*,locale/.*,raster/.*,imagery/.*,scripts/r.in.wms/wms_drv.py,scripts/g.extension/g.extension.py,temporal/t.rast.accdetect/t.rast.accdetect.py,temporal/t.rast.accumulate/t.rast.accumulate.py,scripts/d.rast.edit/d.rast.edit.py"
+          pytest --pylint -m pylint --pylint-rcfile=.pylintrc --pylint-jobs=$(nproc) --pylint-ignore-patterns="python/.*,gui/wxpython/.*,doc/.*,man/.*,utils/.*,locale/.*,raster/.*,imagery/.*,scripts/r.in.wms/wms_drv.py,scripts/g.extension/g.extension.py,temporal/t.rast.accdetect/t.rast.accdetect.py,temporal/t.rast.accumulate/t.rast.accumulate.py,scripts/d.rast.edit/d.rast.edit.py"

+ 1 - 4
.github/workflows/pytest.yml

@@ -47,10 +47,6 @@ jobs:
         run: |
           echo "MAKEFLAGS=-j$(nproc)" >> $GITHUB_ENV
 
-      - name: Set LD_LIBRARY_PATH for compilation
-        run: |
-          echo "LD_LIBRARY_PATH=$HOME/install/lib" >> $GITHUB_ENV
-
       - name: Build
         run: .github/workflows/build_${{ matrix.os }}.sh $HOME/install
 
@@ -64,6 +60,7 @@ jobs:
       - name: Run pytest
         run: |
           export PYTHONPATH=`grass --config python_path`:$PYTHONPATH
+          export LD_LIBRARY_PATH=$HOME/install/grass81/lib:$LD_LIBRARY_PATH
           pytest .
 
       - name: Print installed versions

+ 10 - 3
python/grass/pygrass/modules/grid/grid.py

@@ -14,6 +14,7 @@ import subprocess as sub
 import shutil as sht
 
 from grass.script.setup import write_gisrc
+from grass.script import append_node_pid, legalize_vector_name
 
 from grass.pygrass.gis import Mapset, Location
 from grass.pygrass.gis.region import Region
@@ -434,6 +435,11 @@ class GridModule(object):
     ...                  elevation='elevation',
     ...                  slope='slope', aspect='aspect', overwrite=True)
     >>> grd.run()
+
+    Temporary mapsets created start with a generated prefix which is unique for each
+    process (includes PID and node name). If more instances of this class are used in
+    parallel from one process with the same module, a custom *mapset_prefix* needs to
+    be provided.
     """
 
     def __init__(
@@ -490,9 +496,10 @@ class GridModule(object):
             region=region, width=width, height=height, overlap=overlap
         )
         if mapset_prefix:
-            self.msetstr = mapset_prefix + "_%03d_%03d"
+            self.mapset_prefix = mapset_prefix
         else:
-            self.msetstr = cmd.replace(".", "") + "_%03d_%03d"
+            self.mapset_prefix = append_node_pid("grid_" + legalize_vector_name(cmd))
+        self.msetstr = self.mapset_prefix + "_%03d_%03d"
         self.inlist = None
         if split:
             self.split()
@@ -514,7 +521,7 @@ class GridModule(object):
                 self.n_mset.current()
             location = Location()
 
-        mapsets = location.mapsets(self.msetstr.split("_")[0] + "_*")
+        mapsets = location.mapsets(self.mapset_prefix + "_*")
         for mset in mapsets:
             Mapset(mset).delete()
         if self.n_mset and self.n_mset.is_current():

+ 173 - 0
python/grass/pygrass/modules/tests/grass_pygrass_grid_test.py

@@ -0,0 +1,173 @@
+"""Test main functions of PyGRASS GridModule"""
+
+import multiprocessing
+
+import pytest
+
+import grass.script as gs
+import grass.script.setup as grass_setup
+
+
+def max_processes():
+    """Get max useful number of parallel processes to run"""
+    return min(multiprocessing.cpu_count(), 4)
+
+
+# GridModule uses C libraries which can easily initialize only once
+# and thus can't easily change location/mapset, so we use a subprocess
+# to separate individual GridModule calls.
+def run_in_subprocess(function):
+    """Run function in a separate process"""
+    process = multiprocessing.Process(target=function)
+    process.start()
+    process.join()
+
+
+@pytest.mark.parametrize("processes", list(range(1, max_processes() + 1)) + [None])
+def test_processes(tmp_path, processes):
+    """Check that running with multiple processes works"""
+    location = "test"
+    gs.core._create_location_xy(tmp_path, location)  # pylint: disable=protected-access
+    with grass_setup.init(tmp_path / location):
+        gs.run_command("g.region", s=0, n=50, w=0, e=50, res=1)
+
+        surface = "surface"
+        gs.run_command("r.surf.fractal", output=surface)
+
+        def run_grid_module():
+            # modules/shortcuts calls get_commands which requires GISBASE.
+            # pylint: disable=import-outside-toplevel
+            from grass.pygrass.modules.grid import GridModule
+
+            grid = GridModule(
+                "r.slope.aspect",
+                width=10,
+                height=5,
+                overlap=2,
+                processes=processes,
+                elevation=surface,
+                slope="slope",
+                aspect="aspect",
+            )
+            grid.run()
+
+        run_in_subprocess(run_grid_module)
+
+        info = gs.raster_info("slope")
+        assert info["min"] > 0
+
+
+# @pytest.mark.parametrize("split", [False])  # True does not work.
+
+
+@pytest.mark.parametrize("width", [5, 10, 50])  # None does not work.
+@pytest.mark.parametrize("height", [5, 10, 50])
+def test_tiling_schemes(tmp_path, width, height):
+    """Check that different shapes of tiles work"""
+    location = "test"
+    gs.core._create_location_xy(tmp_path, location)  # pylint: disable=protected-access
+    with grass_setup.init(tmp_path / location):
+        gs.run_command("g.region", s=0, n=50, w=0, e=50, res=1)
+
+        surface = "surface"
+        gs.run_command("r.surf.fractal", output=surface)
+
+        def run_grid_module():
+            # modules/shortcuts calls get_commands which requires GISBASE.
+            # pylint: disable=import-outside-toplevel
+            from grass.pygrass.modules.grid import GridModule
+
+            grid = GridModule(
+                "r.slope.aspect",
+                width=width,
+                height=height,
+                overlap=2,
+                processes=max_processes(),
+                elevation=surface,
+                slope="slope",
+                aspect="aspect",
+            )
+            grid.run()
+
+        run_in_subprocess(run_grid_module)
+
+        info = gs.raster_info("slope")
+        assert info["min"] > 0
+
+
+@pytest.mark.parametrize("overlap", [0, 1, 2, 5])
+def test_overlaps(tmp_path, overlap):
+    """Check that overlap accepts different values"""
+    location = "test"
+    gs.core._create_location_xy(tmp_path, location)  # pylint: disable=protected-access
+    with grass_setup.init(tmp_path / location):
+        gs.run_command("g.region", s=0, n=50, w=0, e=50, res=1)
+        surface = "surface"
+        gs.run_command("r.surf.fractal", output=surface)
+
+        def run_grid_module():
+            # modules/shortcuts calls get_commands which requires GISBASE.
+            # pylint: disable=import-outside-toplevel
+            from grass.pygrass.modules.grid import GridModule
+
+            grid = GridModule(
+                "r.slope.aspect",
+                width=10,
+                height=5,
+                overlap=overlap,
+                processes=max_processes(),
+                elevation=surface,
+                slope="slope",
+                aspect="aspect",
+            )
+            grid.run()
+
+        run_in_subprocess(run_grid_module)
+
+        info = gs.raster_info("slope")
+        assert info["min"] > 0
+
+
+@pytest.mark.parametrize("clean", [True, False])
+def test_cleans(tmp_path, clean):
+    """Check that temporary mapsets are cleaned when appropriate"""
+    location = "test"
+    mapset_prefix = "abc"
+    gs.core._create_location_xy(tmp_path, location)  # pylint: disable=protected-access
+    with grass_setup.init(tmp_path / location):
+        gs.run_command("g.region", s=0, n=50, w=0, e=50, res=1)
+        surface = "surface"
+        gs.run_command("r.surf.fractal", output=surface)
+
+        def run_grid_module():
+            # modules/shortcuts calls get_commands which requires GISBASE.
+            # pylint: disable=import-outside-toplevel
+            from grass.pygrass.modules.grid import GridModule
+
+            grid = GridModule(
+                "r.slope.aspect",
+                width=10,
+                height=5,
+                overlap=0,
+                processes=max_processes(),
+                elevation=surface,
+                slope="slope",
+                aspect="aspect",
+                mapset_prefix=mapset_prefix,
+            )
+            grid.run(clean=clean)
+
+        run_in_subprocess(run_grid_module)
+
+        path = tmp_path / location
+        prefixed = 0
+        for item in path.iterdir():
+            if item.is_dir():
+                if clean:
+                    # We know right away something is wrong.
+                    assert not item.name.startswith(mapset_prefix), "Mapset not cleaned"
+                else:
+                    # We need to see if there is at least one prefixed mapset.
+                    prefixed += int(item.name.startswith(mapset_prefix))
+        if not clean:
+            assert prefixed, "Not even one prefixed mapset"