3 yıl önce · e3344714bd
--- a/python/grass/Makefile
+++ b/python/grass/Makefile
@@ -5,7 +5,7 @@ include $(MODULE_TOPDIR)/include/Make/Python.make
 
				 
			
 
				 PYDIR = $(ETC)/python/grass
			
 
				 
			
 
				-SUBDIRS = app exceptions script ctypes grassdb temporal pygrass pydispatch imaging gunittest bandref jupyter
			
 
				+SUBDIRS = app benchmark exceptions script ctypes grassdb temporal pygrass pydispatch imaging gunittest bandref jupyter
			
 
				 
			
 
				 default: $(PYDIR)/__init__.py
			
 
				 	$(MAKE) subdirs
			
--- a/python/grass/benchmark/Makefile
+++ b/python/grass/benchmark/Makefile
@@ -0,0 +1,19 @@
 
				+MODULE_TOPDIR = ../../..
			
 
				+
			
 
				+include $(MODULE_TOPDIR)/include/Make/Other.make
			
 
				+include $(MODULE_TOPDIR)/include/Make/Python.make
			
 
				+
			
 
				+DSTDIR = $(ETC)/python/grass/benchmark
			
 
				+
			
 
				+MODULES = runners plots
			
 
				+
			
 
				+PYFILES := $(patsubst %,$(DSTDIR)/%.py,$(MODULES) __init__)
			
 
				+PYCFILES := $(patsubst %,$(DSTDIR)/%.pyc,$(MODULES) __init__)
			
 
				+
			
 
				+default: $(PYFILES) $(PYCFILES)
			
 
				+
			
 
				+$(DSTDIR):
			
 
				+	$(MKDIR) $@
			
 
				+
			
 
				+$(DSTDIR)/%: % | $(DSTDIR)
			
 
				+	$(INSTALL_DATA) $< $@
			
--- a/python/grass/benchmark/__init__.py
+++ b/python/grass/benchmark/__init__.py
@@ -0,0 +1,9 @@
 
				+"""Benchmarking for GRASS GIS modules
			
 
				+
			
 
				+This subpackage of the grass package is experimental and the API can change anytime.
			
 
				+The API of the package is defined by what is imported in the top-level ``__init__.py``
			
 
				+file of the subpackage.
			
 
				+"""
			
 
				+
			
 
				+from .plots import nprocs_plot, num_cells_plot
			
 
				+from .runners import benchmark_nprocs, benchmark_resolutions
			
--- a/python/grass/benchmark/plots.py
+++ b/python/grass/benchmark/plots.py
@@ -0,0 +1,122 @@
 
				+# MODULE:    grass.benchmark
			
 
				+#
			
 
				+# AUTHOR(S): Vaclav Petras <wenzeslaus gmail com>
			
 
				+#
			
 
				+# PURPOSE:   Benchmarking for GRASS GIS modules
			
 
				+#
			
 
				+# COPYRIGHT: (C) 2021 Vaclav Petras, and by the GRASS Development Team
			
 
				+#
			
 
				+#            This program is free software under the GNU General Public
			
 
				+#            License (>=v2). Read the file COPYING that comes with GRASS
			
 
				+#            for details.
			
 
				+
			
 
				+
			
 
				+"""Plotting functionality for benchmark results"""
			
 
				+
			
 
				+
			
 
				+def get_pyplot(to_file):
			
 
				+    """Get pyplot from matplotlib
			
 
				+
			
 
				+    Lazy import to easily run code importing this function on limited installations.
			
 
				+    Only actual call to this function requires matplotlib.
			
 
				+
			
 
				+    The *to_file* parameter can be set to True to avoid tkinter dependency
			
 
				+    if the interactive show method is not needed.
			
 
				+    """
			
 
				+    import matplotlib  # pylint: disable=import-outside-toplevel
			
 
				+
			
 
				+    if to_file:
			
 
				+        backend = "agg"
			
 
				+    else:
			
 
				+        backend = None
			
 
				+    if backend:
			
 
				+        matplotlib.use(backend)
			
 
				+
			
 
				+    import matplotlib.pyplot as plt  # pylint: disable=import-outside-toplevel
			
 
				+
			
 
				+    return plt
			
 
				+
			
 
				+
			
 
				+def nprocs_plot(results, filename=None):
			
 
				+    """Plot results from a multiple nprocs (thread) benchmarks.
			
 
				+
			
 
				+    *results* is a list of individual results from separate benchmars.
			
 
				+    One result is required to have attributes: *nprocs*, *times*, *label*.
			
 
				+    The *nprocs* attribute is a list of all processing elements
			
 
				+    (cores, threads, processes) used in the benchmark.
			
 
				+    The *times* attribute is a list of corresponding times for each value
			
 
				+    from the *nprocs* list.
			
 
				+    The *label* attribute identifies the benchmark in the legend.
			
 
				+
			
 
				+    Optionally, result can have an *all_times* attribute which is a list
			
 
				+    of lists. One sublist is all times recorded for each value of nprocs.
			
 
				+
			
 
				+    Each result can come with a different list of nprocs, i.e., benchmarks
			
 
				+    which used different values for nprocs can be combined in one plot.
			
 
				+    """
			
 
				+    plt = get_pyplot(to_file=bool(filename))
			
 
				+    axes = plt.gca()
			
 
				+
			
 
				+    x_ticks = set()  # gather x values
			
 
				+    for result in results:
			
 
				+        x = result.nprocs
			
 
				+        x_ticks.update(x)
			
 
				+        plt.plot(x, result.times, label=result.label)
			
 
				+        if hasattr(result, "all_times"):
			
 
				+            mins = [min(i) for i in result.all_times]
			
 
				+            maxes = [max(i) for i in result.all_times]
			
 
				+            plt.fill_between(x, mins, maxes, color="gray", alpha=0.3)
			
 
				+    plt.legend()
			
 
				+    axes.set(xticks=sorted(x_ticks))
			
 
				+    plt.xlabel("Number of cores (threads, processes)")
			
 
				+    plt.ylabel("Time [s]")
			
 
				+    if filename:
			
 
				+        plt.savefig(filename)
			
 
				+    else:
			
 
				+        plt.show()
			
 
				+
			
 
				+
			
 
				+def num_cells_plot(results, filename=None, show_resolution=False):
			
 
				+    """Plot results from a multiple raster grid size benchmarks.
			
 
				+
			
 
				+    *results* is a list of individual results from separate benchmars
			
 
				+    with one result being similar to the :func:`nprocs_plot` function.
			
 
				+    The result is required to have *times* and *label* attributes
			
 
				+    and may have an *all_times* attribute.
			
 
				+    Further, it is required to have *cells* attribute, or,
			
 
				+    when ``show_resolution=True``, it needs to have a *resolutions* attribute.
			
 
				+
			
 
				+    Each result can come with a different list of nprocs, i.e., benchmarks
			
 
				+    which used different values for nprocs can be combined in one plot.
			
 
				+    """
			
 
				+    plt = get_pyplot(to_file=bool(filename))
			
 
				+    axes = plt.gca()
			
 
				+    if show_resolution:
			
 
				+        axes.invert_xaxis()
			
 
				+
			
 
				+    x_ticks = set()
			
 
				+    for result in results:
			
 
				+        if show_resolution:
			
 
				+            x = result.resolutions
			
 
				+        else:
			
 
				+            x = result.cells
			
 
				+        x_ticks.update(x)
			
 
				+        plt.plot(x, result.times, label=result.label)
			
 
				+        if hasattr(result, "all_times"):
			
 
				+            mins = [min(i) for i in result.all_times]
			
 
				+            maxes = [max(i) for i in result.all_times]
			
 
				+            plt.fill_between(x, mins, maxes, color="gray", alpha=0.3)
			
 
				+
			
 
				+    plt.legend()
			
 
				+    axes.set(xticks=sorted(x_ticks))
			
 
				+    if not show_resolution:
			
 
				+        axes.ticklabel_format(axis="x", style="scientific", scilimits=(0, 0))
			
 
				+    if show_resolution:
			
 
				+        plt.xlabel("Resolution [map units]")
			
 
				+    else:
			
 
				+        plt.xlabel("Number of cells")
			
 
				+    plt.ylabel("Time [s]")
			
 
				+    if filename:
			
 
				+        plt.savefig(filename)
			
 
				+    else:
			
 
				+        plt.show()
			
--- a/python/grass/benchmark/runners.py
+++ b/python/grass/benchmark/runners.py
@@ -0,0 +1,129 @@
 
				+# MODULE:    grass.benchmark
			
 
				+#
			
 
				+# AUTHOR(S): Aaron Saw Min Sern <aaronsms u nus edu>
			
 
				+#            Vaclav Petras <wenzeslaus gmail com>
			
 
				+#
			
 
				+# PURPOSE:   Benchmarking for GRASS GIS modules
			
 
				+#
			
 
				+# COPYRIGHT: (C) 2021 Vaclav Petras, and by the GRASS Development Team
			
 
				+#
			
 
				+#            This program is free software under the GNU General Public
			
 
				+#            License (>=v2). Read the file COPYING that comes with GRASS
			
 
				+#            for details.
			
 
				+
			
 
				+
			
 
				+"""Basic functions for benchmarking modules"""
			
 
				+
			
 
				+import shutil
			
 
				+from types import SimpleNamespace
			
 
				+
			
 
				+import grass.script as gs
			
 
				+
			
 
				+
			
 
				+def benchmark_nprocs(module, label, max_nprocs, repeat):
			
 
				+    """Benchmark module using values of nprocs up to *max_nprocs*.
			
 
				+
			
 
				+    *module* is an instance of PyGRASS Module class.
			
 
				+    The module is executed  used to generate range of values from 1 up to *max_nprocs*.
			
 
				+    *repeat* sets how many times the each run is repeated.
			
 
				+    So, the module will run ``max_nprocs * repeat`` times.
			
 
				+
			
 
				+    *label* is a text to add to the result (for user-facing display).
			
 
				+    Optional *nprocs* is passed to the module if present.
			
 
				+
			
 
				+    Returns an object with attributes *times* (list of average execution times),
			
 
				+    *all_times* (list of lists of measured execution times), *nprocs*
			
 
				+    (list of *nprocs* values used), and *label* (the provided parameter as is).
			
 
				+    """
			
 
				+    term_size = shutil.get_terminal_size()
			
 
				+    print(module.get_bash())
			
 
				+
			
 
				+    min_avg = float("inf")
			
 
				+    min_time = 1
			
 
				+    avg_times = []
			
 
				+    all_times = []
			
 
				+    nprocs_list = list(range(1, max_nprocs + 1))
			
 
				+    for nprocs in nprocs_list:
			
 
				+        print("\u2500" * term_size.columns)
			
 
				+        print(f"Benchmark with {nprocs} thread(s)...\n")
			
 
				+        time_sum = 0
			
 
				+        measured_times = []
			
 
				+        for _ in range(repeat):
			
 
				+            module(nprocs=nprocs).run()
			
 
				+            print(f"{module.time}s")
			
 
				+            time_sum += module.time
			
 
				+            measured_times.append(module.time)
			
 
				+
			
 
				+        avg = time_sum / repeat
			
 
				+        avg_times.append(avg)
			
 
				+        all_times.append(measured_times)
			
 
				+        if nprocs == 1:
			
 
				+            serial_avg = avg
			
 
				+        if avg < min_avg:
			
 
				+            min_avg = avg
			
 
				+            min_time = nprocs
			
 
				+        print(f"\nResult - {avg}s")
			
 
				+
			
 
				+    print("\u2500" * term_size.columns)
			
 
				+    print(f"\nSerial average time - {serial_avg}s")
			
 
				+    print(f"Best average time - {min_avg}s ({min_time} threads)\n")
			
 
				+
			
 
				+    return SimpleNamespace(
			
 
				+        all_times=all_times,
			
 
				+        times=avg_times,
			
 
				+        nprocs=nprocs_list,
			
 
				+        label=label,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def benchmark_resolutions(module, resolutions, label, repeat=5, nprocs=None):
			
 
				+    """Benchmark module using different resolutions.
			
 
				+
			
 
				+    *module* is an instance of PyGRASS Module class.
			
 
				+    *resolutions* is a list of resolutions to set (current region is currently
			
 
				+    used and changed but that may change in the future).
			
 
				+    *repeat* sets how many times the each run is repeated.
			
 
				+    So, the module will run ``len(resolutions) * repeat`` times.
			
 
				+
			
 
				+    *label* is a text to add to the result (for user-facing display).
			
 
				+    Optional *nprocs* is passed to the module if present.
			
 
				+
			
 
				+    Returns an object with attributes *times* (list of average execution times),
			
 
				+    *all_times* (list of lists of measured execution times), *resolutions*
			
 
				+    (the provided parameter as is), *cells* (number of cells in the region),
			
 
				+    and *label* (the provided parameter as is).
			
 
				+    """
			
 
				+    term_size = shutil.get_terminal_size()
			
 
				+    print(module.get_bash())
			
 
				+
			
 
				+    avg_times = []
			
 
				+    all_times = []
			
 
				+    n_cells = []
			
 
				+    for resolution in resolutions:
			
 
				+        gs.run_command("g.region", res=resolution)
			
 
				+        region = gs.region()
			
 
				+        n_cells.append(region["cells"])
			
 
				+        print("\u2500" * term_size.columns)
			
 
				+        print(f"Benchmark with {resolution} resolution...\n")
			
 
				+        time_sum = 0
			
 
				+        measured_times = []
			
 
				+        for _ in range(repeat):
			
 
				+            if nprocs:
			
 
				+                module(nprocs=nprocs)
			
 
				+            module.run()
			
 
				+            print(f"{module.time}s")
			
 
				+            time_sum += module.time
			
 
				+            measured_times.append(module.time)
			
 
				+
			
 
				+        avg = time_sum / repeat
			
 
				+        avg_times.append(avg)
			
 
				+        all_times.append(measured_times)
			
 
				+        print(f"\nResult - {avg}s")
			
 
				+
			
 
				+    return SimpleNamespace(
			
 
				+        all_times=all_times,
			
 
				+        times=avg_times,
			
 
				+        resolutions=resolutions,
			
 
				+        cells=n_cells,
			
 
				+        label=label,
			
 
				+    )
			
--- a/python/grass/benchmark/testsuite/test_benchmark.py
+++ b/python/grass/benchmark/testsuite/test_benchmark.py
@@ -0,0 +1,56 @@
 
				+# MODULE:    Test of grass.benchmark
			
 
				+#
			
 
				+# AUTHOR(S): Vaclav Petras <wenzeslaus gmail com>
			
 
				+#
			
 
				+# PURPOSE:   Benchmarking for GRASS GIS modules
			
 
				+#
			
 
				+# COPYRIGHT: (C) 2021 Vaclav Petras, and by the GRASS Development Team
			
 
				+#
			
 
				+#            This program is free software under the GNU General Public
			
 
				+#            License (>=v2). Read the file COPYING that comes with GRASS
			
 
				+#            for details.
			
 
				+
			
 
				+"""Basic tests of grass.benchmark"""
			
 
				+
			
 
				+from pathlib import Path
			
 
				+from subprocess import DEVNULL
			
 
				+
			
 
				+from grass.benchmark import benchmark_resolutions, num_cells_plot
			
 
				+from grass.gunittest.case import TestCase
			
 
				+from grass.gunittest.main import test
			
 
				+from grass.pygrass.modules import Module
			
 
				+
			
 
				+
			
 
				+class TestBenchmarksRun(TestCase):
			
 
				+    """Tests that functions for benchmarking can run"""
			
 
				+
			
 
				+    def test_resolutions(self):
			
 
				+        """Test that resolution tests runs without nprocs and plots to file"""
			
 
				+        benchmarks = [
			
 
				+            dict(
			
 
				+                module=Module("r.univar", map="elevation", stdout_=DEVNULL, run_=False),
			
 
				+                label="Standard output",
			
 
				+            ),
			
 
				+            dict(
			
 
				+                module=Module(
			
 
				+                    "r.univar", map="elevation", flags="g", stdout_=DEVNULL, run_=False
			
 
				+                ),
			
 
				+                label="Standard output",
			
 
				+            ),
			
 
				+        ]
			
 
				+        resolutions = [300, 200, 100]
			
 
				+        results = []
			
 
				+        for benchmark in benchmarks:
			
 
				+            results.append(
			
 
				+                benchmark_resolutions(
			
 
				+                    **benchmark,
			
 
				+                    resolutions=resolutions,
			
 
				+                )
			
 
				+            )
			
 
				+        plot_file = "test_res_plot.png"
			
 
				+        num_cells_plot(results, filename=plot_file)
			
 
				+        self.assertTrue(Path(plot_file).is_file())
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    test()