Преглед изворни кода

r.patch: implement parallelization with OpenMP (#1782)

Aaron пре 3 година
родитељ
комит
cb68b54db0

+ 2 - 1
raster/r.patch/Makefile

@@ -2,8 +2,9 @@ MODULE_TOPDIR = ../..
 
 PGM = r.patch
 
-LIBES = $(RASTERLIB) $(GISLIB)
+LIBES = $(RASTERLIB) $(GISLIB) $(OMPLIB)
 DEPENDENCIES = $(RASTERDEP) $(GISDEP)
+EXTRA_CFLAGS = $(OMPCFLAGS)
 
 include $(MODULE_TOPDIR)/include/Make/Module.make
 

+ 82 - 0
raster/r.patch/benchmark/benchmark_r_patch.py

@@ -0,0 +1,82 @@
+"""Benchmarking of r.patch
+
+@author Aaron Saw Min Sern
+"""
+
+from grass.exceptions import CalledModuleError
+from grass.pygrass.modules import Module
+from subprocess import DEVNULL
+
+import grass.benchmark as bm
+
+
+def main():
+    results = []
+
+    # Users can add more or modify existing reference maps
+    benchmark(7071, "r.patch_50M", results)
+    benchmark(14142, "r.patch_200M", results)
+    benchmark(10000, "r.patch_100M", results)
+    benchmark(20000, "r.patch_400M", results)
+
+    bm.nprocs_plot(results, filename="rpatch_benchmark.svg")
+
+
+def benchmark(size, label, results):
+    references = ["r_patch_reference_map_{}".format(i) for i in range(4)]
+    output = "benchmark_r_patch"
+
+    generate_map(rows=size, cols=size, fname=references)
+    module = Module(
+        "r.patch",
+        input=references,
+        output=output,
+        run_=False,
+        stdout_=DEVNULL,
+        overwrite=True,
+    )
+    results.append(bm.benchmark_nprocs(module, label=label, max_nprocs=24, repeat=3))
+    for r in references:
+        Module("g.remove", quiet=True, flags="f", type="raster", name=r)
+    Module("g.remove", quiet=True, flags="f", type="raster", name=output)
+
+
+def generate_map(rows, cols, fname):
+    temp = "r_patch_reference_tmp"
+
+    Module("g.region", flags="p", s=0, n=rows, w=0, e=cols, res=1)
+    # Generate using r.random.surface if r.surf.fractal fails
+    try:
+        print("Generating reference map using r.surf.fractal...")
+        for r in fname:
+            Module(
+                "r.surf.fractal",
+                output=temp,
+                overwrite=True,
+            )
+            # Make approximate half of the reference map to be null
+            Module(
+                "r.mapcalc",
+                expression=f"{r} = if({temp}, {temp}, 0, null())",
+                overwrite=True,
+            )
+    except CalledModuleError:
+        print("r.surf.fractal fails, using r.random.surface instead...")
+        for r in fname:
+            Module(
+                "r.random.surface",
+                maximum=255,
+                output=temp,
+                overwrite=True,
+            )
+            Module(
+                "r.mapcalc",
+                expression=f"{r} = if({temp} - 128, {temp}, 0, null())",
+                overwrite=True,
+            )
+
+    Module("g.remove", quiet=True, flags="f", type="raster", name=temp)
+
+
+if __name__ == "__main__":
+    main()

+ 84 - 0
raster/r.patch/benchmark/benchmark_r_patch_memory.py

@@ -0,0 +1,84 @@
+"""Benchmarking of r.patch memory parameter
+
+@author Aaron Saw Min Sern
+@author Anna Petrasova
+"""
+
+from grass.exceptions import CalledModuleError
+from grass.pygrass.modules import Module
+from subprocess import DEVNULL
+
+import grass.benchmark as bm
+
+
+def main():
+    results = []
+
+    references = ["r_patch_reference_map_{}".format(i) for i in range(4)]
+    generate_map(rows=5000, cols=5000, fname=references)
+    # Users can add more or modify existing reference maps
+    benchmark(0, "r.patch_0MB", references, results)
+    benchmark(5, "r.patch_5MB", references, results)
+    benchmark(10, "r.patch_10MB", references, results)
+    benchmark(100, "r.patch_100MB", references, results)
+    benchmark(300, "r.patch_300MB", references, results)
+    for r in references:
+        Module("g.remove", quiet=True, flags="f", type="raster", name=r)
+    bm.nprocs_plot(results, filename="rpatch_benchmark_memory.svg")
+
+
+def benchmark(memory, label, references, results):
+    output = "benchmark_r_patch"
+
+    module = Module(
+        "r.patch",
+        input=references,
+        output=output,
+        memory=memory,
+        run_=False,
+        stdout_=DEVNULL,
+        overwrite=True,
+    )
+    results.append(bm.benchmark_nprocs(module, label=label, max_nprocs=24, repeat=10))
+    Module("g.remove", quiet=True, flags="f", type="raster", name=output)
+
+
+def generate_map(rows, cols, fname):
+    temp = "r_patch_reference_tmp"
+
+    Module("g.region", flags="p", s=0, n=rows, w=0, e=cols, res=1)
+    # Generate using r.random.surface if r.surf.fractal fails
+    try:
+        print("Generating reference map using r.surf.fractal...")
+        for r in fname:
+            Module(
+                "r.surf.fractal",
+                output=temp,
+                overwrite=True,
+            )
+            # Make approximate half of the reference map to be null
+            Module(
+                "r.mapcalc",
+                expression=f"{r} = if({temp}, {temp}, 0, null())",
+                overwrite=True,
+            )
+    except CalledModuleError:
+        print("r.surf.fractal fails, using r.random.surface instead...")
+        for r in fname:
+            Module(
+                "r.random.surface",
+                maximum=255,
+                output=temp,
+                overwrite=True,
+            )
+            Module(
+                "r.mapcalc",
+                expression=f"{r} = if({temp} - 128, {temp}, 0, null())",
+                overwrite=True,
+            )
+
+    Module("g.remove", quiet=True, flags="f", type="raster", name=temp)
+
+
+if __name__ == "__main__":
+    main()

+ 127 - 37
raster/r.patch/main.c

@@ -17,7 +17,11 @@
  *               for details.
  *
  *****************************************************************************/
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 
 #include <grass/gis.h>
@@ -28,7 +32,7 @@
 
 int main(int argc, char *argv[])
 {
-    int *infd;
+    int **infd;
     struct Categories cats;
     struct Cell_stats *statf;
     struct Colors colr;
@@ -38,10 +42,13 @@ int main(int argc, char *argv[])
     RASTER_MAP_TYPE out_type, map_type;
     size_t out_cell_size;
     struct History history;
-    void *presult, *patch;
+    void **presult, **patch;
+    void *outbuf;
+    int bufrows;
     int nfiles;
     char *rname;
-    int i;
+    int i, t;
+    int nprocs;
     int row, nrows, ncols;
     int use_zero, no_support;
     char *new_name;
@@ -52,7 +59,7 @@ int main(int argc, char *argv[])
 
     struct GModule *module;
     struct Flag *zeroflag, *nosupportflag;
-    struct Option *opt1, *opt2;
+    struct Option *opt1, *opt2, *threads, *memory;
 
     G_gisinit(argv[0]);
 
@@ -64,6 +71,7 @@ int main(int argc, char *argv[])
     G_add_keyword(_("patching"));
     G_add_keyword(_("aggregation"));
     G_add_keyword(_("series"));
+    G_add_keyword(_("parallel"));
     module->description =
         _("Creates a composite raster map layer by using "
           "known category values from one (or more) map layer(s) "
@@ -77,6 +85,9 @@ int main(int argc, char *argv[])
     opt2 = G_define_standard_option(G_OPT_R_OUTPUT);
     opt2->description = _("Name for resultant raster map");
 
+    threads = G_define_standard_option(G_OPT_M_NPROCS);
+    memory = G_define_standard_option(G_OPT_MEMORYMB);
+
     /* Define the different flags */
 
     zeroflag = G_define_flag();
@@ -90,6 +101,18 @@ int main(int argc, char *argv[])
     if (G_parser(argc, argv))
         exit(EXIT_FAILURE);
 
+    sscanf(threads->answer, "%d", &nprocs);
+    if (nprocs < 1)
+        G_fatal_error(_("<%d> is not valid number of nprocs."), nprocs);
+#if defined(_OPENMP)
+    omp_set_num_threads(nprocs);
+#else
+    if (nprocs != 1)
+        G_warning(_("GRASS is compiled without OpenMP support. Ignoring "
+                    "threads setting."));
+    nprocs = 1;
+#endif
+
     use_zero = (zeroflag->answer);
     no_support = (nosupportflag->answer);
 
@@ -102,7 +125,9 @@ int main(int argc, char *argv[])
     if (nfiles < 2)
         G_fatal_error(_("The minimum number of input raster maps is two"));
 
-    infd = G_malloc(nfiles * sizeof(int));
+    infd = G_malloc(nprocs * sizeof(int*));
+    for (t = 0; t < nprocs; t++)
+        infd[t] = G_malloc(nfiles * sizeof(int));
     statf = G_malloc(nfiles * sizeof(struct Cell_stats));
     cellhd = G_malloc(nfiles * sizeof(struct Cell_head));
 
@@ -110,9 +135,11 @@ int main(int argc, char *argv[])
         const char *name = names[i];
         int fd;
 
-        fd = Rast_open_old(name, "");
+        for (t = 0; t < nprocs; t++) {
+            infd[t][i] = Rast_open_old(name, "");
+        }
 
-        infd[i] = fd;
+        fd = infd[0][i];
 
         map_type = Rast_get_map_type(fd);
         if (map_type == FCELL_TYPE && out_type == CELL_TYPE)
@@ -130,46 +157,109 @@ int main(int argc, char *argv[])
     rname = opt2->answer;
     outfd = Rast_open_new(new_name = rname, out_type);
 
-    presult = Rast_allocate_buf(out_type);
-    patch = Rast_allocate_buf(out_type);
+    presult = G_malloc(nprocs * sizeof *presult);
+    patch = G_malloc(nprocs * sizeof *patch);
+    for (t = 0; t < nprocs; t++) {
+        presult[t] = Rast_allocate_buf(out_type);
+    }
+    for (t = 0; t < nprocs; t++) {
+        patch[t] = Rast_allocate_buf(out_type);
+    }
 
     Rast_get_window(&window);
     nrows = Rast_window_rows();
     ncols = Rast_window_cols();
 
+    bufrows = atoi(memory->answer) * (((1 << 20) / out_cell_size) / ncols);
+    /* set the output buffer rows to be at most covering the entire map */
+    if (bufrows > nrows) {
+        bufrows = nrows;
+    }
+    /* but at least the number of threads */
+    if (bufrows < nprocs) {
+        bufrows = nprocs;
+    }
+
+    outbuf = G_malloc(out_cell_size * ncols * bufrows);
+
     G_verbose_message(_("Percent complete..."));
-    for (row = 0; row < nrows; row++) {
-        double north_edge, south_edge;
-
-        G_percent(row, nrows, 2);
-        Rast_get_row(infd[0], presult, row, out_type);
-
-        north_edge = Rast_row_to_northing(row, &window);
-        south_edge = north_edge - window.ns_res;
-
-        if (out_type == CELL_TYPE)
-            Rast_update_cell_stats((CELL *) presult, ncols, &statf[0]);
-        for (i = 1; i < nfiles; i++) {
-            /* check if raster i overlaps with the current row */
-            if (south_edge >= cellhd[i].north ||
-                north_edge <= cellhd[i].south ||
-                window.west >= cellhd[i].east ||
-                window.east <= cellhd[i].west)
-                continue;
-
-            Rast_get_row(infd[i], patch, row, out_type);
-            if (!do_patch(presult, patch, &statf[i], ncols, out_type,
-                          out_cell_size, use_zero))
-                break;
+
+    int computed = 0;
+    int written = 0;
+
+    while (written < nrows) {
+        int start = written;
+        int end = start + bufrows;
+        if (end > nrows)
+            end = nrows;
+
+#pragma omp parallel private(i) if(nprocs > 1) 
+        {
+            int t_id = 0;
+#if defined(_OPENMP)
+            t_id = omp_get_thread_num();
+#endif
+            void *local_presult = presult[t_id];
+            void *local_patch = patch[t_id];
+            int *local_infd = infd[t_id];
+
+#pragma omp for schedule(static)
+            for (row = start; row < end; row++) {
+                double north_edge, south_edge;
+
+                G_percent(computed, nrows, 2);
+                Rast_get_row(local_infd[0], local_presult, row, out_type);
+
+                north_edge = Rast_row_to_northing(row, &window);
+                south_edge = north_edge - window.ns_res;
+
+                if (out_type == CELL_TYPE)
+                    Rast_update_cell_stats((CELL *) local_presult, ncols,
+                                           &statf[0]);
+                for (i = 1; i < nfiles; i++) {
+                    /* check if raster i overlaps with the current row */
+                    if (south_edge >= cellhd[i].north ||
+                        north_edge <= cellhd[i].south ||
+                        window.west >= cellhd[i].east ||
+                        window.east <= cellhd[i].west)
+                        continue;
+
+                    Rast_get_row(local_infd[i], local_patch, row, out_type);
+                    if (!do_patch(local_presult, local_patch, &statf[i], ncols,
+                                  out_type, out_cell_size, use_zero))
+                        break;
+                }
+                void *p = G_incr_void_ptr(outbuf, out_cell_size *
+                                                      (row - start) * ncols);
+                memcpy(p, local_presult, out_cell_size * ncols);
+
+#pragma omp atomic update
+                computed++;
+            }
+
+        } /* end parallel region */
+
+        for (row = start; row < end; row++) {
+            void *p =
+                G_incr_void_ptr(outbuf, out_cell_size * (row - start) * ncols);
+            Rast_put_row(outfd, p, out_type);
         }
-        Rast_put_row(outfd, presult, out_type);
-    }
-    G_percent(row, nrows, 2);
 
+        written = end;
+
+    } /* end while loop */
+    G_percent(nrows, nrows, 2);
+
+    for (t = 0; t < nprocs; t++) {
+        G_free(patch[t]);
+        G_free(presult[t]);
+    }
     G_free(patch);
     G_free(presult);
-    for (i = 0; i < nfiles; i++)
-        Rast_close(infd[i]);
+
+    for (t = 0; t < nprocs; t++)
+        for (i = 0; i < nfiles; i++) 
+            Rast_close(infd[t][i]);
 
     if (!no_support) {
         /*

+ 20 - 1
raster/r.patch/r.patch.html

@@ -161,6 +161,23 @@ using the option <b>input</b>. In that case,
 <em><a href="r.series.html">r.series</a></em> can be used instead of
 <em>r.patch</em>.
 
+<h3>PERFORMANCE</h3>
+<p>By specifying the number of parallel processes with <b>nprocs</b> option,
+<em>r.patch</em> can run significantly faster, see benchmarks below.
+
+<div align="center" style="margin: 10px">
+     <img src="r_patch_benchmark_size.png" alt="benchmark for number of cells" border="0">
+     <img src="r_patch_benchmark_memory.png" alt="benchmark for memory size" border="0">
+     <br>
+     <i>Figure: Benchmark on the left shows execution time for different
+     number of cells, benchmark on the right shows execution time
+     for different memory size for 5000x5000 raster. See benchmark scripts in source code.
+     (Intel Core i9-10940X CPU @ 3.30GHz x 28) </i>
+     </div>
+<p>To reduce the memory requirements to minimum, set option <b>memory</b> to zero.
+To take advantage of the parallelization, GRASS GIS
+needs to compiled with OpenMP enabled.
+
 <h2>EXAMPLES</h2>
 
 <h3>Example with three maps</h3>
@@ -209,4 +226,6 @@ r.patch input=$MAPS output=maps_mosaic
 Michael Shapiro, 
 U.S. Army Construction Engineering Research Laboratory
 <br>
--z flag and performance improvement by Huidae Cho
+Huidae Cho (-z flag and performance improvement)
+<br>
+Aaron Saw Min Sern (OpenMP support).

BIN
raster/r.patch/r_patch_benchmark_memory.png


BIN
raster/r.patch/r_patch_benchmark_size.png


+ 37 - 0
raster/r.patch/testsuite/test_rpatch_artificial.py

@@ -128,6 +128,7 @@ class TestSmallDataNoOverlap(TestCase):
     cell_1 = "rpatch_small_test_cell_1"
     cell_2 = "rpatch_small_test_cell_2"
     cell_patched = "rpatch_small_test_cell_patched"
+    cell_patched_threaded = "rpatch_small_test_cell_patched_threaded"
     cell_patched_ref = "rpatch_small_test_cell_patched_ref"
 
     @classmethod
@@ -158,7 +159,14 @@ class TestSmallDataNoOverlap(TestCase):
         self.assertModule(
             "r.patch", input=(self.cell_1, self.cell_2), output=self.cell_patched
         )
+        self.assertModule(
+            "r.patch",
+            input=(self.cell_1, self.cell_2),
+            nprocs=8,
+            output=self.cell_patched_threaded,
+        )
         self.to_remove.append(self.cell_patched)
+        self.to_remove.append(self.cell_patched_threaded)
         self.runModule(
             "r.in.ascii",
             input="-",
@@ -169,6 +177,9 @@ class TestSmallDataNoOverlap(TestCase):
         self.assertRastersNoDifference(
             self.cell_patched, self.cell_patched_ref, precision=0
         )
+        self.assertRastersNoDifference(
+            self.cell_patched_threaded, self.cell_patched_ref, precision=0
+        )
 
 
 class TestSmallDataOverlap(TestCase):
@@ -180,7 +191,9 @@ class TestSmallDataOverlap(TestCase):
     cell_ab = "rpatch_small_test_cell_ab_reference"
     cell_ba = "rpatch_small_test_cell_ba_reference"
     cell_ab_result = "rpatch_small_test_cell_ab_result"
+    cell_ab_result_threaded = "rpatch_small_test_cell_ab_result_threaded"
     cell_ba_result = "rpatch_small_test_cell_ba_result"
+    cell_ba_result_threaded = "rpatch_small_test_cell_ba_result_threaded"
 
     @classmethod
     def setUpClass(cls):
@@ -221,9 +234,21 @@ class TestSmallDataOverlap(TestCase):
             output=self.cell_ab_result,
             flags="z",
         )
+        self.assertModule(
+            "r.patch",
+            input=(self.cell_a, self.cell_b),
+            output=self.cell_ab_result_threaded,
+            flags="z",
+            nprocs=8,
+        )
         self.assertRasterExists(self.cell_ab_result)
+        self.assertRasterExists(self.cell_ab_result_threaded)
         self.to_remove.append(self.cell_ab_result)
+        self.to_remove.append(self.cell_ab_result_threaded)
         self.assertRastersNoDifference(self.cell_ab_result, self.cell_ab, precision=0)
+        self.assertRastersNoDifference(
+            self.cell_ab_result_threaded, self.cell_ab, precision=0
+        )
 
     def test_patch_oder_ba_cell(self):
         """Test patching two overlapping CELL raster maps (watching order)"""
@@ -233,9 +258,21 @@ class TestSmallDataOverlap(TestCase):
             output=self.cell_ba_result,
             flags="z",
         )
+        self.assertModule(
+            "r.patch",
+            input=(self.cell_b, self.cell_a),
+            flags="z",
+            output=self.cell_ba_result_threaded,
+            nprocs=8,
+        )
         self.assertRasterExists(self.cell_ba_result)
+        self.assertRasterExists(self.cell_ba_result_threaded)
         self.to_remove.append(self.cell_ba_result)
+        self.to_remove.append(self.cell_ba_result_threaded)
         self.assertRastersNoDifference(self.cell_ba_result, self.cell_ba, precision=0)
+        self.assertRastersNoDifference(
+            self.cell_ba_result_threaded, self.cell_ba, precision=0
+        )
 
 
 if __name__ == "__main__":