소스 검색

libpython: Avoid race condition when reading region in use_temp_region() (#638)

When g.region is called without -u flag, it always adjusts and writes back the
computational region. When a parallel process is reading it too, it hits an empty
(or rarely corrupted) file. region() and other region functions in core.py are already using it.

In the provided test, this happens when reading the WIND file. In the test, return codes
of background processes are not propagated (in Bash), so raster maps are created to track processes. These are created at the end of each Python script run.
The Bash script is parametrized to make it reusable also for a potential nesting region correctness test.

See also:
https://trac.osgeo.org/grass/ticket/2230 'g.region -p' writes new WIND file...
#250 r.import: ...WIND file disappears...
#630 unpredictable errors in the results with temp region (nesting suspected)
Vaclav Petras 5 년 전
부모
커밋
71c8a1f1d0

+ 1 - 1
lib/python/script/core.py

@@ -1278,7 +1278,7 @@ def use_temp_region():
     handler to delete the temporary region upon termination.
     """
     name = "tmp.%s.%d" % (os.path.basename(sys.argv[0]), os.getpid())
-    run_command("g.region", save=name, overwrite=True)
+    run_command("g.region", flags="u", save=name, overwrite=True)
     os.environ['WIND_OVERRIDE'] = name
     atexit.register(del_temp_region)
 

+ 78 - 0
lib/python/script/testsuite/data/script_using_temporary_region.py

@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+import subprocess
+import sys
+import os
+import platform
+
+import grass.script as gs
+
+
+def call_use_temp_region(script, size, remaining, nesting, map_name=None):
+    pid = os.getpid()
+    node = platform.node()
+    gs.message(
+        "PID {pid} ({node}):"
+        " Using {size}x{size} for temporary region on level {nesting}".format(
+            **locals()
+        )
+    )
+    # The use_temp_region() function is using program name to generate an
+    # identifiable name, so uncomment the following line to trick it and
+    # give us even more unique names. (This won't fully test the
+    # actual behavior, but it may be useful for debugging).
+    # sys.argv[0] = "{script}_nesting_{nesting}".format(**locals())
+    gs.use_temp_region()
+    gs.run_command("g.region", rows=size, cols=size)
+    if remaining:
+        nesting += 1
+        call = [sys.executable, script, remaining, str(nesting)]
+        if map_name:
+            call.append(map_name)
+        subprocess.check_call(call)
+    if map_name:
+        gs.run_command("r.mapcalc", expression="{map_name}_size_{size}_nesting_{nesting} = 1".format(**locals()))
+
+
+def main():
+    this_file = sys.argv[0]
+    if len(sys.argv) == 1:
+        # Some resonable defaults for a trivial test to allow calling without
+        # any parameters.
+        size = 100
+        remaining = None
+        nesting = 0
+        map_name = None
+    elif len(sys.argv) not in [3, 4]:
+        gs.fatal(
+            "Usage: <script name> <size[,size,...]> <nesting level> [<map name>]\n"
+            "  <script name>      The name of this file ({name})\n"
+            "  <size[,size,...]>  Remaining region sizes\n"
+            "  <nesting level>    Nesting level of this call\n"
+            "  <map name>         Base name of maps to generate (optional)\n"
+            "Examples:\n"
+            "  {name} 100 0\n"
+            "  {name} 100 1\n"
+            "  {name} 100,200 0\n"
+            "  {name} 100,200 0 test_raster".format(name=this_file)
+        )
+    else:
+        argument = sys.argv[1]
+        sizes = argument.split(",", 1)
+        size = sizes[0]
+        if len(sizes) > 1:
+            remaining = sizes[1]
+        else:
+            remaining = None
+        nesting = int(sys.argv[2])
+        if len(sys.argv) == 4:
+            map_name = sys.argv[3]
+        else:
+            map_name = None
+    call_use_temp_region(
+        script=this_file, size=size, remaining=remaining, nesting=nesting, map_name=map_name
+    )
+
+
+if __name__ == "__main__":
+    main()

+ 133 - 0
lib/python/script/testsuite/test_parallel_temp_region.sh

@@ -0,0 +1,133 @@
+#!/usr/bin/env bash
+
+# Test usage of temporary region in Python in parallel and when nested.
+# Even with the nesting the parallelization happens only on the first
+# level the nested processes don't create only one child.
+# Calls a Python script to do the actual job.
+# Script can be executed without parameters which is meant primarily for
+# automated testing.
+# See the command line parameters handling below for further details.
+
+# Undefined variables are errors.
+set -u
+# More set commands later on.
+
+if [ $# -eq 0 ]
+then
+    # No arguments supplied, use default which is small enough to run
+    # well on small machines and does not overwhelm a normal system.
+    MAP_PARALLEL=parallel_tmp_region_parallel
+    MAP_NEST=parallel_tmp_region_nest
+    NUM_SEQUNTIALS=10
+    NUM_PARALLELS=50
+    SIMPLE_SIZE=150
+    NEST_SIZES=100,200
+    NUM_NEST=2
+elif [ $# -eq 7 ]
+then
+    # Allow user to set a large number of processes.
+    MAP_PARALLEL=$1
+    MAP_NEST=$2
+    NUM_SEQUNTIALS=$3
+    NUM_PARALLELS=$4
+    SIMPLE_SIZE=$5
+    NEST_SIZES=$6
+    NUM_NEST=$7
+else
+    >&2 echo "Usage:"
+    >&2 echo "  $0"
+    >&2 echo "  $0 <par name> <nest name> <nproc seq> <nproc par> <simple size> <nest sizes> <num nest>"
+    >&2 echo "Example:"
+    >&2 echo "  $0 parallel nest 5 50 250 100,200,300,400 4"
+    >&2 echo "Notes:"
+    >&2 echo "  Runs are done in these combinations and order:"
+    >&2 echo "    Serial and simple (i.e., sequence, not nested)"
+    >&2 echo "    Parallel and simple (i.e., not nested)"
+    >&2 echo "    Serial and nested"
+    >&2 echo "    Parallel and nested"
+    >&2 echo "  The <num nest> should be number of elements in <nest sizes>."
+    >&2 echo "  Nesting numbering in output raster maps starts with 0."
+    >&2 echo "  Return codes are checked only for serial runs."
+    >&2 echo "  Raster maps are produced only for parallel runs."
+    >&2 echo "Use zero or seven parameters, not $#."
+    exit 1
+fi
+
+MAP_PARALLEL_PATTERN="${MAP_PARALLEL}_*_size_*_nesting_*"
+MAP_NEST_PATTERN="${MAP_NEST}_*_size_*_nesting_*"
+
+# Remove maps at exit.
+cleanup () {
+    EXIT_CODE=$?
+    g.remove type=raster pattern="${MAP_PARALLEL_PATTERN}" -f --quiet
+    g.remove type=raster pattern="${MAP_NEST_PATTERN}" -f --quiet
+    exit $EXIT_CODE
+}
+
+trap cleanup EXIT
+
+SCRIPT="./data/script_using_temporary_region.py"
+
+# First, test if the script is in its place.
+# (This needs to be tested explicitly because command_not_found_handle
+# implementations may return 0 when command/file is not found.)
+command -v "${SCRIPT}"
+if [ $? -ne 0 ]
+then
+    >&2 echo "Script ${SCRIPT} not found"
+    exit 1
+fi
+
+# Fail fast and show commands
+set -e
+set -x
+
+# Serial/Sequential
+
+for i in `seq 1 $NUM_SEQUNTIALS`
+do
+    "${SCRIPT}" "${SIMPLE_SIZE}" 0
+done
+
+# Parallel
+# Because it is much harder to check return codes of parallel processes,
+# we go a step further and generate data and check their presence.
+
+for i in `seq 1 $NUM_PARALLELS`
+do
+    "${SCRIPT}" "${SIMPLE_SIZE}" 0 "${MAP_PARALLEL}_$i" &
+done
+
+wait
+
+EXPECTED=$NUM_PARALLELS
+NUM=$(g.list type=raster pattern="${MAP_PARALLEL_PATTERN}" mapset=. | wc -l)
+
+if [ ${NUM} -ne ${EXPECTED} ]
+then
+    echo "Pure parallel test: Got ${NUM} but expected ${EXPECTED} maps"
+    exit 1
+fi
+
+# With nesting
+
+for i in `seq 1 $NUM_SEQUNTIALS`
+do
+    "${SCRIPT}" "${NEST_SIZES}" 0
+done
+
+for i in `seq 1 $NUM_PARALLELS`
+do
+    "${SCRIPT}" "${NEST_SIZES}" 0 "${MAP_NEST}_$i" &
+done
+
+wait
+
+EXPECTED=$(( $NUM_PARALLELS * $NUM_NEST ))
+NUM=$(g.list type=raster pattern="${MAP_NEST_PATTERN}" mapset=. | wc -l)
+
+if [ ${NUM} -ne ${EXPECTED} ]
+then
+    echo "Parallel with nesting: Got ${NUM} but expected ${EXPECTED} maps"
+    exit 1
+fi