Explorar el Código

db.univar: Ignore rows with NULLs with -e (#1341)

When value in a row is NULL, basic statistics ignore the rows
(e.g., not counted in N), however,
prior to this PR, the extended stats (-e) were processing all rows.
This was causing two issues:
1. db.univar/v.db.univar failed with 'could not convert string to float', or
2. wrong results when number of NULLs happened to be less than N/4.

Now the NULL values (represented as empty strings) are always skipped in
the same was as for the basic stats,
i.e., not being counted towards the quartiles anymore (which is what was
causing the wrong results when number of NULL values was low).
With where conditions such as 'IS NOT NULL' neither of these issues emerged.
Now a similar condition is in the code for extended stats too.

This PR refactors the rstrip calls, so they are in the code only once per loop.

The test now tests a run with both basic and extended stats with data
which contains NULL values. Actual values are not tested as before,
but setup and teardown steps are cleaned up (e.g. not trying to delete
elevation raster).
Vaclav Petras hace 4 años
padre
commit
49471321fa
Se han modificado 2 ficheros con 39 adiciones y 30 borrados
  1. 11 7
      scripts/db.univar/db.univar.py
  2. 28 23
      scripts/db.univar/testsuite/test_db_univar.py

+ 11 - 7
scripts/db.univar/db.univar.py

@@ -161,9 +161,10 @@ def main():
 
     tmpf = open(tmp)
     for line in tmpf:
-        if len(line.rstrip("\r\n")) == 0:
+        line = line.rstrip("\r\n")
+        if len(line) == 0:
             continue
-        x = float(line.rstrip("\r\n"))
+        x = float(line)
         N += 1
         sum += x
         sum2 += x * x
@@ -247,17 +248,20 @@ def main():
     inf = open(tmp + ".sort")
     l = 1
     for line in inf:
+        line = line.rstrip("\r\n")
+        if len(line) == 0:
+            continue
         if l == q25pos:
-            q25 = float(line.rstrip("\r\n"))
+            q25 = float(line)
         if l == q50apos:
-            q50a = float(line.rstrip("\r\n"))
+            q50a = float(line)
         if l == q50bpos:
-            q50b = float(line.rstrip("\r\n"))
+            q50b = float(line)
         if l == q75pos:
-            q75 = float(line.rstrip("\r\n"))
+            q75 = float(line)
         for i in range(len(ppos)):
             if l == ppos[i]:
-                pval[i] = float(line.rstrip("\r\n"))
+                pval[i] = float(line)
         l += 1
 
     q50 = (q50a + q50b) / 2

+ 28 - 23
scripts/db.univar/testsuite/test_db_univar.py

@@ -2,47 +2,52 @@
 Created on Sun Jun 07 21:01:34 2018
 
 @author: Sanjeet Bhatti
+@author: Vaclav Petras
 """
 
 from grass.gunittest.case import TestCase
 from grass.gunittest.main import test
 from grass.gunittest.gmodules import SimpleModule
 
-from grass.script.core import run_command
-
 
 class TestDbUnivar(TestCase):
-    """Test db.univar script"""
+    """Test running db.univar script with NULL values and extended stats"""
 
-    columnName = "heights"
-    mapName = "samples"
+    column_name = "heights"
+    map_name = "samples"
 
     @classmethod
-    def setUpClass(cls):
-        """Use temp region"""
+    def setUpClass(cls):  # pylint: disable=invalid-name
+        """Generate vector points in extend larger than raster with values"""
         cls.use_temp_region()
-        cls.runModule("g.region", raster="elevation", flags="p")
+        cls.runModule("g.region", raster="elevation")
+        cls.runModule("g.region", e="e+5000", w="w-5000", s="s-5000", n="n+5000")
+        cls.runModule("v.random", output=cls.map_name, npoints=100, seed=42)
+        cls.runModule(
+            "v.db.addtable",
+            map=cls.map_name,
+            columns="{} double precision".format(cls.column_name),
+        )
+        cls.runModule(
+            "v.what.rast", map=cls.map_name, raster="elevation", column=cls.column_name
+        )
 
     @classmethod
-    def tearDownClass(cls):
-        """Remove temporary region"""
-        cls.runModule("g.remove", flags="f", type="raster", name="elevation")
+    def tearDownClass(cls):  # pylint: disable=invalid-name
+        """Remove temporary region and vector"""
         cls.del_temp_region()
-
-        run_command("v.db.droptable", map="samples", flags="f")
+        cls.runModule("g.remove", flags="f", type="vector", name=cls.map_name)
 
     def test_calculate(self):
-        """run db.univar"""
-        run_command("v.random", output=self.mapName, n=100, overwrite="True")
-        run_command(
-            "v.db.addtable", map=self.mapName, column="heights double precision"
-        )
-        run_command(
-            "v.what.rast", map=self.mapName, raster="elevation", column=self.columnName
-        )
-        run_command("v.db.select", map=self.mapName)
+        """Check that db.univar runs"""
+        module = SimpleModule("db.univar", table=self.map_name, column=self.column_name)
+        self.assertModule(module)
 
-        module = SimpleModule("db.univar", table=self.mapName, column=self.columnName)
+    def test_calculate_extended(self):
+        """Check that db.univar -e runs"""
+        module = SimpleModule(
+            "db.univar", table=self.map_name, flags="e", column=self.column_name
+        )
         self.assertModule(module)