瀏覽代碼

HPCC-17078 Dedup ALL in child query could lose rows if not fully read

If a hash dedup in a child query was only partially read, e.g. if
upstream from a CHOOSEN or EXISTS, then the hash tables might not
be cleared on the next iteration.
That led to records being dedupped on the next child query
iteration that shouldn't have been.

Signed-off-by: Jake Smith <jake.smith@lexisnexisrisk.com>
Jake Smith 8 年之前
父節點
當前提交
c3ad026e90

+ 0 - 2
roxie/ccd/ccdserver.cpp

@@ -7311,12 +7311,10 @@ public:
 
     virtual void reset()
     {
-#ifdef _DEBUG
         while (survivors.isItem(survivorIndex))
         {
             ReleaseRoxieRow(survivors.item(survivorIndex++));
         }
-#endif
         survivors.kill();
         eof = false;
         first = true;

+ 53 - 0
testing/regress/ecl/dedupchild.ecl

@@ -0,0 +1,53 @@
+cRec := RECORD
+ unsigned4 cid;
+END;
+
+pRec := RECORD
+ unsigned4 id;
+ DATASET(cRec) kids;
+END;
+
+pSetSize := 2000;
+
+cRec makeC(unsigned4 c) := TRANSFORM
+  SELF.cid := c;
+END;
+
+pRec makeP(unsigned4 c) := TRANSFORM
+  SELF.id := c;
+  SELF.kids := DATASET(1+(c%10), makeC(COUNTER%2));
+END;
+
+pSet := DATASET(pSetSize, makeP(COUNTER), DISTRIBUTED);
+
+kids  := pSet.kids;
+
+outRec := RECORD
+ unsigned val1;
+ unsigned val2;
+ unsigned val3;
+ unsigned val4;
+ unsigned val5;
+END;
+
+outRec doTrans(pRec l) := TRANSFORM
+ SortedKids := SORT(l.kids, cid);
+ DedupKids1 := DEDUP(SortedKids, cid);
+ DedupKids2 := DEDUP(l.kids, cid, ALL);
+ DedupKids3 := DEDUP(l.kids(cid<99999), cid, ALL); // filter to prevent CSE of dedup
+ DedupKids4 := DEDUP(l.kids(cid<99998), cid, ALL); // filter to prevent CSE of dedup
+ 
+ SELF.val1 := SUM(DedupKids1, cid);
+ SELF.val2 := SUM(DedupKids2, cid);
+ SELF.val3 := IF(EXISTS(DedupKids1), 1, 0);
+ SELF.val4 := IF(EXISTS(DedupKids3), 1, 0);
+ SELF.val5 := COUNT(CHOOSEN(DedupKids4, 2));
+END;
+
+p := PROJECT(pSet, doTrans(LEFT));
+
+DATASET([{'SumDedupVals', SUM(p, val1)},
+         {'SumDedupAllVals', SUM(p, val2)},
+         {'ExistsDedupTotal', SUM(p, val3)},
+         {'ExistsDedupAllTotal', SUM(p, val4)},
+         {'ChoosenDedupAllTotal', SUM(p, val5)} ], {string type, unsigned8 val});

+ 7 - 0
testing/regress/ecl/key/dedupchild.xml

@@ -0,0 +1,7 @@
+<Dataset name='Result 1'>
+ <Row><type>SumDedupVals</type><val>2000</val></Row>
+ <Row><type>SumDedupAllVals</type><val>2000</val></Row>
+ <Row><type>ExistsDedupTotal</type><val>2000</val></Row>
+ <Row><type>ExistsDedupAllTotal</type><val>2000</val></Row>
+ <Row><type>ChoosenDedupAllTotal</type><val>3800</val></Row>
+</Dataset>

+ 4 - 0
thorlcr/activities/hashdistrib/thhashdistribslave.cpp

@@ -3005,7 +3005,11 @@ void CHashTableRowTable::init(rowidx_t sz)
     // reinitialize if need bigger or if requested size is much smaller than existing
     rowidx_t newMaxRows = activity.queryRowManager()->getExpectedCapacity(sz * sizeof(rowidx_t *), activity.allocFlags) / sizeof(rowidx_t *);
     if (newMaxRows <= maxRows && ((maxRows-newMaxRows) <= HASHDEDUP_HT_INC_SIZE))
+    {
+        clear();
         return;
+    }
+    clearRows();
     ReleaseThorRow(rows);
     OwnedConstThorRow newRows = allocateRowTable(sz);
     if (!newRows)