Kaynağa Gözat

HPCC-17078 Dedup ALL in child query could lose rows if not fully read

If a hash dedup in a child query was only partially read, e.g. if
upstream from a CHOOSEN or EXISTS, then the hash tables might not
be cleared on the next iteration.
That led to records being dedupped on the next child query
iteration that shouldn't have been.

Signed-off-by: Jake Smith <jake.smith@lexisnexisrisk.com>
Jake Smith 8 yıl önce
ebeveyn
işleme
c3ad026e90

+ 0 - 2
roxie/ccd/ccdserver.cpp

@@ -7311,12 +7311,10 @@ public:
 
     virtual void reset()
     {
-#ifdef _DEBUG
         while (survivors.isItem(survivorIndex))
         {
             ReleaseRoxieRow(survivors.item(survivorIndex++));
         }
-#endif
         survivors.kill();
         eof = false;
         first = true;

+ 53 - 0
testing/regress/ecl/dedupchild.ecl

@@ -0,0 +1,53 @@
+cRec := RECORD
+ unsigned4 cid;
+END;
+
+pRec := RECORD
+ unsigned4 id;
+ DATASET(cRec) kids;
+END;
+
+pSetSize := 2000;
+
+cRec makeC(unsigned4 c) := TRANSFORM
+  SELF.cid := c;
+END;
+
+pRec makeP(unsigned4 c) := TRANSFORM
+  SELF.id := c;
+  SELF.kids := DATASET(1+(c%10), makeC(COUNTER%2));
+END;
+
+pSet := DATASET(pSetSize, makeP(COUNTER), DISTRIBUTED);
+
+kids  := pSet.kids;
+
+outRec := RECORD
+ unsigned val1;
+ unsigned val2;
+ unsigned val3;
+ unsigned val4;
+ unsigned val5;
+END;
+
+outRec doTrans(pRec l) := TRANSFORM
+ SortedKids := SORT(l.kids, cid);
+ DedupKids1 := DEDUP(SortedKids, cid);
+ DedupKids2 := DEDUP(l.kids, cid, ALL);
+ DedupKids3 := DEDUP(l.kids(cid<99999), cid, ALL); // filter to prevent CSE of dedup
+ DedupKids4 := DEDUP(l.kids(cid<99998), cid, ALL); // filter to prevent CSE of dedup
+ 
+ SELF.val1 := SUM(DedupKids1, cid);
+ SELF.val2 := SUM(DedupKids2, cid);
+ SELF.val3 := IF(EXISTS(DedupKids1), 1, 0);
+ SELF.val4 := IF(EXISTS(DedupKids3), 1, 0);
+ SELF.val5 := COUNT(CHOOSEN(DedupKids4, 2));
+END;
+
+p := PROJECT(pSet, doTrans(LEFT));
+
+DATASET([{'SumDedupVals', SUM(p, val1)},
+         {'SumDedupAllVals', SUM(p, val2)},
+         {'ExistsDedupTotal', SUM(p, val3)},
+         {'ExistsDedupAllTotal', SUM(p, val4)},
+         {'ChoosenDedupAllTotal', SUM(p, val5)} ], {string type, unsigned8 val});

+ 7 - 0
testing/regress/ecl/key/dedupchild.xml

@@ -0,0 +1,7 @@
+<Dataset name='Result 1'>
+ <Row><type>SumDedupVals</type><val>2000</val></Row>
+ <Row><type>SumDedupAllVals</type><val>2000</val></Row>
+ <Row><type>ExistsDedupTotal</type><val>2000</val></Row>
+ <Row><type>ExistsDedupAllTotal</type><val>2000</val></Row>
+ <Row><type>ChoosenDedupAllTotal</type><val>3800</val></Row>
+</Dataset>

+ 4 - 0
thorlcr/activities/hashdistrib/thhashdistribslave.cpp

@@ -3005,7 +3005,11 @@ void CHashTableRowTable::init(rowidx_t sz)
     // reinitialize if need bigger or if requested size is much smaller than existing
     rowidx_t newMaxRows = activity.queryRowManager()->getExpectedCapacity(sz * sizeof(rowidx_t *), activity.allocFlags) / sizeof(rowidx_t *);
     if (newMaxRows <= maxRows && ((maxRows-newMaxRows) <= HASHDEDUP_HT_INC_SIZE))
+    {
+        clear();
         return;
+    }
+    clearRows();
     ReleaseThorRow(rows);
     OwnedConstThorRow newRows = allocateRowTable(sz);
     if (!newRows)