فهرست منبع

HPCC-8413 Fix problems with STEPPED JOIN accessing non existant results

A stepped join tries to prevent the inputs from creating splitters - since that
will default the smart stepping on indexes.  However if the inputs were not
indexesw then it could generate an internal error since one part of the graph
assumed a spill would be created, but none ever was.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 12 سال پیش
والد
کامیت
e71ce4015a
3فایلهای تغییر یافته به همراه107 افزوده شده و 3 حذف شده
  1. 65 3
      ecl/hqlcpp/hqlresource.cpp
  2. 1 0
      ecl/hqlcpp/hqlresource.ipp
  3. 41 0
      ecl/regress/bug8413.ecl

+ 65 - 3
ecl/hqlcpp/hqlresource.cpp

@@ -1735,6 +1735,7 @@ EclResourcer::EclResourcer(IErrorReceiver * _errors, IConstWorkUnit * _wu, Clust
     targetClusterType = _targetClusterType; 
     clusterSize = _clusterSize ? _clusterSize : FIXED_CLUSTER_SIZE;
     insideNeverSplit = false;
+    insideSteppedNeverSplit = false;
     options.mangleSpillNameWithWuid = false;
     options.minimizeSpillSize = _translatorOptions.minimizeSpillSize;
 
@@ -2554,14 +2555,71 @@ protected:
 };
 
 
+static bool isPotentialCompoundSteppedIndexRead(IHqlExpression * expr)
+{
+    loop
+    {
+        switch (expr->getOperator())
+        {
+        case no_compound_diskread:
+        case no_compound_disknormalize:
+        case no_compound_diskaggregate:
+        case no_compound_diskcount:
+        case no_compound_diskgroupaggregate:
+        case no_compound_childread:
+        case no_compound_childnormalize:
+        case no_compound_childaggregate:
+        case no_compound_childcount:
+        case no_compound_childgroupaggregate:
+        case no_compound_selectnew:
+        case no_compound_inline:
+            return false;
+        case no_compound_indexread:
+        case no_newkeyindex:
+            return true;
+        case no_keyedlimit:
+        case no_preload:
+        case no_filter:
+        case no_hqlproject:
+        case no_newusertable:
+        case no_limit:
+        case no_sorted:
+        case no_preservemeta:
+        case no_distributed:
+        case no_grouped:
+        case no_stepped:
+        case no_section:
+        case no_sectioninput:
+        case no_dataset_alias:
+            break;
+        case no_choosen:
+            {
+                IHqlExpression * arg2 = expr->queryChild(2);
+                if (arg2 && !arg2->isPure())
+                    return false;
+                break;
+            }
+        default:
+            return false;
+        }
+        expr = expr->queryChild(0);
+    }
+}
 
 bool EclResourcer::findSplitPoints(IHqlExpression * expr)
 {
     ResourcerInfo * info = queryResourceInfo(expr);
     bool savedInsideNeverSplit = insideNeverSplit;
+    bool savedInsideSteppedNeverSplit = insideSteppedNeverSplit;
+    if (insideSteppedNeverSplit && info)
+    {
+        if (!isPotentialCompoundSteppedIndexRead(expr) && (expr->getOperator() != no_datasetlist))
+            insideSteppedNeverSplit = false;
+    }
+
     if (info && info->numUses)
     {
-        if (insideNeverSplit)
+        if (insideNeverSplit || insideSteppedNeverSplit)
             info->neverSplit = true;
         if (info->isAlreadyInScope && (info->numUses == 0) && expr->isDatarow())
         {
@@ -2585,7 +2643,7 @@ bool EclResourcer::findSplitPoints(IHqlExpression * expr)
     {
         info = queryCreateResourceInfo(expr);
         info->numUses++;
-        if (insideNeverSplit)
+        if (insideNeverSplit || insideSteppedNeverSplit)
             info->neverSplit = true;
 
         bool isActivity = true;
@@ -2657,7 +2715,7 @@ bool EclResourcer::findSplitPoints(IHqlExpression * expr)
         case no_mergejoin:
         case no_nwayjoin:
             if (options.preventSteppedSplit)
-                insideNeverSplit = true;
+                insideSteppedNeverSplit = true;
             break;
         case no_compound_diskread:
         case no_compound_disknormalize:
@@ -2684,6 +2742,7 @@ bool EclResourcer::findSplitPoints(IHqlExpression * expr)
         if (!type || type->isScalar())
         {
             insideNeverSplit = savedInsideNeverSplit;
+            insideSteppedNeverSplit = savedInsideSteppedNeverSplit;
             return false;
         }
 
@@ -2716,12 +2775,14 @@ bool EclResourcer::findSplitPoints(IHqlExpression * expr)
                     findSplitPoints(cur);
                 }
                 insideNeverSplit = savedInsideNeverSplit;
+                insideSteppedNeverSplit = savedInsideSteppedNeverSplit;
                 gatherChildSplitPoints(expr, alwaysHoistChild, info, first, last);
                 break;
             }
         }
 
         insideNeverSplit = false;
+        insideSteppedNeverSplit = false;
         ForEachItemIn(i2, info->childDependents)
         {
             IHqlExpression & cur = info->childDependents.item(i2);
@@ -2738,6 +2799,7 @@ bool EclResourcer::findSplitPoints(IHqlExpression * expr)
     }
 
     insideNeverSplit = savedInsideNeverSplit;
+    insideSteppedNeverSplit = savedInsideSteppedNeverSplit;
     return info->containsActivity;
 }
 

+ 1 - 0
ecl/hqlcpp/hqlresource.ipp

@@ -422,6 +422,7 @@ protected:
     bool spillMultiCondition;
     bool spotThroughAggregate;
     bool insideNeverSplit;
+    bool insideSteppedNeverSplit;
     CResourceOptions options;
     HqlExprArray rootConditions;
     HqlExprCopyArray activeSelectors;

+ 41 - 0
ecl/regress/bug8413.ecl

@@ -0,0 +1,41 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+d := dataset('in', { string15 name, unsigned value }, thor);
+
+i := index(d, { name, value } , {}, '\\home\\person.name_first.key');
+
+f := i(name != 'Gavin');
+
+r1 := RECORD
+string15 name;
+unsigned value;
+unsigned seq;
+  END;
+  
+p := PROJECT(f, TRANSFORM(r1, SELF.seq := RANDOM(), SELF := LEFT));
+s1 := SORT(p, value);
+
+output(s1(name != 'Jim'));
+
+p2 := DEDUP(s1, name);
+
+output(p2);
+
+f2 := JOIN([s1,p2,p2], LEFT.value = RIGHT.value, TRANSFORM(LEFT), SORTED(value));
+
+output(f2);