浏览代码

Use a spill result for a dataset hoisted from a loop

A child query that accesses a dataset that is invarient within the child
query has that dataset executed outside of the child query.  However
because the child query only executes on a single node it uses a workunit
result instead of a spill to cope hold the temporary result.

In thor, a loop that isn't in a child query contains global operations.
So any loop invariant datasets that are extracted should still spill to
a disk spill rather than using a workunit result.

A future better fix is probably to always write to graph results for
any values hoisted within graphs (for hthor, roxie and thor).  However
that will require work in the engines, so not for 3.8.

Fixes #2081.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 13 年之前
父节点
当前提交
d756289160
共有 5 个文件被更改,包括 184 次插入16 次删除
  1. 49 16
      ecl/hqlcpp/hqlresource.cpp
  2. 1 0
      ecl/hqlcpp/hqlresource.ipp
  3. 44 0
      ecl/regress/loophoist.ecl
  4. 45 0
      ecl/regress/loophoist2.ecl
  5. 45 0
      ecl/regress/loophoist3.ecl

+ 49 - 16
ecl/hqlcpp/hqlresource.cpp

@@ -1964,9 +1964,16 @@ static HqlTransformerInfo eclHoistLocatorInfo("EclHoistLocator");
 class EclHoistLocator : public NewHqlTransformer
 {
 public:
-    EclHoistLocator(HqlExprCopyArray & _originalMatches, HqlExprArray & _matches, BoolArray & _alwaysHoistMatches) 
-        : NewHqlTransformer(eclHoistLocatorInfo), originalMatched(_originalMatches), matched(_matches), alwaysHoistMatches(_alwaysHoistMatches)
-    { 
+    EclHoistLocator(HqlExprCopyArray & _originalMatches, HqlExprArray & _matches, BoolArray & _singleNode, BoolArray & _alwaysHoistMatches)
+        : NewHqlTransformer(eclHoistLocatorInfo), originalMatched(_originalMatches), matched(_matches), singleNode(_singleNode), alwaysHoistMatches(_alwaysHoistMatches)
+    {
+        alwaysSingle = true;
+    }
+
+    void analyseChild(IHqlExpression * expr, bool _alwaysSingle)
+    {
+        alwaysSingle = _alwaysSingle;
+        analyse(expr, 0);
     }
 
     void noteDataset(IHqlExpression * expr, IHqlExpression * hoisted, bool alwaysHoist)
@@ -1980,9 +1987,15 @@ public:
             originalMatched.append(*expr);
             matched.append(*LINK(hoisted));
             alwaysHoistMatches.append(alwaysHoist);
+            singleNode.append(alwaysSingle);
+        }
+        else
+        {
+            if (alwaysHoist && !alwaysHoistMatches.item(match))
+                alwaysHoistMatches.replace(true, match);
+            if (alwaysSingle && !singleNode.item(match))
+                singleNode.replace(true, match);
         }
-        else if (alwaysHoist && !alwaysHoistMatches.item(match))
-            alwaysHoistMatches.replace(true, match);
     }
 
     void noteScalar(IHqlExpression * expr, IHqlExpression * value)
@@ -2047,13 +2060,16 @@ public:
             originalMatched.append(*expr);
             matched.append(*hoisted.getClear());
             alwaysHoistMatches.append(true);
+            singleNode.append(true);
         }
     }
 
 protected:
     HqlExprCopyArray & originalMatched;
     HqlExprArray & matched;
+    BoolArray & singleNode;
     BoolArray & alwaysHoistMatches;
+    bool alwaysSingle;
 };
 
 
@@ -2061,8 +2077,8 @@ protected:
 class EclChildSplitPointLocator : public EclHoistLocator
 {
 public:
-    EclChildSplitPointLocator(IHqlExpression * _original, HqlExprCopyArray & _selectors, HqlExprCopyArray & _originalMatches, HqlExprArray & _matches, BoolArray & _alwaysHoistMatches, bool _groupedChildIterators, bool _supportsChildQueries) 
-    : EclHoistLocator(_originalMatches, _matches, _alwaysHoistMatches), selectors(_selectors), groupedChildIterators(_groupedChildIterators), supportsChildQueries(_supportsChildQueries)
+    EclChildSplitPointLocator(IHqlExpression * _original, HqlExprCopyArray & _selectors, HqlExprCopyArray & _originalMatches, HqlExprArray & _matches, BoolArray & _singleNode, BoolArray & _alwaysHoistMatches, bool _groupedChildIterators, bool _supportsChildQueries)
+    : EclHoistLocator(_originalMatches, _matches, _singleNode, _alwaysHoistMatches), selectors(_selectors), groupedChildIterators(_groupedChildIterators), supportsChildQueries(_supportsChildQueries)
     { 
         original = _original;
         okToSelect = false; 
@@ -2079,14 +2095,16 @@ public:
         }
     }
 
-    void findSplitPoints(IHqlExpression * expr, unsigned from, unsigned to, bool _executedOnce)
+    void findSplitPoints(IHqlExpression * expr, unsigned from, unsigned to, bool _alwaysSingle, bool _executedOnce)
     {
+        alwaysSingle = _alwaysSingle;
         for (unsigned i=from; i < to; i++)
         {
             IHqlExpression * cur = expr->queryChild(i);
             executedOnce = _executedOnce || cur->isAttribute();     // assume attributes are only executed once.
             findSplitPoints(cur);
         }
+        alwaysSingle = false;
     }
 
 protected:
@@ -2445,7 +2463,7 @@ protected:
 void EclResourcer::gatherChildSplitPoints(IHqlExpression * expr, BoolArray & alwaysHoistChild, ResourcerInfo * info, unsigned first, unsigned last)
 {
     //NB: Don't call member functions to ensure correct nesting of transform mutexes.
-    EclChildSplitPointLocator locator(expr, activeSelectors, info->originalChildDependents, info->childDependents, alwaysHoistChild, options.groupedChildIterators, options.supportsChildQueries);
+    EclChildSplitPointLocator locator(expr, activeSelectors, info->originalChildDependents, info->childDependents, info->childSingleNode, alwaysHoistChild, options.groupedChildIterators, options.supportsChildQueries);
     unsigned max = expr->numChildren();
 
     //If child queries are supported then don't hoist the expressions if they might only be evaluated once
@@ -2455,18 +2473,32 @@ void EclResourcer::gatherChildSplitPoints(IHqlExpression * expr, BoolArray & alw
     case no_setresult:
     case no_selectnth:
         //set results, only done once=>don't hoist conditionals
-        locator.findSplitPoints(expr, last, max, true);
+        locator.findSplitPoints(expr, last, max, true, true);
         return;
+    case no_loop:
+        if ((options.targetClusterType == ThorLCRCluster) && !options.isChildQuery)
+        {
+            //This is ugly!  The body is executed in parallel, so don't force that as a work unit result
+            //It means some child query expressions within loops don't get forced into work unit writes
+            //but that just means that the generated code will be not as good as it could be.
+            const unsigned bodyArg = 4;
+            locator.findSplitPoints(expr, 1, bodyArg, true, false);
+            locator.findSplitPoints(expr, bodyArg, bodyArg+1, false, false);
+            locator.findSplitPoints(expr, bodyArg+1, max, true, false);
+            return;
+        }
+        break;
     }
-    locator.findSplitPoints(expr, 0, first, true);          // IF() conditions only evaluated once... => don't force
-    locator.findSplitPoints(expr, last, max, false);
+    locator.findSplitPoints(expr, 0, first, true, true);          // IF() conditions only evaluated once... => don't force
+    locator.findSplitPoints(expr, last, max, true, false);
 }
 
 
 class EclThisNodeLocator : public EclHoistLocator
 {
 public:
-    EclThisNodeLocator(HqlExprCopyArray & _originalMatches, HqlExprArray & _matches, BoolArray & _alwaysHoistMatches) : EclHoistLocator(_originalMatches, _matches, _alwaysHoistMatches)
+    EclThisNodeLocator(HqlExprCopyArray & _originalMatches, HqlExprArray & _matches, BoolArray & _singleNode, BoolArray & _alwaysHoistMatches)
+        : EclHoistLocator(_originalMatches, _matches, _singleNode, _alwaysHoistMatches)
     { 
         allNodesDepth = 0;
     }
@@ -2657,8 +2689,8 @@ bool EclResourcer::findSplitPoints(IHqlExpression * expr)
         case no_allnodes:
             {
                 //MORE: This needs to recursively walk and lift any contained no_selfnode, but don't go past another nested no_allnodes;
-                EclThisNodeLocator locator(info->originalChildDependents, info->childDependents, alwaysHoistChild);
-                locator.analyse(expr->queryChild(0), 0);
+                EclThisNodeLocator locator(info->originalChildDependents, info->childDependents, info->childSingleNode, alwaysHoistChild);
+                locator.analyseChild(expr->queryChild(0), true);
                 break;
             }
         default:
@@ -3483,7 +3515,8 @@ void EclResourcer::addDependencies(IHqlExpression * expr, ResourceGraphInfo * gr
         {
             addDependencies(&cur, NULL, NULL);
             ResourcerInfo * sourceInfo = queryResourceInfo(&cur);
-            sourceInfo->noteUsedFromChild();
+            if (info->childSingleNode.item(i))
+                sourceInfo->noteUsedFromChild();
             ResourceGraphLink * link = new ResourceGraphDependencyLink(sourceInfo->graph, &cur, graph, expr);
             graph->dependsOn.append(*link);
             links.append(*link);

+ 1 - 0
ecl/hqlcpp/hqlresource.ipp

@@ -277,6 +277,7 @@ public:
     HqlExprArray conditions;
     HqlExprArray childDependents;
     HqlExprCopyArray originalChildDependents;
+    BoolArray childSingleNode;
     HqlExprAttr spilledDataset;
     HqlExprAttr splitterOutput;
 

+ 44 - 0
ecl/regress/loophoist.ecl

@@ -0,0 +1,44 @@
+/*##############################################################################
+
+    Copyright (C) 2011 HPCC Systems.
+
+    All rights reserved. This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string20        surname;
+string10        forename;
+integer2        age := 25;
+            END;
+
+namesTable := dataset('x',namesRecord,FLAT);
+
+otherTable := dataset('other',namesRecord,FLAT);
+
+
+processLoop(dataset(namesRecord) in) := FUNCTION
+
+    x := otherTable(LENGTH(TRIM(surname)) > 1);
+    x2 := dedup(x, surname, all);
+
+    y := JOIN(in, x2, LEFT.surname = RIGHT.surname);
+
+    RETURN y;
+END;
+
+
+ds1 := LOOP(namesTable, 100, processLoop(ROWS(LEFT)));
+output(ds1);

+ 45 - 0
ecl/regress/loophoist2.ecl

@@ -0,0 +1,45 @@
+/*##############################################################################
+
+    Copyright (C) 2011 HPCC Systems.
+
+    All rights reserved. This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string20        surname;
+string10        forename;
+integer2        age := 25;
+            END;
+
+namesTable := dataset('x',namesRecord,FLAT);
+
+otherTable := dataset('other',namesRecord,FLAT);
+
+
+processLoop(dataset(namesRecord) in, unsigned c) := FUNCTION
+
+    x := otherTable(LENGTH(TRIM(surname)) > 1);
+    x2 := dedup(x, surname, all);
+
+    //Use x2 from a child query - so it IS force to a single node
+    y := JOIN(in, x2, LEFT.surname = RIGHT.surname and LEFT.surname != x2[c].surname);
+
+    RETURN y;
+END;
+
+
+ds1 := LOOP(namesTable, 100, processLoop(ROWS(LEFT), COUNTER));
+output(ds1);

+ 45 - 0
ecl/regress/loophoist3.ecl

@@ -0,0 +1,45 @@
+/*##############################################################################
+
+    Copyright (C) 2011 HPCC Systems.
+
+    All rights reserved. This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string20        surname;
+string10        forename;
+integer2        age := 25;
+            END;
+
+namesTable := dataset('x',namesRecord,FLAT);
+
+otherTable := dataset('other',namesRecord,FLAT);
+
+    x := otherTable(LENGTH(TRIM(surname)) > 1);
+    x2 := dedup(x, surname, all);
+
+processLoop(dataset(namesRecord) in, unsigned c) := FUNCTION
+
+
+    //Use x2 from a child query - so it IS force to a single node
+    y := JOIN(in, x2, LEFT.surname = RIGHT.surname);
+
+    RETURN y;
+END;
+
+
+ds1 := LOOP(namesTable, 100, LEFT.surname != x2[COUNTER].surname, processLoop(ROWS(LEFT), COUNTER));
+output(ds1);