فهرست منبع

HPCC-9848 Optimize fields after resourcing child graphs

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 11 سال پیش
والد
کامیت
0f8346123f
7فایلهای تغییر یافته به همراه74 افزوده شده و 67 حذف شده
  1. 0 1
      ecl/hqlcpp/hqlcpp.cpp
  2. 1 2
      ecl/hqlcpp/hqlcpp.ipp
  3. 5 29
      ecl/hqlcpp/hqlcppds.cpp
  4. 6 5
      ecl/hqlcpp/hqlhtcpp.cpp
  5. 61 27
      ecl/hqlcpp/hqlresource.cpp
  6. 1 1
      ecl/hqlcpp/hqlresource.hpp
  7. 0 2
      ecl/hqlcpp/hqlresource.ipp

+ 0 - 1
ecl/hqlcpp/hqlcpp.cpp

@@ -1608,7 +1608,6 @@ void HqlCppTranslator::cacheOptions()
         DebugOption(options.finalizeAllRows, "finalizeAllRows", false),
         DebugOption(options.maxLocalRowSize , "maxLocalRowSize", MAX_LOCAL_ROW_SIZE),
         DebugOption(options.optimizeGraph,"optimizeGraph", true),
-        DebugOption(options.optimizeChildGraph,"optimizeChildGraph", true),
         DebugOption(options.orderDiskFunnel,"orderDiskFunnel", true),
         DebugOption(options.alwaysAllowAllNodes,"alwaysAllowAllNodes", false),
         DebugOption(options.slidingJoins,"slidingJoins", false),

+ 1 - 2
ecl/hqlcpp/hqlcpp.ipp

@@ -626,7 +626,6 @@ struct HqlCppOptions
     bool                foldFilter;
     bool                finalizeAllRows;
     bool                optimizeGraph;
-    bool                optimizeChildGraph ;
     bool                orderDiskFunnel;
     bool                alwaysAllowAllNodes;
     bool                slidingJoins;
@@ -1849,7 +1848,7 @@ protected:
     IHqlExpression * getResourcedGraph(IHqlExpression * expr, IHqlExpression * graphIdExpr);
     IHqlExpression * getResourcedChildGraph(BuildCtx & ctx, IHqlExpression * childQuery, unsigned numResults, node_operator graphKind);
     IHqlExpression * optimizeCompoundSource(IHqlExpression * expr, unsigned flags);
-    IHqlExpression * optimizeGraphPostResource(IHqlExpression * expr, unsigned csfFlags);
+    IHqlExpression * optimizeGraphPostResource(IHqlExpression * expr, unsigned csfFlags, bool projectBeforeSpill);
     bool isInlineOk();
     GraphLocalisation getGraphLocalisation(IHqlExpression * expr, bool isInsideChildQuery);
     bool isAlwaysCoLocal();

+ 5 - 29
ecl/hqlcpp/hqlcppds.cpp

@@ -1685,7 +1685,7 @@ IHqlExpression * HqlCppTranslator::getResourcedChildGraph(BuildCtx & ctx, IHqlEx
         updateTimer("workunit;tree transform: optimize disk read", msTick()-time);
     }
 
-    if (options.optimizeChildGraph)
+    if (options.optimizeGraph)
     {
         unsigned time = msTick();
         traceExpression("BeforeOptimizeSub", resourced);
@@ -1710,35 +1710,11 @@ IHqlExpression * HqlCppTranslator::getResourcedChildGraph(BuildCtx & ctx, IHqlEx
     checkNormalized(ctx, resourced);
     traceExpression("AfterResourcing Child", resourced);
     
-    //Convert queries on preloaded into compound activities - before resourcing so keyed gets done correctly
-    // Second attempt to spot compound disk reads - this time of spill files.  Since resourcing has removed
-    // any sharing we don't need to bother about sharing.
-    if (options.optimizeResourcedProjects)
+    resourced.setown(optimizeGraphPostResource(resourced, csfFlags, false));
+    if (options.optimizeSpillProject)
     {
-        cycle_t time = msTick();
-        OwnedHqlExpr optimized = insertImplicitProjects(*this, resourced.get(), options.optimizeSpillProject);
-        updateTimer("workunit;child.implicitprojects", msTick()-time);
-        traceExpression("AfterResourcedImplicit", optimized);
-        checkNormalized(ctx, optimized);
-        resourced.set(optimized);
-    }
-
-    {
-        unsigned time = msTick();
-
-        CompoundSourceTransformer transformer(*this, csfFlags);
-        resourced.setown(transformer.process(resourced));
-        updateTimer("workunit;tree transform: optimize disk read", msTick()-time);
-    }
-
-    //Now call the optimizer again - the main purpose is to move projects over limits and into compound index/disk reads
-    if (options.optimizeChildGraph)
-    {
-        unsigned time = msTick();
-        traceExpression("BeforeOptimize2", resourced);
-        resourced.setown(optimizeHqlExpression(resourced, getOptimizeFlags()|HOOcompoundproject));
-        traceExpression("AfterOptimize2", resourced);
-        updateTimer("workunit;optimize graph", msTick()-time);
+        resourced.setown(convertSpillsToActivities(resourced, true));
+        resourced.setown(optimizeGraphPostResource(resourced, csfFlags, false));
     }
 
     if (options.paranoidCheckNormalized || options.paranoidCheckDependencies)

+ 6 - 5
ecl/hqlcpp/hqlhtcpp.cpp

@@ -9006,7 +9006,7 @@ IHqlExpression * HqlCppTranslator::optimizeCompoundSource(IHqlExpression * expr,
     return ret.getClear();
 }
 
-IHqlExpression * HqlCppTranslator::optimizeGraphPostResource(IHqlExpression * expr, unsigned csfFlags)
+IHqlExpression * HqlCppTranslator::optimizeGraphPostResource(IHqlExpression * expr, unsigned csfFlags, bool projectBeforeSpill)
 {
     LinkedHqlExpr resourced = expr;
     // Second attempt to spot compound disk reads - this time of spill files for thor.
@@ -9016,7 +9016,7 @@ IHqlExpression * HqlCppTranslator::optimizeGraphPostResource(IHqlExpression * ex
     if (options.optimizeResourcedProjects)
     {
         cycle_t time = msTick();
-        OwnedHqlExpr optimized = insertImplicitProjects(*this, resourced.get(), options.optimizeSpillProject);
+        OwnedHqlExpr optimized = insertImplicitProjects(*this, resourced.get(), projectBeforeSpill);
         updateTimer("workunit;implicit projects", msTick()-time);
         traceExpression("AfterResourcedImplicit", resourced);
         checkNormalized(optimized);
@@ -9100,11 +9100,12 @@ IHqlExpression * HqlCppTranslator::getResourcedGraph(IHqlExpression * expr, IHql
 
     checkNormalized(resourced);
 
-    resourced.setown(optimizeGraphPostResource(resourced, csfFlags));
+    bool createGraphResults = (outputLibraryId != 0);
+    resourced.setown(optimizeGraphPostResource(resourced, csfFlags, options.optimizeSpillProject && !createGraphResults));
     if (options.optimizeSpillProject)
     {
-        resourced.setown(convertSpillsToActivities(resourced));
-        resourced.setown(optimizeGraphPostResource(resourced, csfFlags));
+        resourced.setown(convertSpillsToActivities(resourced, createGraphResults));
+        resourced.setown(optimizeGraphPostResource(resourced, csfFlags, false));
     }
 
     checkNormalized(resourced);

+ 61 - 27
ecl/hqlcpp/hqlresource.cpp

@@ -1252,7 +1252,10 @@ IHqlExpression * ResourcerInfo::createSpilledRead(IHqlExpression * spillReason)
     bool loseDistribution = true;
     if (useGraphResult())
     {
-        args.append(*LINK(record));
+        if (spilledDataset)
+            args.append(*LINK(spilledDataset));
+        else
+            args.append(*LINK(record));
         args.append(*LINK(options->graphIdExpr));
         args.append(*createSpillName());
         if (isGrouped(original))
@@ -1265,10 +1268,12 @@ IHqlExpression * ResourcerInfo::createSpilledRead(IHqlExpression * spillReason)
             args.append(*LINK(recordCountAttr));
         if (options->targetThor() && original->isDataset() && !options->isChildQuery)
             args.append(*createAttribute(_distributed_Atom));
+
+        node_operator readOp = spilledDataset ? no_readspill : no_getgraphresult;
         if (original->isDictionary())
-            dataset.setown(createDictionary(no_getgraphresult, args));
+            dataset.setown(createDictionary(readOp, args));
         else
-            dataset.setown(createDataset(no_getgraphresult, args));
+            dataset.setown(createDataset(readOp, args));
         loseDistribution = false;
     }
     else if (useGlobalResult())
@@ -1333,10 +1338,23 @@ IHqlExpression * ResourcerInfo::createSpilledWrite(IHqlExpression * transformed)
     if (useGraphResult())
     {
         assertex(options->graphIdExpr);
-        args.append(*LINK(transformed));
+
+        if (options->createSpillAsDataset && !linkedFromChild)
+        {
+            IHqlExpression * value = LINK(transformed);
+            if (value->isDatarow())
+                value = createDatasetFromRow(value);
+            spilledDataset.setown(createDataset(no_commonspill, value));
+            args.append(*LINK(spilledDataset));
+        }
+        else
+            args.append(*LINK(transformed));
+
         args.append(*LINK(options->graphIdExpr));
         args.append(*createSpillName());
         args.append(*createAttribute(_spill_Atom));
+        if (spilledDataset)
+            return createValue(no_writespill, makeVoidType(), args);
         return createValue(no_setgraphresult, makeVoidType(), args);
     }
     else if (useGlobalResult())
@@ -1891,8 +1909,6 @@ EclResourcer::~EclResourcer()
 void EclResourcer::setChildQuery(bool value) 
 { 
     options.isChildQuery = value; 
-    if (value)
-        options.createSpillAsDataset = false;
 }
 
 void EclResourcer::setNewChildQuery(IHqlExpression * graphIdExpr, unsigned numResults) 
@@ -5247,7 +5263,7 @@ a single splitter.
 class SpillActivityTransformer : public NewHqlTransformer
 {
 public:
-    SpillActivityTransformer();
+    SpillActivityTransformer(bool _createGraphResults);
 
 protected:
     virtual void analyseExpr(IHqlExpression * expr);
@@ -5263,11 +5279,13 @@ protected:
         ANewTransformInfo * info = queryTransformExtra(body);
         info->spareByte1 = true;
     }
+protected:
+    bool createGraphResults;
 };
 
 static HqlTransformerInfo spillActivityTransformerInfo("SpillActivityTransformer");
-SpillActivityTransformer::SpillActivityTransformer() 
-: NewHqlTransformer(spillActivityTransformerInfo)
+SpillActivityTransformer::SpillActivityTransformer(bool _createGraphResults)
+: NewHqlTransformer(spillActivityTransformerInfo), createGraphResults(_createGraphResults)
 { 
 }
 
@@ -5276,21 +5294,23 @@ void SpillActivityTransformer::analyseExpr(IHqlExpression * expr)
     IHqlExpression * body = expr->queryBody();
     if (alreadyVisited(body))
         return;
-    if (body->getOperator() == no_split)
+
+    //If splitters are commoned up ensure unbalanced splitters stay unbalanced.
+    if ((body->getOperator() == no_split) && !body->hasAttribute(balancedAtom))
     {
-        IHqlExpression * input = body->queryChild(0);
-        if (input->getOperator() == no_split)
+        IHqlExpression * splitter = NULL;
+        IHqlExpression * cur = body->queryChild(0);
+        loop
         {
-            loop
-            {
-                IHqlExpression * cur = input->queryChild(0);
-                if (cur->getOperator() != no_split)
-                    break;
-                input = cur;
-            }
-            if (!body->hasAttribute(balancedAtom))
-                setUnbalanced(input->queryBody());
+            node_operator op = cur->getOperator();
+            if (op == no_split)
+                splitter = cur;
+            else if (op != no_commonspill)
+                break;
+            cur = cur->queryChild(0);
         }
+        if (splitter)
+            setUnbalanced(splitter->queryBody());
     }
     NewHqlTransformer::analyseExpr(expr);
 }
@@ -5305,9 +5325,9 @@ IHqlExpression * SpillActivityTransformer::createTransformed(IHqlExpression * ex
     {
     case no_split:
         {
-            IHqlExpression * input = expr->queryChild(0);
+            OwnedHqlExpr input = transform(expr->queryChild(0));
             if (input->getOperator() == no_split)
-                return transform(input);
+                return input.getClear();
             OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
             if (transformed->hasAttribute(balancedAtom) && isUnbalanced(expr))
                 return removeAttribute(transformed, balancedAtom);
@@ -5317,6 +5337,8 @@ IHqlExpression * SpillActivityTransformer::createTransformed(IHqlExpression * ex
         {
             HqlExprArray args;
             transformChildren(expr, args);
+            if (createGraphResults)
+                return createValue(no_setgraphresult, makeVoidType(), args);
             return createValue(no_output, makeVoidType(), args);
         }
     case no_commonspill:
@@ -5324,9 +5346,14 @@ IHqlExpression * SpillActivityTransformer::createTransformed(IHqlExpression * ex
     case no_readspill:
         {
             OwnedHqlExpr ds = transform(expr->queryChild(0));
+            node_operator readOp = createGraphResults ? no_getgraphresult : no_table;
+
             HqlExprArray args;
-            args.append(*transform(expr->queryChild(1)));
+            if (!createGraphResults)
+                args.append(*transform(expr->queryChild(1)));
             args.append(*LINK(ds->queryRecord()));
+            if (createGraphResults)
+                args.append(*transform(expr->queryChild(1)));
             ForEachChildFrom(i, expr, 2)
             {
                 IHqlExpression * cur = expr->queryChild(i);
@@ -5335,15 +5362,22 @@ IHqlExpression * SpillActivityTransformer::createTransformed(IHqlExpression * ex
             IHqlExpression * recordCountAttr = queryRecordCountInfo(expr);
             if (recordCountAttr)
                 args.append(*LINK(recordCountAttr));
-            return createDataset(no_table, args);
+
+            OwnedHqlExpr ret;
+            if (ds->isDictionary())
+                ret.setown(createDictionary(readOp, args));
+            else
+                ret.setown(createDataset(readOp, args));
+            const bool loseDistribution = false;
+            return preserveTableInfo(ret, ds, loseDistribution, NULL);
         }
     }
     return NewHqlTransformer::createTransformed(expr);
 }
 
-IHqlExpression * convertSpillsToActivities(IHqlExpression * expr)
+IHqlExpression * convertSpillsToActivities(IHqlExpression * expr, bool createGraphResults)
 {
-    SpillActivityTransformer transformer;
+    SpillActivityTransformer transformer(createGraphResults);
     transformer.analyse(expr, 0);
     return transformer.transformRoot(expr);
 }

+ 1 - 1
ecl/hqlcpp/hqlresource.hpp

@@ -27,6 +27,6 @@ IHqlExpression * resourceLoopGraph(HqlCppTranslator & translator, HqlExprCopyArr
 IHqlExpression * resourceNewChildGraph(HqlCppTranslator & translator, HqlExprCopyArray & activeRows, IHqlExpression * expr, ClusterType targetClusterType, IHqlExpression * graphIdExpr, unsigned numResults);
 IHqlExpression * resourceRemoteGraph(HqlCppTranslator & translator, IHqlExpression * expr, ClusterType targetClusterType, unsigned clusterSize);
 
-IHqlExpression * convertSpillsToActivities(IHqlExpression * expr);
+IHqlExpression * convertSpillsToActivities(IHqlExpression * expr, bool createGraphResults);
 
 #endif

+ 0 - 2
ecl/hqlcpp/hqlresource.ipp

@@ -360,8 +360,6 @@ public:
     void setUseGraphResults(bool _useGraphResults) 
     { 
         options.useGraphResults = _useGraphResults; 
-        if (_useGraphResults)
-            options.createSpillAsDataset = false; 
     }
     void tagActiveCursors(HqlExprCopyArray & activeRows);
     inline unsigned numGraphResults() { return options.nextResult; }