فهرست منبع

Merge pull request #5485 from ghalliday/issue10942

HPCC-10942 Refactor some of the graph transformations

Reviewed-By: Jamie Noss <james.noss@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 11 سال پیش
والد
کامیت
2d84b928e7
7فایلهای تغییر یافته به همراه173 افزوده شده و 130 حذف شده
  1. 11 0
      ecl/hql/hqlexpr.cpp
  2. 1 0
      ecl/hql/hqlexpr.hpp
  3. 4 0
      ecl/hql/hqlfold.cpp
  4. 3 3
      ecl/hqlcpp/hqlcpp.cpp
  5. 3 0
      ecl/hqlcpp/hqlcpp.ipp
  6. 13 8
      ecl/hqlcpp/hqlresource.cpp
  7. 138 119
      ecl/hqlcpp/hqlttcpp.cpp

+ 11 - 0
ecl/hql/hqlexpr.cpp

@@ -12233,6 +12233,17 @@ bool isDummySerializeDeserialize(IHqlExpression * expr)
 }
 
 
+bool isRedundantGlobalScope(IHqlExpression * expr)
+{
+    assertex(expr->getOperator() == no_globalscope);
+    IHqlExpression * child = expr->queryChild(0);
+    if (child->getOperator() != no_globalscope)
+        return false;
+    if (expr->hasAttribute(optAtom) && !child->hasAttribute(optAtom))
+        return false;
+    return true;
+}
+
 bool isIndependentOfScope(IHqlExpression * expr)
 {
     return expr->isIndependentOfScope();

+ 1 - 0
ecl/hql/hqlexpr.hpp

@@ -1513,6 +1513,7 @@ extern HQL_API IHqlExpression * expandBetween(IHqlExpression * expr);
 extern HQL_API bool isAlwaysActiveRow(IHqlExpression * expr);
 extern HQL_API bool isAlwaysNewRow(IHqlExpression * expr);
 extern HQL_API IHqlExpression * ensureActiveRow(IHqlExpression * expr);
+extern HQL_API bool isRedundantGlobalScope(IHqlExpression * expr);
 extern HQL_API bool isIndependentOfScope(IHqlExpression * expr);
 extern HQL_API bool isActivityIndependentOfScope(IHqlExpression * expr);
 extern HQL_API bool exprReferencesDataset(IHqlExpression * expr, IHqlExpression * dataset);

+ 4 - 0
ecl/hql/hqlfold.cpp

@@ -3936,6 +3936,10 @@ IHqlExpression * NullFolderMixin::foldNullDataset(IHqlExpression * expr)
                 return removeParentNode(expr);
             break;
         }
+    case no_globalscope:
+        if (isRedundantGlobalScope(expr))
+            return removeParentNode(expr);
+        break;
     }
     return NULL;
 }

+ 3 - 3
ecl/hqlcpp/hqlcpp.cpp

@@ -3228,7 +3228,7 @@ void HqlCppTranslator::buildExpr(BuildCtx & ctx, IHqlExpression * expr, CHqlBoun
         tgt.expr.set(expr);
         return;
     case no_globalscope:
-        if (options.regressionTest)
+        if (options.regressionTest && !ctx.queryMatchExpr(globalContextMarkerExpr))
             throwUnexpected();
         buildExpr(ctx, expr->queryChild(0), tgt);
         return;
@@ -8038,8 +8038,8 @@ void HqlCppTranslator::expandSimpleOrder(IHqlExpression * left, IHqlExpression *
     {
         IHqlExpression * record = left->queryRecord();
         assertex(right->isDatarow() && (record == right->queryRecord()));
-        expandRowOrder(left, record, leftValues, !isActiveRow(left));
-        expandRowOrder(right, record, rightValues, !isActiveRow(right));
+        expandRowOrder(left, record, leftValues, !isActiveRow(left) && (left->getOperator() != no_select));
+        expandRowOrder(right, record, rightValues, !isActiveRow(right) && (right->getOperator() != no_select));
     }
     else
     {

+ 3 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -1834,6 +1834,9 @@ protected:
 
 
     void markThorBoundaries(WorkflowItem & curWorkflow);
+    void normalizeGraphForGeneration(HqlExprArray & exprs, HqlQueryContext & query);
+    void applyGlobalOptimizations(HqlExprArray & exprs);
+    void transformWorkflowItem(WorkflowItem & curWorkflow);
     bool transformGraphForGeneration(HqlQueryContext & query, WorkflowArray & exprs);
     void processEmbeddedLibraries(HqlExprArray & exprs, HqlExprArray & internalLibraries, bool isLibrary);
     void pickBestEngine(WorkflowArray & array);

+ 13 - 8
ecl/hqlcpp/hqlresource.cpp

@@ -2056,6 +2056,16 @@ inline bool projectSelectorDatasetToField(IHqlExpression * row)
     return ((row->getOperator() == no_selectnth) && getFieldCount(row->queryRecord()) > 1);
 }
 
+static IHqlExpression * skipScalarWrappers(IHqlExpression * value)
+{
+    loop
+    {
+        node_operator op = value->getOperator();
+        if ((op != no_globalscope) && (op != no_thisnode) && (op != no_evalonce))
+            return value;
+        value = value->queryChild(0);
+    }
+}
 
 static HqlTransformerInfo eclHoistLocatorInfo("EclHoistLocator");
 class EclHoistLocator : public NewHqlTransformer
@@ -2098,6 +2108,8 @@ public:
         unsigned match = matched.findOriginal(expr);
         if (match == NotFound)
         {
+            value = skipScalarWrappers(value);
+
             OwnedHqlExpr hoisted;
             IHqlExpression * projected = NULL;
             if (value->getOperator() == no_select)
@@ -4541,15 +4553,8 @@ protected:
 
 static IHqlExpression * getScalarReplacement(CChildDependent & cur, ResourcerInfo * hoistedInfo, IHqlExpression * replacement)
 {
-    IHqlExpression * value = cur.original;
     //First skip any wrappers which are there to cause things to be hoisted.
-    loop
-    {
-        node_operator op = value->getOperator();
-        if ((op != no_globalscope) && (op != no_thisnode) && (op != no_evalonce))
-            break;
-        value = value->queryChild(0);
-    }
+    IHqlExpression * value = skipScalarWrappers(cur.original);
 
     //Now modify the spilled result depending on how the spilled result was created (see EclHoistLocator::noteScalar() above)
     if (value->getOperator() == no_select)

+ 138 - 119
ecl/hqlcpp/hqlttcpp.cpp

@@ -7706,6 +7706,10 @@ IHqlExpression * NewScopeMigrateTransformer::createTransformed(IHqlExpression *
             }
             break;
         }
+    case no_globalscope:
+        if (isRedundantGlobalScope(transformed))
+            return LINK(transformed->queryChild(0));
+        break;
     }
     return transformed.getClear();
 }
@@ -12772,10 +12776,8 @@ IHqlExpression * HqlCppTranslator::separateLibraries(IHqlExpression * query, Hql
     return createComma(exprs);
 }
 
-
-bool HqlCppTranslator::transformGraphForGeneration(HqlQueryContext & query, WorkflowArray & workflow)
+void HqlCppTranslator::normalizeGraphForGeneration(HqlExprArray & exprs, HqlQueryContext & query)
 {
-    HqlExprArray exprs;
     if (isLibraryScope(query.expr))
         outputLibrary->mapLogicalToImplementation(exprs, query.expr);
     else
@@ -12810,15 +12812,11 @@ bool HqlCppTranslator::transformGraphForGeneration(HqlQueryContext & query, Work
 
     traceExpressions("allocate Sequence", exprs);
     checkNormalized(exprs);
+}
 
-    if (options.generateLogicalGraph || options.generateLogicalGraphOnly)
-    {
-        LogicalGraphCreator creator(wu());
-        creator.createLogicalGraph(exprs);
-        if (options.generateLogicalGraphOnly)
-            return false;
-        curActivityId = creator.queryMaxActivityId();
-    }
+
+void HqlCppTranslator::applyGlobalOptimizations(HqlExprArray & exprs)
+{
     traceExpressions("begin transformGraphForGeneration", exprs);
     checkNormalized(exprs);
 
@@ -12859,13 +12857,140 @@ bool HqlCppTranslator::transformGraphForGeneration(HqlQueryContext & query, Work
     traceExpressions("alloc", exprs);
     checkNormalized(exprs);
     modifyOutputLocations(exprs);
+}
+
+void HqlCppTranslator::transformWorkflowItem(WorkflowItem & curWorkflow)
+{
+#ifdef USE_SELSEQ_UID
+    if (options.normalizeSelectorSequence)
+    {
+        unsigned time = msTick();
+        LeftRightTransformer normalizer;
+        normalizer.process(curWorkflow.queryExprs());
+        updateTimer("workunit;tree transform: left right", msTick()-time);
+        //traceExpressions("after implicit alias", workflow);
+    }
+#endif
+
+    if (queryOptions().createImplicitAliases)
+    {
+        unsigned time = msTick();
+        ImplicitAliasTransformer normalizer;
+        normalizer.process(curWorkflow.queryExprs());
+        updateTimer("workunit;tree transform: implicit alias", msTick()-time);
+        //traceExpressions("after implicit alias", workflow);
+    }
+
+    {
+        unsigned startTime = msTick();
+        hoistNestedCompound(*this, curWorkflow.queryExprs());
+        updateTimer("workunit;tree transform: hoist nested compound", msTick()-startTime);
+    }
+
+    if (options.optimizeNestedConditional)
+    {
+        cycle_t time = msTick();
+        optimizeNestedConditional(curWorkflow.queryExprs());
+        updateTimer("workunit;optimize nested conditional", msTick()-time);
+        traceExpressions("nested", curWorkflow);
+        checkNormalized(curWorkflow);
+    }
+
+    checkNormalized(curWorkflow);
+    //sort(x)[n] -> topn(x, n)[]n, count(x)>n -> count(choosen(x,n+1)) > n and possibly others
+    {
+        unsigned startTime = msTick();
+        optimizeActivities(curWorkflow.queryExprs(), !targetThor(), options.optimizeNonEmpty);
+        updateTimer("workunit;tree transform: optimize activities", msTick()-startTime);
+    }
+    checkNormalized(curWorkflow);
+
+    //----------------------------- Transformations below this mark may have created globals so be very careful with hoisting ---------------------
+
+    unsigned time5 = msTick();
+    migrateExprToNaturalLevel(curWorkflow, wu(), *this);       // Ensure expressions are evaluated at the best level - e.g., counts moved to most appropriate level.
+    updateTimer("workunit;tree transform: migrate", msTick()-time5);
+    //transformToAliases(exprs);
+    traceExpressions("migrate", curWorkflow);
+    checkNormalized(curWorkflow);
+
+    unsigned time2 = msTick();
+    markThorBoundaries(curWorkflow);                                               // work out which engine is going to perform which operation.
+    updateTimer("workunit;tree transform: thor hole", msTick()-time2);
+    traceExpressions("boundary", curWorkflow);
+    checkNormalized(curWorkflow);
+
+    if (options.optimizeGlobalProjects)
+    {
+        cycle_t time = msTick();
+        insertImplicitProjects(*this, curWorkflow.queryExprs());
+        updateTimer("workunit;global implicit projects", msTick()-time);
+        traceExpressions("implicit", curWorkflow);
+        checkNormalized(curWorkflow);
+    }
+
+    unsigned time3 = msTick();
+    normalizeResultFormat(curWorkflow, options);
+    updateTimer("workunit;tree transform: normalize result", msTick()-time3);
+    traceExpressions("results", curWorkflow);
+    checkNormalized(curWorkflow);
+
+    optimizePersists(curWorkflow.queryExprs());
+
+    traceExpressions("per", curWorkflow);
+    checkNormalized(curWorkflow);
+//  flattenDatasets(workflow);
+//  traceExpressions("flatten", workflow);
+
+    {
+        unsigned startTime = msTick();
+        mergeThorGraphs(curWorkflow, options.resourceConditionalActions, options.resourceSequential);          // reduces number of graphs sent to thor
+        updateTimer("workunit;tree transform: merge thor", msTick()-startTime);
+    }
+
+    traceExpressions("merged", curWorkflow);
+    checkNormalized(curWorkflow);
+
+    if (queryOptions().normalizeLocations)
+        normalizeAnnotations(*this, curWorkflow.queryExprs());
+
+    spotGlobalCSE(curWorkflow);                                                        // spot CSE within those graphs, and create some more
+    checkNormalized(curWorkflow);
+
+    //expandGlobalDatasets(workflow, wu(), *this);
+
+    {
+        unsigned startTime = msTick();
+        mergeThorGraphs(curWorkflow, options.resourceConditionalActions, options.resourceSequential);
+        updateTimer("workunit;tree transform: merge thor", msTick()-startTime);
+    }
+    checkNormalized(curWorkflow);
+
+    removeTrivialGraphs(curWorkflow);
+    checkNormalized(curWorkflow);
+}
+
+bool HqlCppTranslator::transformGraphForGeneration(HqlQueryContext & query, WorkflowArray & workflow)
+{
+    HqlExprArray exprs;
+    normalizeGraphForGeneration(exprs, query);
+
+    if (options.generateLogicalGraph || options.generateLogicalGraphOnly)
+    {
+        LogicalGraphCreator creator(wu());
+        creator.createLogicalGraph(exprs);
+        if (options.generateLogicalGraphOnly)
+            return false;
+        curActivityId = creator.queryMaxActivityId();
+    }
+
+    applyGlobalOptimizations(exprs);
     if (exprs.ordinality() == 0)
         return false;   // No action needed
 
     unsigned time4 = msTick();
     ::extractWorkflow(*this, exprs, workflow);
 
-
     traceExpressions("workflow", workflow);
     checkNormalized(workflow);
     updateTimer("workunit;tree transform: stored results", msTick()-time4);
@@ -12889,115 +13014,9 @@ bool HqlCppTranslator::transformGraphForGeneration(HqlQueryContext & query, Work
     ForEachItemIn(i, workflow)
     {
         WorkflowItem & curWorkflow = workflow.item(i);
-
-#ifdef USE_SELSEQ_UID
-        if (options.normalizeSelectorSequence)
-        {
-            unsigned time = msTick();
-            LeftRightTransformer normalizer;
-            normalizer.process(curWorkflow.queryExprs());
-            updateTimer("workunit;tree transform: left right", msTick()-time);
-            //traceExpressions("after implicit alias", workflow);
-        }
-#endif
-
-        if (queryOptions().createImplicitAliases)
-        {
-            unsigned time = msTick();
-            ImplicitAliasTransformer normalizer;
-            normalizer.process(curWorkflow.queryExprs());
-            updateTimer("workunit;tree transform: implicit alias", msTick()-time);
-            //traceExpressions("after implicit alias", workflow);
-        }
-
-        {
-            unsigned startTime = msTick();
-            hoistNestedCompound(*this, curWorkflow.queryExprs());
-            updateTimer("workunit;tree transform: hoist nested compound", msTick()-startTime);
-        }
-
-        if (options.optimizeNestedConditional)
-        {
-            cycle_t time = msTick();
-            optimizeNestedConditional(curWorkflow.queryExprs());
-            updateTimer("workunit;optimize nested conditional", msTick()-time);
-            traceExpressions("nested", curWorkflow);
-            checkNormalized(curWorkflow);
-        }
-
-        checkNormalized(curWorkflow);
-        //sort(x)[n] -> topn(x, n)[]n, count(x)>n -> count(choosen(x,n+1)) > n and possibly others
-        {
-            unsigned startTime = msTick();
-            optimizeActivities(curWorkflow.queryExprs(), !targetThor(), options.optimizeNonEmpty);
-            updateTimer("workunit;tree transform: optimize activities", msTick()-startTime);
-        }
-        checkNormalized(curWorkflow);
-
-        unsigned time5 = msTick();
-        migrateExprToNaturalLevel(curWorkflow, wu(), *this);       // Ensure expressions are evaluated at the best level - e.g., counts moved to most appropriate level.
-        updateTimer("workunit;tree transform: migrate", msTick()-time5);
-        //transformToAliases(exprs);
-        traceExpressions("migrate", curWorkflow);
-        checkNormalized(curWorkflow);
-
-        unsigned time2 = msTick();
-        markThorBoundaries(curWorkflow);                                               // work out which engine is going to perform which operation.
-        updateTimer("workunit;tree transform: thor hole", msTick()-time2);
-        traceExpressions("boundary", curWorkflow);
-        checkNormalized(curWorkflow);
-
-        if (options.optimizeGlobalProjects)
-        {
-            cycle_t time = msTick();
-            insertImplicitProjects(*this, curWorkflow.queryExprs());
-            updateTimer("workunit;global implicit projects", msTick()-time);
-            traceExpressions("implicit", curWorkflow);
-            checkNormalized(curWorkflow);
-        }
-
-        unsigned time3 = msTick();
-        normalizeResultFormat(curWorkflow, options);
-        updateTimer("workunit;tree transform: normalize result", msTick()-time3);
-        traceExpressions("results", curWorkflow);
-        checkNormalized(curWorkflow);
-
-        optimizePersists(curWorkflow.queryExprs());
-
-        traceExpressions("per", curWorkflow);
-        checkNormalized(curWorkflow);
-    //  flattenDatasets(workflow);
-    //  traceExpressions("flatten", workflow);
-
-        {
-            unsigned startTime = msTick();
-            mergeThorGraphs(curWorkflow, options.resourceConditionalActions, options.resourceSequential);          // reduces number of graphs sent to thor
-            updateTimer("workunit;tree transform: merge thor", msTick()-startTime);
-        }
-
-        traceExpressions("merged", curWorkflow);
-        checkNormalized(curWorkflow);
-
-        if (queryOptions().normalizeLocations)
-            normalizeAnnotations(*this, curWorkflow.queryExprs());
-
-        spotGlobalCSE(curWorkflow);                                                        // spot CSE within those graphs, and create some more
-        checkNormalized(curWorkflow);
-
-        //expandGlobalDatasets(workflow, wu(), *this);
-
-        {
-            unsigned startTime = msTick();
-            mergeThorGraphs(curWorkflow, options.resourceConditionalActions, options.resourceSequential);
-            updateTimer("workunit;tree transform: merge thor", msTick()-startTime);
-        }
-        checkNormalized(curWorkflow);
-
-        removeTrivialGraphs(curWorkflow);
-        checkNormalized(curWorkflow);
+        transformWorkflowItem(curWorkflow);
     }
 
-
 #ifndef PICK_ENGINE_EARLY
     if (options.pickBestEngine)
         pickBestEngine(workflow);