Sfoglia il codice sorgente

Merge pull request #5210 from ghalliday/issue10534

HPCC-10534 Optimize IF(PROJECT, PROJECT)

Reviewed-By: Jamie Noss <james.noss@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 11 anni fa
parent
commit
d1337e8174

+ 2 - 0
ecl/hql/hqlatoms.cpp

@@ -85,6 +85,7 @@ IAtom * assertConstAtom;
 IAtom * atmostAtom;
 IAtom * aveAtom;
 IAtom * backupAtom;
+IAtom * balancedAtom;
 IAtom * bcdAtom;
 IAtom * beforeAtom;
 IAtom * bestAtom;
@@ -496,6 +497,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKEATOM(atmost);
     MAKEATOM(ave);
     MAKEATOM(backup);
+    MAKEATOM(balanced);
     MAKEATOM(bcd);
     MAKEATOM(before);
     MAKEATOM(best);

+ 1 - 0
ecl/hql/hqlatoms.hpp

@@ -87,6 +87,7 @@ extern HQL_API IAtom * assertConstAtom;
 extern HQL_API IAtom * atmostAtom;
 extern HQL_API IAtom * aveAtom;
 extern HQL_API IAtom * backupAtom;
+extern HQL_API IAtom * balancedAtom;
 extern HQL_API IAtom * bcdAtom;
 extern HQL_API IAtom * beforeAtom;
 extern HQL_API IAtom * bestAtom;

+ 63 - 1
ecl/hql/hqlopt.cpp

@@ -758,6 +758,54 @@ IHqlExpression * CTreeOptimizer::optimizeDatasetIf(IHqlExpression * transformed)
     return LINK(transformed);
 }
 
+static bool branchesMatch(unsigned options, IHqlExpression * left, IHqlExpression * right)
+{
+    if (left->queryBody() == right->queryBody())
+        return true;
+
+    node_operator leftOp = left->getOperator();
+    if (leftOp != right->getOperator())
+        return false;
+
+    switch (leftOp)
+    {
+    case no_hqlproject:
+    case no_newusertable:
+        break;
+    default:
+        return false;
+    }
+    if (left->numChildren() != right->numChildren())
+        return false;
+
+    //Check for the situation where the only difference between two projects is the selector sequence
+    ForEachChild(i, left)
+    {
+        IHqlExpression * curLeft = left->queryChild(i);
+        if (curLeft->isAttribute() && (curLeft->queryName() == _selectorSequence_Atom))
+            continue;
+        IHqlExpression * curRight = right->queryChild(i);
+        if (curLeft->queryBody() != curRight->queryBody())
+        {
+            //The following code allows LEFT to be referred to within the transform, but I don't think it is worth enabling
+            //because of the potential cost of replacing the selseq within the transform.
+            if (options & HOOexpensive)
+            {
+                if ((leftOp != no_hqlproject) || !curLeft->isTransform())
+                    return false;
+                if (!recordTypesMatch(curLeft,curRight))
+                    return false;
+                OwnedHqlExpr newTransform = replaceExpression(curLeft, querySelSeq(left), querySelSeq(right));
+                if (newTransform->queryBody() != curRight->queryBody())
+                    return false;
+            }
+
+            return false;
+        }
+    }
+    return true;
+}
+
 IHqlExpression * CTreeOptimizer::optimizeIf(IHqlExpression * expr)
 {
     IHqlExpression * trueExpr = expr->queryChild(1);
@@ -766,7 +814,7 @@ IHqlExpression * CTreeOptimizer::optimizeIf(IHqlExpression * expr)
     if (!falseExpr)
         return NULL;
 
-    if (trueExpr->queryBody() == falseExpr->queryBody())
+    if (branchesMatch(options, trueExpr, falseExpr))
     {
         noteUnused(trueExpr);       // inherit usage() will increase the usage again
         noteUnused(falseExpr);
@@ -2538,6 +2586,20 @@ IHqlExpression * CTreeOptimizer::doCreateTransformed(IHqlExpression * transforme
             return createDataset(no_hqlproject, args);
         }
         break;
+    case no_split:
+        node_operator childOp = child->getOperator();
+        if (childOp == no_split)
+        {
+            //Don't convert an unbalanced splitter into a balanced splitter
+            //- best would be to set unbalanced on the child, but that would require more complication.
+            if (transformed->hasAttribute(balancedAtom) || !child->hasAttribute(balancedAtom))
+                return removeParentNode(transformed);
+        }
+
+        //This would remove splits only used once, but dangerous if we ever get the usage counting wrong...
+        //if (queryBodyExtra(transformed)->useCount == 1)
+        //    return removeParentNode(transformed);
+        break;
     }
 
     bool shared = childrenAreShared(transformed);

+ 1 - 0
ecl/hql/hqlopt.hpp

@@ -31,6 +31,7 @@ enum
     HOOhascompoundaggregate     = 0x0040,
     HOOfoldconstantdatasets     = 0x0080,
     HOOalwayslocal              = 0x0100,
+    HOOexpensive                = 0x0200,   // include potentially expensive optimizations
 };
 
 extern HQL_API IHqlExpression * optimizeHqlExpression(IHqlExpression * expr, unsigned options);

+ 0 - 2
ecl/hqlcpp/hqlcatom.cpp

@@ -32,7 +32,6 @@ IAtom * activeMatchUtf8Atom;
 IAtom * activeProductionMarkerAtom;
 IAtom * activeValidateMarkerAtom;
 IAtom * activityIdMarkerAtom;
-IAtom * balancedAtom;
 IAtom * bitfieldOffsetAtom;
 IAtom * blobHelperAtom;
 IAtom * branchAtom;
@@ -1408,7 +1407,6 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM-1)
     MAKEATOM(activeProductionMarker);
     MAKEATOM(activeValidateMarker);
     MAKEATOM(activityIdMarker);
-    MAKEATOM(balanced);
     MAKEATOM(bitfieldOffset);
     MAKEATOM(blobHelper);
     MAKEATOM(branch);

+ 0 - 1
ecl/hqlcpp/hqlcatom.hpp

@@ -32,7 +32,6 @@ extern IAtom * activeMatchUtf8Atom;
 extern IAtom * activeProductionMarkerAtom;
 extern IAtom * activeValidateMarkerAtom;
 extern IAtom * activityIdMarkerAtom;
-extern IAtom * balancedAtom;
 extern IAtom * bitfieldOffsetAtom;
 extern IAtom * blobHelperAtom;
 extern IAtom * branchAtom;

+ 11 - 0
ecl/hqlcpp/hqlcpp.cpp

@@ -1735,6 +1735,7 @@ void HqlCppTranslator::cacheOptions()
         DebugOption(options.expandPersistInputDependencies,"expandPersistInputDependencies",true),
         DebugOption(options.multiplePersistInstances,"multiplePersistInstances",true),
         DebugOption(options.defaultNumPersistInstances,"defaultNumPersistInstances",-1),
+        DebugOption(options.optimizeMax,"optimizeMax",false),
     };
 
     //get options values from workunit
@@ -1807,6 +1808,14 @@ void HqlCppTranslator::cacheOptions()
 
 void HqlCppTranslator::postProcessOptions()
 {
+    if (options.optimizeMax)
+    {
+        //Enable any extra potentially expensive optimizations options here...
+        options.foldConstantDatasets = true;
+        options.percolateConstants = true;
+        options.percolateFilters = true;
+    }
+
 //Any post processing - e.g., dependent flags goes here...
     options.optimizeDiskFlag = 0;
     if (options.optimizeInlineSource) 
@@ -1857,6 +1866,8 @@ unsigned HqlCppTranslator::getOptimizeFlags() const
         optFlags |= HOOhascompoundaggregate;
     if (options.foldConstantDatasets)
         optFlags |= HOOfoldconstantdatasets;
+    if (options.optimizeMax)
+        optFlags |= HOOexpensive;
     return optFlags;
 }
 

+ 1 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -729,6 +729,7 @@ struct HqlCppOptions
     bool                expandPersistInputDependencies;
     bool                expirePersists;
     bool                actionLinkInNewGraph;
+    bool                optimizeMax;
 };
 
 //Any information gathered while processing the query should be moved into here, rather than cluttering up the translator class

+ 2 - 0
ecl/hqlcpp/hqlttcpp.cpp

@@ -12886,8 +12886,10 @@ bool HqlCppTranslator::transformGraphForGeneration(HqlQueryContext & query, Work
         if (options.foldConstantDatasets) foldOptions |= HFOconstantdatasets;
         if (options.percolateConstants) foldOptions |= HFOpercolateconstants;
         if (options.percolateFilters) foldOptions |= HFOpercolatefilters;
+        if (options.optimizeMax) foldOptions |= HFOx_op_not_x;
         if (options.globalFoldOptions != (unsigned)-1)
             foldOptions = options.globalFoldOptions;
+
         foldHqlExpression(folded, exprs, foldOptions);
         replaceArray(exprs, folded);
         updateTimer("workunit;tree transform: global fold", msTick()-startTime);