Explorar o código

HPCC-12877 Use spilling sort by default when index building

We know these sorts are likely to be big...

We should still provide ways of setting the sort algorithm for other internal
joins, or for all sorts in a job.

Also fix a memory corruption (double free) in the spilling sort code.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman %!s(int64=10) %!d(string=hai) anos
pai
achega
48c65873c8

+ 4 - 3
ecl/hql/hqlmeta.cpp

@@ -1558,7 +1558,7 @@ IHqlExpression * getSubSort(IHqlExpression * dataset, IHqlExpression * order, bo
 
 //--------------------------------------------------------------------------------------------------------------------
 
-IHqlExpression * ensureSorted(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping, bool alwaysLocal, bool allowSubSort)
+IHqlExpression * ensureSorted(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping, bool alwaysLocal, bool allowSubSort, bool requestSpilling)
 {
     if (isAlreadySorted(dataset, order, isLocal||alwaysLocal, ignoreGrouping))
         return LINK(dataset);
@@ -1573,8 +1573,9 @@ IHqlExpression * ensureSorted(IHqlExpression * dataset, IHqlExpression * order,
         }
     }
 
-    IHqlExpression * attr = isLocal ? createLocalAttribute() : (isGrouped(dataset) && ignoreGrouping) ? createAttribute(globalAtom) : NULL;
-    return createDatasetF(no_sort, LINK(dataset), LINK(order), attr, NULL);
+    IHqlExpression * attr1 = isLocal ? createLocalAttribute() : (isGrouped(dataset) && ignoreGrouping) ? createAttribute(globalAtom) : NULL;
+    IHqlExpression * attr2 = requestSpilling ? createAttribute(internalAtom) : NULL;
+    return createDatasetF(no_sort, LINK(dataset), LINK(order), attr1 ? attr1 : attr2, attr1 ? attr2 : NULL, NULL);
 }
 
 //-------------------------------

+ 1 - 1
ecl/hql/hqlmeta.hpp

@@ -110,7 +110,7 @@ extern HQL_API bool matchesAnyDistribution(IHqlExpression * distn);
 extern HQL_API bool appearsToBeSorted(IHqlExpression * dataset, bool isLocal, bool ignoreGrouping);
 extern HQL_API bool isAlreadySorted(IHqlExpression * dataset, const HqlExprArray & newSort, bool isLocal, bool ignoreGrouping);
 extern HQL_API bool isAlreadySorted(IHqlExpression * dataset, IHqlExpression * newSort, bool isLocal, bool ignoreGrouping);
-extern HQL_API IHqlExpression * ensureSorted(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping, bool alwaysLocal, bool allowSubSort);
+extern HQL_API IHqlExpression * ensureSorted(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping, bool alwaysLocal, bool allowSubSort, bool requestSpilling);
 
 extern HQL_API bool isWorthShuffling(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping);
 extern HQL_API bool isWorthShuffling(IHqlExpression * dataset, const HqlExprArray & newSort, bool isLocal, bool ignoreGrouping);

+ 5 - 1
ecl/hqlcpp/hqlhtcpp.cpp

@@ -16368,6 +16368,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivitySort(BuildCtx & ctx, IHqlExpre
 
     IHqlExpression * stable = expr->queryAttribute(stableAtom);
     IHqlExpression * unstable = expr->queryAttribute(unstableAtom);
+    IHqlExpression * internal = expr->queryAttribute(internalAtom);
     IHqlExpression * method = NULL;
     StringBuffer flags;
     if (stable)
@@ -16380,7 +16381,10 @@ ABoundActivity * HqlCppTranslator::doBuildActivitySort(BuildCtx & ctx, IHqlExpre
         flags.append("|TAFunstable");
         method = unstable->queryChild(0);
     }
-
+    if (internal)
+    {
+        flags.append("|TAFspill");
+    }
     if (!method || method->isConstant())
         flags.append("|TAFconstant");
 

+ 3 - 3
ecl/hqlcpp/hqlttcpp.cpp

@@ -1920,7 +1920,7 @@ static IHqlExpression * normalizeIndexBuild(IHqlExpression * expr, bool sortInde
             }
         }
 
-        OwnedHqlExpr sorted = ensureSorted(dataset, newsort, expr->hasAttribute(localAtom), true, alwaysLocal, allowImplicitSubSort);
+        OwnedHqlExpr sorted = ensureSorted(dataset, newsort, expr->hasAttribute(localAtom), true, alwaysLocal, allowImplicitSubSort, true);
         if (sorted == dataset)
             return NULL;
 
@@ -2523,7 +2523,7 @@ IHqlExpression * ThorHqlTransformer::normalizeCoGroup(IHqlExpression * expr)
         {
             IHqlExpression & cur = inputs.item(i);
             OwnedHqlExpr mappedOrder = replaceSelector(bestSortOrder, queryActiveTableSelector(), &cur);
-            sortedInputs.append(*ensureSorted(&cur, mappedOrder, true, true, alwaysLocal, options.implicitSubSort));
+            sortedInputs.append(*ensureSorted(&cur, mappedOrder, true, true, alwaysLocal, options.implicitSubSort, false));
         }
         HqlExprArray sortedArgs;
         unwindChildren(sortedArgs, bestSortOrder);
@@ -2565,7 +2565,7 @@ static IHqlExpression * getNonThorSortedJoinInput(IHqlExpression * joinExpr, IHq
     groupOrder.setown(replaceSelector(groupOrder, queryActiveTableSelector(), expr->queryNormalizedSelector()));
 
     //not used for thor, so sort can be local
-    OwnedHqlExpr table = ensureSorted(expr, groupOrder, false, true, true, implicitSubSort);
+    OwnedHqlExpr table = ensureSorted(expr, groupOrder, false, true, true, implicitSubSort, false);
     if (table != expr)
         table.setown(cloneInheritedAnnotations(joinExpr, table));
 

+ 4 - 2
roxie/ccd/ccdserver.cpp

@@ -7446,7 +7446,7 @@ class CSpillingQuickSortAlgorithm : implements CInterfaceOf<ISortAlgorithm>, imp
     IRoxieSlaveContext * ctx;
     Owned<IDiskMerger> diskMerger;
     Owned<IRowStream> diskReader;
-    Owned<IOutputMetaData> rowMeta;
+    IOutputMetaData *rowMeta;
     unsigned activityId;
 
 public:
@@ -8128,7 +8128,9 @@ public:
         if (sortMethod)
         {
             sortFlags = sortMethod->getAlgorithmFlags();
-            if (sortFlags & TAFunstable)
+            if (sortFlags & TAFspill)
+                sortAlgorithm = spillingQuickSort;
+            else if (sortFlags & TAFunstable)
                 sortAlgorithm = quickSort;
             if (!(sortFlags & TAFconstant))
                 sortAlgorithm = unknownSort;

+ 1 - 0
rtl/include/eclhelper.hpp

@@ -1573,6 +1573,7 @@ enum
 
     TAFstable           = 0x0002,
     TAFunstable         = 0x0004,
+    TAFspill            = 0x0008,
 };
 
 struct IHThorSortArg : public IHThorArg