Forráskód Böngészése

HPCC-13017 Mark sorts that include all fields as potentially unstable

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 10 éve
szülő
commit
9639337e29

+ 7 - 0
ecl/hql/hqlexpr.cpp

@@ -14777,6 +14777,13 @@ bool preservesValue(ITypeInfo * after, IHqlExpression * expr)
     return (recastValue->compare(value) == 0);
 }
 
+bool castPreservesValue(IHqlExpression * expr)
+{
+    dbgassertex(isCast(expr));
+    return preservesValue(expr->queryType(), expr->queryChild(0));
+}
+
+
 static const unsigned UNLIMITED_REPEAT = (unsigned)-1;
 
 unsigned getRepeatMin(IHqlExpression * expr)

+ 1 - 0
ecl/hql/hqlexpr.hpp

@@ -1538,6 +1538,7 @@ extern HQL_API unsigned getRepeatMin(IHqlExpression * expr);
 extern HQL_API bool isStandardRepeat(IHqlExpression * expr);
 extern HQL_API bool transformContainsCounter(IHqlExpression * transform, IHqlExpression * counter);
 extern HQL_API bool preservesValue(ITypeInfo * after, IHqlExpression * expr);
+extern HQL_API bool castPreservesValue(IHqlExpression * expr);
 extern HQL_API IHqlExpression * getActiveTableSelector();
 extern HQL_API IHqlExpression * queryActiveTableSelector();
 extern HQL_API IHqlExpression * getSelf(IHqlExpression * ds);

+ 56 - 0
ecl/hql/hqlmeta.cpp

@@ -3391,3 +3391,59 @@ extern HQL_API bool hasKnownSortGroupDistribution(IHqlExpression * expr, bool is
 {
     return queryMetaProperty(expr)->meta.hasKnownSortGroupDistribution(isLocal);
 }
+
+//---------------------------------------------------------------------------------------------------------------------
+
+//Mark all selectors that are fully included in the sort criteria. Err on the side of caution.
+void markValidSelectors(IHqlExpression * expr, IHqlExpression * dsSelector)
+{
+    switch (expr->getOperator())
+    {
+    case no_sortlist:
+        break;
+    case no_cast:
+    case no_implicitcast:
+        if (!castPreservesValue(expr))
+            return;
+        break;
+    case no_typetransfer:
+        //Special case the transfer to a variable length data type that is done for a dataset in an index build
+        //(it will always preserve the value of any argument)
+        {
+            ITypeInfo * type = expr->queryType();
+            if ((type->getTypeCode() != type_data) || !isUnknownSize(type))
+                return;
+            break;
+        }
+    case no_select:
+        {
+            bool isNew = false;
+            IHqlExpression * root = querySelectorDataset(expr, isNew);
+            if (!isNew && (root == dsSelector))
+                expr->setTransformExtra(expr);
+            return;
+        }
+    default:
+        return;
+    }
+
+    ForEachChild(i, expr)
+        markValidSelectors(expr->queryChild(i), dsSelector);
+}
+
+extern HQL_API bool allFieldsAreSorted(IHqlExpression * record, IHqlExpression * sortOrder, IHqlExpression * dsSelector)
+{
+    TransformMutexBlock block;
+
+    //First walk the sort order expression, tagging valid sorted selectors
+    markValidSelectors(sortOrder, dsSelector);
+
+    //Now expand all the selectors from the record, and check that they have been tagged
+    RecordSelectIterator iter(record, dsSelector);
+    ForEach(iter)
+    {
+        if (!iter.query()->queryTransformExtra())
+            return false;
+    }
+    return true;
+}

+ 2 - 0
ecl/hql/hqlmeta.hpp

@@ -129,6 +129,8 @@ extern HQL_API CHqlMetaProperty * querySimpleDatasetMeta(IHqlExpression * expr);
 extern HQL_API bool hasSameSortGroupDistribution(IHqlExpression * expr, IHqlExpression * other);
 extern HQL_API bool hasKnownSortGroupDistribution(IHqlExpression * expr, bool isLocal);
 
+extern HQL_API bool allFieldsAreSorted(IHqlExpression * record, IHqlExpression * sortOrder, IHqlExpression * selector);
+
 inline IHqlExpression * queryRemoveOmitted(IHqlExpression * expr)
 {
     if (expr &&  expr->isAttribute() && (expr->queryName() == _omitted_Atom))

+ 1 - 0
ecl/hqlcpp/hqlcpp.cpp

@@ -1754,6 +1754,7 @@ void HqlCppTranslator::cacheOptions()
         DebugOption(options.newBalancedSpotter,"newBalancedSpotter",true),
         DebugOption(options.keyedJoinPreservesOrder,"keyedJoinPreservesOrder",true),
         DebugOption(options.expandSelectCreateRow,"expandSelectCreateRow",false),
+        DebugOption(options.optimizeSortAllFields,"optimizeSortAllFields",true),
     };
 
     //get options values from workunit

+ 1 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -744,6 +744,7 @@ struct HqlCppOptions
     bool                newBalancedSpotter;
     bool                keyedJoinPreservesOrder;
     bool                expandSelectCreateRow;
+    bool                optimizeSortAllFields;
 };
 
 //Any information gathered while processing the query should be moved into here, rather than cluttering up the translator class

+ 10 - 0
ecl/hqlcpp/hqlhtcpp.cpp

@@ -16391,6 +16391,16 @@ ABoundActivity * HqlCppTranslator::doBuildActivitySort(BuildCtx & ctx, IHqlExpre
         flags.append("|TAFunstable");
         method = unstable->queryChild(0);
     }
+    else
+    {
+        //If a dataset is sorted by all fields then it is impossible to determine if the original order
+        //was preserved - so mark the sort as potentially unstable (to reduce memory usage at runtime)
+        if (options.optimizeSortAllFields &&
+            allFieldsAreSorted(expr->queryRecord(), sortlist, dataset->queryNormalizedSelector()))
+
+        flags.append("|TAFunstable");
+    }
+
     if (spill)
         flags.append("|TAFspill");
     if (!method || method->isConstant())

+ 14 - 0
ecl/hqlcpp/hqlttcpp.cpp

@@ -1778,6 +1778,20 @@ static IHqlExpression * simplifySortlistComplexity(IHqlExpression * sortlist)
                     appendComponent(cpts, invert, &concats.item(idxc));
             }
         }
+        else if (cur->getOperator() == no_trim)
+        {
+            //TRIM(fixed-length-string) can just sort by the string instead.  (Don't match LEFT/RIGHT versions.)
+            if (!cur->queryChild(1))
+            {
+                IHqlExpression * arg = cur->queryChild(0);
+                ITypeInfo * argType = arg->queryType();
+                if (isFixedSize(argType) && (cur->queryType()->getTypeCode() == argType->getTypeCode()))
+                {
+                    expand = true;
+                    appendComponent(cpts, invert, arg);
+                }
+            }
+        }
         else
         {
 #if 0

+ 73 - 0
testing/regress/ecl/key/sort3.xml

@@ -0,0 +1,73 @@
+<Dataset name='Result 1'>
+ <Row><uv>1</uv><sv>GAVIN     </sv></Row>
+ <Row><uv>1</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>513</uv><sv>Jim       </sv></Row>
+ <Row><uv>769</uv><sv>JAMES     </sv></Row>
+ <Row><uv>769</uv><sv>ABSALOM   </sv></Row>
+ <Row><uv>769</uv><sv>BARNEY    </sv></Row>
+ <Row><uv>769</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>769</uv><sv>JETHROW   </sv></Row>
+ <Row><uv>769</uv><sv>DANIEL    </sv></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><uv>1</uv><sv>GAVIN     </sv></Row>
+ <Row><uv>769</uv><sv>JAMES     </sv></Row>
+ <Row><uv>769</uv><sv>ABSALOM   </sv></Row>
+ <Row><uv>769</uv><sv>BARNEY    </sv></Row>
+ <Row><uv>769</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>769</uv><sv>JETHROW   </sv></Row>
+ <Row><uv>1</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>769</uv><sv>DANIEL    </sv></Row>
+ <Row><uv>513</uv><sv>Jim       </sv></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><uv>769</uv><sv>ABSALOM   </sv></Row>
+ <Row><uv>769</uv><sv>BARNEY    </sv></Row>
+ <Row><uv>769</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>1</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>769</uv><sv>DANIEL    </sv></Row>
+ <Row><uv>1</uv><sv>GAVIN     </sv></Row>
+ <Row><uv>769</uv><sv>JAMES     </sv></Row>
+ <Row><uv>769</uv><sv>JETHROW   </sv></Row>
+ <Row><uv>513</uv><sv>Jim       </sv></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><uv>769</uv><sv>ABSALOM   </sv></Row>
+ <Row><uv>769</uv><sv>BARNEY    </sv></Row>
+ <Row><uv>1</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>769</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>769</uv><sv>DANIEL    </sv></Row>
+ <Row><uv>1</uv><sv>GAVIN     </sv></Row>
+ <Row><uv>769</uv><sv>JAMES     </sv></Row>
+ <Row><uv>769</uv><sv>JETHROW   </sv></Row>
+ <Row><uv>513</uv><sv>Jim       </sv></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><uv>769</uv><sv>ABSALOM   </sv></Row>
+ <Row><uv>769</uv><sv>BARNEY    </sv></Row>
+ <Row><uv>1</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>769</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>769</uv><sv>DANIEL    </sv></Row>
+ <Row><uv>1</uv><sv>GAVIN     </sv></Row>
+ <Row><uv>769</uv><sv>JAMES     </sv></Row>
+ <Row><uv>769</uv><sv>JETHROW   </sv></Row>
+ <Row><uv>513</uv><sv>Jim       </sv></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><uv>769</uv><sv>ABSALOM   </sv></Row>
+ <Row><uv>769</uv><sv>BARNEY    </sv></Row>
+ <Row><uv>1</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>769</uv><sv>CHARLIE   </sv></Row>
+ <Row><uv>769</uv><sv>DANIEL    </sv></Row>
+ <Row><uv>1</uv><sv>GAVIN     </sv></Row>
+ <Row><uv>769</uv><sv>JAMES     </sv></Row>
+ <Row><uv>769</uv><sv>JETHROW   </sv></Row>
+ <Row><uv>513</uv><sv>Jim       </sv></Row>
+</Dataset>
+<Dataset name='Result 7'>
+</Dataset>
+<Dataset name='Result 8'>
+</Dataset>
+<Dataset name='Result 9'>
+ <Row><Result_9>done</Result_9></Row>
+</Dataset>

+ 52 - 0
testing/regress/ecl/sort3.ecl

@@ -0,0 +1,52 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2013 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+#onwarning(1036, ignore);
+
+r1 := { unsigned id };
+MyRec := RECORD
+    UNSIGNED2 uv;
+    STRING10   sv;
+END;
+MyRec2 := RECORD(myRec)
+    DATASET(r1) child;
+END;
+
+SomeFile := DATASET([{0x001,'GAVIN'},
+                     {0x301,'JAMES'},
+                     {0x301,'ABSALOM'},
+                     {0x301,'BARNEY'},
+                     {0x301,'CHARLIE'},
+                     {0x301,'JETHROW'},
+                     {0x001,'CHARLIE'},
+                     {0x301,'DANIEL'},
+                     {0x201,'Jim'}
+                    ],MyRec);
+
+p := PROJECT(SomeFile, TRANSFORM(myRec2, SELF := LEFT; SELF := []));
+
+sequential(
+    output(SORT(SomeFile, uv)), // needs to be stable
+    output(SORT(SomeFile, (unsigned1)uv)), // needs to be stable
+    output(SORT(SomeFile, (unsigned1)uv,sv)), // needs to be stable
+    output(SORT(SomeFile, (unsigned1)uv,sv,uv)), // can be unstable
+    output(SORT(SomeFile, trim(sv),uv)), // can be unstable
+    output(SORT(SomeFile, (string20)sv,(unsigned4)uv)), // can be unstable
+    buildindex(NOFOLD(SomeFile), { uv }, { SomeFile }, 'REGRESS:dummyIndex1',overwrite);
+    buildindex(NOFOLD(p), { uv }, { p }, 'REGRESS:dummyIndex2',overwrite);
+    output('done')
+);