Browse Source

Merge pull request #10900 from ghalliday/issue18917

HPCC-18917 Add KEYED substring filters to disk files

Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 7 years ago
parent
commit
b5270a7e09

+ 3 - 0
ecl/hql/hqlexpr.cpp

@@ -1933,6 +1933,9 @@ node_operator getReverseOp(node_operator op)
     case no_eq:
     case no_ne:
         return op;
+    case no_in:
+    case no_notin:
+        return no_none;
     default:
         assertex(!"Should not be called");
     }

+ 85 - 9
ecl/hql/hqlfilter.cpp

@@ -93,6 +93,41 @@ static node_operator getModifiedOp(node_operator op, bool duplicate)
     }
 }
 
+static bool isSubString(IHqlExpression * expr)
+{
+    for(;;)
+    {
+        switch (expr->getOperator())
+        {
+        case no_substring:
+            return true;
+        case no_cast:
+        case no_implicitcast:
+            break;
+        default:
+            return false;
+        }
+        expr = expr->queryChild(0);
+    }
+}
+
+static IHqlExpression * querySubStringRange(IHqlExpression * expr)
+{
+    for(;;)
+    {
+        switch (expr->getOperator())
+        {
+        case no_substring:
+            return expr->queryChild(1);
+        case no_cast:
+        case no_implicitcast:
+            break;
+        default:
+            return nullptr;
+        }
+        expr = expr->queryChild(0);
+    }
+}
 
 void KeyFailureInfo::merge(const KeyFailureInfo & other)
 {
@@ -497,7 +532,7 @@ void FilterExtractor::expandSelects(IHqlExpression * expr, IHqlSimpleScope * exp
                 OwnedHqlExpr keySelected = createSelectExpr(LINK(keySelector), LINK(expr));
                 OwnedHqlExpr expandedSelected = createSelectExpr(LINK(expandedSelector), LINK(match));
                 IHqlExpression * record = expr->queryRecord();
-                if (record)
+                if (expr->isDatarow())
                     expandSelects(record, match->queryRecord()->querySimpleScope(), keySelected, expandedSelected);
                 else
                 {
@@ -674,8 +709,11 @@ IHqlExpression * FilterExtractor::invertTransforms(IHqlExpression * left, IHqlEx
             assertex(isKeySelect(left));
             ITypeInfo * leftType = left->queryType();
             ITypeInfo * rightType = right->queryType();
-            if (leftType == rightType || !castLosesInformation(leftType, rightType))
-                return LINK(right);
+            if (!createValueSets)
+            {
+                if (leftType == rightType || !castLosesInformation(leftType, rightType))
+                    return LINK(right);
+            }
             return ensureExprType(right, leftType);
         }
 
@@ -728,6 +766,14 @@ IHqlExpression * FilterExtractor::isKeyableFilter(IHqlExpression * left, IHqlExp
             IHqlExpression * uncast = left->queryChild(0);
             ITypeInfo * castType = left->queryType();
             ITypeInfo * uncastType = uncast->queryType();
+
+            //Keyed filters on alien datatypes do not work, and can trigger an internal error in ensureExprType()
+            if (uncastType->getTypeCode() == type_alien)
+            {
+                reason.set(KFRtoocomplex, left);
+                return nullptr;
+            }
+
             //(ty)x = y.            E.g., (int1)string2field = int1value
             //if more than one value of x[uncastType] corresponds to a single value in y[castType] then we can't sensibly create
             //the key segment monitor.  Because we will get false negatives.  If it is an inverse then duplicate (see below)
@@ -786,15 +832,15 @@ IHqlExpression * FilterExtractor::isKeyableFilter(IHqlExpression * left, IHqlExp
             if (range->getOperator() == no_rangeto)
             {
                 IValue *end = range->queryChild(0)->queryValue();
-                if (!end)
+                if (!createValueSets && !end)
                     break;
                 return isKeyableFilter(left->queryChild(0), right, duplicate, compareOp, reason, keyedKind);
             }
             else if (range->getOperator() == no_range)
             {
-                IValue *start = range->queryChild(0)->queryValue();
-                IValue *end = range->queryChild(1)->queryValue();
-                if (!start || !end || start->getIntValue() != 1)
+                if (!matchesConstantValue(range->queryChild(0), 1))
+                    break;
+                if (!createValueSets && !range->queryChild(1)->queryValue())
                     break;
                 return isKeyableFilter(left->queryChild(0), right, duplicate, compareOp, reason, keyedKind);
             }
@@ -1006,6 +1052,13 @@ IHqlExpression * FilterExtractor::getRangeLimit(ITypeInfo * fieldType, IHqlExpre
 IHqlExpression * FilterExtractor::createRangeCompare(IHqlExpression * selector, IHqlExpression * value, IHqlExpression * lengthExpr, bool compareEqual)
 {
     OwnedHqlExpr foldedValue = foldHqlExpression(value);
+    if (createValueSets)
+    {
+        OwnedHqlExpr rangeExpr = createValue(no_rangeto, makeNullType(), LINK(lengthExpr));
+        OwnedHqlExpr substr = createValue(no_substring, getStretchedType(UNKNOWN_LENGTH, selector->queryType()), LINK(selector), rangeExpr.getClear());
+        return createValue(compareEqual ? no_eq : no_ne, makeBoolType(), LINK(selector), foldedValue.getClear());
+    }
+
     ITypeInfo * fieldType = selector->queryType();
     OwnedHqlExpr lowExpr = getRangeLimit(fieldType, lengthExpr, foldedValue, -1);
     OwnedHqlExpr highExpr = getRangeLimit(fieldType, lengthExpr, foldedValue, +1);
@@ -1025,6 +1078,10 @@ bool FilterExtractor::matchSubstringFilter(KeyConditionInfo & matches, node_oper
     OwnedHqlExpr guard;
     ITypeInfo * guardCastType = NULL;
 
+    //Do not match implicit substring filters by default - because only a single substring range is supported
+    if (createValueSets && (keyedKind == KeyedNo))
+        return false;
+
     if ((left->getOperator() == no_cast) || (left->getOperator() == no_implicitcast))
     {
         //code is extracted and simplified from isKeyableFilter() above - should be commoned up.
@@ -1089,7 +1146,7 @@ bool FilterExtractor::matchSubstringFilter(KeyConditionInfo & matches, node_oper
         return false;
     ITypeInfo * fieldType = selector->queryType();
     unsigned fieldLength = fieldType->getStringLen();
-    if (fieldLength == UNKNOWN_LENGTH)
+    if (!createValueSets && (fieldLength == UNKNOWN_LENGTH))
         return false;
 
     OwnedHqlExpr range = foldHqlExpression(left->queryChild(1));
@@ -1129,7 +1186,9 @@ bool FilterExtractor::matchSubstringFilter(KeyConditionInfo & matches, node_oper
         newTest.setown(createBalanced(combineOp, boolType, compares));
     }
 
-    matches.appendCondition(*new KeyCondition(selector, newTest, keyedKind));
+    KeyCondition * entry = new KeyCondition(selector, newTest, keyedKind);
+    entry->subrange.set(left->queryChild(1));
+    matches.appendCondition(*entry);
     if (guard)
         matches.appendPreFilter(guard);
     return true;
@@ -1142,18 +1201,27 @@ bool FilterExtractor::extractSimpleCompareFilter(KeyConditionInfo & matches, IHq
     IHqlExpression * l = promoted->queryChild(0);
     IHqlExpression * r = promoted->queryChild(1);
     bool duplicate = false;
+
     KeyFailureInfo reasonl, reasonr;
     node_operator op = expr->getOperator();
     IHqlExpression * matchedSelector = isKeyableFilter(l, r, duplicate, op, reasonl, keyedKind);
     Owned<KeyCondition> result;
     if (matchedSelector)
     {
+        //Do not match implicit substring filters by default - because only a single substring range is supported
+        if (createValueSets && isSubString(l) && (keyedKind == KeyedNo))
+        {
+            matches.appendPostFilter(expr);
+            return false;
+        }
+
         node_operator newOp = getModifiedOp(op, duplicate);
 
         if (newOp != no_none)
         {
             OwnedHqlExpr newFilter = createValue(newOp, expr->getType(), LINK(l), LINK(r));
             result.setown(new KeyCondition(matchedSelector, newFilter, keyedKind));
+            result->subrange.set(querySubStringRange(l));
         }
     }
     else
@@ -1162,11 +1230,19 @@ bool FilterExtractor::extractSimpleCompareFilter(KeyConditionInfo & matches, IHq
         matchedSelector = isKeyableFilter(r, l, duplicate, op, reasonr, keyedKind);
         if (matchedSelector)
         {
+            //Do not match implicit substring filters by default - because only a single substring range is supported
+            if (createValueSets && isSubString(r) && (keyedKind == KeyedNo))
+            {
+                matches.appendPostFilter(expr);
+                return false;
+            }
+
             node_operator newOp = getModifiedOp(getReverseOp(op), duplicate);
             if (newOp != no_none)
             {
                 OwnedHqlExpr newFilter = createValue(newOp, expr->getType(), LINK(r), LINK(l));
                 result.setown(new KeyCondition(matchedSelector, newFilter, keyedKind));
+                result->subrange.set(querySubStringRange(r));
             }
         }
     }

+ 4 - 1
ecl/hql/hqlfilter.hpp

@@ -28,6 +28,7 @@ public:
     bool isKeyed()          { return (keyedKind != KeyedNo); }
 
     HqlExprAttr     selector;
+    HqlExprAttr     subrange;
     HqlExprAttr     expr;
     KeyedKind       keyedKind;
     bool            isWild;
@@ -79,10 +80,11 @@ enum MonitorFilterKind { NoMonitorFilter, MonitorFilterSkipEmpty, MonitorFilterS
 struct HQL_API KeySelectorInfo
 {
 public:
-    KeySelectorInfo(KeyedKind _keyedKind, IHqlExpression * _selector, IHqlExpression * _expandedSelector, unsigned _fieldIdx, size32_t _offset, size32_t _size)
+    KeySelectorInfo(KeyedKind _keyedKind, IHqlExpression * _selector, IHqlExpression * _subrange, IHqlExpression * _expandedSelector, unsigned _fieldIdx, size32_t _offset, size32_t _size)
     {
         keyedKind = _keyedKind;
         selector = _selector;
+        subrange = _subrange;
         expandedSelector = _expandedSelector;
         fieldIdx = _fieldIdx;
         offset = _offset;
@@ -92,6 +94,7 @@ public:
     const char * getFFOptions();
 
     IHqlExpression * selector;
+    IHqlExpression * subrange;
     IHqlExpression * expandedSelector;
     unsigned fieldIdx;
     size32_t offset;

+ 3 - 1
ecl/hqlcpp/hqlcerrors.hpp

@@ -219,6 +219,7 @@
 #define HQLERR_HashStoredDuplication            4207
 #define HQLERR_DatafileRequiresSigned           4208
 #define HQLERR_ExpectedFileLhsFetch             4209
+#define HQLERR_IncompatibleKeyedSubString       4210
 
 //Warnings....
 #define HQLWRN_PersistDataNotLikely             4500
@@ -386,7 +387,7 @@
 #define HQLERR_SetUnknownLength_Text            "Sets of items of unknown length are not yet supported!"
 #define HQLERR_HashStoredTypeMismatch_Text      "#STORED (%s) type mismatch (was '%s' replacement '%s')"
 #define HQLERR_CountAllSet_Text                 "Cannot count number of elements in ALL"
-#define HQLERR_OnlyKeyFixedField_Text           "Can only key fixed fields at fixed offsets"
+#define HQLERR_OnlyKeyFixedField_Text           "Cannot key field %s - only fixed fields at fixed offsets"
 #define HQLERR_DuplicateStoredDiffType_Text     "Duplicate definition of %s with different type (use #stored to override default value)"
 #define HQLERR_RegexFeatureNotSupport_Text      "Features are not supported by regex - did you mean repeat() instead of {}?"
 #define HQLERR_UnsupportedAttribute_Text        "Option %s not yet supported on child datasets"
@@ -519,6 +520,7 @@
 #define HQLERR_HashStoredDuplication_Text       "Inconsistent #%s(%s, %s) and #%s(%s, %s)"
 #define HQLERR_DatafileRequiresSigned_Text      "Insufficient access rights to use datafiles"
 #define HQLERR_ExpectedFileLhsFetch_Text        "The first argument of FETCH must be a disk file (had %s)"
+#define HQLERR_IncompatibleKeyedSubString_Text  "Cannot use two different KEYED substring filters for field %s in key %s"
 
 //Warnings.
 #define HQLWRN_CannotRecreateDistribution_Text  "Cannot recreate the distribution for a persistent dataset"

+ 53 - 3
ecl/hqlcpp/hqlcfilter.cpp

@@ -279,6 +279,10 @@ void CppFilterExtractor::extractCompareInformation(BuildCtx & ctx, IHqlExpressio
 
 void CppFilterExtractor::extractCompareInformation(BuildCtx & ctx, IHqlExpression * lhs, IHqlExpression * value, SharedHqlExpr & compare, SharedHqlExpr & normalized, IHqlExpression * expandedSelector)
 {
+    //For substring matching the set of values should match the type of the underlying field.
+    if (createValueSets && (lhs->getOperator() == no_substring))
+        lhs = lhs->queryChild(0);
+
     LinkedHqlExpr compareValue = value->queryBody();
     OwnedHqlExpr recastValue;
     if ((lhs->getOperator() != no_select) || (lhs->queryType() != compareValue->queryType()))
@@ -584,7 +588,38 @@ void CppFilterExtractor::buildKeySegmentExpr(BuildFilterState & buildState, KeyS
     if (targetSet && !requiredSet)
     {
         if (createValueSets)
-            createMonitorText.appendf("createFieldFilter(%u, %s)", selectorInfo.fieldIdx, targetSet);
+        {
+            IHqlExpression * lhs = thisKey.queryChild(0);
+            if (selectorInfo.subrange)
+            {
+                IHqlExpression * range = selectorInfo.subrange;
+                IHqlExpression * limit;
+                switch (range->getOperator())
+                {
+                case no_rangeto:
+                    limit = range->queryChild(0);
+                    break;
+                case no_range:
+                    assertex(matchesConstValue(range->queryChild(0), 1));
+                    limit = range->queryChild(1);
+                    break;
+                case no_constant:
+                    limit = range;
+                    assertex(matchesConstValue(range, 1));
+                    break;
+                default:
+                    throwUnexpected();
+                }
+                CHqlBoundExpr boundLimit;
+                translator.buildExpr(ctx, limit, boundLimit);
+                StringBuffer limitText;
+                translator.generateExprCpp(limitText, boundLimit.expr);
+
+                createMonitorText.appendf("createSubStringFieldFilter(%u, %s, %s)", selectorInfo.fieldIdx, limitText.str(), targetSet);
+            }
+            else
+                createMonitorText.appendf("createFieldFilter(%u, %s)", selectorInfo.fieldIdx, targetSet);
+        }
         else
             createMonitorText.appendf("createKeySegmentMonitor(%s, %s.getClear(), %u, %u, %u)",
                                       boolToText(selectorInfo.keyedKind != KeyedYes), targetSet, selectorInfo.fieldIdx, selectorInfo.offset, selectorInfo.size);
@@ -661,6 +696,9 @@ IHqlExpression * CppFilterExtractor::getMonitorValueAddress(BuildCtx & ctx, IHql
 
 bool CppFilterExtractor::buildSingleKeyMonitor(StringBuffer & createMonitorText, KeySelectorInfo & selectorInfo, BuildCtx & ctx, IHqlExpression & thisKey)
 {
+    if (selectorInfo.subrange)
+        return false;
+
     BuildCtx subctx(ctx);
     OwnedHqlExpr compare, normalized;
 
@@ -757,6 +795,7 @@ void CppFilterExtractor::buildKeySegment(BuildFilterState & buildState, BuildCtx
     bool isImplicit = true;
     bool prevWildWasKeyed = buildState.wildWasKeyed;
     buildState.wildWasKeyed = false;
+    IHqlExpression * subrange = nullptr;
     ForEachItemIn(cond, keyed.conditions)
     {
         KeyCondition & cur = keyed.conditions.item(cond);
@@ -776,6 +815,17 @@ void CppFilterExtractor::buildKeySegment(BuildFilterState & buildState, BuildCtx
             }
             else
             {
+                if (createValueSets)
+                {
+                    if (matches.empty())
+                        subrange = cur.subrange;
+                    else if (subrange != cur.subrange)
+                    {
+                        StringBuffer s, keyname;
+                        translator.throwError2(HQLERR_IncompatibleKeyedSubString, getExprECL(field, s).str(), queryKeyName(keyname));
+                    }
+                }
+
                 matches.append(OLINK(cur));
                 if (buildState.implicitWildField && !ignoreUnkeyed)
                 {
@@ -801,7 +851,7 @@ void CppFilterExtractor::buildKeySegment(BuildFilterState & buildState, BuildCtx
     KeyedKind keyedKind = getKeyedKind(translator, matches);
     if (whichField >= firstOffsetField)
         translator.throwError1(HQLERR_KeyedNotKeyed, getExprECL(field, s).str());
-    KeySelectorInfo selectorInfo(keyedKind, selector, expandedSelector, buildState.curFieldIdx, buildState.curOffset, curSize);
+    KeySelectorInfo selectorInfo(keyedKind, selector, subrange, expandedSelector, buildState.curFieldIdx, buildState.curOffset, curSize);
 
     bool ignoreKeyedExtend = false;
     if ((keyedKind == KeyedExtend) && buildState.wildPending() && !ignoreUnkeyed)
@@ -940,7 +990,7 @@ void CppFilterExtractor::buildSegments(BuildCtx & ctx, const char * listName, bo
     {
         KeyCondition & cur = keyed.conditions.item(cond);
         if (!cur.generated)
-            translator.throwError(HQLERR_OnlyKeyFixedField);
+            translator.throwError1(HQLERR_OnlyKeyFixedField, str(cur.selector->queryChild(1)->queryId()));
     }
 }
 

+ 2 - 1
ecl/hqlcpp/hqlcpp.cpp

@@ -1809,6 +1809,7 @@ void HqlCppTranslator::cacheOptions()
         DebugOption(options.reportDFSinfo,"reportDFSinfo", 0),
         DebugOption(options.useGlobalCompareClass,"useGlobalCompareClass", false),
         DebugOption(options.createValueSets,"createValueSets", false),
+        DebugOption(options.implicitKeyedDiskFilter,"implicitKeyedDiskFilter", false),
     };
 
     //get options values from workunit
@@ -4776,7 +4777,7 @@ void HqlCppTranslator::ensureHasAddress(BuildCtx & ctx, CHqlBoundExpr & tgt)
     case no_variable:
         break;
     default:
-        if (!isTypePassedByAddress(expr->queryType()))
+        if (!isTypePassedByAddress(expr->queryType()) || (expr->getOperator() == no_decimalstack))
         {
             OwnedHqlExpr bound = tgt.getTranslatedExpr();
             buildTempExpr(ctx, bound, tgt);

+ 1 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -794,6 +794,7 @@ struct HqlCppOptions
     bool                timeTransforms;
     bool                useGlobalCompareClass;
     bool                createValueSets;
+    bool                implicitKeyedDiskFilter;
 };
 
 //Any information gathered while processing the query should be moved into here, rather than cluttering up the translator class

+ 41 - 21
ecl/hqlcpp/hqlsource.cpp

@@ -2709,12 +2709,12 @@ void DiskReadBuilderBase::buildMembers(IHqlExpression * expr)
     StringBuffer s;
 
     //Process any KEYED() information
-    if (monitors.isFiltered())
+    if (monitors.isKeyed())
     {
         MemberFunction func(translator, instance->startctx, "virtual void createSegmentMonitors(IIndexReadContext *irc) override");
         monitors.buildSegments(func.ctx, "irc", true);
     }
-    instance->addAttributeBool("_isKeyed", monitors.isFiltered());
+    instance->addAttributeBool("_isKeyed", monitors.isKeyed());
 
     //---- virtual unsigned getFlags()
     instance->addAttributeBool("preload", isPreloaded);
@@ -2778,7 +2778,7 @@ void DiskReadBuilderBase::buildFlagsMember(IHqlExpression * expr)
     if (tableExpr->hasAttribute(optAtom)) flags.append("|TDRoptional");
     if (tableExpr->hasAttribute(_workflowPersist_Atom)) flags.append("|TDXupdateaccessed");
     if (isPreloaded) flags.append("|TDRpreload");
-    if (monitors.isFiltered()) flags.append("|TDRkeyed");
+    if (monitors.isKeyed()) flags.append("|TDRkeyed");
     if (limitExpr)
     {
         if (limitExpr->hasAttribute(onFailAtom))
@@ -2819,34 +2819,54 @@ void DiskReadBuilderBase::buildTransformFpos(BuildCtx & transformCtx)
 
 void DiskReadBuilderBase::extractMonitors(IHqlExpression * ds, SharedHqlExpr & unkeyedFilter, HqlExprArray & conds)
 {
-    ForEachItemIn(i, conds)
+    HqlExprAttr mode = tableExpr->queryChild(2);
+    //KEYED filters can only currently be implemented for binary files - not csv, xml or pipe....
+    if (queryTableMode(tableExpr) == no_flat)
     {
-        IHqlExpression * filter = &conds.item(i);
-        if (isSourceInvariant(ds, filter))                  // more actually isSourceInvariant.
-            extendConditionOwn(globalGuard, no_and, LINK(filter));
-        else
+        OwnedHqlExpr implicitFilter;
+        ForEachItemIn(i, conds)
         {
-            node_operator op = filter->getOperator();
-            switch (op)
+            IHqlExpression * filter = &conds.item(i);
+            if (isSourceInvariant(ds, filter))                  // more actually isSourceInvariant.
+                extendConditionOwn(globalGuard, no_and, LINK(filter));
+            else
             {
-            case no_assertkeyed:
-            case no_assertwild:
+                node_operator op = filter->getOperator();
+                switch (op)
                 {
-                    //MORE: This needs to test that the fields are at fixed offsets, fixed length, and collatable.
-                    OwnedHqlExpr extraFilter;
-                    monitors.extractFilters(filter, extraFilter);
+                case no_assertkeyed:
+                case no_assertwild:
+                    {
+                        //MORE: This needs to test that the fields are at fixed offsets, fixed length, and collatable.
+                        OwnedHqlExpr extraFilter;
+                        monitors.extractFilters(filter, extraFilter);
 
-                    //NB: Even if it is keyed then (part of) the test condition might be duplicated.
-                    appendFilter(unkeyedFilter, extraFilter);
+                        //NB: Even if it is keyed then (part of) the test condition might be duplicated.
+                        appendFilter(unkeyedFilter, extraFilter);
+                        break;
+                    }
+                default:
+                    // Add this condition to the catchall filter
+                    appendFilter(implicitFilter, filter);
                     break;
                 }
-            default:
-                // Add this condition to the catchall filter
-                appendFilter(unkeyedFilter, filter);
-                break;
             }
         }
+
+        if (implicitFilter)
+        {
+            if (translator.queryOptions().implicitKeyedDiskFilter && !monitors.isKeyed())
+            {
+                OwnedHqlExpr extraFilter;
+                monitors.extractFilters(implicitFilter.get(), extraFilter);
+                appendFilter(unkeyedFilter, extraFilter);
+            }
+            else
+                appendFilter(unkeyedFilter, implicitFilter);
+        }
     }
+    else
+        SourceBuilder::extractMonitors(ds, unkeyedFilter, conds);
 }
 
 //---------------------------------------------------------------------------

+ 2 - 0
rtl/eclrtl/rtlkey.hpp

@@ -318,6 +318,8 @@ extern ECLRTL_API IFieldFilter * createFieldFilter(unsigned fieldId, const RtlTy
 extern ECLRTL_API IFieldFilter * createFieldFilter(unsigned fieldId, IValueSet * values);
 extern ECLRTL_API IFieldFilter * createWildFieldFilter(unsigned fieldId, const RtlTypeInfo & type);
 
+extern ECLRTL_API IFieldFilter * createSubStringFieldFilter(unsigned fieldId, size32_t subLength, IValueSet * values);
+
 extern ECLRTL_API IFieldFilter * deserializeFieldFilter(unsigned fieldId, const RtlTypeInfo & type, const char * src);
 extern ECLRTL_API IFieldFilter * deserializeFieldFilter(unsigned fieldId, const RtlTypeInfo & type, MemoryBuffer & in);
 

+ 5 - 2
rtl/eclrtl/rtlnewkey.cpp

@@ -370,9 +370,9 @@ public:
             // x >= 'ab' becomes x > 'a' => clear the equal item
             // x < 'ab' becomes x <= 'a' => set the equal item
             if (newMask & CMPlt)
-                newMask &= ~CMPeq;
-            else if (newMask & CMPgt)
                 newMask |= CMPeq;
+            else if (newMask & CMPgt)
+                newMask &= ~CMPeq;
         }
         return new ValueTransition(newMask, newType, resized.toByteArray());
     }
@@ -2829,6 +2829,9 @@ protected:
         testFilter("extra:0=['XX']", { false, false, false, false, false, false });
         testFilter("extra:0=['']", { true, true, true, true, true, true });
         testFilter("name:1=['A']", { false, true, false, true, false, false });
+        //Check substring ranges are correctly truncated
+        testFilter("extra:3=['MARK','MAST']", { false, false, false, false, true, true });
+        testFilter("name:1=['AB','JA']", { true, false, true, false, true, true });
     }
 
 

+ 3 - 0
testing/regress/ecl/aggds3.ecl

@@ -20,14 +20,17 @@
 
 //version multiPart=true
 //version multiPart=false,useSequential=true
+//version keyedFilters=true
 
 import ^ as root;
 multiPart := #IFDEFINED(root.multiPart, false);
 useSequential := #IFDEFINED(root.useSequential, false);
+keyedFilters := #IFDEFINED(root.keyedFilters, false);
 
 //--- end of version configuration ---
 
 #onwarning (2168, ignore);
+#option ('implicitKeyedDiskFilter', keyedFilters);
 
 import $.setup;
 sq := setup.sq(multiPart);

+ 201 - 0
testing/regress/ecl/key/sqfiltsubstring.xml

@@ -0,0 +1,201 @@
+<Dataset name='Result 1'>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>WC1       </postcode></Row>
+ <Row><postcode>          </postcode></Row>
+ <Row><postcode>AG1 6QT   </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><postcode>AG1 6QT   </postcode></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+</Dataset>
+<Dataset name='Result 7'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+</Dataset>
+<Dataset name='Result 8'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 9'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 10'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>WC1       </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 11'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>WC1       </postcode></Row>
+</Dataset>
+<Dataset name='Result 12'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>WC1       </postcode></Row>
+</Dataset>
+<Dataset name='Result 13'>
+</Dataset>
+<Dataset name='Result 14'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>WC1       </postcode></Row>
+ <Row><postcode>          </postcode></Row>
+ <Row><postcode>AG1 6QT   </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 15'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 16'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 17'>
+ <Row><postcode>AG1 6QT   </postcode></Row>
+</Dataset>
+<Dataset name='Result 18'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+</Dataset>
+<Dataset name='Result 19'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+</Dataset>
+<Dataset name='Result 20'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 21'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 22'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>WC1       </postcode></Row>
+ <Row><postcode>SG8 1S2   </postcode></Row>
+</Dataset>
+<Dataset name='Result 23'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>WC1       </postcode></Row>
+</Dataset>
+<Dataset name='Result 24'>
+ <Row><postcode>SW1A0AA   </postcode></Row>
+ <Row><postcode>WC1       </postcode></Row>
+</Dataset>
+<Dataset name='Result 25'>
+ <Row><forename>Gavin</forename></Row>
+ <Row><forename>Abigail</forename></Row>
+ <Row><forename>Liz</forename></Row>
+ <Row><forename>Elizabeth</forename></Row>
+ <Row><forename>Philip</forename></Row>
+ <Row><forename>Fred</forename></Row>
+ <Row><forename>Wilma</forename></Row>
+ <Row><forename>John</forename></Row>
+ <Row><forename>Brian</forename></Row>
+ <Row><forename>Julia</forename></Row>
+</Dataset>
+<Dataset name='Result 26'>
+</Dataset>
+<Dataset name='Result 27'>
+ <Row><forename>John</forename></Row>
+ <Row><forename>Julia</forename></Row>
+</Dataset>
+<Dataset name='Result 28'>
+ <Row><forename>John</forename></Row>
+ <Row><forename>Julia</forename></Row>
+</Dataset>
+<Dataset name='Result 29'>
+ <Row><forename>Wilma</forename></Row>
+</Dataset>
+<Dataset name='Result 30'>
+ <Row><forename>Liz</forename></Row>
+</Dataset>
+<Dataset name='Result 31'>
+ <Row><forename>Liz</forename></Row>
+</Dataset>
+<Dataset name='Result 32'>
+ <Row><forename>Liz</forename></Row>
+</Dataset>
+<Dataset name='Result 33'>
+ <Row><forename>Gavin</forename></Row>
+ <Row><forename>Fred</forename></Row>
+ <Row><forename>John</forename></Row>
+</Dataset>
+<Dataset name='Result 34'>
+ <Row><forename>Gavin</forename></Row>
+ <Row><forename>Fred</forename></Row>
+ <Row><forename>John</forename></Row>
+</Dataset>
+<Dataset name='Result 35'>
+ <Row><forename>Gavin</forename></Row>
+ <Row><forename>John</forename></Row>
+</Dataset>
+<Dataset name='Result 36'>
+ <Row><forename>Gavin</forename></Row>
+ <Row><forename>John</forename></Row>
+</Dataset>
+<Dataset name='Result 37'>
+ <Row><forename>Gavin</forename></Row>
+ <Row><forename>Abigail</forename></Row>
+ <Row><forename>Liz</forename></Row>
+ <Row><forename>Elizabeth</forename></Row>
+ <Row><forename>Philip</forename></Row>
+ <Row><forename>Fred</forename></Row>
+ <Row><forename>Wilma</forename></Row>
+ <Row><forename>John</forename></Row>
+ <Row><forename>Brian</forename></Row>
+ <Row><forename>Julia</forename></Row>
+</Dataset>
+<Dataset name='Result 38'>
+</Dataset>
+<Dataset name='Result 39'>
+ <Row><forename>John</forename></Row>
+ <Row><forename>Julia</forename></Row>
+</Dataset>
+<Dataset name='Result 40'>
+ <Row><forename>John</forename></Row>
+ <Row><forename>Julia</forename></Row>
+</Dataset>
+<Dataset name='Result 41'>
+ <Row><forename>Wilma</forename></Row>
+</Dataset>
+<Dataset name='Result 42'>
+ <Row><forename>Liz</forename></Row>
+</Dataset>
+<Dataset name='Result 43'>
+ <Row><forename>Liz</forename></Row>
+</Dataset>
+<Dataset name='Result 44'>
+ <Row><forename>Liz</forename></Row>
+</Dataset>
+<Dataset name='Result 45'>
+ <Row><forename>Gavin</forename></Row>
+ <Row><forename>Fred</forename></Row>
+ <Row><forename>John</forename></Row>
+</Dataset>
+<Dataset name='Result 46'>
+ <Row><forename>Gavin</forename></Row>
+ <Row><forename>Fred</forename></Row>
+ <Row><forename>John</forename></Row>
+</Dataset>
+<Dataset name='Result 47'>
+ <Row><forename>Gavin</forename></Row>
+ <Row><forename>John</forename></Row>
+</Dataset>
+<Dataset name='Result 48'>
+ <Row><forename>Gavin</forename></Row>
+ <Row><forename>John</forename></Row>
+</Dataset>
+<Dataset name='Result 49'>
+ <Row><Result_49>Done</Result_49></Row>
+</Dataset>

+ 2 - 2
testing/regress/ecl/setup/setupsq.ecl

@@ -253,7 +253,7 @@ personBooks := normalize(final, left.persons, extractPersonBook(LEFT, RIGHT));
 personOut := project(personBooks, extractPerson(LEFT));
 bookOut := normalize(personBooks, count(left.books), extractBook(LEFT.books[COUNTER], LEFT.id));
 
-simplePersonBooks := project(personBooks, transform(sq.SimplePersonBookRec, SELF := LEFT, SELF.limit.booklimit := LEFT.booklimit));
+simplePersonBooks := project(personBooks, transform(sq.SimplePersonBookRec, SELF.surname := (STRING)LEFT.surname; SELF := LEFT, SELF.limit.booklimit := LEFT.booklimit));
 
 output(final,, sq.HousePersonBookName,overwrite);
 output(personBooks,, sq.PersonBookName,overwrite);
@@ -290,7 +290,7 @@ fileServices.AddFileRelationship( __nameof__(sq.BookExDs), sq.BookIndexName+'ID'
 
 //Some more conventional indexes - some requiring a double lookup to resolve the payload
 buildindex(sq.HouseExDs, { string40 addr := sq.HouseExDs.addr, postcode }, { filepos }, sq.HouseIndexName, overwrite);
-buildindex(sq.PersonExDs, { string40 forename := sq.PersonExDs.forename, string40 surname := sq.PersonExDs.surname }, { id }, sq.PersonIndexName, overwrite);
+buildindex(sq.PersonExDs, { string40 forename := sq.PersonExDs.forename, string40 surname := (string)sq.PersonExDs.surname }, { id }, sq.PersonIndexName, overwrite);
 buildindex(sq.BookExDs, { string40 name := sq.BookExDs.name, string40 author := sq.BookExDs.author }, { id }, sq.BookIndexName, overwrite);
 
 fileServices.AddFileRelationship( __nameof__(sq.HouseExDs), sq.HouseIndexName, '', '', 'view', '1:1', false);

+ 1 - 1
testing/regress/ecl/setup/sq.ecl

@@ -19,7 +19,7 @@ unsigned2       yearBuilt := 0;
 EXPORT PersonRec :=
             record
 string          forename;
-string          surname;
+utf8            surname;
 udecimal8       dob;
 udecimal8       booklimit := 0;
 unsigned2       aage := 0;

+ 99 - 0
testing/regress/ecl/sqfiltsubstring.ecl

@@ -0,0 +1,99 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2017 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//version multiPart=false
+
+import ^ as root;
+multiPart := #IFDEFINED(root.multiPart, false);
+
+//--- end of version configuration ---
+
+import $.setup;
+sq := setup.sq(multiPart);
+
+// Test filtering at different levels, making sure parent fields are available in the child query.
+// Also tests scoping of sub expressions using within.
+
+zero := 0;
+one := 1 : stored('one');
+two := 2 : stored('two');
+three := 3 : stored('three');
+four := 4 : stored('four');
+thirty := 30 : stored('thirty');
+
+xkeyed(boolean x) := keyed(x);
+unkeyed(boolean x) := (x);
+
+//Fixed length substring
+sequential(
+output(sq.houseDs(xkeyed(postcode[1..0] = 'X')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..0] = '    ')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..1] = 'S')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1] = 'S')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..1] = 'A')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..2] = 'SW')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[..2] = 'SW')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..2] IN ['SA', 'SB', 'SW', 'SG'])), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..2] IN ['SA', 'SB', 'SW', 'SG'])), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..2] > 'SA')), { postcode} );
+//Check behaviour of out of range truncation.
+output(sq.houseDs(xkeyed(postcode[1..2] > 'SG8' and postcode[1..2] < 'WC1')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..2] >= 'SG8' and postcode[1..2] <= 'WC1')), { postcode} );
+
+//Check that the code to dynamically restrict the datasets works as expected.
+output(sq.houseDs(xkeyed(postcode[1..zero] = 'X')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..zero] = '    ')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..one] = 'S')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1] = 'S')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..one] = 'A')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..two] = 'SW')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[..two] = 'SW')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..two] IN ['SA', 'SB', 'SW', 'SG'])), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..two] IN ['SA', 'SB', 'SW', 'SG'])), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..two] > 'SA')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..two] > 'SG8' and postcode[1..two] < 'WC1')), { postcode} );
+output(sq.houseDs(xkeyed(postcode[1..two] >= 'SG8' and postcode[1..two] <= 'WC1')), { postcode} );
+
+//Variable length substring
+output(sq.personDs(xkeyed(forename[1..0] = '   ')), { forename });
+output(sq.personDs(xkeyed(forename[1..0] = 'x  ')), { forename });
+output(sq.personDs(xkeyed(forename[1..1] = 'J')), { forename });
+output(sq.personDs(xkeyed(forename[1] = 'J')), { forename });
+output(sq.personDs(xkeyed(forename[1..4] = 'Wilm')), { forename });
+output(sq.personDs(xkeyed(forename[1..4] = 'Liz ')), { forename });
+output(sq.personDs(xkeyed(forename[..4] = 'Liz ')), { forename });
+output(sq.personDs(xkeyed(forename[1..30] = 'Liz ')), { forename });
+output(sq.personDs(xkeyed(forename[1..3] between 'Fre' and 'Joh')), { forename });
+output(sq.personDs(xkeyed(forename[..3] between 'Fre' and 'Joh')), { forename });
+output(sq.personDs(xkeyed(forename[..3] between 'Fred' and 'John')), { forename });
+output(sq.personDs(xkeyed(forename[1..3] between 'Fred' and 'John')), { forename });
+
+output(sq.personDs(xkeyed(forename[1..zero] = '   ')), { forename });
+output(sq.personDs(xkeyed(forename[1..zero] = 'x  ')), { forename });
+output(sq.personDs(xkeyed(forename[1..one] = 'J')), { forename });
+output(sq.personDs(xkeyed(forename[1] = 'J')), { forename });
+output(sq.personDs(xkeyed(forename[1..four] = 'Wilm')), { forename });
+output(sq.personDs(xkeyed(forename[1..four] = 'Liz ')), { forename });
+output(sq.personDs(xkeyed(forename[..four] = 'Liz ')), { forename });
+output(sq.personDs(xkeyed(forename[1..thirty] = 'Liz ')), { forename });
+output(sq.personDs(xkeyed(forename[1..three] between 'Fre' and 'Joh')), { forename });
+output(sq.personDs(xkeyed(forename[..three] between 'Fre' and 'Joh')), { forename });
+output(sq.personDs(xkeyed(forename[..three] between 'Fred' and 'John')), { forename });
+output(sq.personDs(xkeyed(forename[1..three] between 'Fred' and 'John')), { forename });
+
+output('Done')
+);

+ 1 - 1
testing/regress/ecl/sqsimple.ecl

@@ -40,5 +40,5 @@ output(sq.HousePersonBookDs, {count(persons(id != skipId, exists(books(id != ski
 output(sq.HousePersonBookDs, {count(persons(id != sq.HousePersonBookDs.id, exists(books(id != skipId)))), count(persons) });
 
 // cse
-string fullname := trim(persons.surname) + ', ' + trim(persons.forename);
+string fullname := (string)trim(persons.surname) + ', ' + trim(persons.forename);
 output(sq.HousePersonBookDs, {count(persons(fullname != '', exists(books(author != fullname)))), count(persons) });