Browse Source

Merge pull request #11192 from ghalliday/issue19721

HPCC-19721 Split canMatch() from transform() for disk and index read

Reviewed-By: Shamser Ahmed <shamser.ahmed@lexisnexis.co.uk>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 7 years ago
parent
commit
59e614d989

+ 6 - 1
ecl/hql/hqlpmap.cpp

@@ -990,7 +990,11 @@ static bool isTrivialTransform(IHqlExpression * expr, IHqlExpression * selector,
                         return false;
                     IHqlExpression * rightField = rhs->queryChild(1);
                     if (leftField != rightField)
-                        return false;
+                    {
+                        if (!allowDeserialize)
+                            return false;
+                        UNIMPLEMENTED; // Implement when projected fields can be in-memory form (HPCC-19743)
+                    }
                     break;
                 }
                 case no_createrow:
@@ -1006,6 +1010,7 @@ static bool isTrivialTransform(IHqlExpression * expr, IHqlExpression * selector,
                         return false;
                     break;
                 }
+                //MORE: no_hqlproject?
                 default:
                     return false;
                 }

+ 133 - 7
ecl/hqlcpp/hqlsource.cpp

@@ -218,6 +218,7 @@ bool isSimpleSource(IHqlExpression * expr)
 
 static bool isSimpleProjectingDiskRead(IHqlExpression * expr)
 {
+    bool projected = false;
     for (;;)
     {
         switch (expr->getOperator())
@@ -226,8 +227,13 @@ static bool isSimpleProjectingDiskRead(IHqlExpression * expr)
             return true;
         case no_hqlproject:
         case no_newusertable:
+            if (projected)
+                return false;
             //MORE: HPCC-18469 Check if the transform only assigns fields with the same name from source to the target
-            return false;
+            if (!isSimpleProject(expr))
+                return false;
+            projected = true;
+            break;
         default:
             return false;
         }
@@ -755,6 +761,9 @@ public:
     virtual void buildTransform(IHqlExpression * expr) = 0;
     virtual void analyse(IHqlExpression * expr);
 
+    void buildCanMatch(IHqlExpression * expr);
+    void buildMatchFilter(BuildCtx & ctx, IHqlExpression * expr);
+
     void buildFilenameMember();
     void appendFilter(SharedHqlExpr & unkeyedFilter, IHqlExpression * expr);
     void buildKeyedLimitHelper(IHqlExpression * expr);
@@ -770,7 +779,7 @@ public:
     bool recordHasVirtualsOrDeserialize()                               { return fieldInfo.hasVirtualsOrDeserialize(); }
     bool isSourceInvariant(IHqlExpression * dataset, IHqlExpression * expr);
     bool hasExistChoosenLimit() { return (choosenValue && getIntValue(choosenValue) == 1); }
-
+    bool isRootSelector(IHqlExpression * expr) const;
 
 protected:
     void assignLocalExtract(BuildCtx & ctx, ParentExtract * extractBuilder, IHqlExpression * dataset, const char * argName);
@@ -814,6 +823,7 @@ public:
     HqlExprAttr     keyedLimitExpr;
     HqlExprAttr     choosenValue;
     HqlExprAttr     preloadSize;
+    HqlExprAttr     firstTransformer;
     HqlExprAttr     lastTransformer;
     HqlExprAttr     alreadyDoneFlag;
     HqlExprArray    originalFilters;
@@ -847,6 +857,7 @@ public:
     bool            isVirtualLogicalFilenameUsed;
     bool            requiresOrderedMerge;
     bool            newDiskReadMapping;
+    bool            extractCanMatch = false;
 protected:
     HqlCppTranslator & translator;
 };
@@ -896,6 +907,13 @@ bool SourceBuilder::isSourceInvariant(IHqlExpression * dataset, IHqlExpression *
     }
 }
 
+bool SourceBuilder::isRootSelector(IHqlExpression * expr) const
+{
+    if (!tableExpr)
+        return false;
+    return (expr->queryNormalizedSelector() == tableExpr->queryNormalizedSelector());
+}
+
 void SourceBuilder::analyse(IHqlExpression * expr)
 {
     IHqlExpression * body = expr->queryBody(true);
@@ -982,7 +1000,8 @@ void SourceBuilder::analyse(IHqlExpression * expr)
 
             if (unkeyedFilter)
             {
-                transformCanFilter = true;
+                if (!extractCanMatch || !isRootSelector(expr))
+                    transformCanFilter = true;
                 originalFilters.append(*LINK(expr));
                 mappedFilters.append(*unkeyedFilter.getClear());
             }
@@ -1047,11 +1066,15 @@ void SourceBuilder::analyse(IHqlExpression * expr)
     case no_hqlproject:
         needToCallTransform = true;
         needDefaultTransform = false;
+        if (!firstTransformer)
+            firstTransformer.set(expr);
         lastTransformer.set(expr);
         break;
     case no_newusertable:
         needToCallTransform = true;
         needDefaultTransform = false;
+        if (!firstTransformer)
+            firstTransformer.set(expr);
         lastTransformer.set(expr);
         break;
     case no_aggregate:
@@ -1059,6 +1082,8 @@ void SourceBuilder::analyse(IHqlExpression * expr)
             needToCallTransform = true;
             needDefaultTransform = false;
             aggregation = true;
+            if (!firstTransformer)
+                firstTransformer.set(expr);
             lastTransformer.set(expr);
             break;
         }
@@ -1067,6 +1092,8 @@ void SourceBuilder::analyse(IHqlExpression * expr)
             needToCallTransform = true;
             needDefaultTransform = false;
             aggregation = true;
+            if (!firstTransformer)
+                firstTransformer.set(expr);
             lastTransformer.set(expr);
 
             IHqlExpression * transform = expr->queryChild(2);
@@ -1088,6 +1115,8 @@ void SourceBuilder::analyse(IHqlExpression * expr)
     case no_fetch:
         needToCallTransform = true;
         needDefaultTransform = false;
+        if (!firstTransformer)
+            firstTransformer.set(expr);
         lastTransformer.set(expr);
         break;
     case no_compound_diskread:
@@ -1434,7 +1463,7 @@ void SourceBuilder::buildTransformElements(BuildCtx & ctx, IHqlExpression * expr
         break;
     case no_filter:
         {
-            if (!ignoreFilters)
+            if (!ignoreFilters && (!extractCanMatch || !isRootSelector(expr)))
             {
                 LinkedHqlExpr cond;
                 if (useFilterMappings)
@@ -1696,6 +1725,71 @@ void SourceBuilder::buildTransformElements(BuildCtx & ctx, IHqlExpression * expr
 }
 
 
+void SourceBuilder::buildMatchFilter(BuildCtx & ctx, IHqlExpression * expr)
+{
+    expr = expr->queryBody();
+    node_operator op = expr->getOperator();
+
+    switch (op)
+    {
+    case no_cachealias:
+        buildMatchFilter(ctx, expr->queryChild(1));
+        return;
+    case no_newkeyindex:
+    case no_table:
+    case no_fetch:
+    case no_select:     // handled below
+    case no_null:
+    case no_anon:
+    case no_pseudods:
+    case no_workunit_dataset:
+    case no_getgraphresult:
+    case no_call:
+    case no_externalcall:
+    case no_rows:
+    case no_libraryinput:
+        return;
+    default:
+        buildMatchFilter(ctx, expr->queryChild(0));
+        break;
+    }
+
+    switch (op)
+    {
+    case no_filter:
+        {
+            LinkedHqlExpr cond;
+            if (useFilterMappings)
+            {
+                unsigned match = originalFilters.find(*expr);
+                if (match != NotFound)
+                    cond.set(&mappedFilters.item(match));
+            }
+            else
+            {
+                HqlExprArray args;
+                unwindRealChildren(args, expr, 1);
+                cond.setown(createBalanced(no_and, queryBoolType(), args));
+            }
+
+            if (cond)
+            {
+                IHqlExpression * ds = expr->queryChild(0);
+                OwnedHqlExpr test = getInverse(cond);
+                if (translator.queryOptions().foldFilter)
+                    test.setown(foldScopedHqlExpression(translator.queryErrorProcessor(), ds->queryNormalizedSelector(), test));
+
+                if (translator.options.spotCSE)
+                    test.setown(spotScalarCSE(test, ds, translator.queryOptions().spotCseInIfDatasetConditions));
+
+                translator.buildFilteredReturn(ctx, test, queryBoolExpr(false));
+            }
+        }
+        break;
+    }
+}
+
+
 void SourceBuilder::doBuildAggregateSelectIterator(BuildCtx & ctx, IHqlExpression * expr)
 {
     IHqlExpression * ds = expr->queryChild(0);
@@ -2072,6 +2166,7 @@ ABoundActivity * SourceBuilder::buildActivity(BuildCtx & ctx, IHqlExpression * e
     {
         buildMembers(expr);
         buildTransform(expr);
+        buildCanMatch(expr);
         buildFlagsMember(expr);
 
         if (tableExpr && (activityKind < TAKchildread || activityKind > TAKchildthroughnormalize))
@@ -2613,6 +2708,32 @@ void SourceBuilder::buildNormalizeHelpers(IHqlExpression * expr)
 }
 
 
+void SourceBuilder::buildCanMatch(IHqlExpression * expr)
+{
+    if (extractCanMatch)
+    {
+        MemberFunction func(translator, instance->startctx);
+        func.start("virtual bool canMatch(const void * _left) override");
+        func.ctx.addQuotedLiteral("unsigned char * left = (unsigned char *)_left;");
+        translator.bindTableCursor(func.ctx, tableExpr, "left");
+
+        //This will have no ill effect for disk read, and is used for blob lookup
+        translator.associateBlobHelper(func.ctx, tableExpr, "fpp");
+        buildTransformFpos(func.ctx);
+
+        unsigned mark = func.numStmts();
+        buildMatchFilter(func.ctx, firstTransformer ? firstTransformer->queryChild(0) : expr);
+        if (func.numStmts() != mark)
+        {
+            func.ctx.addReturn(queryBoolExpr(true));
+            translator.doBuildBoolFunction(instance->classctx, "hasMatchFilter", true);
+        }
+        else
+            func.setIncluded(false);
+    }
+}
+
+
 void SourceBuilder::buildGroupAggregateTransformBody(BuildCtx & transformCtx, IHqlExpression * expr, bool useExtract, bool bindInputRow)
 {
     buildTransformBody(transformCtx, expr, false, false, bindInputRow);
@@ -2985,7 +3106,8 @@ class DiskReadBuilder : public DiskReadBuilderBase
 public:
     DiskReadBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr)
         : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr)
-    { 
+    {
+        extractCanMatch = (modeOp == no_thor) || (modeOp == no_flat);
     }
 
     virtual void buildTransform(IHqlExpression * expr);
@@ -3119,7 +3241,10 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDiskRead(BuildCtx & ctx, IHqlE
 
         //MORE: This shouldn't always need to be serialized - but engines crash at the moment if not
         const bool forceAllProjectedSerialized = true;
-        if ((!tableExpr->hasAttribute(_spill_Atom) && !isSimpleProjectingDiskRead(expr)) || forceAllProjectedSerialized)
+        //Reading from a spill file uses the in-memory format to optimize on-demand spilling.
+        bool optimizeInMemorySpill = !targetHThor() && tableExpr->hasAttribute(_spill_Atom);
+        bool useInMemoryFormat = optimizeInMemorySpill || isSimpleProjectingDiskRead(expr);
+        if (forceAllProjectedSerialized || !useInMemoryFormat)
         {
             //else if the the table isn't serialized, then map to a serialized table, and then project to the real format
             if (recordRequiresSerialization(tableExpr->queryRecord(), diskAtom))
@@ -3931,7 +4056,8 @@ class NewIndexReadBuilder : public IndexReadBuilderBase
 public:
     NewIndexReadBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr)
         : IndexReadBuilderBase(_translator, _tableExpr, _nameExpr)
-    { 
+    {
+        extractCanMatch = true;
     }
 
     virtual void analyseGraph(IHqlExpression * expr)

+ 11 - 9
ecl/hthor/hthor.cpp

@@ -8594,8 +8594,8 @@ const void *CHThorDiskReadActivity::nextRow()
                     prefetcher->readAhead(prefetchBuffer);
                     const byte * next = prefetchBuffer.queryRow();
                     size32_t sizeRead = prefetchBuffer.queryRowSize();
-                    size32_t thisSize;
-                    if (segMonitorsMatch(next)) // NOTE - keyed fields are checked pre-translation
+                    size32_t thisSize = 0;
+                    if (likely(segMonitorsMatch(next))) // NOTE - keyed fields are checked pre-translation
                     {
                         MemoryBuffer translated;
                         if (translator)
@@ -8604,10 +8604,9 @@ const void *CHThorDiskReadActivity::nextRow()
                             translator->translate(aBuilder, *this, next);
                             next = reinterpret_cast<const byte *>(translated.toByteArray());
                         }
-                        thisSize = helper.transform(outBuilder.ensureRow(), next);
+                        if (likely(helper.canMatch(next)))
+                            thisSize = helper.transform(outBuilder.ensureRow(), next);
                     }
-                    else
-                        thisSize = 0;
                     bool eog = false;
                     if (grouped)
                         prefetchBuffer.read(sizeof(eog), &eog);
@@ -8645,21 +8644,24 @@ const void *CHThorDiskReadActivity::nextRow()
             {
                 queryUpdateProgress();
 
-                if (!inputstream->eos())
+                while (!inputstream->eos())
                 {
                     size32_t sizeRead = deserializer->deserialize(outBuilder.ensureRow(), deserializeSource);
                     //In this case size read from disk == size created in memory
                     localOffset += sizeRead;
+                    OwnedConstRoxieRow ret = outBuilder.finalizeRowClear(sizeRead);
                     if ((processed - initialProcessed)>=limit)
                     {
-                        outBuilder.clear();
                         if ( agent.queryCodeContext()->queryDebugContext())
                             agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
                         helper.onLimitExceeded();
                         return NULL;
                     }
-                    processed++;
-                    return outBuilder.finalizeRowClear(sizeRead);
+                    if (likely(helper.canMatch(ret)))
+                    {
+                        processed++;
+                        return ret.getClear();
+                    }
                 }
                 eofseen = !openNext();
             }

+ 86 - 68
ecl/hthor/hthorkey.cpp

@@ -882,54 +882,63 @@ const void *CHThorIndexReadActivity::nextRow()
             if ((keyedLimit != (unsigned __int64) -1) && keyedProcessed > keyedLimit)
                 helper.onKeyedLimitExceeded();
             byte const * keyRow = klManager->queryKeyBuffer();
-            if (needTransform)
+            if (likely(helper.canMatch(keyRow)))
             {
-                try
+                if (needTransform)
                 {
-                    size32_t recSize;
-                    RtlDynamicRowBuilder rowBuilder(rowAllocator);
-                    recSize = helper.transform(rowBuilder, keyRow);
-                    callback.finishedRow();
-                    if (recSize)
+                    try
                     {
-                        processed++;
-                        if ((processed-initialProcessed) > rowLimit)
+                        size32_t recSize;
+                        RtlDynamicRowBuilder rowBuilder(rowAllocator);
+                        recSize = helper.transform(rowBuilder, keyRow);
+                        callback.finishedRow();
+                        if (recSize)
+                        {
+                            processed++;
+                            if ((processed-initialProcessed) > rowLimit)
+                            {
+                                helper.onLimitExceeded();
+                                if ( agent.queryCodeContext()->queryDebugContext())
+                                    agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
+                            }
+                            return rowBuilder.finalizeRowClear(recSize);
+                        }
+                        else
                         {
-                            helper.onLimitExceeded();
-                            if ( agent.queryCodeContext()->queryDebugContext())
-                                agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
+                            postFiltered++;
                         }
-                        return rowBuilder.finalizeRowClear(recSize);
                     }
-                    else
+                    catch(IException * e)
                     {
-                        postFiltered++;
+                        throw makeWrappedException(e);
                     }
                 }
-                catch(IException * e)
+                else
                 {
-                    throw makeWrappedException(e);
+                    callback.finishedRow(); // since filter might have accessed a blob
+                    processed++;
+                    if ((processed-initialProcessed) > rowLimit)
+                    {
+                        helper.onLimitExceeded();
+                        if ( agent.queryCodeContext()->queryDebugContext())
+                            agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
+                    }
+                    try
+                    {
+                        RtlDynamicRowBuilder rowBuilder(rowAllocator);
+                        size32_t finalSize = cloneRow(rowBuilder, keyRow, outputMeta);
+                        return rowBuilder.finalizeRowClear(finalSize);
+                    }
+                    catch(IException * e)
+                    {
+                        throw makeWrappedException(e);
+                    }
                 }
             }
             else
             {
-                processed++;
-                if ((processed-initialProcessed) > rowLimit)
-                {
-                    helper.onLimitExceeded();
-                    if ( agent.queryCodeContext()->queryDebugContext())
-                        agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
-                }
-                try
-                {
-                    RtlDynamicRowBuilder rowBuilder(rowAllocator);
-                    size32_t finalSize = cloneRow(rowBuilder, keyRow, outputMeta);
-                    return rowBuilder.finalizeRowClear(finalSize);
-                }
-                catch(IException * e)
-                {
-                    throw makeWrappedException(e);
-                }
+                callback.finishedRow(); // since filter might have accessed a blob
+                postFiltered++;
             }
         }
         else if (!nextPart())
@@ -971,54 +980,63 @@ const void *CHThorIndexReadActivity::nextRowGE(const void * seek, unsigned numFi
             keyedProcessed++;
             if ((keyedLimit != (unsigned __int64) -1) && keyedProcessed > keyedLimit)
                 helper.onKeyedLimitExceeded();
-            if (needTransform)
+            if (likely(helper.canMatch(row)))
             {
-                try
+                if (needTransform)
                 {
-                    size32_t recSize;
-                    RtlDynamicRowBuilder rowBuilder(rowAllocator);
-                    recSize = helper.transform(rowBuilder, row);
-                    callback.finishedRow();
-                    if (recSize)
+                    try
                     {
-                        processed++;
-                        if ((processed-initialProcessed) > rowLimit)
+                        size32_t recSize;
+                        RtlDynamicRowBuilder rowBuilder(rowAllocator);
+                        recSize = helper.transform(rowBuilder, row);
+                        callback.finishedRow();
+                        if (recSize)
+                        {
+                            processed++;
+                            if ((processed-initialProcessed) > rowLimit)
+                            {
+                                helper.onLimitExceeded();
+                                if ( agent.queryCodeContext()->queryDebugContext())
+                                    agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
+                            }
+                            return rowBuilder.finalizeRowClear(recSize);
+                        }
+                        else
                         {
-                            helper.onLimitExceeded();
-                            if ( agent.queryCodeContext()->queryDebugContext())
-                                agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
+                            postFiltered++;
                         }
-                        return rowBuilder.finalizeRowClear(recSize);
                     }
-                    else
+                    catch(IException * e)
                     {
-                        postFiltered++;
+                        throw makeWrappedException(e);
                     }
                 }
-                catch(IException * e)
+                else
                 {
-                    throw makeWrappedException(e);
+                    callback.finishedRow(); // since filter might have accessed a blob
+                    processed++;
+                    if ((processed-initialProcessed) > rowLimit)
+                    {
+                        helper.onLimitExceeded();
+                        if ( agent.queryCodeContext()->queryDebugContext())
+                            agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
+                    }
+                    try
+                    {
+                        RtlDynamicRowBuilder rowBuilder(rowAllocator);
+                        size32_t finalSize = cloneRow(rowBuilder, row, outputMeta);
+                        return rowBuilder.finalizeRowClear(finalSize);
+                    }
+                    catch(IException * e)
+                    {
+                        throw makeWrappedException(e);
+                    }
                 }
             }
             else
             {
-                processed++;
-                if ((processed-initialProcessed) > rowLimit)
-                {
-                    helper.onLimitExceeded();
-                    if ( agent.queryCodeContext()->queryDebugContext())
-                        agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
-                }
-                try
-                {
-                    RtlDynamicRowBuilder rowBuilder(rowAllocator);
-                    size32_t finalSize = cloneRow(rowBuilder, row, outputMeta);
-                    return rowBuilder.finalizeRowClear(finalSize);
-                }
-                catch(IException * e)
-                {
-                    throw makeWrappedException(e);
-                }
+                callback.finishedRow(); // since filter might have accessed a blob
+                postFiltered++;
             }
         }
         else if (!nextPart())

+ 14 - 5
roxie/ccd/ccdactivities.cpp

@@ -1073,9 +1073,14 @@ public:
 
     size32_t doTransform(IMessagePacker * output, const void *src) const
     {
-        OptimizedRowBuilder rowBuilder(rowAllocator, meta, output, serializer);
-        unsigned transformedSize = helper->transform(rowBuilder, src);
-        return rowBuilder.writeToOutput(transformedSize, false);
+        if (likely(helper->canMatch(src)))
+        {
+            OptimizedRowBuilder rowBuilder(rowAllocator, meta, output, serializer);
+            unsigned transformedSize = helper->transform(rowBuilder, src);
+            return rowBuilder.writeToOutput(transformedSize, false);
+        }
+        else
+            return 0;
     }
 
 };
@@ -2728,7 +2733,7 @@ public:
 class CRoxieIndexReadActivity : public CRoxieIndexActivity, implements IIndexReadActivityInfo
 {
 protected:
-    IHThorCompoundReadExtra * readHelper;
+    IHThorIndexReadArg * readHelper;
 
 public:
     CRoxieIndexReadActivity(SlaveContextLogger &_logctx, IRoxieQueryPacket *_packet, HelperFactory *_hFactory, const CRoxieIndexActivityFactory *_aFactory, unsigned _steppingOffset)
@@ -2860,8 +2865,12 @@ public:
                                 assertex(!steppingRow);
                                 break;
                             }
+
                             rowBuilder.ensureRow();
-                            transformedSize = readHelper->transform(rowBuilder, keyRow);
+                            transformedSize = 0;
+                            if (likely(readHelper->canMatch(keyRow)))
+                                transformedSize = readHelper->transform(rowBuilder, keyRow);
+
                             callback.finishedRow();
                             if (transformedSize)
                             {

+ 112 - 89
roxie/ccd/ccdserver.cpp

@@ -11399,7 +11399,7 @@ public:
         {
             for (;;)
             {
-                const void *in = inputStream->nextRow();
+                OwnedConstRoxieRow in = inputStream->nextRow();
                 if (!in)
                 {
                     if (anyThisGroup)
@@ -11407,36 +11407,38 @@ public:
                         anyThisGroup = false;
                         return NULL;
                     }
-                    in = inputStream->nextRow();
+                    in.setown(inputStream->nextRow());
                     if (!in)
                     {
                         eof = true;
                         return NULL;                // eof...
                     }
                 }
-                unsigned outSize;
-                RtlDynamicRowBuilder rowBuilder(rowAllocator);
-                try
-                {
-                    outSize = helper.transform(rowBuilder, in);
-                    ReleaseRoxieRow(in);
-                }
-                catch (IException *E)
+                if (likely(helper.canMatch(in)))
                 {
-                    throw makeWrappedException(E);
-                }
+                    unsigned outSize;
+                    RtlDynamicRowBuilder rowBuilder(rowAllocator);
+                    try
+                    {
+                        outSize = helper.transform(rowBuilder, in);
+                    }
+                    catch (IException *E)
+                    {
+                        throw makeWrappedException(E);
+                    }
 
-                if (outSize)
-                {
-                    anyThisGroup = true;
-                    processed++;
-                    if (processed==rowLimit)
+                    if (outSize)
                     {
-                        if (traceLevel > 4)
-                            DBGLOG("activityid = %d  line = %d", activityId, __LINE__);
-                        helper.onLimitExceeded();
+                        anyThisGroup = true;
+                        processed++;
+                        if (processed==rowLimit)
+                        {
+                            if (traceLevel > 4)
+                                DBGLOG("activityid = %d  line = %d", activityId, __LINE__);
+                            helper.onLimitExceeded();
+                        }
+                        return rowBuilder.finalizeRowClear(outSize);
                     }
-                    return rowBuilder.finalizeRowClear(outSize);
                 }
             }
         }
@@ -22006,10 +22008,15 @@ public:
             if (nextRec)
             {
                 someInGroup = true;
-                transformedSize = readHelper->transform(rowBuilder, nextRec);
-                reader->finishedRow();
-                if (transformedSize)
-                    break;
+                if (likely(readHelper->canMatch(nextRec)))
+                {
+                    transformedSize = readHelper->transform(rowBuilder, nextRec);
+                    reader->finishedRow();
+                    if (transformedSize)
+                        break;
+                }
+                else
+                    reader->finishedRow();
             }
             else if (isGrouped)
             {
@@ -23266,33 +23273,41 @@ public:
                 size32_t transformedSize;
                 RtlDynamicRowBuilder rowBuilder(owner.rowAllocator);
                 byte const * keyRow = tlk->queryKeyBuffer();
-                try
+                if (likely(owner.readHelper.canMatch(keyRow)))
                 {
-                    transformedSize = owner.readHelper.transform(rowBuilder, keyRow);
-                    owner.callback.finishedRow();
-                }
-                catch (IException *E)
-                {
-                    throw owner.makeWrappedException(E);
-                }
-                if (transformedSize)
-                {
-                    OwnedConstRoxieRow result = rowBuilder.finalizeRowClear(transformedSize);
-                    matched++;
-                    if (matched > owner.rowLimit)
+                    try
                     {
-                        owner.onLimitExceeded(false); // Should throw exception
-                        throwUnexpected();
+                        transformedSize = owner.readHelper.transform(rowBuilder, keyRow);
+                        owner.callback.finishedRow();
                     }
-                    if (matched > owner.choosenLimit) // MORE - bit of a strange place to check
+                    catch (IException *E)
                     {
-                        break;
+                        throw owner.makeWrappedException(E);
                     }
-                    owner.accepted++;
-                    return result.getClear();
+                    if (transformedSize)
+                    {
+                        OwnedConstRoxieRow result = rowBuilder.finalizeRowClear(transformedSize);
+                        matched++;
+                        if (matched > owner.rowLimit)
+                        {
+                            owner.onLimitExceeded(false); // Should throw exception
+                            throwUnexpected();
+                        }
+                        if (matched > owner.choosenLimit) // MORE - bit of a strange place to check
+                        {
+                            break;
+                        }
+                        owner.accepted++;
+                        return result.getClear();
+                    }
+                    else
+                        owner.rejected++;
                 }
                 else
+                {
+                    owner.callback.finishedRow(); // since filter might have accessed a blob
                     owner.rejected++;
+                }
             }
             EOFseen = true;
             return NULL;
@@ -23776,64 +23791,72 @@ public:
             }
 
             byte const * keyRow = tlk->queryKeyBuffer();
+            if (likely(indexHelper.canMatch(keyRow)))
+            {
 #ifdef _DEBUG
-//          StringBuffer recstr;
-//          unsigned size = (tlk->queryRecordSize()<80)  ? tlk->queryRecordSize() : 80;
-//          for (unsigned i = 0; i < size; i++)
-//          {
-//              recstr.appendf("%02x ", ((unsigned char *) keyRow)[i]);
-//          }
-//          DBGLOG("nextRowGE Got %s", recstr.str());
-            if (originalRawSeek && memcmp(keyRow + seekGEOffset, originalRawSeek, seekSize) < 0)
-                assertex(!"smart seek failure");
+    //          StringBuffer recstr;
+    //          unsigned size = (tlk->queryRecordSize()<80)  ? tlk->queryRecordSize() : 80;
+    //          for (unsigned i = 0; i < size; i++)
+    //          {
+    //              recstr.appendf("%02x ", ((unsigned char *) keyRow)[i]);
+    //          }
+    //          DBGLOG("nextRowGE Got %s", recstr.str());
+                if (originalRawSeek && memcmp(keyRow + seekGEOffset, originalRawSeek, seekSize) < 0)
+                    assertex(!"smart seek failure");
 #endif
-            size32_t transformedSize;
-            rowBuilder.ensureRow();
-            try
-            {
-                transformedSize =indexHelper.transform(rowBuilder, keyRow);
-                //if the post filter causes a mismatch, and the stepping condition no longer matches
-                //then return a mismatch record - so the join code can start seeking on the other input.
-                if (transformedSize == 0 && optimizeSteppedPostFilter && stepExtra.returnMismatches())
+                size32_t transformedSize;
+                rowBuilder.ensureRow();
+                try
                 {
-                    if (memcmp(keyRow + seekGEOffset, originalRawSeek, seekSize) != 0)
+                    transformedSize =indexHelper.transform(rowBuilder, keyRow);
+                    //if the post filter causes a mismatch, and the stepping condition no longer matches
+                    //then return a mismatch record - so the join code can start seeking on the other input.
+                    if (transformedSize == 0 && optimizeSteppedPostFilter && stepExtra.returnMismatches())
                     {
-                        transformedSize = indexHelper.unfilteredTransform(rowBuilder, keyRow);
-                        if (transformedSize != 0)
-                            wasCompleteMatch = false;
+                        if (memcmp(keyRow + seekGEOffset, originalRawSeek, seekSize) != 0)
+                        {
+                            transformedSize = indexHelper.unfilteredTransform(rowBuilder, keyRow);
+                            if (transformedSize != 0)
+                                wasCompleteMatch = false;
+                        }
                     }
+                    callback.finishedRow();
                 }
-                callback.finishedRow();
-            }
-            catch (IException *E)
-            {
-                throw makeWrappedException(E);
-            }
-            if (transformedSize)
-            {
-                accepted++;
-                if (accepted > rowLimit)
+                catch (IException *E)
                 {
-                    if ((indexHelper.getFlags() & (TIRlimitskips|TIRlimitcreates)) != 0)
+                    throw makeWrappedException(E);
+                }
+                if (transformedSize)
+                {
+                    accepted++;
+                    if (accepted > rowLimit)
                     {
-                        throwUnexpected(); // should not have used simple variant if maySkip set...
+                        if ((indexHelper.getFlags() & (TIRlimitskips|TIRlimitcreates)) != 0)
+                        {
+                            throwUnexpected(); // should not have used simple variant if maySkip set...
+                        }
+                        if (traceLevel > 4)
+                            DBGLOG("activityid = %d  line = %d", activityId, __LINE__);
+                        indexHelper.onLimitExceeded();
+                        break;
                     }
-                    if (traceLevel > 4)
-                        DBGLOG("activityid = %d  line = %d", activityId, __LINE__);
-                    indexHelper.onLimitExceeded();
-                    break;
+                    processed++;
+    #ifdef _DEBUG
+    //              const byte *ret = (const byte *) out.get();
+    //              CommonXmlWriter xmlwrite(XWFnoindent|XWFtrim|XWFopt);
+    //              queryOutputMeta()->toXML(ret, xmlwrite);
+    //              DBGLOG("ROW: {%p} %s", ret, xmlwrite.str());
+    #endif
+                    return rowBuilder.finalizeRowClear(transformedSize);
                 }
-                processed++;
-#ifdef _DEBUG
-//              const byte *ret = (const byte *) out.get();
-//              CommonXmlWriter xmlwrite(XWFnoindent|XWFtrim|XWFopt);
-//              queryOutputMeta()->toXML(ret, xmlwrite);
-//              DBGLOG("ROW: {%p} %s", ret, xmlwrite.str());
-#endif
-                return rowBuilder.finalizeRowClear(transformedSize);
+                else
+                    rejected++;
             }
             else
+            {
+                callback.finishedRow(); // since filter might have accessed a blob
                 rejected++;
+            }
             rawSeek = NULL;
         }
         onEOF();

+ 4 - 2
rtl/include/eclhelper.hpp

@@ -44,8 +44,8 @@ typedef unsigned short UChar;
 
 //Should be incremented whenever the virtuals in the context or a helper are changed, so
 //that a work unit can't be rerun.  Try as hard as possible to retain compatibility.
-#define ACTIVITY_INTERFACE_VERSION      203
-#define MIN_ACTIVITY_INTERFACE_VERSION  203             //minimum value that is compatible with current interface
+#define ACTIVITY_INTERFACE_VERSION      204
+#define MIN_ACTIVITY_INTERFACE_VERSION  204             //minimum value that is compatible with current interface
 
 typedef unsigned char byte;
 
@@ -2332,6 +2332,8 @@ struct IHThorCompoundBaseArg : public IHThorArg
 {
     virtual bool canMatchAny()                              { return true; }
     virtual void createSegmentMonitors(IIndexReadContext *ctx) {}
+    virtual bool canMatch(const void * row)                 { return true; }
+    virtual bool hasMatchFilter()                           { return false; }
 };
 
 struct IHThorIndexReadBaseArg : extends IHThorCompoundBaseArg

+ 19 - 9
thorlcr/activities/diskread/thdiskreadslave.cpp

@@ -56,9 +56,10 @@ protected:
     IConstArrayOf<IFieldFilter> fieldFilters;
     bool grouped;
     bool isFixedDiskWidth;
+    bool needTransform = false;
+    bool hasMatchFilter = false;
     size32_t diskRowMinSz;
     unsigned numSegFieldsUsed = 0;
-    bool needTransform = false;
     rowcount_t totalProgress = 0;
     rowcount_t stopAfter = 0;
     rowcount_t remoteLimit = 0;
@@ -404,14 +405,19 @@ public:
                                 if (!row)
                                     break;
                             }
-                            // NB: rows from prefetch are filtered and translated
-                            size32_t sz = activity.helper->transform(outBuilder.ensureRow(), row);
-                            CDiskRecordPartHandler::prefetchDone();
-                            if (sz)
+                            if (likely(!activity.hasMatchFilter || activity.helper->canMatch(row)))
                             {
-                                firstInGroup = false;
-                                return outBuilder.finalizeRowClear(sz);  
+                                // NB: rows from prefetch are filtered and translated
+                                size32_t sz = activity.helper->transform(outBuilder.ensureRow(), row);
+                                CDiskRecordPartHandler::prefetchDone();
+                                if (sz)
+                                {
+                                    firstInGroup = false;
+                                    return outBuilder.finalizeRowClear(sz);
+                                }
                             }
+                            else
+                                CDiskRecordPartHandler::prefetchDone();
                         }
                     }
                     else
@@ -432,8 +438,11 @@ public:
                                 if (!row)
                                     break;
                             }
-                            firstInGroup = false;
-                            return row.getClear();
+                            if (likely(!activity.hasMatchFilter || activity.helper->canMatch(row)))
+                            {
+                                firstInGroup = false;
+                                return row.getClear();
+                            }
                         }
                     }
                 }
@@ -501,6 +510,7 @@ public:
         unsorted = 0 != (TDRunsorted & helper->getFlags());
         grouped = 0 != (TDXgrouped & helper->getFlags());
         needTransform = helper->needTransform() || (TDRkeyed & helper->getFlags());
+        hasMatchFilter = helper->hasMatchFilter();
         appendOutputLinked(this);
     }
     ~CDiskReadSlaveActivity()

+ 41 - 29
thorlcr/activities/indexread/thindexreadslave.cpp

@@ -484,21 +484,27 @@ class CIndexReadSlaveActivity : public CIndexReadSlaveBase
             const void *r = nextKey();
             if (!r)
                 break;
-            if (needTransform)
+            if (likely(helper->canMatch(r)))
             {
-                size32_t sz = helper->transform(ret, r);
-                if (sz)
+                if (needTransform)
                 {
-                    callback.finishedRow();
+                    size32_t sz = helper->transform(ret, r);
+                    if (sz)
+                    {
+                        callback.finishedRow();
+                        return ret.finalizeRowClear(sz);
+                    }
+                }
+                else
+                {
+                    callback.finishedRow(); // since filter might have accessed a blob
+                    size32_t sz = queryRowMetaData()->getRecordSize(r);
+                    memcpy(ret.ensureCapacity(sz, NULL), r, sz);
                     return ret.finalizeRowClear(sz);
                 }
             }
             else
-            {
-                size32_t sz = queryRowMetaData()->getRecordSize(r);
-                memcpy(ret.ensureCapacity(sz, NULL), r, sz);
-                return ret.finalizeRowClear(sz);
-            }
+                callback.finishedRow(); // since filter might have accessed a blob
         }
         return nullptr;
     }
@@ -549,37 +555,43 @@ class CIndexReadSlaveActivity : public CIndexReadSlaveBase
             if (seek && memcmp((byte *)r + seekGEOffset, seek, seekSize) < 0)
                 assertex(!"smart seek failure");
 #endif
-            if (needTransform)
+            if (likely(helper->canMatch(r)))
             {
-                size32_t sz = helper->transform(ret, r);
-                if (sz)
-                {
-                    callback.finishedRow();
-                    return ret.finalizeRowClear(sz);
-                }
-                else
+                if (needTransform)
                 {
-                    if (optimizeSteppedPostFilter && stepExtra.returnMismatches())
+                    size32_t sz = helper->transform(ret, r);
+                    if (sz)
                     {
-                        if (memcmp(ret.getSelf() + seekGEOffset, seek, seekSize) != 0)
+                        callback.finishedRow();
+                        return ret.finalizeRowClear(sz);
+                    }
+                    else
+                    {
+                        if (optimizeSteppedPostFilter && stepExtra.returnMismatches())
                         {
-                            size32_t sz = helper->unfilteredTransform(ret, r);
-                            if (sz)
+                            if (memcmp(ret.getSelf() + seekGEOffset, seek, seekSize) != 0)
                             {
-                                wasCompleteMatch = false;
-                                callback.finishedRow();
-                                return ret.finalizeRowClear(sz);
+                                size32_t sz = helper->unfilteredTransform(ret, r);
+                                if (sz)
+                                {
+                                    wasCompleteMatch = false;
+                                    callback.finishedRow();
+                                    return ret.finalizeRowClear(sz);
+                                }
                             }
                         }
                     }
                 }
+                else
+                {
+                    callback.finishedRow(); // since filter might have accessed a blob
+                    size32_t sz = queryRowMetaData()->getRecordSize(r);
+                    memcpy(ret.ensureCapacity(sz, NULL), r, sz);
+                    return ret.finalizeRowClear(sz);
+                }
             }
             else
-            {
-                size32_t sz = queryRowMetaData()->getRecordSize(r);
-                memcpy(ret.ensureCapacity(sz, NULL), r, sz);
-                return ret.finalizeRowClear(sz);
-            }
+                callback.finishedRow(); // since filter might have accessed a blob
         }
         return nullptr;
     }

+ 12 - 7
tools/dumpkey/dumpkey.cpp

@@ -278,16 +278,21 @@ int main(int argc, const char **argv)
                     }
                     else if (helper)
                     {
-                        MemoryBuffer buf;
-                        MemoryBufferBuilder aBuilder(buf, 0);
-                        if (helper->transform(aBuilder, (const byte *) buffer))
+                        if (helper->canMatch(buffer))
                         {
-                            outmeta->toXML((const byte *) buf.toByteArray(), *writer.get());
-                            printf("%s\n", writer->str());
-                            writer->clear();
+                            MemoryBuffer buf;
+                            MemoryBufferBuilder aBuilder(buf, 0);
+                            if (helper->transform(aBuilder, (const byte *) buffer))
+                            {
+                                outmeta->toXML((const byte *) buf.toByteArray(), *writer.get());
+                                printf("%s\n", writer->str());
+                                writer->clear();
+                            }
+                            else
+                                count++;  // Don't count this row as it was postfiltered
                         }
                         else
-                            count++;  // Don't count this row as it was postfiltered
+                            count++;
                     }
                     else
                         printf("%.*s  :%" I64F "u\n", size, buffer, seq);