Browse Source

Merge pull request #1988 from ghalliday/countexists

Various index count and aggregate fixes

Reviewed-By: Jake Smith <jake.smith@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 13 years ago
parent
commit
2ad30b87f5

+ 1 - 3
ecl/hql/hqlattr.cpp

@@ -228,7 +228,6 @@ unsigned getOperatorMetaFlags(node_operator op)
 //Aggregate operators
     case no_count:
     case no_exists:
-    case no_notexists:
     case no_max:
     case no_min:
     case no_sum:
@@ -238,7 +237,6 @@ unsigned getOperatorMetaFlags(node_operator op)
     case no_correlation:
     case no_countgroup:
     case no_existsgroup:
-    case no_notexistsgroup:
     case no_maxgroup:
     case no_mingroup:
     case no_sumgroup:
@@ -614,7 +612,7 @@ unsigned getOperatorMetaFlags(node_operator op)
     case no_unused30: case no_unused31: case no_unused32: case no_unused33: case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
     case no_unused50: case no_unused52:
-    case no_unused80:
+    case no_unused80: case no_unused82: case no_unused83:
     case no_is_null:
     case no_position:
     case no_current_time:

+ 0 - 7
ecl/hql/hqlexpr.cpp

@@ -976,9 +976,7 @@ const char *getOpString(node_operator op)
     case no_mapto: return "=>";
     case no_constant: return "<constant>";
     case no_field: return "<field>";
-    case no_notexists: return "NOT EXISTS";
     case no_exists: case no_existslist: return "EXISTS";
-    case no_notexistsgroup: return "NOT EXISTS";
     case no_existsgroup: return "EXISTS";
     case no_select: return ".";
     case no_table: return "DATASET";
@@ -1478,8 +1476,6 @@ node_operator getInverseOp(node_operator op)
     case no_in: return no_notin;
     case no_notbetween: return no_between;
     case no_between: return no_notbetween;
-    case no_notexists: return no_exists;
-    case no_exists: return no_notexists;
 //  case no_notwithin: return no_within;
 //  case no_within: return no_notwithin;
     default:
@@ -14690,7 +14686,6 @@ IHqlExpression * convertToSimpleAggregate(IHqlExpression * expr)
     case no_maxgroup:       newop = no_max; numArgs = 1; break;
     case no_sumgroup:       newop = no_sum; numArgs = 1; break;
     case no_existsgroup:    newop = no_exists; break;
-    case no_notexistsgroup: newop = no_notexists; break;
     default: 
         return NULL;
     }
@@ -14729,7 +14724,6 @@ IHqlExpression * queryAggregateFilter(IHqlExpression * expr)
     {
     case no_countgroup:
     case no_existsgroup:
-    case no_notexistsgroup:
         return queryRealChild(expr, 0);
     case no_sumgroup:
     case no_vargroup:
@@ -14788,7 +14782,6 @@ node_operator querySingleAggregate(IHqlExpression * expr, bool canFilterArg, boo
             switch (curOp)
             {
             case no_existsgroup:
-            case no_notexistsgroup:
             case no_countgroup:
                 break;
             default:

+ 4 - 6
ecl/hql/hqlexpr.hpp

@@ -224,7 +224,7 @@ enum _node_operator {
         no_comma,
         no_count,
         no_countgroup,
-        no_notexists,
+    no_unused82,
         no_exists,
         no_within,
         no_notwithin,
@@ -535,7 +535,7 @@ enum _node_operator {
         no_outputscalar,
         no_matchunicode,
         no_pat_validate,
-        no_notexistsgroup,
+   no_unused83,
         no_existsgroup,
         no_pat_use,
         no_unused13,
@@ -1743,8 +1743,7 @@ extern HQL_API void gatherWarnings(IErrorReceiver * errs, IHqlExpression * expr)
     case no_max:            \
     case no_min:            \
     case no_ave:            \
-    case no_exists:         \
-    case no_notexists
+    case no_exists
 
 #define NO_AGGREGATEGROUP   \
          no_countgroup:         \
@@ -1755,8 +1754,7 @@ extern HQL_API void gatherWarnings(IErrorReceiver * errs, IHqlExpression * expr)
     case no_maxgroup:           \
     case no_mingroup:           \
     case no_avegroup:           \
-    case no_existsgroup:        \
-    case no_notexistsgroup
+    case no_existsgroup
 
 extern HQL_API ITypeInfo * getTypedefType(IHqlExpression * expr);
 

+ 2 - 7
ecl/hql/hqlfold.cpp

@@ -3786,9 +3786,6 @@ IHqlExpression * NullFolderMixin::queryOptimizeAggregateInline(IHqlExpression *
     case no_existsgroup:
         value.setown(createConstant(numRows != 0));
         break;
-    case no_notexistsgroup:
-        value.setown(createConstant(numRows == 0));
-        break;
     case no_countgroup:
         {
             ITypeInfo * type = assign->queryChild(0)->queryType();
@@ -4391,7 +4388,6 @@ IHqlExpression * CExprFolderTransformer::doFoldTransformed(IHqlExpression * unfo
             break;
         }
     case no_exists:
-    case no_notexists:
         {
             IHqlExpression * child = expr->queryChild(0);
             node_operator childOp = child->getOperator();
@@ -4401,7 +4397,7 @@ IHqlExpression * CExprFolderTransformer::doFoldTransformed(IHqlExpression * unfo
                 if (isPureInlineDataset(child))
                 {
                     bool hasChildren = (child->queryChild(0)->numChildren() != 0);
-                    return createConstant((op == no_exists) ? hasChildren : !hasChildren);
+                    return createConstant(hasChildren);
                 }
                 break;
 #if 0
@@ -4409,7 +4405,7 @@ IHqlExpression * CExprFolderTransformer::doFoldTransformed(IHqlExpression * unfo
                 {
                     OwnedHqlExpr lhs = replaceChild(expr, 0, child->queryChild(0));
                     OwnedHqlExpr rhs = replaceChild(expr, 0, child->queryChild(1));
-                    return createValue((op == no_exists) ? no_or : no_add, expr->getType(), LINK(lhs), LINK(rhs));
+                    return createValue(no_or, expr->getType(), LINK(lhs), LINK(rhs));
                 }
             case no_if:
                 {
@@ -5196,7 +5192,6 @@ IHqlExpression * CExprFolderTransformer::createTransformed(IHqlExpression * expr
             case no_min:
             case no_sum:
             case no_exists:
-            case no_notexists:
             case no_ave:
                 //Could implement this on a temp table, or at least count...
                 //not sufficient to just fix these, because functions of these also fail.

+ 0 - 1
ecl/hql/hqlgram2.cpp

@@ -7302,7 +7302,6 @@ void HqlGram::checkConditionalAggregates(_ATOM name, IHqlExpression * value, con
         break;
     case no_existsgroup:
     case no_countgroup:
-    case no_notexistsgroup:
         cond = queryRealChild(value, 0);
         break;
     case no_covargroup:

+ 1 - 0
ecl/hql/hqlopt.cpp

@@ -495,6 +495,7 @@ IHqlExpression * CTreeOptimizer::optimizeAggregateUnsharedDataset(IHqlExpression
         break;
     case no_compound_indexread:
     case no_compound_diskread:
+    case no_keyedlimit:
         break;
     case no_limit:
         if (expr->hasProperty(onFailAtom))

+ 0 - 1
ecl/hql/hqlutil.cpp

@@ -3928,7 +3928,6 @@ extern HQL_API IHqlExpression * convertScalarAggregateToDataset(IHqlExpression *
     case no_max:   newop = no_maxgroup; break;
     case no_sum:   newop = no_sumgroup; break;
     case no_exists:newop = no_existsgroup; break;
-    case no_notexists:  newop = no_notexistsgroup; break;
     case no_variance:   newop = no_vargroup; break;
     case no_covariance: newop = no_covargroup; break;
     case no_correlation:newop = no_corrgroup; break;

+ 0 - 2
ecl/hqlcpp/hqlcpp.cpp

@@ -2486,7 +2486,6 @@ void HqlCppTranslator::buildExprAssign(BuildCtx & ctx, const CHqlBoundTarget & t
     case no_min:
     case no_sum:
     case no_exists:
-    case no_notexists:
         doBuildAssignAggregate(ctx, target, expr);
         break;
     case no_getenv:
@@ -2799,7 +2798,6 @@ void HqlCppTranslator::buildExpr(BuildCtx & ctx, IHqlExpression * expr, CHqlBoun
             doBuildExprAggregate(ctx, expr, tgt);
         return;
     case no_exists:
-    case no_notexists:
         if (!(expr->isPure() && ctx.getMatchExpr(expr, tgt)))
             doBuildExprExists(ctx, expr, tgt);
         return;

+ 14 - 14
ecl/hqlcpp/hqlcppds.cpp

@@ -631,7 +631,6 @@ void HqlCppTranslator::doBuildAssignAggregateLoop(BuildCtx & ctx, const CHqlBoun
     switch (op)
     {
     case no_exists:
-    case no_notexists:
         {
             OwnedHqlExpr optimized = queryOptimizedExists(ctx, expr, dataset);
             if (optimized)
@@ -680,7 +679,15 @@ void HqlCppTranslator::doBuildAssignAggregateLoop(BuildCtx & ctx, const CHqlBoun
     //If no_if or no_addfiles has been optimized above then the selector for the argument will have changed => map it.
     if (arg && (dataset != oldDataset))
         arg.setown(replaceSelector(arg, oldDataset, dataset));
-    bool needToBreak = (op == no_exists || op == no_notexists);
+
+    bool needToBreak = (op == no_exists);
+    if (needToBreak)
+    {
+        //if it can have at most one row (fairly strange code!) then don't add a break
+        //unless it was deliberately a choosen to restrict the number of iterations.
+        if (hasNoMoreRowsThan(dataset, 1) && (dataset->getOperator() != no_choosen))
+            needToBreak = false;
+    }
 
     BuildCtx loopctx(ctx);
     buildDatasetIterate(loopctx, dataset, needToBreak);
@@ -688,9 +695,9 @@ void HqlCppTranslator::doBuildAssignAggregateLoop(BuildCtx & ctx, const CHqlBoun
     switch (op)
     {
     case no_exists:
-    case no_notexists:
-        buildExprAssign(loopctx, target, queryBoolExpr(op==no_exists));
-        loopctx.addBreak();
+        buildExprAssign(loopctx, target, queryBoolExpr(true));
+        if (needToBreak)
+            loopctx.addBreak();
         break;
     case no_count:
         {
@@ -748,7 +755,6 @@ bool assignAggregateDirect(const CHqlBoundTarget & target, IHqlExpression * expr
             break;
         //fall through
     case no_exists:
-    case no_notexists:
     case no_count:
         if (target.expr->getOperator() != no_variable)
             return false;
@@ -784,8 +790,7 @@ void HqlCppTranslator::doBuildAssignAggregate(BuildCtx & ctx, const CHqlBoundTar
         switch (op)
         {
         case no_exists:
-        case no_notexists:
-            buildExprAssign(ctx, target, queryBoolExpr(op==no_notexists));
+            buildExprAssign(ctx, target, queryBoolExpr(false));
             break;
         default:
             {
@@ -3404,9 +3409,6 @@ void HqlCppTranslator::doBuildRowAssignAggregateClear(BuildCtx & ctx, IReference
         case no_existsgroup:
             curTarget->buildClear(ctx, 0);
             break;
-        case no_notexistsgroup:
-            curTarget->set(ctx, queryBoolExpr(true));
-            break;
         default:
             if (src->isConstant())
                 curTarget->set(ctx, src);
@@ -3487,11 +3489,10 @@ void HqlCppTranslator::doBuildRowAssignAggregateNext(BuildCtx & ctx, IReferenceS
             }
             break;
         case no_existsgroup:
-        case no_notexistsgroup:
             assertex(!(arg && isVariableOffset));
             if (arg)
                 buildFilter(condctx, arg);
-            curTarget->set(condctx, queryBoolExpr(srcOp == no_existsgroup));
+            curTarget->set(condctx, queryBoolExpr(true));
             if (isSingleExists)
                 condctx.addBreak();
             break;
@@ -3538,7 +3539,6 @@ void HqlCppTranslator::doBuildRowAssignAggregate(BuildCtx & ctx, IReferenceSelec
             isSingleExists = false;
             break;
         case no_existsgroup:
-        case no_notexistsgroup:
             break;
         case no_mingroup:
             isSingleExists = false;

+ 0 - 2
ecl/hqlcpp/hqlcse.cpp

@@ -177,7 +177,6 @@ bool CseSpotterInfo::useInverseForAlias()
     case no_ne:
     case no_notin:
     case no_notbetween:
-    case no_notexists:
         return inverse->worthAliasingOnOwn();
     }
 
@@ -188,7 +187,6 @@ bool CseSpotterInfo::useInverseForAlias()
     case no_ne:
     case no_notin:
     case no_notbetween:
-    case no_notexists:
         return !worthAliasingOnOwn();
     }
     return op > invOp;

+ 1 - 12
ecl/hqlcpp/hqlhtcpp.cpp

@@ -11835,9 +11835,6 @@ void HqlCppTranslator::doBuildAggregateClearFunc(BuildCtx & ctx, IHqlExpression
         case no_existsgroup:
             buildClear(funcctx, target);
             break;
-        case no_notexistsgroup:
-            buildAssign(funcctx, target, queryBoolExpr(true));
-            break;
         default:
             if (src->isConstant())
                 buildAssign(funcctx, target, src);
@@ -11978,7 +11975,6 @@ void HqlCppTranslator::doBuildAggregateProcessTransform(BuildCtx & ctx, BoundRow
             }
             break;
         case no_existsgroup:
-        case no_notexistsgroup:
             assertex(!(arg && isVariableOffset));
             cond = arg;
             if (cond || !alwaysNextRow)
@@ -11986,7 +11982,7 @@ void HqlCppTranslator::doBuildAggregateProcessTransform(BuildCtx & ctx, BoundRow
                 //The assign is conditional because unconditionally it is done in the AggregateFirst
                 if (cond)
                     buildFilter(condctx, cond);
-                buildAssign(condctx, target, queryBoolExpr(srcOp == no_existsgroup));
+                buildAssign(condctx, target, queryBoolExpr(true));
             }
             break;
         default:
@@ -12077,13 +12073,6 @@ void HqlCppTranslator::doBuildAggregateMergeFunc(BuildCtx & ctx, IHqlExpression
                 buildAssign(condctx, target, queryBoolExpr(true));
                 break;
             }
-        case no_notexistsgroup:
-            {
-                BuildCtx condctx(funcctx);
-                buildFilter(condctx, target);
-                buildAssign(condctx, target, src);
-            }
-            break;
         default:
             //already filled in and wouldn't be legal to have an expression in this case anyway...
             break;

+ 59 - 56
ecl/hqlcpp/hqlsource.cpp

@@ -727,7 +727,7 @@ protected:
     void doBuildAggregateSelectIterator(BuildCtx & ctx, IHqlExpression * expr);
     void doBuildNormalizeIterators(BuildCtx & ctx, IHqlExpression * expr, bool isChildIterator);
     void buildAggregateHelpers(IHqlExpression * expr, bool needMerge);
-    void buildCountHelpers(IHqlExpression * expr);
+    void buildCountHelpers(IHqlExpression * expr, bool allowMultiple);
     virtual void buildFlagsMember(IHqlExpression * expr) {}
     void buildGlobalGroupAggregateHelpers(IHqlExpression * expr);
     void buildGroupAggregateHelpers(ParentExtract * extractBuilder, IHqlExpression * aggregate);
@@ -2277,7 +2277,7 @@ void SourceBuilder::buildAggregateHelpers(IHqlExpression * expr, bool needMerge)
 }
 
 
-void SourceBuilder::buildCountHelpers(IHqlExpression * expr)
+void SourceBuilder::buildCountHelpers(IHqlExpression * expr, bool allowMultiple)
 {
     StringBuffer s;
 
@@ -2285,62 +2285,65 @@ void SourceBuilder::buildCountHelpers(IHqlExpression * expr)
     if (transformCanFilter||isNormalize)
         translator.doBuildBoolFunction(instance->classctx, "hasFilter", true);
 
-    bool isExists = hasExistChoosenLimit();
-    OwnedHqlExpr one = getSizetConstant(1);
-
-    if (transformCanFilter||isNormalize)
+    if (allowMultiple)
     {
-        //virtual bool numValid(const void * src) = 0;
-        BuildCtx rowctx(instance->startctx);
-        rowctx.addQuotedCompound("virtual size32_t numValid(const void * src)");
-        rowctx.addQuoted("return valid((byte *)src);");
-
-        //virtual size32_t numValid(size32_t srcLen, const void * src);
-        BuildCtx rowsctx(instance->startctx);
-        rowsctx.addQuotedCompound("virtual size32_t numValid(size32_t srcLen, const void * _src)");
-        rowsctx.addQuoted("unsigned char * src = (unsigned char *)_src;");
-        OwnedHqlExpr ds = createVariable("src", makeReferenceModifier(tableExpr->getType()));
-        OwnedHqlExpr len = createVariable("srcLen", LINK(sizetType));
-        OwnedHqlExpr fullDs = createTranslated(ds, len);
-
-        if (isExists)
-        {
-            BuildCtx iterctx(rowsctx);
-            BoundRow * curRow = translator.buildDatasetIterate(iterctx, fullDs, false);
-            s.clear().append("if (valid(");
-            translator.generateExprCpp(s, curRow->queryBound());
-            s.append("))");
-            iterctx.addQuotedCompound(s);
-            iterctx.addReturn(one);
-            rowsctx.addQuoted("return 0;");
-        }
-        else
+        bool isExists = hasExistChoosenLimit();
+        OwnedHqlExpr one = getSizetConstant(1);
+
+        if (transformCanFilter||isNormalize)
         {
-            rowsctx.addQuoted("size32_t cnt = 0;");
-            BuildCtx iterctx(rowsctx);
-            BoundRow * curRow = translator.buildDatasetIterate(iterctx, fullDs, false);
-            s.clear().append("cnt += valid(");
-            translator.generateExprCpp(s, curRow->queryBound());
-            s.append(");");
-            iterctx.addQuoted(s);
-            rowsctx.addQuoted("return cnt;");
+            //virtual size32_t numValid(const void * src) = 0;
+            BuildCtx rowctx(instance->startctx);
+            rowctx.addQuotedCompound("virtual size32_t numValid(const void * src)");
+            rowctx.addQuoted("return valid((byte *)src);");
+
+            //virtual size32_t numValid(size32_t srcLen, const void * src);
+            BuildCtx rowsctx(instance->startctx);
+            rowsctx.addQuotedCompound("virtual size32_t numValid(size32_t srcLen, const void * _src)");
+            rowsctx.addQuoted("unsigned char * src = (unsigned char *)_src;");
+            OwnedHqlExpr ds = createVariable("src", makeReferenceModifier(tableExpr->getType()));
+            OwnedHqlExpr len = createVariable("srcLen", LINK(sizetType));
+            OwnedHqlExpr fullDs = createTranslated(ds, len);
+
+            if (isExists)
+            {
+                BuildCtx iterctx(rowsctx);
+                BoundRow * curRow = translator.buildDatasetIterate(iterctx, fullDs, false);
+                s.clear().append("if (valid(");
+                translator.generateExprCpp(s, curRow->queryBound());
+                s.append("))");
+                iterctx.addQuotedCompound(s);
+                iterctx.addReturn(one);
+                rowsctx.addQuoted("return 0;");
+            }
+            else
+            {
+                rowsctx.addQuoted("size32_t cnt = 0;");
+                BuildCtx iterctx(rowsctx);
+                BoundRow * curRow = translator.buildDatasetIterate(iterctx, fullDs, false);
+                s.clear().append("cnt += valid(");
+                translator.generateExprCpp(s, curRow->queryBound());
+                s.append(");");
+                iterctx.addQuoted(s);
+                rowsctx.addQuoted("return cnt;");
+            }
         }
-    }
-    else
-    {
-        //virtual size32_t numValid(size32_t srcLen, const void * src);
-        BuildCtx rowsctx(instance->startctx);
-        rowsctx.addQuotedCompound("virtual size32_t numValid(size32_t srcLen, const void * _src)");
-        if (isExists)
-            rowsctx.addReturn(one);
         else
         {
-            rowsctx.addQuoted("unsigned char * src = (unsigned char *)_src;");
-            CHqlBoundExpr bound;
-            bound.length.setown(createVariable("srcLen", LINK(sizetType)));
-            bound.expr.setown(createVariable("src", makeReferenceModifier(tableExpr->getType())));
-            OwnedHqlExpr count = translator.getBoundCount(bound);
-            rowsctx.addReturn(count);
+            //virtual size32_t numValid(size32_t srcLen, const void * src);
+            BuildCtx rowsctx(instance->startctx);
+            rowsctx.addQuotedCompound("virtual size32_t numValid(size32_t srcLen, const void * _src)");
+            if (isExists)
+                rowsctx.addReturn(one);
+            else
+            {
+                rowsctx.addQuoted("unsigned char * src = (unsigned char *)_src;");
+                CHqlBoundExpr bound;
+                bound.length.setown(createVariable("srcLen", LINK(sizetType)));
+                bound.expr.setown(createVariable("src", makeReferenceModifier(tableExpr->getType())));
+                OwnedHqlExpr count = translator.getBoundCount(bound);
+                rowsctx.addReturn(count);
+            }
         }
     }
 }
@@ -3005,7 +3008,7 @@ void DiskCountBuilder::buildMembers(IHqlExpression * expr)
     isUnfilteredCount = !(transformCanFilter||isNormalize);
     buildFilenameMember();
     DiskReadBuilderBase::buildMembers(expr);
-    buildCountHelpers(expr);
+    buildCountHelpers(expr, true);
 }
 
 
@@ -6520,7 +6523,7 @@ void IndexCountBuilder::buildMembers(IHqlExpression * expr)
 {
     buildFilenameMember();
     IndexReadBuilderBase::buildMembers(expr);
-    buildCountHelpers(expr);
+    buildCountHelpers(expr, false);
 }
 
 
@@ -6529,7 +6532,7 @@ void IndexCountBuilder::buildTransform(IHqlExpression * expr)
     if (transformCanFilter||isNormalize)
     {
         BuildCtx transformCtx(instance->startctx);
-        transformCtx.addQuotedCompound("size32_t valid(byte * _left)");
+        transformCtx.addQuotedCompound("virtual size32_t numValid(const void * _left)");
         transformCtx.addQuoted("unsigned char * left = (unsigned char *)_left;");
         translator.associateBlobHelper(transformCtx, tableExpr, "fpp");
         OwnedHqlExpr cnt;

+ 98 - 38
ecl/hqlcpp/hqlttcpp.cpp

@@ -1532,7 +1532,6 @@ IHqlExpression * evalNormalizeAggregateExpr(IHqlExpression * selector, IHqlExpre
     case no_ave:
     case no_select:
     case no_exists:
-    case no_notexists:
     case no_field:
         // a count on a child dataset or something else - add it as it is...
         //goes wrong for count(group)*
@@ -1542,7 +1541,6 @@ IHqlExpression * evalNormalizeAggregateExpr(IHqlExpression * selector, IHqlExpre
     case no_maxgroup:
     case no_mingroup:
     case no_existsgroup:
-    case no_notexistsgroup:
         {
             ForEachItemIn(idx, assigns)
             {
@@ -3144,9 +3142,6 @@ IHqlExpression * ThorHqlTransformer::getMergeTransform(IHqlExpression * dataset,
                 case no_existsgroup:
                     newRhs.setown(createValue(no_existsgroup, selected->getType(), LINK(selected)));
                     break;
-                case no_notexistsgroup:
-                    newRhs.setown(createValue(no_notexistsgroup, selected->getType(), getInverse(selected)));
-                    break;
                 case no_vargroup:
                 case no_covargroup:
                 case no_corrgroup:
@@ -3432,6 +3427,8 @@ void CompoundSourceInfo::reset()
     isBoundary = false;
     isPreloaded = false;
     isLimited = false;
+    hasChoosen = false;
+    hasSkipLimit = false;
     isCloned = false;
     isFiltered = false;
     isPostFiltered = false;
@@ -3439,38 +3436,47 @@ void CompoundSourceInfo::reset()
 }
 
 
-bool CompoundSourceInfo::canMergeLimit(IHqlExpression * expr, ClusterType targetClusterType)
+bool CompoundSourceInfo::canMergeLimit(IHqlExpression * expr, ClusterType targetClusterType) const
 {
-    if (!isLimited && !isAggregate() && !isChooseNAllLimit(expr->queryChild(1)) && isBinary())
+    if (isAggregate() || isChooseNAllLimit(expr->queryChild(1)) || !isBinary())
+        return false;
+
+    node_operator op = expr->getOperator();
+    switch (op)
     {
-        node_operator op = expr->getOperator();
-        switch (op)
+    case no_limit:
+        //Can't merge a limit into a choosen() because the limit will be applied first
+        if (isLimited || hasChoosen)
+            return false;
+
+        //Don't merge skip and onfail limits into activities that can't implement them completely
+        if (targetClusterType != RoxieCluster)
         {
-        case no_limit:
-            //Don't merge skip and onfail limits into activities that can't implement them completely
-            if (targetClusterType != RoxieCluster)
-            {
-                if (expr->hasProperty(skipAtom) || expr->hasProperty(onFailAtom))
-                    return false;
-            }
-            else
-            {
-                //Can always limit a count/aggregate with a skip limit - just resets count to 0
-                if (expr->hasProperty(skipAtom))
-                    return true;
-            }
-            break;
+            if (expr->hasProperty(skipAtom) || expr->hasProperty(onFailAtom))
+                return false;
         }
-
-        switch (sourceOp)
+        else
         {
-        case no_compound_diskread:
-        case no_compound_disknormalize:
-        case no_compound_indexread:
-        case no_compound_indexnormalize:
-            return true;
+            //Can always limit a count/aggregate with a skip limit - just resets count to 0
+            if (expr->hasProperty(skipAtom))
+                return true;
         }
+        break;
+    case no_choosen:
+        if (hasChoosen)
+            return false;
+        break;
     }
+
+    switch (sourceOp)
+    {
+    case no_compound_diskread:
+    case no_compound_disknormalize:
+    case no_compound_indexread:
+    case no_compound_indexnormalize:
+        return true;
+    }
+
     return false;
 }
 
@@ -3499,6 +3505,8 @@ void CompoundSourceInfo::ensureCompound()
 bool CompoundSourceInfo::inherit(const CompoundSourceInfo & other, node_operator newSourceOp)
 {
     isLimited = other.isLimited;
+    hasSkipLimit = other.hasSkipLimit;
+    hasChoosen = other.hasChoosen;
     isFiltered = other.isFiltered;
     isPostFiltered = other.isPostFiltered;
     isPreloaded = other.isPreloaded;
@@ -3520,7 +3528,7 @@ bool CompoundSourceInfo::inherit(const CompoundSourceInfo & other, node_operator
     return true;
 }
 
-bool CompoundSourceInfo::isAggregate()
+bool CompoundSourceInfo::isAggregate() const
 {
     switch (sourceOp)
     {
@@ -3628,7 +3636,7 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
         break;
     }
 
-    switch (expr->getOperator())
+    switch (op)
     {
     case no_newkeyindex:
         extra->sourceOp = no_compound_indexread;
@@ -3746,7 +3754,7 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
             {
                 IHqlExpression * dataset = expr->queryChild(0);
                 CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
-                if (!parentExtra->isAggregate() && !parentExtra->isLimited && parentExtra->isBinary())
+                if (!parentExtra->isAggregate() && !parentExtra->hasAnyLimit() && parentExtra->isBinary())
                 {
                     node_operator newOp = no_none;
                     switch (parentExtra->sourceOp)
@@ -3813,7 +3821,16 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
             {
                 if (extra->inherit(*parentExtra))
                 {
-                    extra->isLimited = true;
+                    if (op == no_choosen)
+                    {
+                        extra->hasChoosen = true;
+                    }
+                    else
+                    {
+                        extra->isLimited = true;
+                        if (expr->hasProperty(skipAtom))
+                            extra->hasSkipLimit = true;
+                    }
                     if (expr->hasProperty(onFailAtom))
                         extra->isCreateRowLimited = true;
                     extra->isCloned = cloneRequired;
@@ -3841,8 +3858,9 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
                 bool isSimpleCountExists = isSimpleCountExistsAggregate(expr, true, false);
                 if (parentExtra->isCreateRowLimited)
                     break;
-                if (parentExtra->isLimited && !isSimpleCountExists)
+                if (parentExtra->hasAnyLimit() && !isSimpleCountExists)
                     break;
+
                 node_operator newOp = no_none;
                 node_operator parentOp = parentExtra->sourceOp;
                 if (queryRealChild(expr, 3))
@@ -3890,7 +3908,9 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
                         break;
                     case no_compound_indexread:
                     case no_compound_indexnormalize:
-                        if (flags & CSFnewindex)
+                        //Don't create counts for (non-keyed) skip limits - little benefit, and could cause problems
+                        //correctly returning the counts - e.g. especially for exists()
+                        if ((flags & CSFnewindex) && !parentExtra->hasSkipLimit)
                         {
                             newOp = no_compound_indexaggregate;
                             //Force counts on indexes to become a new compound activity
@@ -3902,7 +3922,11 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
                             //That can be logged as a future enhancement.....
 //                          if (isSimpleCountExists && !parentExtra->isPostFiltered && (parentOp == no_compound_indexread))
                             if (isSimpleCountExists && (parentOp == no_compound_indexread))
-                                extra->forceCompound = true;
+                            {
+                                //A skip limit will require everything to be read anyway - so no point splitting in two
+                                if (!parentExtra->hasSkipLimit)
+                                    extra->forceCompound = true;
+                            }
                         }
                         break;
                     case no_compound_childread:
@@ -3936,7 +3960,7 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
         {
             IHqlExpression * dataset = expr->queryChild(0);
             CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
-            if (!parentExtra->isLimited && !parentExtra->isAggregate())
+            if (!parentExtra->hasAnyLimit() && !parentExtra->isAggregate())
             {
                 if (extra->inherit(*parentExtra))
                 {
@@ -4322,6 +4346,42 @@ IHqlExpression * OptimizeActivityTransformer::optimizeCompare(IHqlExpression * l
     if (looksLikeSimpleCount(rhs))
         return NULL;
 
+    //Convert count(x) >= 1 to exists(x)  (and other varients)
+    node_operator existOp = no_none;
+    switch (op)
+    {
+    case no_ne:
+    case no_gt:
+        if (matchesConstantValue(rhs, 0))
+            existOp = no_exists;
+        break;
+    case no_eq:
+    case no_le:
+        if (matchesConstantValue(rhs, 0))
+            existOp = no_not;
+        break;
+    case no_lt:
+        if (matchesConstantValue(rhs, 1))
+            existOp = no_not;
+        break;
+    case no_ge:
+        if (matchesConstantValue(rhs, 1))
+            existOp = no_exists;
+        break;
+    }
+
+    if (existOp != no_none)
+    {
+        if (lhs->getOperator() == no_count)
+        {
+            IHqlExpression * ds = lhs->queryChild(0);
+            OwnedHqlExpr ret = createValue(no_exists, makeBoolType(), LINK(ds));
+            if (existOp == no_not)
+                return createValue(no_not, makeBoolType(), ret.getClear());
+            return ret.getClear();
+        }
+    }
+
     unsigned choosenDelta =0;
     switch (op)
     {

+ 5 - 2
ecl/hqlcpp/hqlttcpp.ipp

@@ -226,9 +226,9 @@ class CompoundSourceInfo : public NewTransformInfo
 public:
     CompoundSourceInfo(IHqlExpression * _original); 
 
-    bool canMergeLimit(IHqlExpression * expr, ClusterType targetClusterType);
+    bool canMergeLimit(IHqlExpression * expr, ClusterType targetClusterType) const;
     void ensureCompound();
-    bool isAggregate();
+    bool isAggregate() const;
     inline bool isShared() { return splitCount > 1; }
     bool inherit(const CompoundSourceInfo & other, node_operator newSourceOp = no_none);
     inline bool isNoteUsageFirst() 
@@ -238,6 +238,7 @@ public:
     }
     inline void noteUsage() { if (splitCount < 10) splitCount++; }
     inline bool isBinary() const { return mode != no_csv && mode != no_xml; }
+    inline bool hasAnyLimit() const { return isLimited || hasChoosen; }
     void reset();
 
 public:
@@ -249,6 +250,8 @@ public:
     bool isBoundary:1;
     bool isCloned:1;
     bool isLimited:1;
+    bool hasChoosen:1;
+    bool hasSkipLimit:1;
     bool isPreloaded:1;
     bool isFiltered:1;
     bool isPostFiltered:1;

+ 11 - 7
ecl/hthor/hthor.cpp

@@ -3083,14 +3083,18 @@ const void * CHThorAggregateActivity::nextInGroup()
         helper.processFirst(rowBuilder, next);
         releaseHThorRow(next);
         
-        loop
+        bool abortEarly = (kind == TAKexistsaggregate) && !input->isGrouped();
+        if (!abortEarly)
         {
-            next = input->nextInGroup();
-            if (!next)
-                break;
-            
-            helper.processNext(rowBuilder, next);
-            releaseHThorRow(next);
+            loop
+            {
+                next = input->nextInGroup();
+                if (!next)
+                    break;
+
+                helper.processNext(rowBuilder, next);
+                releaseHThorRow(next);
+            }
         }
     }
     

+ 6 - 0
rtl/include/eclhelper_base.hpp

@@ -2617,6 +2617,12 @@ class CThorIndexCountArg : public CThorArg, implements IHThorIndexCountArg, impl
     virtual unsigned __int64 getKeyedLimit()                { return (unsigned __int64) -1; }
     virtual void onKeyedLimitExceeded()                     { }
 
+    virtual size32_t numValid(size32_t srcLen, const void * _src)
+    {
+        rtlFailUnexpected();
+        return 0;
+    }
+
 public:
     IThorIndexCallback * fpp;
 };