Browse Source

Various index count and aggregate fixes

Remove projects from count(keyed-limit(project(index)))
  A missing case from a switch statement meant projects weren't
  stripped when there was a keyed limit on the index read.

Reduce the generated code size for an index count/exists
  The numValid(size, ptr) member was never called for an index, so default
  it to throwing an exception in the base class, and clean up numValid(ptr)
  code.

CHOOSEN(LIMIT(index-read))
  The choosen was not being combined within the compound indexread if there
  was a limit.  It is however valid to do so since the choosen is done last.
  (A limit cannot be combined when there is an existing choosen.)
  This was unfortunate because it was frustrating  another optimization.
  count(index-read) > n being converted to count(choosen(index-read,n+1)) > n
  (This is temporarily retained, but will be removed in a subsequent commit).

IF(count(x) > 0, x, y) optimization
  Currently the intention is to perform a count-index on x before reading x.
  However if x contains a limit,skip it is going to need to read the entire
  result, so don't perform the count separately.

Don't generate count(index) if there is a skip limit

Optimize count(x) > 0 to exists(x)
  Instead of (count(x) > 0) becoming count(choosen(x, 1)) > 0 it is now
  transformed to exists(x) - which is either as efficient, or more
  efficient as the count representation.

Remove no_notexists since never created

Terminate hthor exists aggregate activity early

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 13 years ago
parent
commit
bce0431db9

+ 1 - 3
ecl/hql/hqlattr.cpp

@@ -228,7 +228,6 @@ unsigned getOperatorMetaFlags(node_operator op)
 //Aggregate operators
     case no_count:
     case no_exists:
-    case no_notexists:
     case no_max:
     case no_min:
     case no_sum:
@@ -238,7 +237,6 @@ unsigned getOperatorMetaFlags(node_operator op)
     case no_correlation:
     case no_countgroup:
     case no_existsgroup:
-    case no_notexistsgroup:
     case no_maxgroup:
     case no_mingroup:
     case no_sumgroup:
@@ -614,7 +612,7 @@ unsigned getOperatorMetaFlags(node_operator op)
     case no_unused30: case no_unused31: case no_unused32: case no_unused33: case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
     case no_unused50: case no_unused52:
-    case no_unused80:
+    case no_unused80: case no_unused82: case no_unused83:
     case no_is_null:
     case no_position:
     case no_current_time:

+ 0 - 7
ecl/hql/hqlexpr.cpp

@@ -976,9 +976,7 @@ const char *getOpString(node_operator op)
     case no_mapto: return "=>";
     case no_constant: return "<constant>";
     case no_field: return "<field>";
-    case no_notexists: return "NOT EXISTS";
     case no_exists: case no_existslist: return "EXISTS";
-    case no_notexistsgroup: return "NOT EXISTS";
     case no_existsgroup: return "EXISTS";
     case no_select: return ".";
     case no_table: return "DATASET";
@@ -1478,8 +1476,6 @@ node_operator getInverseOp(node_operator op)
     case no_in: return no_notin;
     case no_notbetween: return no_between;
     case no_between: return no_notbetween;
-    case no_notexists: return no_exists;
-    case no_exists: return no_notexists;
 //  case no_notwithin: return no_within;
 //  case no_within: return no_notwithin;
     default:
@@ -14676,7 +14672,6 @@ IHqlExpression * convertToSimpleAggregate(IHqlExpression * expr)
     case no_maxgroup:       newop = no_max; numArgs = 1; break;
     case no_sumgroup:       newop = no_sum; numArgs = 1; break;
     case no_existsgroup:    newop = no_exists; break;
-    case no_notexistsgroup: newop = no_notexists; break;
     default: 
         return NULL;
     }
@@ -14715,7 +14710,6 @@ IHqlExpression * queryAggregateFilter(IHqlExpression * expr)
     {
     case no_countgroup:
     case no_existsgroup:
-    case no_notexistsgroup:
         return queryRealChild(expr, 0);
     case no_sumgroup:
     case no_vargroup:
@@ -14774,7 +14768,6 @@ node_operator querySingleAggregate(IHqlExpression * expr, bool canFilterArg, boo
             switch (curOp)
             {
             case no_existsgroup:
-            case no_notexistsgroup:
             case no_countgroup:
                 break;
             default:

+ 4 - 6
ecl/hql/hqlexpr.hpp

@@ -224,7 +224,7 @@ enum _node_operator {
         no_comma,
         no_count,
         no_countgroup,
-        no_notexists,
+    no_unused82,
         no_exists,
         no_within,
         no_notwithin,
@@ -535,7 +535,7 @@ enum _node_operator {
         no_outputscalar,
         no_matchunicode,
         no_pat_validate,
-        no_notexistsgroup,
+   no_unused83,
         no_existsgroup,
         no_pat_use,
         no_unused13,
@@ -1743,8 +1743,7 @@ extern HQL_API void gatherWarnings(IErrorReceiver * errs, IHqlExpression * expr)
     case no_max:            \
     case no_min:            \
     case no_ave:            \
-    case no_exists:         \
-    case no_notexists
+    case no_exists
 
 #define NO_AGGREGATEGROUP   \
          no_countgroup:         \
@@ -1755,8 +1754,7 @@ extern HQL_API void gatherWarnings(IErrorReceiver * errs, IHqlExpression * expr)
     case no_maxgroup:           \
     case no_mingroup:           \
     case no_avegroup:           \
-    case no_existsgroup:        \
-    case no_notexistsgroup
+    case no_existsgroup
 
 extern HQL_API ITypeInfo * getTypedefType(IHqlExpression * expr);
 

+ 2 - 7
ecl/hql/hqlfold.cpp

@@ -3786,9 +3786,6 @@ IHqlExpression * NullFolderMixin::queryOptimizeAggregateInline(IHqlExpression *
     case no_existsgroup:
         value.setown(createConstant(numRows != 0));
         break;
-    case no_notexistsgroup:
-        value.setown(createConstant(numRows == 0));
-        break;
     case no_countgroup:
         {
             ITypeInfo * type = assign->queryChild(0)->queryType();
@@ -4382,7 +4379,6 @@ IHqlExpression * CExprFolderTransformer::doFoldTransformed(IHqlExpression * unfo
             break;
         }
     case no_exists:
-    case no_notexists:
         {
             IHqlExpression * child = expr->queryChild(0);
             node_operator childOp = child->getOperator();
@@ -4392,7 +4388,7 @@ IHqlExpression * CExprFolderTransformer::doFoldTransformed(IHqlExpression * unfo
                 if (isPureInlineDataset(child))
                 {
                     bool hasChildren = (child->queryChild(0)->numChildren() != 0);
-                    return createConstant((op == no_exists) ? hasChildren : !hasChildren);
+                    return createConstant(hasChildren);
                 }
                 break;
 #if 0
@@ -4400,7 +4396,7 @@ IHqlExpression * CExprFolderTransformer::doFoldTransformed(IHqlExpression * unfo
                 {
                     OwnedHqlExpr lhs = replaceChild(expr, 0, child->queryChild(0));
                     OwnedHqlExpr rhs = replaceChild(expr, 0, child->queryChild(1));
-                    return createValue((op == no_exists) ? no_or : no_add, expr->getType(), LINK(lhs), LINK(rhs));
+                    return createValue(no_or, expr->getType(), LINK(lhs), LINK(rhs));
                 }
             case no_if:
                 {
@@ -5186,7 +5182,6 @@ IHqlExpression * CExprFolderTransformer::createTransformed(IHqlExpression * expr
             case no_min:
             case no_sum:
             case no_exists:
-            case no_notexists:
             case no_ave:
                 //Could implement this on a temp table, or at least count...
                 //not sufficient to just fix these, because functions of these also fail.

+ 0 - 1
ecl/hql/hqlgram2.cpp

@@ -7302,7 +7302,6 @@ void HqlGram::checkConditionalAggregates(_ATOM name, IHqlExpression * value, con
         break;
     case no_existsgroup:
     case no_countgroup:
-    case no_notexistsgroup:
         cond = queryRealChild(value, 0);
         break;
     case no_covargroup:

+ 1 - 0
ecl/hql/hqlopt.cpp

@@ -495,6 +495,7 @@ IHqlExpression * CTreeOptimizer::optimizeAggregateUnsharedDataset(IHqlExpression
         break;
     case no_compound_indexread:
     case no_compound_diskread:
+    case no_keyedlimit:
         break;
     case no_limit:
         if (expr->hasProperty(onFailAtom))

+ 0 - 1
ecl/hql/hqlutil.cpp

@@ -3928,7 +3928,6 @@ extern HQL_API IHqlExpression * convertScalarAggregateToDataset(IHqlExpression *
     case no_max:   newop = no_maxgroup; break;
     case no_sum:   newop = no_sumgroup; break;
     case no_exists:newop = no_existsgroup; break;
-    case no_notexists:  newop = no_notexistsgroup; break;
     case no_variance:   newop = no_vargroup; break;
     case no_covariance: newop = no_covargroup; break;
     case no_correlation:newop = no_corrgroup; break;

+ 0 - 2
ecl/hqlcpp/hqlcpp.cpp

@@ -2486,7 +2486,6 @@ void HqlCppTranslator::buildExprAssign(BuildCtx & ctx, const CHqlBoundTarget & t
     case no_min:
     case no_sum:
     case no_exists:
-    case no_notexists:
         doBuildAssignAggregate(ctx, target, expr);
         break;
     case no_getenv:
@@ -2799,7 +2798,6 @@ void HqlCppTranslator::buildExpr(BuildCtx & ctx, IHqlExpression * expr, CHqlBoun
             doBuildExprAggregate(ctx, expr, tgt);
         return;
     case no_exists:
-    case no_notexists:
         if (!(expr->isPure() && ctx.getMatchExpr(expr, tgt)))
             doBuildExprExists(ctx, expr, tgt);
         return;

+ 14 - 14
ecl/hqlcpp/hqlcppds.cpp

@@ -631,7 +631,6 @@ void HqlCppTranslator::doBuildAssignAggregateLoop(BuildCtx & ctx, const CHqlBoun
     switch (op)
     {
     case no_exists:
-    case no_notexists:
         {
             OwnedHqlExpr optimized = queryOptimizedExists(ctx, expr, dataset);
             if (optimized)
@@ -680,7 +679,15 @@ void HqlCppTranslator::doBuildAssignAggregateLoop(BuildCtx & ctx, const CHqlBoun
     //If no_if or no_addfiles has been optimized above then the selector for the argument will have changed => map it.
     if (arg && (dataset != oldDataset))
         arg.setown(replaceSelector(arg, oldDataset, dataset));
-    bool needToBreak = (op == no_exists || op == no_notexists);
+
+    bool needToBreak = (op == no_exists);
+    if (needToBreak)
+    {
+        //if it can have at most one row (fairly strange code!) then don't add a break
+        //unless it was deliberately a choosen to restrict the number of iterations.
+        if (hasNoMoreRowsThan(dataset, 1) && (dataset->getOperator() != no_choosen))
+            needToBreak = false;
+    }
 
     BuildCtx loopctx(ctx);
     buildDatasetIterate(loopctx, dataset, needToBreak);
@@ -688,9 +695,9 @@ void HqlCppTranslator::doBuildAssignAggregateLoop(BuildCtx & ctx, const CHqlBoun
     switch (op)
     {
     case no_exists:
-    case no_notexists:
-        buildExprAssign(loopctx, target, queryBoolExpr(op==no_exists));
-        loopctx.addBreak();
+        buildExprAssign(loopctx, target, queryBoolExpr(true));
+        if (needToBreak)
+            loopctx.addBreak();
         break;
     case no_count:
         {
@@ -748,7 +755,6 @@ bool assignAggregateDirect(const CHqlBoundTarget & target, IHqlExpression * expr
             break;
         //fall through
     case no_exists:
-    case no_notexists:
     case no_count:
         if (target.expr->getOperator() != no_variable)
             return false;
@@ -784,8 +790,7 @@ void HqlCppTranslator::doBuildAssignAggregate(BuildCtx & ctx, const CHqlBoundTar
         switch (op)
         {
         case no_exists:
-        case no_notexists:
-            buildExprAssign(ctx, target, queryBoolExpr(op==no_notexists));
+            buildExprAssign(ctx, target, queryBoolExpr(false));
             break;
         default:
             {
@@ -3401,9 +3406,6 @@ void HqlCppTranslator::doBuildRowAssignAggregateClear(BuildCtx & ctx, IReference
         case no_existsgroup:
             curTarget->buildClear(ctx, 0);
             break;
-        case no_notexistsgroup:
-            curTarget->set(ctx, queryBoolExpr(true));
-            break;
         default:
             if (src->isConstant())
                 curTarget->set(ctx, src);
@@ -3484,11 +3486,10 @@ void HqlCppTranslator::doBuildRowAssignAggregateNext(BuildCtx & ctx, IReferenceS
             }
             break;
         case no_existsgroup:
-        case no_notexistsgroup:
             assertex(!(arg && isVariableOffset));
             if (arg)
                 buildFilter(condctx, arg);
-            curTarget->set(condctx, queryBoolExpr(srcOp == no_existsgroup));
+            curTarget->set(condctx, queryBoolExpr(true));
             if (isSingleExists)
                 condctx.addBreak();
             break;
@@ -3535,7 +3536,6 @@ void HqlCppTranslator::doBuildRowAssignAggregate(BuildCtx & ctx, IReferenceSelec
             isSingleExists = false;
             break;
         case no_existsgroup:
-        case no_notexistsgroup:
             break;
         case no_mingroup:
             isSingleExists = false;

+ 0 - 2
ecl/hqlcpp/hqlcse.cpp

@@ -177,7 +177,6 @@ bool CseSpotterInfo::useInverseForAlias()
     case no_ne:
     case no_notin:
     case no_notbetween:
-    case no_notexists:
         return inverse->worthAliasingOnOwn();
     }
 
@@ -188,7 +187,6 @@ bool CseSpotterInfo::useInverseForAlias()
     case no_ne:
     case no_notin:
     case no_notbetween:
-    case no_notexists:
         return !worthAliasingOnOwn();
     }
     return op > invOp;

+ 1 - 12
ecl/hqlcpp/hqlhtcpp.cpp

@@ -11841,9 +11841,6 @@ void HqlCppTranslator::doBuildAggregateClearFunc(BuildCtx & ctx, IHqlExpression
         case no_existsgroup:
             buildClear(funcctx, target);
             break;
-        case no_notexistsgroup:
-            buildAssign(funcctx, target, queryBoolExpr(true));
-            break;
         default:
             if (src->isConstant())
                 buildAssign(funcctx, target, src);
@@ -11984,7 +11981,6 @@ void HqlCppTranslator::doBuildAggregateProcessTransform(BuildCtx & ctx, BoundRow
             }
             break;
         case no_existsgroup:
-        case no_notexistsgroup:
             assertex(!(arg && isVariableOffset));
             cond = arg;
             if (cond || !alwaysNextRow)
@@ -11992,7 +11988,7 @@ void HqlCppTranslator::doBuildAggregateProcessTransform(BuildCtx & ctx, BoundRow
                 //The assign is conditional because unconditionally it is done in the AggregateFirst
                 if (cond)
                     buildFilter(condctx, cond);
-                buildAssign(condctx, target, queryBoolExpr(srcOp == no_existsgroup));
+                buildAssign(condctx, target, queryBoolExpr(true));
             }
             break;
         default:
@@ -12083,13 +12079,6 @@ void HqlCppTranslator::doBuildAggregateMergeFunc(BuildCtx & ctx, IHqlExpression
                 buildAssign(condctx, target, queryBoolExpr(true));
                 break;
             }
-        case no_notexistsgroup:
-            {
-                BuildCtx condctx(funcctx);
-                buildFilter(condctx, target);
-                buildAssign(condctx, target, src);
-            }
-            break;
         default:
             //already filled in and wouldn't be legal to have an expression in this case anyway...
             break;

+ 59 - 56
ecl/hqlcpp/hqlsource.cpp

@@ -727,7 +727,7 @@ protected:
     void doBuildAggregateSelectIterator(BuildCtx & ctx, IHqlExpression * expr);
     void doBuildNormalizeIterators(BuildCtx & ctx, IHqlExpression * expr, bool isChildIterator);
     void buildAggregateHelpers(IHqlExpression * expr, bool needMerge);
-    void buildCountHelpers(IHqlExpression * expr);
+    void buildCountHelpers(IHqlExpression * expr, bool allowMultiple);
     virtual void buildFlagsMember(IHqlExpression * expr) {}
     void buildGlobalGroupAggregateHelpers(IHqlExpression * expr);
     void buildGroupAggregateHelpers(ParentExtract * extractBuilder, IHqlExpression * aggregate);
@@ -2277,7 +2277,7 @@ void SourceBuilder::buildAggregateHelpers(IHqlExpression * expr, bool needMerge)
 }
 
 
-void SourceBuilder::buildCountHelpers(IHqlExpression * expr)
+void SourceBuilder::buildCountHelpers(IHqlExpression * expr, bool allowMultiple)
 {
     StringBuffer s;
 
@@ -2285,62 +2285,65 @@ void SourceBuilder::buildCountHelpers(IHqlExpression * expr)
     if (transformCanFilter||isNormalize)
         translator.doBuildBoolFunction(instance->classctx, "hasFilter", true);
 
-    bool isExists = hasExistChoosenLimit();
-    OwnedHqlExpr one = getSizetConstant(1);
-
-    if (transformCanFilter||isNormalize)
+    if (allowMultiple)
     {
-        //virtual bool numValid(const void * src) = 0;
-        BuildCtx rowctx(instance->startctx);
-        rowctx.addQuotedCompound("virtual size32_t numValid(const void * src)");
-        rowctx.addQuoted("return valid((byte *)src);");
-
-        //virtual size32_t numValid(size32_t srcLen, const void * src);
-        BuildCtx rowsctx(instance->startctx);
-        rowsctx.addQuotedCompound("virtual size32_t numValid(size32_t srcLen, const void * _src)");
-        rowsctx.addQuoted("unsigned char * src = (unsigned char *)_src;");
-        OwnedHqlExpr ds = createVariable("src", makeReferenceModifier(tableExpr->getType()));
-        OwnedHqlExpr len = createVariable("srcLen", LINK(sizetType));
-        OwnedHqlExpr fullDs = createTranslated(ds, len);
-
-        if (isExists)
-        {
-            BuildCtx iterctx(rowsctx);
-            BoundRow * curRow = translator.buildDatasetIterate(iterctx, fullDs, false);
-            s.clear().append("if (valid(");
-            translator.generateExprCpp(s, curRow->queryBound());
-            s.append("))");
-            iterctx.addQuotedCompound(s);
-            iterctx.addReturn(one);
-            rowsctx.addQuoted("return 0;");
-        }
-        else
+        bool isExists = hasExistChoosenLimit();
+        OwnedHqlExpr one = getSizetConstant(1);
+
+        if (transformCanFilter||isNormalize)
         {
-            rowsctx.addQuoted("size32_t cnt = 0;");
-            BuildCtx iterctx(rowsctx);
-            BoundRow * curRow = translator.buildDatasetIterate(iterctx, fullDs, false);
-            s.clear().append("cnt += valid(");
-            translator.generateExprCpp(s, curRow->queryBound());
-            s.append(");");
-            iterctx.addQuoted(s);
-            rowsctx.addQuoted("return cnt;");
+            //virtual size32_t numValid(const void * src) = 0;
+            BuildCtx rowctx(instance->startctx);
+            rowctx.addQuotedCompound("virtual size32_t numValid(const void * src)");
+            rowctx.addQuoted("return valid((byte *)src);");
+
+            //virtual size32_t numValid(size32_t srcLen, const void * src);
+            BuildCtx rowsctx(instance->startctx);
+            rowsctx.addQuotedCompound("virtual size32_t numValid(size32_t srcLen, const void * _src)");
+            rowsctx.addQuoted("unsigned char * src = (unsigned char *)_src;");
+            OwnedHqlExpr ds = createVariable("src", makeReferenceModifier(tableExpr->getType()));
+            OwnedHqlExpr len = createVariable("srcLen", LINK(sizetType));
+            OwnedHqlExpr fullDs = createTranslated(ds, len);
+
+            if (isExists)
+            {
+                BuildCtx iterctx(rowsctx);
+                BoundRow * curRow = translator.buildDatasetIterate(iterctx, fullDs, false);
+                s.clear().append("if (valid(");
+                translator.generateExprCpp(s, curRow->queryBound());
+                s.append("))");
+                iterctx.addQuotedCompound(s);
+                iterctx.addReturn(one);
+                rowsctx.addQuoted("return 0;");
+            }
+            else
+            {
+                rowsctx.addQuoted("size32_t cnt = 0;");
+                BuildCtx iterctx(rowsctx);
+                BoundRow * curRow = translator.buildDatasetIterate(iterctx, fullDs, false);
+                s.clear().append("cnt += valid(");
+                translator.generateExprCpp(s, curRow->queryBound());
+                s.append(");");
+                iterctx.addQuoted(s);
+                rowsctx.addQuoted("return cnt;");
+            }
         }
-    }
-    else
-    {
-        //virtual size32_t numValid(size32_t srcLen, const void * src);
-        BuildCtx rowsctx(instance->startctx);
-        rowsctx.addQuotedCompound("virtual size32_t numValid(size32_t srcLen, const void * _src)");
-        if (isExists)
-            rowsctx.addReturn(one);
         else
         {
-            rowsctx.addQuoted("unsigned char * src = (unsigned char *)_src;");
-            CHqlBoundExpr bound;
-            bound.length.setown(createVariable("srcLen", LINK(sizetType)));
-            bound.expr.setown(createVariable("src", makeReferenceModifier(tableExpr->getType())));
-            OwnedHqlExpr count = translator.getBoundCount(bound);
-            rowsctx.addReturn(count);
+            //virtual size32_t numValid(size32_t srcLen, const void * src);
+            BuildCtx rowsctx(instance->startctx);
+            rowsctx.addQuotedCompound("virtual size32_t numValid(size32_t srcLen, const void * _src)");
+            if (isExists)
+                rowsctx.addReturn(one);
+            else
+            {
+                rowsctx.addQuoted("unsigned char * src = (unsigned char *)_src;");
+                CHqlBoundExpr bound;
+                bound.length.setown(createVariable("srcLen", LINK(sizetType)));
+                bound.expr.setown(createVariable("src", makeReferenceModifier(tableExpr->getType())));
+                OwnedHqlExpr count = translator.getBoundCount(bound);
+                rowsctx.addReturn(count);
+            }
         }
     }
 }
@@ -3005,7 +3008,7 @@ void DiskCountBuilder::buildMembers(IHqlExpression * expr)
     isUnfilteredCount = !(transformCanFilter||isNormalize);
     buildFilenameMember();
     DiskReadBuilderBase::buildMembers(expr);
-    buildCountHelpers(expr);
+    buildCountHelpers(expr, true);
 }
 
 
@@ -6520,7 +6523,7 @@ void IndexCountBuilder::buildMembers(IHqlExpression * expr)
 {
     buildFilenameMember();
     IndexReadBuilderBase::buildMembers(expr);
-    buildCountHelpers(expr);
+    buildCountHelpers(expr, false);
 }
 
 
@@ -6529,7 +6532,7 @@ void IndexCountBuilder::buildTransform(IHqlExpression * expr)
     if (transformCanFilter||isNormalize)
     {
         BuildCtx transformCtx(instance->startctx);
-        transformCtx.addQuotedCompound("size32_t valid(byte * _left)");
+        transformCtx.addQuotedCompound("virtual size32_t numValid(const void * _left)");
         transformCtx.addQuoted("unsigned char * left = (unsigned char *)_left;");
         translator.associateBlobHelper(transformCtx, tableExpr, "fpp");
         OwnedHqlExpr cnt;

+ 98 - 38
ecl/hqlcpp/hqlttcpp.cpp

@@ -1532,7 +1532,6 @@ IHqlExpression * evalNormalizeAggregateExpr(IHqlExpression * selector, IHqlExpre
     case no_ave:
     case no_select:
     case no_exists:
-    case no_notexists:
     case no_field:
         // a count on a child dataset or something else - add it as it is...
         //goes wrong for count(group)*
@@ -1542,7 +1541,6 @@ IHqlExpression * evalNormalizeAggregateExpr(IHqlExpression * selector, IHqlExpre
     case no_maxgroup:
     case no_mingroup:
     case no_existsgroup:
-    case no_notexistsgroup:
         {
             ForEachItemIn(idx, assigns)
             {
@@ -3144,9 +3142,6 @@ IHqlExpression * ThorHqlTransformer::getMergeTransform(IHqlExpression * dataset,
                 case no_existsgroup:
                     newRhs.setown(createValue(no_existsgroup, selected->getType(), LINK(selected)));
                     break;
-                case no_notexistsgroup:
-                    newRhs.setown(createValue(no_notexistsgroup, selected->getType(), getInverse(selected)));
-                    break;
                 case no_vargroup:
                 case no_covargroup:
                 case no_corrgroup:
@@ -3432,6 +3427,8 @@ void CompoundSourceInfo::reset()
     isBoundary = false;
     isPreloaded = false;
     isLimited = false;
+    hasChoosen = false;
+    hasSkipLimit = false;
     isCloned = false;
     isFiltered = false;
     isPostFiltered = false;
@@ -3439,38 +3436,47 @@ void CompoundSourceInfo::reset()
 }
 
 
-bool CompoundSourceInfo::canMergeLimit(IHqlExpression * expr, ClusterType targetClusterType)
+bool CompoundSourceInfo::canMergeLimit(IHqlExpression * expr, ClusterType targetClusterType) const
 {
-    if (!isLimited && !isAggregate() && !isChooseNAllLimit(expr->queryChild(1)) && isBinary())
+    if (isAggregate() || isChooseNAllLimit(expr->queryChild(1)) || !isBinary())
+        return false;
+
+    node_operator op = expr->getOperator();
+    switch (op)
     {
-        node_operator op = expr->getOperator();
-        switch (op)
+    case no_limit:
+        //Can't merge a limit into a choosen() because the limit will be applied first
+        if (isLimited || hasChoosen)
+            return false;
+
+        //Don't merge skip and onfail limits into activities that can't implement them completely
+        if (targetClusterType != RoxieCluster)
         {
-        case no_limit:
-            //Don't merge skip and onfail limits into activities that can't implement them completely
-            if (targetClusterType != RoxieCluster)
-            {
-                if (expr->hasProperty(skipAtom) || expr->hasProperty(onFailAtom))
-                    return false;
-            }
-            else
-            {
-                //Can always limit a count/aggregate with a skip limit - just resets count to 0
-                if (expr->hasProperty(skipAtom))
-                    return true;
-            }
-            break;
+            if (expr->hasProperty(skipAtom) || expr->hasProperty(onFailAtom))
+                return false;
         }
-
-        switch (sourceOp)
+        else
         {
-        case no_compound_diskread:
-        case no_compound_disknormalize:
-        case no_compound_indexread:
-        case no_compound_indexnormalize:
-            return true;
+            //Can always limit a count/aggregate with a skip limit - just resets count to 0
+            if (expr->hasProperty(skipAtom))
+                return true;
         }
+        break;
+    case no_choosen:
+        if (hasChoosen)
+            return false;
+        break;
     }
+
+    switch (sourceOp)
+    {
+    case no_compound_diskread:
+    case no_compound_disknormalize:
+    case no_compound_indexread:
+    case no_compound_indexnormalize:
+        return true;
+    }
+
     return false;
 }
 
@@ -3499,6 +3505,8 @@ void CompoundSourceInfo::ensureCompound()
 bool CompoundSourceInfo::inherit(const CompoundSourceInfo & other, node_operator newSourceOp)
 {
     isLimited = other.isLimited;
+    hasSkipLimit = other.hasSkipLimit;
+    hasChoosen = other.hasChoosen;
     isFiltered = other.isFiltered;
     isPostFiltered = other.isPostFiltered;
     isPreloaded = other.isPreloaded;
@@ -3520,7 +3528,7 @@ bool CompoundSourceInfo::inherit(const CompoundSourceInfo & other, node_operator
     return true;
 }
 
-bool CompoundSourceInfo::isAggregate()
+bool CompoundSourceInfo::isAggregate() const
 {
     switch (sourceOp)
     {
@@ -3628,7 +3636,7 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
         break;
     }
 
-    switch (expr->getOperator())
+    switch (op)
     {
     case no_newkeyindex:
         extra->sourceOp = no_compound_indexread;
@@ -3746,7 +3754,7 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
             {
                 IHqlExpression * dataset = expr->queryChild(0);
                 CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
-                if (!parentExtra->isAggregate() && !parentExtra->isLimited && parentExtra->isBinary())
+                if (!parentExtra->isAggregate() && !parentExtra->hasAnyLimit() && parentExtra->isBinary())
                 {
                     node_operator newOp = no_none;
                     switch (parentExtra->sourceOp)
@@ -3813,7 +3821,16 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
             {
                 if (extra->inherit(*parentExtra))
                 {
-                    extra->isLimited = true;
+                    if (op == no_choosen)
+                    {
+                        extra->hasChoosen = true;
+                    }
+                    else
+                    {
+                        extra->isLimited = true;
+                        if (expr->hasProperty(skipAtom))
+                            extra->hasSkipLimit = true;
+                    }
                     if (expr->hasProperty(onFailAtom))
                         extra->isCreateRowLimited = true;
                     extra->isCloned = cloneRequired;
@@ -3841,8 +3858,9 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
                 bool isSimpleCountExists = isSimpleCountExistsAggregate(expr, true, false);
                 if (parentExtra->isCreateRowLimited)
                     break;
-                if (parentExtra->isLimited && !isSimpleCountExists)
+                if (parentExtra->hasAnyLimit() && !isSimpleCountExists)
                     break;
+
                 node_operator newOp = no_none;
                 node_operator parentOp = parentExtra->sourceOp;
                 if (queryRealChild(expr, 3))
@@ -3890,7 +3908,9 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
                         break;
                     case no_compound_indexread:
                     case no_compound_indexnormalize:
-                        if (flags & CSFnewindex)
+                        //Don't create counts for (non-keyed) skip limits - little benefit, and could cause problems
+                        //correctly returning the counts - e.g. especially for exists()
+                        if ((flags & CSFnewindex) && !parentExtra->hasSkipLimit)
                         {
                             newOp = no_compound_indexaggregate;
                             //Force counts on indexes to become a new compound activity
@@ -3902,7 +3922,11 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
                             //That can be logged as a future enhancement.....
 //                          if (isSimpleCountExists && !parentExtra->isPostFiltered && (parentOp == no_compound_indexread))
                             if (isSimpleCountExists && (parentOp == no_compound_indexread))
-                                extra->forceCompound = true;
+                            {
+                                //A skip limit will require everything to be read anyway - so no point splitting in two
+                                if (!parentExtra->hasSkipLimit)
+                                    extra->forceCompound = true;
+                            }
                         }
                         break;
                     case no_compound_childread:
@@ -3936,7 +3960,7 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
         {
             IHqlExpression * dataset = expr->queryChild(0);
             CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
-            if (!parentExtra->isLimited && !parentExtra->isAggregate())
+            if (!parentExtra->hasAnyLimit() && !parentExtra->isAggregate())
             {
                 if (extra->inherit(*parentExtra))
                 {
@@ -4322,6 +4346,42 @@ IHqlExpression * OptimizeActivityTransformer::optimizeCompare(IHqlExpression * l
     if (looksLikeSimpleCount(rhs))
         return NULL;
 
+    //Convert count(x) >= 1 to exists(x)  (and other varients)
+    node_operator existOp = no_none;
+    switch (op)
+    {
+    case no_ne:
+    case no_gt:
+        if (matchesConstantValue(rhs, 0))
+            existOp = no_exists;
+        break;
+    case no_eq:
+    case no_le:
+        if (matchesConstantValue(rhs, 0))
+            existOp = no_not;
+        break;
+    case no_lt:
+        if (matchesConstantValue(rhs, 1))
+            existOp = no_not;
+        break;
+    case no_ge:
+        if (matchesConstantValue(rhs, 1))
+            existOp = no_exists;
+        break;
+    }
+
+    if (existOp != no_none)
+    {
+        if (lhs->getOperator() == no_count)
+        {
+            IHqlExpression * ds = lhs->queryChild(0);
+            OwnedHqlExpr ret = createValue(no_exists, makeBoolType(), LINK(ds));
+            if (existOp == no_not)
+                return createValue(no_not, makeBoolType(), ret.getClear());
+            return ret.getClear();
+        }
+    }
+
     unsigned choosenDelta =0;
     switch (op)
     {

+ 5 - 2
ecl/hqlcpp/hqlttcpp.ipp

@@ -226,9 +226,9 @@ class CompoundSourceInfo : public NewTransformInfo
 public:
     CompoundSourceInfo(IHqlExpression * _original); 
 
-    bool canMergeLimit(IHqlExpression * expr, ClusterType targetClusterType);
+    bool canMergeLimit(IHqlExpression * expr, ClusterType targetClusterType) const;
     void ensureCompound();
-    bool isAggregate();
+    bool isAggregate() const;
     inline bool isShared() { return splitCount > 1; }
     bool inherit(const CompoundSourceInfo & other, node_operator newSourceOp = no_none);
     inline bool isNoteUsageFirst() 
@@ -238,6 +238,7 @@ public:
     }
     inline void noteUsage() { if (splitCount < 10) splitCount++; }
     inline bool isBinary() const { return mode != no_csv && mode != no_xml; }
+    inline bool hasAnyLimit() const { return isLimited || hasChoosen; }
     void reset();
 
 public:
@@ -249,6 +250,8 @@ public:
     bool isBoundary:1;
     bool isCloned:1;
     bool isLimited:1;
+    bool hasChoosen:1;
+    bool hasSkipLimit:1;
     bool isPreloaded:1;
     bool isFiltered:1;
     bool isPostFiltered:1;

+ 11 - 7
ecl/hthor/hthor.cpp

@@ -3083,14 +3083,18 @@ const void * CHThorAggregateActivity::nextInGroup()
         helper.processFirst(rowBuilder, next);
         releaseHThorRow(next);
         
-        loop
+        bool abortEarly = (kind == TAKexistsaggregate) && !input->isGrouped();
+        if (!abortEarly)
         {
-            next = input->nextInGroup();
-            if (!next)
-                break;
-            
-            helper.processNext(rowBuilder, next);
-            releaseHThorRow(next);
+            loop
+            {
+                next = input->nextInGroup();
+                if (!next)
+                    break;
+
+                helper.processNext(rowBuilder, next);
+                releaseHThorRow(next);
+            }
         }
     }
     

+ 6 - 0
rtl/include/eclhelper_base.hpp

@@ -2618,6 +2618,12 @@ class CThorIndexCountArg : public CThorArg, implements IHThorIndexCountArg, impl
     virtual unsigned __int64 getKeyedLimit()                { return (unsigned __int64) -1; }
     virtual void onKeyedLimitExceeded()                     { }
 
+    virtual size32_t numValid(size32_t srcLen, const void * _src)
+    {
+        rtlFailUnexpected();
+        return 0;
+    }
+
 public:
     IThorIndexCallback * fpp;
 };