Browse Source

Improve implicit field projection for count/exists

- Previously the output record from an activity was required to have at least one
  field, which meant that exists/count arbitrarily "used" the first field.  With
  this change that no longer happens - often reducing the number of fields used.

A record can have a _nonEmpty_ attribute associated with it to ensure
it has a non-zero size (to differenitate it from a skipped row).  The
transform clears the byte (to make memory consitant and ensure spilt
files compress properly).

Serialize/Deserialize use the default classes so they will still work.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 13 years ago
parent
commit
9a758af5ee

+ 2 - 0
ecl/hql/hqlatoms.cpp

@@ -218,6 +218,7 @@ _ATOM noBoundCheckAtom;
 _ATOM noCaseAtom;
 _ATOM _noHoist_Atom;
 _ATOM noLocalAtom;
+_ATOM _nonEmpty_Atom;
 _ATOM noOverwriteAtom;
 _ATOM _normalized_Atom;
 _ATOM noRootAtom;
@@ -597,6 +598,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKEATOM(noCase);
     MAKESYSATOM(noHoist);
     MAKEATOM(noLocal);
+    MAKESYSATOM(nonEmpty);
     MAKEATOM(noOverwrite);
     MAKESYSATOM(normalized);
     MAKEATOM(noRoot);

+ 1 - 0
ecl/hql/hqlatoms.hpp

@@ -222,6 +222,7 @@ extern HQL_API _ATOM noBoundCheckAtom;
 extern HQL_API _ATOM noCaseAtom;
 extern HQL_API _ATOM _noHoist_Atom;
 extern HQL_API _ATOM noLocalAtom;
+extern HQL_API _ATOM _nonEmpty_Atom;
 extern HQL_API _ATOM noOverwriteAtom;
 extern HQL_API _ATOM _normalized_Atom;
 extern HQL_API _ATOM noRootAtom;

+ 7 - 0
ecl/hql/hqlattr.cpp

@@ -1162,6 +1162,13 @@ static IHqlExpression * evaluateRecordAttrSize(IHqlExpression * expr)
         }
     }
 
+    if ((maximumSize == 0) && expr->hasProperty(_nonEmpty_Atom))
+    {
+        expectedSize = 1;
+        minimumSize = 1;
+        maximumSize = 1;
+    }
+
     if (maximumSize || !maximumSizeExpr)
     {
         OwnedHqlExpr maxExpr = getSizetConstant(truncMaxlength(maximumSize));

+ 11 - 1
ecl/hql/hqlexpr.cpp

@@ -116,6 +116,7 @@ static IHqlExpression * cachedSelfExpr;
 static IHqlExpression * cachedSelfReferenceExpr;
 static IHqlExpression * cachedNoBody;
 static IHqlExpression * cachedNullRecord;
+static IHqlExpression * cachedNullRowRecord;
 static IHqlExpression * cachedOne;
 static IHqlExpression * cachedLocalAttribute;
 static IHqlExpression * constantTrue;
@@ -171,6 +172,8 @@ MODULE_INIT(INIT_PRIORITY_HQLINTERNAL)
     cachedSelfExpr = createValue(no_self, makeRowType(NULL));
     cachedSelfReferenceExpr = createValue(no_selfref);
     cachedNullRecord = createRecord()->closeExpr();
+    OwnedHqlExpr nonEmptyAttr = createAttribute(_nonEmpty_Atom);
+    cachedNullRowRecord = createRecord(nonEmptyAttr);
     cachedOne = createConstant(1);
     cachedLocalAttribute = createAttribute(localAtom);
     constantTrue = createConstant(createBoolValue(true));
@@ -206,6 +209,7 @@ MODULE_EXIT()
     cachedActiveTableExpr->Release();
     cachedSelfReferenceExpr->Release();
     cachedSelfExpr->Release();
+    cachedNullRowRecord->Release();
     cachedNullRecord->Release();
 
     ClearTypeCache();
@@ -1565,6 +1569,7 @@ bool checkConstant(node_operator op)
     case no_counter:
     case no_loopcounter:
     case no_sequence:
+    case no_table:
         return false;
     // following are currently not implemented in the const folder - can enable if they are.
     case no_global:
@@ -14304,6 +14309,11 @@ IHqlExpression * queryNullRecord()
     return cachedNullRecord;
 }
 
+IHqlExpression * queryNullRowRecord()
+{
+    return cachedNullRowRecord;
+}
+
 IHqlExpression * createNullDataset()
 {
     return createDataset(no_null, LINK(queryNullRecord()));
@@ -14801,7 +14811,7 @@ bool isKeyedCountAggregate(IHqlExpression * aggregate)
 {
     IHqlExpression * transform = aggregate->queryChild(2);
     IHqlExpression * assign = transform->queryChild(0);
-    if (assign->getOperator() != no_assign)
+    if (!assign || assign->getOperator() != no_assign)
         return false;
     IHqlExpression * count = assign->queryChild(1);
     if (count->getOperator() != no_countgroup)

+ 8 - 1
ecl/hql/hqlexpr.hpp

@@ -1534,6 +1534,7 @@ extern HQL_API bool isSimpleCountExistsAggregate(IHqlExpression * aggregateExpr,
 extern HQL_API bool isKeyedCountAggregate(IHqlExpression * aggregate);
 extern HQL_API IHqlExpression * createNullDataset();
 extern HQL_API IHqlExpression * queryNullRecord();
+extern HQL_API IHqlExpression * queryNullRowRecord();
 extern HQL_API IHqlExpression * queryExpression(ITypeInfo * t);
 extern HQL_API IHqlExpression * queryExpression(IHqlDataset * ds);
 inline IHqlExpression * queryExpression(IHqlScope * scope) { return scope ? scope->queryExpression() : NULL; }
@@ -1607,7 +1608,7 @@ inline IHqlExpression * queryDistribution(IHqlExpression * expr)    { return que
 inline IHqlExpression * queryGlobalSortOrder(IHqlExpression * expr) { return queryGlobalSortOrder(expr->queryType()); }
 inline IHqlExpression * queryLocalUngroupedSortOrder(IHqlExpression * expr) { return queryLocalUngroupedSortOrder(expr->queryType()); }
 inline IHqlExpression * queryGroupSortOrder(IHqlExpression * expr)  { return queryGroupSortOrder(expr->queryType()); }
-inline bool isGrouped(ITypeInfo * type)                     { return type->queryGroupInfo() != NULL; }
+inline bool isGrouped(ITypeInfo * type)                     { return type && type->queryGroupInfo() != NULL; }
 inline bool isGrouped(IHqlExpression * expr)                { return isGrouped(expr->queryType()); }
 
 inline IFunctionTypeExtra * queryFunctionTypeExtra(ITypeInfo * type)    { return static_cast<IFunctionTypeExtra *>(queryUnqualifiedType(type)->queryModifierExtra()); }
@@ -1665,6 +1666,12 @@ inline bool isAbstractDataset(IHqlExpression * expr)
     IHqlExpression * record = expr->queryRecord();
     return record && record->hasProperty(abstractAtom);
 }
+inline IHqlExpression * queryRecord(IHqlExpression * expr)
+{
+    if (!expr)
+        return NULL;
+    return expr->queryRecord();
+}
 
 extern HQL_API bool isPureVirtual(IHqlExpression * cur);
 inline bool isForwardScope(IHqlScope * scope) { return scope && (queryExpression(scope)->getOperator() == no_forwardscope); }

+ 2 - 0
ecl/hql/hqlopt.cpp

@@ -576,6 +576,8 @@ IHqlExpression * CTreeOptimizer::optimizeAggregateDataset(IHqlExpression * trans
             if (ds->hasProperty(prefetchAtom))
                 break;
 
+            //MORE: If the record is empty then either remove the project if no SKIP, or convert the SKIP to a filter
+
             //Don't remove projects for the moment because they can make counts of disk reads much less
             //efficient.  Delete the following lines once we have a count-diskread activity
             if (!isScalarAggregate && !(options & (HOOcompoundproject|HOOinsidecompound)) && !ds->hasProperty(_countProject_Atom) )

+ 1 - 1
ecl/hql/hqlthql.cpp

@@ -1084,7 +1084,7 @@ void HqltHql::toECL(IHqlExpression *expr, StringBuffer &s, bool paren, bool inTy
             ForEachChild(i2, expr)
             {
                 IHqlExpression *child = queryChild(expr, i2);
-                if (child->getOperator() == no_record)
+                if (child && child->getOperator() == no_record)
                 {
                     s.append("(");
                     if (isEclAlias(child) || !m_recurse)

+ 1 - 0
ecl/hql/hqlutil.hpp

@@ -63,6 +63,7 @@ extern HQL_API IHqlExpression * createIf(IHqlExpression * cond, IHqlExpression *
 
 extern HQL_API void gatherIndexBuildSortOrder(HqlExprArray & sorts, IHqlExpression * expr, bool sortIndexPayload);
 extern HQL_API bool recordContainsBlobs(IHqlExpression * record);
+inline bool recordIsEmpty(IHqlExpression * record) { return queryLastField(record) == NULL; }
 extern HQL_API IHqlExpression * queryVirtualFileposField(IHqlExpression * record);
 
 extern HQL_API IHqlExpression * flattenListOwn(IHqlExpression * list);

+ 6 - 7
ecl/hqlcpp/hqlckey.cpp

@@ -409,8 +409,7 @@ void KeyedJoinInfo::buildExtractFetchFields(BuildCtx & ctx)
     }
 
     //virtual IOutputMetaData * queryFetchInputRecordSize() = 0;
-    OwnedHqlExpr null = createValue(no_null, makeVoidType());
-    translator.buildMetaMember(ctx, fileAccessDataset.get() ? fileAccessDataset.get() : null.get(), "queryFetchInputRecordSize");
+    translator.buildMetaMember(ctx, fileAccessDataset, false, "queryFetchInputRecordSize");
 }
 
 
@@ -430,7 +429,7 @@ void KeyedJoinInfo::buildExtractIndexReadFields(BuildCtx & ctx)
     }
 
     //virtual IOutputMetaData * queryIndexReadInputRecordSize() = 0;
-    translator.buildMetaMember(ctx, keyAccessDataset, "queryIndexReadInputRecordSize");
+    translator.buildMetaMember(ctx, keyAccessDataset, isGrouped(keyAccessDataset), "queryIndexReadInputRecordSize");    //->false
 }
 
 
@@ -465,7 +464,7 @@ void KeyedJoinInfo::buildExtractJoinFields(ActivityInstance & instance)
     }
 
     //virtual IOutputMetaData * queryJoinFieldsRecordSize() = 0;
-    translator.buildMetaMember(instance.classctx, extractJoinFieldsRecord, "queryJoinFieldsRecordSize");
+    translator.buildMetaMember(instance.classctx, extractJoinFieldsRecord, false, "queryJoinFieldsRecordSize");
 }
 
 void KeyedJoinInfo::buildFetchMatch(BuildCtx & ctx)
@@ -1136,7 +1135,7 @@ void HqlCppTranslator::buildKeyedJoinExtra(ActivityInstance & instance, IHqlExpr
 {
     //virtual IOutputMetaData * queryDiskRecordSize() = 0;  // Excluding fpos and sequence
     if (info->isFullJoin())
-        buildMetaMember(instance.classctx, info->queryRawRhs(), "queryDiskRecordSize");
+        buildMetaMember(instance.classctx, info->queryRawRhs(), false, "queryDiskRecordSize");
 
     //virtual unsigned __int64 extractPosition(const void * _right) = 0;  // Gets file position value from rhs row
     if (info->isFullJoin())
@@ -1194,7 +1193,7 @@ void HqlCppTranslator::buildKeyJoinIndexReadHelper(ActivityInstance & instance,
     buildFilenameFunction(instance, instance.startctx, "getIndexFileName", info->queryKeyFilename(), hasDynamicFilename(info->queryKey()));
 
     //virtual IOutputMetaData * queryIndexRecordSize() = 0; //Excluding fpos and sequence
-    buildMetaMember(instance.classctx, info->queryRawKey(), "queryIndexRecordSize");
+    buildMetaMember(instance.classctx, info->queryRawKey(), false, "queryIndexRecordSize");
 
     //virtual void createSegmentMonitors(IIndexReadContext *ctx, const void *lhs) = 0;
     info->buildMonitors(instance.startctx);
@@ -1436,7 +1435,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityKeyedDistribute(BuildCtx & ctx
     buildFilenameFunction(*instance, instance->startctx, "getIndexFileName", keyFilename, dynamic);
 
     //virtual IOutputMetaData * queryIndexRecordSize() = 0; //Excluding fpos and sequence
-    buildMetaMember(instance->classctx, info.queryRawKey(), "queryIndexRecordSize");
+    buildMetaMember(instance->classctx, info.queryRawKey(), false, "queryIndexRecordSize");
 
     //virtual void createSegmentMonitors(IIndexReadContext *ctx, const void *lhs) = 0;
     info.buildMonitors(instance->startctx);

+ 1 - 1
ecl/hqlcpp/hqlcpp.ipp

@@ -1608,7 +1608,7 @@ public:
 
     void buildSetXmlSerializer(StringBuffer & helper, ITypeInfo * valueType);
 
-    void buildMetaMember(BuildCtx & ctx, IHqlExpression * datasetOrRecord, const char * name);
+    void buildMetaMember(BuildCtx & ctx, IHqlExpression * datasetOrRecord, bool isGrouped, const char * name);
     void buildMetaSerializerClass(BuildCtx & ctx, IHqlExpression * record, const char * serializerName);
     void buildMetaDeserializerClass(BuildCtx & ctx, IHqlExpression * record, const char * deserializerName);
     bool buildMetaPrefetcherClass(BuildCtx & ctx, IHqlExpression * record, const char * prefetcherName);

+ 53 - 74
ecl/hqlcpp/hqlhtcpp.cpp

@@ -168,13 +168,11 @@ bool InternalResultTracker::noteUse(IHqlExpression * searchName, unsigned curGra
 }
 
 //---------------------------------------------------------------------------
-IHqlExpression * getMetaUniqueKey(IHqlExpression * dataset)
+IHqlExpression * getMetaUniqueKey(IHqlExpression * record, bool grouped)
 {
-    IHqlExpression * record = dataset->queryRecord();
     if (record) record = record->queryBody();
     LinkedHqlExpr search = record;
-    ITypeInfo * type = dataset->queryType();
-    if (type && type->queryGroupInfo() != NULL)
+    if (grouped)
         search.setown(createAttribute(groupedAtom, search.getClear()));
     if (!search)
         search.setown(createValue(no_null));
@@ -1105,9 +1103,14 @@ void TransformBuilder::doTransform(BuildCtx & ctx, IHqlExpression * transform, B
     }
 
     translator.filterExpandAssignments(ctx, this, assigns, transform);
-    buildTransformChildren(ctx, self->queryRecord(), self->querySelector());
+    IHqlExpression * selfRecord = self->queryRecord();
+    buildTransformChildren(ctx, selfRecord, self->querySelector());
     flush(ctx);
     checkAssigned();
+
+    //If this is a blank record with the size "fixed" to 1, clear the byte so consistent and disk writes compress well
+    if (isEmptyRecord(selfRecord) && selfRecord->hasProperty(_nonEmpty_Atom))
+        translator.buildClearRecord(ctx, self->querySelector(), selfRecord, 0);
 }
 
 
@@ -1734,23 +1737,20 @@ void GlobalFileTracker::writeToGraph()
 
 //---------------------------------------------------------------------------
 
-MetaInstance::MetaInstance(HqlCppTranslator & translator, IHqlExpression * _dataset)
-{
-    setDataset(translator, _dataset);
-}
-
-IHqlExpression * MetaInstance::queryRecord()
+MetaInstance::MetaInstance(HqlCppTranslator & translator, IHqlExpression * _record, bool _isGrouped)
 {
-    return dataset->queryRecord();
+    setMeta(translator, _record, _isGrouped);
 }
 
-void MetaInstance::setDataset(HqlCppTranslator & translator, IHqlExpression * _dataset)
+void MetaInstance::setMeta(HqlCppTranslator & translator, IHqlExpression * _record, bool _isGrouped)
 {
-    StringBuffer s,recordBase;
+    record = _record;
+    grouped = _isGrouped;
+    assertex(!record || record->getOperator() == no_record);
 
-    dataset = _dataset;
-    searchKey.setown(::getMetaUniqueKey(dataset));
+    searchKey.setown(::getMetaUniqueKey(record, grouped));
 
+    StringBuffer s,recordBase;
     appendUniqueId(recordBase, translator.getConsistentUID(searchKey));
 
     metaName.set(s.clear().append("mi").append(recordBase).str());
@@ -1819,7 +1819,8 @@ ActivityInstance::ActivityInstance(HqlCppTranslator & _translator, BuildCtx & ct
     if ((op == no_setgraphresult) && translator.queryOptions().minimizeActivityClasses)
         outputDataset = dataset->queryChild(0);
 
-    meta.setDataset(translator, outputDataset);
+    IHqlExpression * record = queryRecord(outputDataset);
+    meta.setMeta(translator, record, ::isGrouped(outputDataset));
 
     activityId = translator.nextActivityId();
 
@@ -2436,9 +2437,7 @@ void ActivityInstance::buildMetaMember()
 
     translator.buildMetaInfo(meta);
 
-    IHqlExpression * dataset = meta.dataset;
-    ITypeInfo * type = dataset->queryType();
-    if (type && type->getTypeCode() != type_void)
+    if (meta.queryRecord())
     {
         StringBuffer s;
         s.append("virtual IOutputMetaData * queryOutputMeta() { return &").append(meta.queryInstanceObject()).append("; }");
@@ -2451,8 +2450,7 @@ void ActivityInstance::addConstructorMetaParameter()
 {
     translator.buildMetaInfo(meta);
 
-    ITypeInfo * type = meta.dataset->queryType();
-    if (type && type->getTypeCode() != type_void)
+    if (meta.queryRecord())
     {
         StringBuffer s;
         s.append("&").append(meta.queryInstanceObject());
@@ -4011,7 +4009,6 @@ void HqlCppTranslator::buildMetaInfo(MetaInstance & instance)
 
     BuildCtx declarectx(*code, declareAtom);
 
-    IHqlExpression * dataset = instance.dataset;
     OwnedHqlExpr search = instance.getMetaUniqueKey();
 
     // stop duplicate classes being generated.
@@ -4031,13 +4028,12 @@ void HqlCppTranslator::buildMetaInfo(MetaInstance & instance)
     endText.append(" ").append(instance.instanceName).append(";");
     BuildCtx metactx(declarectx);
 
-    ITypeInfo * type = dataset->queryType();
-    IHqlExpression * record = dataset->queryRecord();
+    IHqlExpression * record = instance.queryRecord();
     ColumnToOffsetMap * map = queryRecordOffsetMap(record);
     
     unsigned flags = MDFhasserialize;       // we always generate a serialize since 
     bool useTypeForXML = false;
-    if (type && type->getTypeCode() == type_groupedtable)
+    if (instance.isGrouped())
         flags |= MDFgrouped;
     if (map)
         flags |= MDFhasxml;
@@ -4052,10 +4048,9 @@ void HqlCppTranslator::buildMetaInfo(MetaInstance & instance)
         useTypeForXML = true;
     }
 
-    if (type && type->getTypeCode() == type_groupedtable)
+    if (instance.isGrouped())
     {
-        OwnedHqlExpr ungrouped = createDataset(no_group, LINK(dataset));
-        MetaInstance ungroupedMeta(*this, ungrouped);
+        MetaInstance ungroupedMeta(*this, record, false);
         buildMetaInfo(ungroupedMeta);
 
         s.append("struct ").append(instance.metaName).append(" : public ").append(ungroupedMeta.metaName);
@@ -4133,16 +4128,12 @@ void HqlCppTranslator::buildMetaInfo(MetaInstance & instance)
                 getctx.addQuoted(s.str());
                 getctx.addQuoted("const unsigned char * left = (const unsigned char *)data;");
 
-                LinkedHqlExpr selfDs = dataset;
-                if (!selfDs->isDataset() || !selfDs->isDatarow())
-                    selfDs.setown(createDataset(no_null, LINK(dataset->queryRecord())));
-
+                LinkedHqlExpr selfDs = createDataset(no_null, LINK(instance.queryRecord()));
                 BoundRow * selfRow = bindTableCursorOrRow(getctx, selfDs, "left");
                 OwnedHqlExpr size = getRecordSize(selfRow->querySelector());
                 buildReturn(getctx, size);
-
             }
-            assertex(!(type && type->getTypeCode() == type_groupedtable));
+            assertex(!instance.isGrouped());
 
             StringBuffer typeName;
             unsigned recordTypeFlags = buildRtlType(typeName, record->queryType());
@@ -4151,7 +4142,7 @@ void HqlCppTranslator::buildMetaInfo(MetaInstance & instance)
 
             if (record->numChildren() != 0)
             {
-                OwnedHqlExpr anon = createDataset(no_anon, LINK(dataset->queryRecord()));
+                OwnedHqlExpr anon = createDataset(no_anon, LINK(instance.queryRecord()));
                 if (!useTypeForXML || (recordTypeFlags & (RFTMinvalidxml|RFTMhasxmlattr)))
                     buildXmlSerialize(metactx, anon, "toXML", true);
             }
@@ -4165,9 +4156,8 @@ void HqlCppTranslator::buildMetaInfo(MetaInstance & instance)
             if (flags & MDFneedserialize)
             {
                 OwnedHqlExpr serializedRecord = getSerializedForm(record);
-                OwnedHqlExpr serializedDataset = createDataset(no_anon, LINK(serializedRecord));
 
-                MetaInstance serializedMeta(*this, serializedDataset);
+                MetaInstance serializedMeta(*this, serializedRecord, false);
                 buildMetaInfo(serializedMeta);
                 StringBuffer s;
                 s.append("virtual IOutputMetaData * querySerializedMeta() { return &").append(serializedMeta.queryInstanceObject()).append("; }");
@@ -4203,7 +4193,7 @@ public:
 
     void callChildFunction(BuildCtx & ctx, IHqlExpression * selected)
     {
-        MetaInstance childMeta(translator, selected);
+        MetaInstance childMeta(translator, selected->queryRecord(), false);
         translator.buildMetaInfo(childMeta);
         callChildFunction(ctx, selected, childMeta);
     }
@@ -4252,7 +4242,7 @@ public:
                             if (cur->hasProperty(_linkCounted_Atom))
                             {
                                 //releaseRowset(ctx, count, rowset)
-                                MetaInstance childMeta(translator, selected);
+                                MetaInstance childMeta(translator, selected->queryRecord(), false);
                                 translator.buildMetaInfo(childMeta);
                                 processRowset(ctx, selected, childMeta);
                             }
@@ -4395,19 +4385,14 @@ void HqlCppTranslator::generateMetaRecordSerialize(BuildCtx & ctx, IHqlExpressio
 
 IHqlExpression * HqlCppTranslator::buildMetaParameter(IHqlExpression * arg)
 {
-    OwnedHqlExpr dataset = createDataset(no_anon, LINK(arg->queryRecord()));
-    MetaInstance meta(*this, dataset);
+    MetaInstance meta(*this, arg->queryRecord(), false);
     buildMetaInfo(meta);
     return createQuoted(meta.queryInstanceObject(), makeBoolType());
 }
 
-void HqlCppTranslator::buildMetaMember(BuildCtx & ctx, IHqlExpression * datasetOrRecord, const char * name)
+void HqlCppTranslator::buildMetaMember(BuildCtx & ctx, IHqlExpression * datasetOrRecord, bool grouped, const char * name)
 {
-    LinkedHqlExpr dataset = datasetOrRecord;
-    if (datasetOrRecord->getOperator() == no_record)
-        dataset.setown(createDataset(no_anon, LINK(datasetOrRecord)));
-
-    MetaInstance meta(*this, dataset);
+    MetaInstance meta(*this, ::queryRecord(datasetOrRecord), grouped);
     StringBuffer s;
 
     buildMetaInfo(meta);
@@ -4417,8 +4402,7 @@ void HqlCppTranslator::buildMetaMember(BuildCtx & ctx, IHqlExpression * datasetO
 
 void HqlCppTranslator::buildMetaForRecord(StringBuffer & name, IHqlExpression * record)
 {
-    OwnedHqlExpr dataset = createDataset(no_anon, LINK(record));
-    MetaInstance meta(*this, dataset);
+    MetaInstance meta(*this, record, false);
     buildMetaInfo(meta);
     name.append(meta.queryInstanceObject());
 }
@@ -4461,8 +4445,7 @@ void HqlCppTranslator::ensureRowSerializer(StringBuffer & serializerName, BuildC
         s.append("Owned<IOutputRowDeserializer> ").append(uid).append(";");
     declarectx->addQuoted(s);
 
-    OwnedHqlExpr ds = createDataset(no_anon, LINK(record));
-    MetaInstance meta(*this, ds);
+    MetaInstance meta(*this, record, false);
     buildMetaInfo(meta);
 
     s.clear().append(uid).append(".setown(").append(meta.queryInstanceObject());
@@ -4503,8 +4486,7 @@ void HqlCppTranslator::ensureRowPrefetcher(StringBuffer & prefetcherName, BuildC
     s.append("Owned<ISourceRowPrefetcher> ").append(uid).append(";");
     declarectx->addQuoted(s);
 
-    OwnedHqlExpr ds = createDataset(no_anon, LINK(record));
-    MetaInstance meta(*this, ds);
+    MetaInstance meta(*this, record, false);
     buildMetaInfo(meta);
 
     s.clear().append(uid).append(".setown(").append(meta.queryInstanceObject());
@@ -6455,7 +6437,7 @@ void HqlCppTranslator::buildRecordSerializeExtract(BuildCtx & ctx, IHqlExpressio
     OwnedHqlExpr serializedDataset = createDataset(no_null, LINK(serializedRecord));
     OwnedHqlExpr memoryDataset = createDataset(no_anon, LINK(memoryRecord));
 
-    MetaInstance meta(*this, memoryDataset);
+    MetaInstance meta(*this, memoryRecord, false);
     buildMetaInfo(meta);
 
     if (recordTypesMatch(memoryRecord, serializedRecord))
@@ -9467,7 +9449,7 @@ void HqlCppTranslator::doBuildIndexOutputTransform(BuildCtx & ctx, IHqlExpressio
 
     buildReturnRecordSize(subctx, selfCursor);
 
-    buildMetaMember(ctx, tgtDataset, "queryDiskRecordSize");
+    buildMetaMember(ctx, tgtDataset, false, "queryDiskRecordSize");
 }
 
 
@@ -9974,11 +9956,10 @@ ABoundActivity * HqlCppTranslator::doBuildActivityOutput(BuildCtx & ctx, IHqlExp
 
         buildFormatCrcFunction(instance->classctx, "getFormatCrc", dataset, NULL, 0);
 
-        LinkedHqlExpr diskDataset = dataset;
-        if (!expr->hasProperty(groupedAtom) && isGroupedActivity(dataset))
-            diskDataset.setown(createDataset(no_group, LINK(dataset), NULL));
-        if ((kind != TAKspill) || (diskDataset->queryType() != expr->queryType()))
-            buildMetaMember(instance->classctx, diskDataset, "queryDiskRecordSize");
+        bool grouped = isGrouped(dataset);
+        bool ignoreGrouped = !expr->hasProperty(groupedAtom);
+        if ((kind != TAKspill) || (dataset->queryType() != expr->queryType()) || (grouped && ignoreGrouped))
+            buildMetaMember(instance->classctx, dataset, grouped && !ignoreGrouped, "queryDiskRecordSize");
         buildClusterHelper(instance->classctx, expr);
 
         //Both csv write and pipe with csv/xml format
@@ -10160,7 +10141,7 @@ IWUResult * HqlCppTranslator::createDatasetResultSchema(IHqlExpression * sequenc
 
     OwnedHqlExpr serialRecord = getSerializedForm(record);
     OwnedHqlExpr ds = createDataset(no_anon, LINK(serialRecord));
-    MetaInstance meta(*this, ds);
+    MetaInstance meta(*this, serialRecord, false);
     buildMetaInfo(meta);
     result->setResultRecordSizeEntry(meta.metaFactoryName);
 
@@ -10275,8 +10256,7 @@ void HqlCppTranslator::buildXmlSerializeDataset(BuildCtx & ctx, IHqlExpression *
 
     StringBuffer boundRowText;
     generateExprCpp(boundRowText, sourceRow->queryBound());
-    OwnedHqlExpr ds = createDataset(no_null, LINK(field->queryRecord()));
-    buildXmlSerializeUsingMeta(subctx, ds, boundRowText.str());
+    buildXmlSerializeUsingMeta(subctx, field, boundRowText.str());
 
     buildXmlSerializeEndNested(subctx, rowName);
 
@@ -10477,7 +10457,7 @@ void HqlCppTranslator::buildXmlSerialize(BuildCtx & ctx, IHqlExpression * datase
 
 void HqlCppTranslator::buildXmlSerializeUsingMeta(BuildCtx & ctx, IHqlExpression * dataset, const char * self)
 {
-    MetaInstance meta(*this, dataset);
+    MetaInstance meta(*this, dataset->queryRecord(), false);
     buildMetaInfo(meta);
 
     StringBuffer s;
@@ -10963,7 +10943,7 @@ void HqlCppTranslator::generateSerializeKey(BuildCtx & nestedctx, node_operator
 
             generateSerializeFunction(classctx, "recordToKey", true, dataset, keyActiveRef, datasetSelects, keySelects);
             generateSerializeFunction(classctx, "keyToRecord", false, keyActiveRef, dataset, keySelects, datasetSelects);
-            buildMetaMember(classctx, keyDataset, "queryRecordSize");
+            buildMetaMember(classctx, keyRecord, false, "queryRecordSize");
 
             endNestedClass();
 
@@ -11723,7 +11703,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityProcess(BuildCtx & ctx, IHqlEx
 
     buildInstancePrefix(instance);
 
-    buildMetaMember(instance->classctx, right->queryRecord(), "queryRightRecordSize");
+    buildMetaMember(instance->classctx, right->queryRecord(), false, "queryRightRecordSize");
 
     {
         BuildCtx initialctx(instance->startctx);
@@ -12765,7 +12745,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityAggregate(BuildCtx & ctx, IHql
                 associateRemoteResult(*instance, sequence, name);
             }
         }
-        buildMetaMember(instance->classctx, resultDataset, "queryAggregateRecordSize");
+        buildMetaMember(instance->classctx, resultDataset, isGrouped(resultDataset), "queryAggregateRecordSize");
     }
 
     buildInstanceSuffix(instance);
@@ -13199,7 +13179,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDedup(BuildCtx & ctx, IHqlExpr
         OwnedHqlExpr keyDataset = createDataset(no_anon, createRecordInheritMaxLength(fields, dataset));
 
         //virtual IOutputMetaData * queryKeySize()
-        buildMetaMember(instance->classctx, keyDataset, "queryKeySize");
+        buildMetaMember(instance->classctx, keyDataset, false, "queryKeySize");
 
         //virtual unsigned recordToKey(void * _key, const void * _record)
         buildDedupSerializeFunction(instance->startctx, "recordToKey", dataset, keyDataset, info.equalities, selects, selSeq);
@@ -13663,7 +13643,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityNormalizeChild(BuildCtx & ctx,
     buildInstancePrefix(instance);
 
     //Generate queryChildRecordSize();
-    buildMetaMember(instance->classctx, childDataset, "queryChildRecordSize");
+    buildMetaMember(instance->classctx, childDataset, isGrouped(childDataset), "queryChildRecordSize");
 
     // INormalizeChildIterator * queryIterator();
     { 
@@ -13691,7 +13671,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityNormalizeChild(BuildCtx & ctx,
             beginNestedClass(iterclassctx, memberName, "CNormalizeChildIterator");
             format = FormatBlockedDataset;
 
-            MetaInstance childmeta(*this, childDataset);
+            MetaInstance childmeta(*this, childDataset->queryRecord(), isGrouped(childDataset));
             buildMetaInfo(childmeta);
             s.clear().append(className).append("() : CNormalizeChildIterator(").append(childmeta.queryInstanceObject()).append(") {}");
             iterclassctx.addQuoted(s);
@@ -15446,7 +15426,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivitySort(BuildCtx & ctx, IHqlExpre
             generateExprCpp(s, source->queryChild(0)).append("; }");
             instance->startctx.addQuoted(s);
 
-            buildMetaMember(instance->classctx, cosortDataset, "querySortedRecordSize");
+            buildMetaMember(instance->classctx, cosortDataset, false, "querySortedRecordSize");
         }
         else
         {
@@ -17044,8 +17024,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDistribution(BuildCtx & ctx, I
         HqlExprArray fields;
         fields.append(*createField(unnamedAtom, makeDataType(numFields*sizeof(void*)), NULL, NULL));
         OwnedHqlExpr tempRecord = createRecord(fields);
-        OwnedHqlExpr nullDataset = createDataset(no_anon, tempRecord.getLink());
-        buildMetaMember(instance->classctx, nullDataset, "queryInternalRecordSize");
+        buildMetaMember(instance->classctx, tempRecord, false, "queryInternalRecordSize");
     }
 
     //Generate the send Result method().

+ 9 - 6
ecl/hqlcpp/hqlhtcpp.ipp

@@ -40,20 +40,23 @@ class MetaInstance
 {
 public:
     //Shouldn't really need to pass translator, but it provides a place to have a per-query unique id. Anything else seems even messier.
-    MetaInstance()  { dataset = NULL; }
-    MetaInstance(HqlCppTranslator & translator, IHqlExpression * _dataset);
-    IHqlExpression * queryRecord();
-    void setDataset(HqlCppTranslator & translator, IHqlExpression * _dataset);
+    MetaInstance()  { record = NULL; grouped = false; }
+    MetaInstance(HqlCppTranslator & translator, IHqlExpression * _record, bool _isGrouped);
+    bool isGrouped() const { return grouped; }
+    IHqlExpression * queryRecord() const { return record; }
+    void setMeta(HqlCppTranslator & translator, IHqlExpression * _record, bool _isGrouped);
     IHqlExpression * getMetaUniqueKey()     { return searchKey.getLink(); }
     const char * queryInstanceObject()      { return instanceObject ? instanceObject : instanceName; }
 
 public:
-    IHqlExpression * dataset;
-    HqlExprAttr      searchKey;
     StringAttr       metaName;
     StringAttr       instanceName;
     StringAttr       metaFactoryName;
     StringAttr       instanceObject;
+private:
+    HqlExprAttr      searchKey;
+    IHqlExpression * record;
+    bool grouped;
 };
 
 //===========================================================================

+ 4 - 57
ecl/hqlcpp/hqliproj.cpp

@@ -498,55 +498,6 @@ int ComplexImplicitProjectInfo::docompare(const void * l,const void * r) const
 }
 
 
-void ComplexImplicitProjectInfo::ensureOutputNotEmpty()
-{
-    if (outputInfo && (outputFields.ordinality() == 0))
-    {
-        //MORE: Sometimes this can pull in other data from upstream activities - should pick one that is already required if
-        //there are any.  e.g., count(ds(x=0)) should pick field x.
-#if 1
-        IHqlExpression * best = &outputInfo->outputFields.item(0);
-#else
-        //Looks good, but in first field is more often used by something else. so disable...
-        //choose the smallest field at a fixed offset
-        IHqlExpression * best = NULL;
-        unsigned bestSize = UNKNOWN_LENGTH;
-        ForEachItemIn(i, outputInfo->outputFields)
-        {
-            IHqlExpression & cur = outputInfo->outputFields.item(i);
-            ITypeInfo * curType = cur.queryType();
-            type_t tc = curType->getTypeCode();
-            //try not to select record fields - they tend to have 0 length size
-            size32_t curSize = (tc == type_row) ? UNKNOWN_LENGTH-1 : curType->getSize();
-            if (!best)
-            {
-                best = &cur;
-                bestSize = curSize;
-            }
-            else if (bestSize > curSize)
-            {
-                switch (tc)
-                {
-                case type_bitfield:
-                case type_alien:
-                case type_row:
-                    //avoid these if at all possible....
-                    break;
-                default:
-                    best = &cur;
-                    bestSize = curSize;
-                    break;
-                }
-            }
-            if (curSize == UNKNOWN_LENGTH)
-                break;
-        }
-#endif
-
-        outputFields.append(OLINK(*best));
-    }
-}
-
 void ComplexImplicitProjectInfo::finalizeOutputRecord()
 {
     //MORE: Create them in the same order as the original record + don't change if numOutputFields = numOriginalOutputFields
@@ -563,10 +514,11 @@ void ComplexImplicitProjectInfo::finalizeOutputRecord()
             }
 
             outputFields.sort(*outputInfo);
-            assertex(outputFields.ordinality() != 0);
-
             outputFields.getFields(recordFields);
 
+            if (recordFields.ordinality() == 0)
+                recordFields.append(*createAttribute(_nonEmpty_Atom));
+
             //Ensure that maxSize is set on the new record - if necessary
             OwnedHqlExpr newRecord = createRecord(recordFields);
             //optionally? pack the record so that it is in the optimal alignment order
@@ -1428,6 +1380,7 @@ ProjectExprKind ImplicitProjectTransformer::getProjectExprKind(IHqlExpression *
     case no_newparse:
     case no_newxmlparse:
     case no_createrow:
+    case no_rollupgroup:
         return CreateRecordActivity;
     case no_inlinetable:
         return CreateRecordSourceActivity;
@@ -1530,7 +1483,6 @@ ProjectExprKind ImplicitProjectTransformer::getProjectExprKind(IHqlExpression *
         //MORE: Rethink these later:
     case no_combine:
     case no_combinegroup:
-    case no_rollupgroup:
     case no_regroup:
     case no_loop:
     case no_graphloop:
@@ -1674,11 +1626,6 @@ void ImplicitProjectTransformer::calculateFieldsUsed(IHqlExpression * expr)
 
         if (extra->outputFields.includeAll())
             assertex(extra->newOutputRecord != NULL);       //extra->newOutputRecord.set(expr->queryRecord());
-
-        //Ensure at least one field is required - otherwise meta goes wrong.  It really needs to be added here, rather than later,
-        //otherwise the field tracking for iterate/rollup etc. go wrong.  Could possibly improve later if we added code to project the
-        //dataset in front of a iterate/rollup
-        extra->ensureOutputNotEmpty();
     }
 
     switch (extra->activityKind())

+ 0 - 1
ecl/hqlcpp/hqliproj.ipp

@@ -226,7 +226,6 @@ public:
     void addAllOutputs();
     bool addOutputField(IHqlExpression * field);
     IHqlExpression * createOutputProject(IHqlExpression * ds);
-    void ensureOutputNotEmpty();
     void finalizeOutputRecord();
     void inheritRequiredFields(UsedFieldSet * requiredList);
     bool safeToReorderInput();

+ 1 - 1
ecl/hqlcpp/hqllib.cpp

@@ -587,7 +587,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityLibraryInstance(BuildCtx & ctx
         IHqlExpression & cur = library->outputs.item(iout);
         OwnedHqlExpr dataset = moduleScope->lookupSymbol(cur.queryName(), LSFpublic, dummyCtx);
         assertex(dataset && dataset->queryRecord());
-        MetaInstance meta(*this, dataset);
+        MetaInstance meta(*this, dataset->queryRecord(), isGrouped(dataset));
         buildMetaInfo(meta);
         switchctx.addQuoted(s.clear().append("case ").append(iout).append(": return &").append(meta.queryInstanceObject()).append(";"));
     }

+ 1 - 2
ecl/hqlcpp/hqlnlp.cpp

@@ -292,8 +292,7 @@ void NlpParseContext::buildProductions(HqlCppTranslator & translator, BuildCtx &
         ForEachItemIn(i, productions)
         {
             IHqlExpression & cur = productions.item(i);
-            OwnedHqlExpr dataset = createDataset(no_anon, LINK(cur.queryChild(1)->queryRecord()));
-            MetaInstance meta(translator, dataset);
+            MetaInstance meta(translator, cur.queryChild(1)->queryRecord(), false);
             translator.buildMetaInfo(meta);
 
             s.clear().append("case ").append(getIntValue(cur.queryChild(0)));

+ 6 - 7
ecl/hqlcpp/hqlsource.cpp

@@ -1869,10 +1869,10 @@ ABoundActivity * SourceBuilder::buildActivity(BuildCtx & ctx, IHqlExpression * e
             if (fieldInfo.hasVirtualsOrDeserialize())
             {
                 OwnedHqlExpr diskTable = createDataset(no_anon, LINK(physicalRecord));
-                translator.buildMetaMember(instance->classctx, diskTable, "queryDiskRecordSize");
+                translator.buildMetaMember(instance->classctx, diskTable, false, "queryDiskRecordSize");
             }
             else
-                translator.buildMetaMember(instance->classctx, tableExpr, "queryDiskRecordSize");
+                translator.buildMetaMember(instance->classctx, tableExpr, isGrouped(tableExpr), "queryDiskRecordSize");
 
         }
     }
@@ -6807,7 +6807,7 @@ void HqlCppTranslator::buildXmlReadTransform(IHqlExpression * dataset, StringBuf
 
     classctx.addQuoted("ICodeContext * ctx;");
     classctx.addQuoted("unsigned activityId;");
-    buildMetaMember(classctx, dataset, "queryRecordSize");
+    buildMetaMember(classctx, dataset, false, "queryRecordSize");
 
     transformClass->setIncomplete(false);
 
@@ -6894,7 +6894,7 @@ void HqlCppTranslator::buildCsvReadTransformer(IHqlExpression * dataset, StringB
     unsigned maxColumns = buildCsvReadTransform(classctx, dataset, false, optCsvAttr);
     doBuildUnsignedFunction(classctx, "getMaxColumns", maxColumns);
 
-    buildMetaMember(classctx, dataset, "queryRecordSize");
+    buildMetaMember(classctx, dataset, false, "queryRecordSize");
     buildCsvParameters(classctx, optCsvAttr, NULL, true);
 
     transformClass->setIncomplete(false);
@@ -6935,7 +6935,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityXmlRead(BuildCtx & ctx, IHqlEx
 
     doBuildVarStringFunction(instance->classctx, "queryIteratorPath", queryRealChild(mode, 0));
 
-    buildMetaMember(instance->classctx, tableExpr, "queryDiskRecordSize");  // A lie, but I don't care....
+    buildMetaMember(instance->classctx, tableExpr, false, "queryDiskRecordSize");  // A lie, but I don't care....
 
     //virtual unsigned getFlags() = 0;
     StringBuffer flags;
@@ -7075,8 +7075,7 @@ void FetchBuilder::buildMembers(IHqlExpression * expr)
         translator.buildRecordSerializeExtract(funcctx, memoryRhsRecord);
 
         StringBuffer s;
-        OwnedHqlExpr serializedRhs = createDataset(no_anon, LINK(serializedRhsRecord));
-        MetaInstance meta(translator, serializedRhs);
+        MetaInstance meta(translator, serializedRhsRecord, false);
         translator.buildMetaInfo(meta);
         instance->classctx.addQuoted(s.clear().append("virtual IOutputMetaData * queryExtractedSize() { return &").append(meta.queryInstanceObject()).append("; }"));
     }

+ 1 - 1
ecl/hqlcpp/hqlstep.cpp

@@ -945,7 +945,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityNWayMergeJoin(BuildCtx & ctx,
 
     //virtual IOutputMetaData * queryInputMeta()
     {
-        MetaInstance inputmeta(*this, dataset);
+        MetaInstance inputmeta(*this, dataset->queryRecord(), isGrouped(dataset));
         buildMetaInfo(inputmeta);
 
         StringBuffer s;

+ 23 - 1
ecl/hqlcpp/hqltcppc.cpp

@@ -330,6 +330,14 @@ void CContainerInfo::calcCachedSize(const SizeStruct & offset, SizeStruct & size
     }
     calcCachedChildrenOffsets(childOffset, cachedSize);
 
+    //Ensure that a record with no fields has a meta size > 0 (can be created by implicit project code)
+    if (cachedSize.isEmpty())
+    {
+        IHqlExpression * record = column->queryRecord();
+        if (record->hasProperty(_nonEmpty_Atom))
+            cachedSize.addFixed(1);
+    }
+
     if (cachedSize.isFixedSize())
         sizeSelf.set(cachedSize);
     else
@@ -869,6 +877,7 @@ CContainerInfo::CContainerInfo(CContainerInfo * _container, CMemberInfo * _prior
     CMemberInfo(_container, _prior, _column)
 {
     fixedSize = true;
+    isDynamic = false;
 }
 
 
@@ -884,6 +893,19 @@ void CContainerInfo::buildClear(HqlCppTranslator & translator, BuildCtx & ctx, I
     BuildCtx condctx(ctx);
     buildConditionFilter(translator, condctx, selector);
 
+    if (children.ordinality() == 0)
+    {
+        if (column->queryRecord()->hasProperty(_nonEmpty_Atom))
+        {
+            //Clear on an empty record that has the _nonEmpty_attrbute clears the implicit byte
+            Owned<ITypeInfo> dummyType = makeIntType(1, false);
+            OwnedHqlExpr address = getColumnAddress(translator, ctx, selector, dummyType);
+            OwnedHqlExpr dummyTarget = convertAddressToValue(address, dummyType);
+            translator.buildAssignToTemp(ctx, dummyTarget, queryZero());
+        }
+        return;
+    }
+
     ForEachItemIn(idx, children)
     {
         CMemberInfo & cur = children.item(idx);
@@ -3127,7 +3149,7 @@ CMemberInfo * ColumnToOffsetMap::expandRecord(IHqlExpression * record, CContaine
 
 DynamicColumnToOffsetMap::DynamicColumnToOffsetMap(unsigned _maxRecordSize) : ColumnToOffsetMap(queryNullRecord(), 0, _maxRecordSize, false)
 {
-    root.setFixedSize(false);
+    root.setDynamic();
     fixedSizeRecord = false;
 }
 

+ 4 - 1
ecl/hqlcpp/hqltcppc.ipp

@@ -45,6 +45,7 @@ public:
     unsigned getFixedSize() const                           { return fixedSize; }
     unsigned getMinimumSize()   const                       { return fixedSize+varMinSize; }
     IHqlExpression * getSizeExpr(BoundRow * cursor);
+    bool isEmpty() const                                    { return fixedSize == 0 && varSize == NULL; }
     bool isFixedSize() const                                { return varSize == NULL; }
     bool isWorthCommoning() const;
     IHqlExpression * queryVarSize() const                   { return varSize; }
@@ -188,7 +189,7 @@ public:
     virtual unsigned getTotalFixedSize();
     virtual unsigned getTotalMinimumSize();
     virtual bool isConditional();
-    virtual bool isFixedSize()              { return fixedSize; }
+    virtual bool isFixedSize()              { return fixedSize && !isDynamic; }
 
     void addTrailingFixed(SizeStruct & size, CMemberInfo * cur);
     void subLeadingFixed(SizeStruct & size, CMemberInfo * cur);
@@ -198,6 +199,7 @@ public:
 public:
     void addChild(CMemberInfo * child);
     void setFixedSize(bool _fixed)          { fixedSize = _fixed; }
+    void setDynamic()                       { isDynamic = true; }
 
 protected:
     virtual void registerChild(CMemberInfo * child);
@@ -208,6 +210,7 @@ protected:
 protected:
     CMemberInfoArray    children;
     bool                fixedSize;
+    bool                isDynamic;
 };
 
 

+ 1 - 0
rtl/include/eclhelper_base.hpp

@@ -3030,6 +3030,7 @@ class CThorLibraryCallArg : public CThorArg, implements IHThorLibraryCallArg
     virtual void Link() const { RtlCInterface::Link(); }
     virtual bool Release() const { return RtlCInterface::Release(); }
     virtual void onCreate(ICodeContext * _ctx, IHThorArg *, MemoryBuffer * in) { ctx = _ctx; }
+    virtual IOutputMetaData * queryOutputMeta()             { return NULL; }
 
     virtual IInterface * selectInterface(ActivityInterfaceEnum which)
     {