Explorar o código

Extend implicit row projection to nested rows

The previous version of the implicit project code only analysed the top
level of fields within a dataset.  If a dataset contained a nested
row field then it was either all included, or all removed.  This
commit extends the functionality so it also removes fields from nested
records if they are unsued.

The main differences are
* Ensuring each set of fields knows the original record structure
* Keeping track of nested fields
* Replacing selectors now also ensures that fields of type datarow are
  mapped if they have been changed.

The advantage is often smaller generated code, and reduced data pulled
from datafiles and indexes.  It also improves the accuracy of an
upcoming feature for describing what data is actually used by a query.

The disadvantage is that this generates more unique row formats - which
increases the size of the metadata classes.  An issue is open to work
on reducing the size of those for the common cases.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday %!s(int64=13) %!d(string=hai) anos
pai
achega
15ad4decd0

+ 3 - 0
ecl/hql/hqlexpr.cpp

@@ -1899,6 +1899,8 @@ childDatasetType getChildDatasetType(IHqlExpression * expr)
         if (expr->isDataset())
             return childdataset_dataset_noscope;
         return childdataset_none;
+    case no_ensureresult:
+        return childdataset_dataset_noscope;
     case no_preservemeta:
         //Only has a single dataset - but fields are referenced via active selector, so use the many option
         return childdataset_many;
@@ -2131,6 +2133,7 @@ inline unsigned doGetNumChildTables(IHqlExpression * dataset)
     case no_normalizegroup:
     case no_owned_ds:
     case no_dataset_alias:
+    case no_ensureresult:
         return 1;
     case no_childdataset:
     case no_left:

+ 1 - 0
ecl/hql/hqlpmap.cpp

@@ -157,6 +157,7 @@ void replaceSelectors(HqlExprArray & out, IHqlExpression * expr, unsigned first,
         out.append(*transformer.transformRoot(expr->queryChild(iChild)));
 }
 
+//---------------------------------------------------------------------------------------------------------------------
 
 //NB: This can not be derived from NewHqlTransformer since it is called before the tree is normalised, and it creates
 //inconsistent expression trees.

+ 2 - 0
ecl/hql/hqlpmap.hpp

@@ -170,6 +170,8 @@ extern HQL_API TableProjectMapper * createProjectMapper(IHqlExpression * mapping
 //The tree must be scope tagged before it works correctly though...
 extern HQL_API IHqlExpression * replaceSelector(IHqlExpression * expr, IHqlExpression * oldDataset, IHqlExpression * newDataset);
 extern HQL_API void replaceSelectors(HqlExprArray & out, IHqlExpression * expr, unsigned first, IHqlExpression * oldDataset, IHqlExpression * newDataset);
+extern HQL_API IHqlExpression * updateChildSelectors(IHqlExpression * expr, IHqlExpression * oldSelector, IHqlExpression * newSelector, unsigned firstChild);
+extern HQL_API IHqlExpression * updateMappedFields(IHqlExpression * expr, IHqlExpression * oldRecord, IHqlExpression * newSelector, unsigned firstChild);
 extern HQL_API IHqlExpression * scopedReplaceSelector(IHqlExpression * expr, IHqlExpression * oldDataset, IHqlExpression * newDataset);
 extern HQL_API IHqlExpression * replaceSelfRefSelector(IHqlExpression * expr, IHqlExpression * newDataset);
 

+ 109 - 3
ecl/hql/hqltrans.cpp

@@ -2999,16 +2999,16 @@ void NewSelectorReplacingTransformer::initSelectorMapping(IHqlExpression * oldDa
     if (oldDataset->isDatarow() || op == no_activetable || op == no_self || op == no_selfref)
     {
         if (isAlwaysActiveRow(newDataset) || newDataset->isDatarow())
-            setMappingOnly(oldDataset, newDataset);         // A row, so Don't change any new references to the dataset
+            setRootMapping(oldDataset, newDataset);         // A row, so Don't change any new references to the dataset
         else
         {
             OwnedHqlExpr newActive = ensureActiveRow(newDataset);
-            setMappingOnly(oldDataset, newActive);          // A row, so Don't change any new references to the dataset
+            setRootMapping(oldDataset, newActive);          // A row, so Don't change any new references to the dataset
         }
     }
     else
     {
-        setMappingOnly(oldDataset, oldDataset);         // Don't change any new references to the dataset
+        setRootMapping(oldDataset, oldDataset);         // Don't change any new references to the dataset
     }
     setSelectorMapping(oldDataset, newSelector);
 
@@ -3017,6 +3017,52 @@ void NewSelectorReplacingTransformer::initSelectorMapping(IHqlExpression * oldDa
 }
 
 
+void NewSelectorReplacingTransformer::setNestedMapping(IHqlExpression * oldSel, IHqlExpression * newSel, IHqlSimpleScope * oldScope, IHqlExpression * newRecord)
+{
+    ForEachChild(i, newRecord)
+    {
+        IHqlExpression * cur = newRecord->queryChild(i);
+        switch (cur->getOperator())
+        {
+        case no_record:
+            setNestedMapping(oldSel, newSel, oldScope, cur);
+            break;
+        case no_ifblock:
+            setNestedMapping(oldSel, newSel, oldScope, cur->queryChild(1));
+            break;
+        case no_field:
+            {
+                OwnedHqlExpr oldField = oldScope->lookupSymbol(cur->queryName());
+                assertex(oldField);
+                if (cur != oldField)
+                {
+                    OwnedHqlExpr oldSelected = createSelectExpr(LINK(oldSel), LINK(oldField));
+                    OwnedHqlExpr newSelected = createSelectExpr(LINK(newSel), LINK(cur));
+                    setRootMapping(oldSelected, newSelected, oldField->queryRecord());
+                }
+            }
+        }
+    }
+}
+
+void NewSelectorReplacingTransformer::setRootMapping(IHqlExpression * oldSel, IHqlExpression * newSel, IHqlExpression * oldRecord)
+{
+    setMappingOnly(oldSel, newSel);
+    IHqlExpression * newRecord = newSel->queryRecord();
+    if (oldRecord != newRecord)
+    {
+        if (oldRecord != queryNullRecord() && newRecord != queryNullRecord())
+        {
+            setNestedMapping(oldSel, newSel, oldRecord->querySimpleScope(), newRecord);
+        }
+    }
+}
+
+void NewSelectorReplacingTransformer::setRootMapping(IHqlExpression * oldSel, IHqlExpression * newSel)
+{
+    setRootMapping(oldSel, newSel, oldSel->queryRecord());
+}
+
 IHqlExpression * NewSelectorReplacingTransformer::createTransformed(IHqlExpression * expr)
 {
     if (!isHidden && expr->queryNormalizedSelector() == savedNewDataset)
@@ -3076,6 +3122,66 @@ IHqlExpression * queryNewReplaceSelector(IHqlExpression * expr, IHqlExpression *
     return ret.getClear();
 }
 
+
+//---------------------------------------------------------------------------------------------------------------------
+
+IHqlExpression * updateChildSelectors(IHqlExpression * expr, IHqlExpression * oldSelector, IHqlExpression * newSelector, unsigned firstChild)
+{
+    if (oldSelector == newSelector)
+        return LINK(expr);
+
+    unsigned max = expr->numChildren();
+    unsigned i;
+    HqlExprArray args;
+    args.ensure(max);
+    for (i = 0; i < firstChild; i++)
+        args.append(*LINK(expr->queryChild(i)));
+
+    NewSelectorReplacingTransformer transformer;
+    transformer.initSelectorMapping(oldSelector, newSelector);
+    bool same = true;
+    for (; i < max; i++)
+    {
+        IHqlExpression * cur = expr->queryChild(i);
+        IHqlExpression * transformed = transformer.transformRoot(cur);
+        args.append(*transformed);
+        if (cur != transformed)
+            same = false;
+    }
+    if (same)
+        return LINK(expr);
+    return expr->clone(args);
+}
+
+
+IHqlExpression * updateMappedFields(IHqlExpression * expr, IHqlExpression * oldRecord, IHqlExpression * newSelector, unsigned firstChild)
+{
+    if (oldRecord == newSelector->queryRecord())
+        return LINK(expr);
+
+    unsigned max = expr->numChildren();
+    unsigned i;
+    HqlExprArray args;
+    args.ensure(max);
+    for (i = 0; i < firstChild; i++)
+        args.append(*LINK(expr->queryChild(i)));
+
+    NewSelectorReplacingTransformer transformer;
+    transformer.setRootMapping(newSelector, newSelector, oldRecord);
+    bool same = true;
+    for (; i < max; i++)
+    {
+        IHqlExpression * cur = expr->queryChild(i);
+        IHqlExpression * transformed = transformer.transformRoot(cur);
+        args.append(*transformed);
+        if (cur != transformed)
+            same = false;
+    }
+    if (same)
+        return LINK(expr);
+    return expr->clone(args);
+}
+
 //---------------------------------------------------------------------------
 
 /*

+ 6 - 0
ecl/hql/hqltrans.ipp

@@ -990,6 +990,12 @@ public:
 
     inline bool foundAmbiguity() const { return introducesAmbiguity; }
 
+    void setRootMapping(IHqlExpression * oldSel, IHqlExpression * newSel, IHqlExpression * record);
+
+protected:
+    void setNestedMapping(IHqlExpression * oldSel, IHqlExpression * newSel, IHqlSimpleScope * oldScope, IHqlExpression * newRecord);
+    void setRootMapping(IHqlExpression * oldSel, IHqlExpression * newSel);
+
 protected:
     OwnedHqlExpr oldSelector;
     bool introducesAmbiguity;

+ 49 - 0
ecl/hql/hqlutil.cpp

@@ -1504,6 +1504,7 @@ unsigned getNumActivityArguments(IHqlExpression * expr)
     case no_allnodes:
     case no_thisnode:
     case no_keydiff:
+    case no_keypatch:
         return 0;
     case no_setresult:
         if (expr->queryChild(0)->isAction())
@@ -7418,6 +7419,54 @@ StringBuffer & appendLocation(StringBuffer & s, IHqlExpression * location, const
 
 //---------------------------------------------------------------------------------------------------------------------
 
+static void createMappingAssigns(HqlExprArray & assigns, IHqlExpression * selfSelector, IHqlExpression * oldSelector, IHqlSimpleScope * oldScope, IHqlExpression * newRecord)
+{
+    ForEachChild(i, newRecord)
+    {
+        IHqlExpression * cur = newRecord->queryChild(i);
+        switch (cur->getOperator())
+        {
+        case no_record:
+            createMappingAssigns(assigns, selfSelector, oldSelector, oldScope, cur);
+            break;
+        case no_ifblock:
+            createMappingAssigns(assigns, selfSelector, oldSelector, oldScope, cur->queryChild(1));
+            break;
+        case no_field:
+            {
+                OwnedHqlExpr oldField = oldScope->lookupSymbol(cur->queryName());
+                assertex(oldField);
+                OwnedHqlExpr selfSelected = createSelectExpr(LINK(selfSelector), LINK(cur));
+                OwnedHqlExpr oldSelected = createSelectExpr(LINK(oldSelector), LINK(oldField));
+
+                if (selfSelected->queryRecord() != oldSelected->queryRecord())
+                {
+                    assertex(oldSelected->isDatarow());
+                    OwnedHqlExpr childSelf = getSelf(cur);
+                    OwnedHqlExpr childTransform = createMappingTransform(childSelf, oldSelected);
+                    OwnedHqlExpr createRowExpr = createRow(no_createrow, childTransform.getClear());
+                    assigns.append(*createAssign(selfSelected.getClear(), createRowExpr.getClear()));
+                }
+                else
+                    assigns.append(*createAssign(selfSelected.getClear(), oldSelected.getClear()));
+            }
+        }
+    }
+}
+
+IHqlExpression * createMappingTransform(IHqlExpression * selfSelector, IHqlExpression * inSelector)
+{
+    HqlExprArray assigns;
+    IHqlExpression * selfRecord = selfSelector->queryRecord();
+    IHqlExpression * inRecord = inSelector->queryRecord();
+    createMappingAssigns(assigns, selfSelector, inSelector, inRecord->querySimpleScope(), selfRecord);
+    return createValue(no_transform, makeTransformType(selfRecord->getType()), assigns);
+
+}
+
+
+//---------------------------------------------------------------------------------------------------------------------
+
 static IHqlExpression * transformAttributeToQuery(IHqlExpression * expr, HqlLookupContext & ctx)
 {
     if (expr->isMacro())

+ 1 - 0
ecl/hql/hqlutil.hpp

@@ -133,6 +133,7 @@ extern HQL_API bool isLengthPreservingCast(IHqlExpression * expr);
 
 extern HQL_API IHqlExpression * createTransformFromRow(IHqlExpression * expr);
 extern HQL_API IHqlExpression * createNullTransform(IHqlExpression * record);
+extern HQL_API IHqlExpression * createMappingTransform(IHqlExpression * selfSelector, IHqlExpression * inSelector);
 
 extern HQL_API IHqlExpression * getFailCode(IHqlExpression * failExpr);
 extern HQL_API IHqlExpression * getFailMessage(IHqlExpression * failExpr, bool nullIfOmitted);

+ 2 - 5
ecl/hqlcpp/hqlckey.cpp

@@ -993,11 +993,8 @@ void KeyedJoinInfo::optimizeExtractJoinFields()
         {
             //A bit of a hack - Richard can't cope with zero length values being returned, so allocate
             //a single byte to keep him happy.
-            OwnedHqlExpr dummyField = createField(unnamedAtom, makeIntType(1, false), NULL, NULL);
-            extractJoinFieldsRecord.setown(createRecord(dummyField));
-            OwnedHqlExpr self = getSelf(extractJoinFieldsRecord);
-
-            assigns.append(*createAssign(createSelectExpr(LINK(self), LINK(dummyField)), getZero()));
+            OwnedHqlExpr nonEmptyAttr = createAttribute(_nonEmpty_Atom);
+            extractJoinFieldsRecord.setown(createRecord(nonEmptyAttr));
         }
 
         extractJoinFieldsTransform.setown(createValue(no_transform, makeTransformType(extractJoinFieldsRecord->getType()), assigns));

+ 1 - 0
ecl/hqlcpp/hqlcpp.cpp

@@ -1680,6 +1680,7 @@ void HqlCppTranslator::cacheOptions()
         DebugOption(options.implicitGroupHashAggregate,"implicitGroupHashAggregate",false),
         DebugOption(options.implicitGroupHashDedup,"implicitGroupHashDedup",false),
         DebugOption(options.shuffleLocalJoinConditions,"shuffleLocalJoinConditions",false),
+        DebugOption(options.projectNestedTables,"projectNestedTables",true),
     };
 
     //get options values from workunit

+ 1 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -709,6 +709,7 @@ struct HqlCppOptions
     bool                implicitGroupHashAggregate;  // convert aggreate(sort(x,a),{..},a,d) to aggregate(group(sort(x,a),a_,{},d))
     bool                implicitGroupHashDedup;
     bool                shuffleLocalJoinConditions;
+    bool                projectNestedTables;
 };
 
 //Any information gathered while processing the query should be moved into here, rather than cluttering up the translator class

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 1033 - 477
ecl/hqlcpp/hqliproj.cpp


+ 140 - 70
ecl/hqlcpp/hqliproj.ipp

@@ -34,7 +34,7 @@ enum ProjectExprKind
     CreateRecordLRActivity,             // creates a record, can remove fields from output at will, has left and right input
     CompoundActivity,                   // a compound source, so inserting a project before it is assumed to have no cost
     CompoundableActivity,               // a source that could become a compound activity, so worth adding projects after
-    TransformRecordActivity,            // contains a transform, but input must match output
+    RollupTransformActivity,            // contains a transform, but input must match output
     FixedInputActivity,                 // can't change input to this activity.  E.g., pipe, output
                                         // or input/output record contains ifblocks.
     SourceActivity,                     // No inputs, so no need to do any calculations.
@@ -42,93 +42,60 @@ enum ProjectExprKind
     PassThroughActivity,                // input always equals output.
     ScalarSelectActivity,               // <someDataset>[n].field
     DenormalizeActivity,                // contains a transform, but left must match output
-    ActionSinkActivity,                 // a sink, but that doesn't necessarily use all input fields.
+    SinkActivity,                       // a sink, but that doesn't necessarily use all input fields.
     CreateRecordSourceActivity,         // a source activity containing a transform i.e., inline table
     ComplexNonActivity,                 
     AnyTypeActivity,                    // can be created any type.
 };
 
+//---------------------------------------------------------------------------------------------------------------------
 
-class UsedFieldSet
-{
-public:
-    UsedFieldSet() { all = false; }
-
-    void addUnique(IHqlExpression * field);
-    void append(IHqlExpression & field);
-    void clone(const UsedFieldSet & source);
-    void cloneFields(const UsedFieldSet & source);
-    int compareOrder(IHqlExpression * left, IHqlExpression * right) const;
-    bool contains(IHqlExpression & field) const;
-    void getFields(HqlExprArray & target) const;
-    void getText(StringBuffer & s) const;
-    void intersectFields(const UsedFieldSet & source);
-    void set(const UsedFieldSet & source);
-    void setAll() { all = true; }
-    void setAll(IHqlExpression * record);
-    void sort(ICompare & compare);
-
-    inline unsigned ordinality() const { return fields.ordinality(); }
-    inline bool includeAll() const { return all; }
-    inline IHqlExpression & item(unsigned i) const { return fields.item(i); }
-
-protected:
-    void kill();
-
-protected:
-#ifdef USE_IPROJECT_HASH
-    HqlExprHashTable hash;
-#endif
-    HqlExprArray fields;
-    bool all;
-};
-
-//Save memory allocation if only a single item in the list.  Could conceiv
+//Save memory allocation if only a single item in the list.
 class OptimizeSingleExprCopyArray : private HqlExprCopyArray
 {
 public:
     OptimizeSingleExprCopyArray() { singleValue = NULL; }
 
-    unsigned ordinality() const 
-    { 
-        return singleValue ? 1 : HqlExprCopyArray::ordinality(); 
+    unsigned ordinality() const
+    {
+        return singleValue ? 1 : HqlExprCopyArray::ordinality();
     }
-    IHqlExpression & item(unsigned i) const 
+    IHqlExpression & item(unsigned i) const
     {
         if (singleValue && i == 0)
             return *singleValue;
-        return HqlExprCopyArray::item(i); 
+        return HqlExprCopyArray::item(i);
     }
     void ensure(unsigned max)
     {
         if (max > 1)
-            HqlExprCopyArray::ensure(max); 
+            HqlExprCopyArray::ensure(max);
     }
-    unsigned find(IHqlExpression & cur) const 
-    { 
+    unsigned find(IHqlExpression & cur) const
+    {
         if (singleValue)
             return &cur == singleValue ? 0 : NotFound;
-        return HqlExprCopyArray::find(cur); 
+        return HqlExprCopyArray::find(cur);
     }
-    void remove(unsigned i) 
-    { 
+    void remove(unsigned i)
+    {
         if (singleValue && i == 0)
             singleValue = NULL;
         else
-            HqlExprCopyArray::remove(i); 
+            HqlExprCopyArray::remove(i);
     }
-    void append(IHqlExpression & cur) 
-    { 
+    void append(IHqlExpression & cur)
+    {
         if (singleValue)
         {
-            HqlExprCopyArray::append(*singleValue); 
-            HqlExprCopyArray::append(cur); 
+            HqlExprCopyArray::append(*singleValue);
+            HqlExprCopyArray::append(cur);
             singleValue = NULL;
         }
         else if (HqlExprCopyArray::ordinality() == 0)
             singleValue = &cur;
         else
-            HqlExprCopyArray::append(cur); 
+            HqlExprCopyArray::append(cur);
     }
 
 protected:
@@ -137,6 +104,112 @@ protected:
 
 typedef OptimizeSingleExprCopyArray SelectUsedArray;
 
+//---------------------------------------------------------------------------------------------------------------------
+
+class NestedField;
+
+//NB: Once all is set this structure should not be modified.  That allows the un-modified definitions to be shared
+//by other expressions.
+class UsedFieldSet
+{
+public:
+    UsedFieldSet() { all = false; maxGathered = 0; originalFields = NULL; }
+
+    void addUnique(IHqlExpression * field);
+    NestedField * addNested(IHqlExpression * field);
+    bool allGathered() const;
+    void appendField(IHqlExpression & ownedField);
+    void appendNested(IHqlExpression & ownedField, NestedField * ownedNested);
+    void clone(const UsedFieldSet & source);
+    bool checkAllFieldsUsed();
+
+    int compareOrder(IHqlExpression * left, IHqlExpression * right) const;
+    void createDifference(const UsedFieldSet & left, const UsedFieldSet & right);
+    IHqlExpression * createFilteredTransform(IHqlExpression * transform, const UsedFieldSet * exceptions) const;
+    void calcFinalRecord(bool canPack, bool ignoreIfEmpty);
+    NestedField * findNested(IHqlExpression * field) const;
+    void gatherTransformValuesUsed(HqlExprArray * selfSelects, SelectUsedArray * parentSelects, HqlExprArray * values, IHqlExpression * selector, IHqlExpression * transform);
+    void getText(StringBuffer & s) const;
+    void intersectFields(const UsedFieldSet & source);
+    bool isEmpty() const;
+    void noteGatheredAll() { maxGathered = (unsigned)-1; }
+    void optimizeFieldsToBlank(const UsedFieldSet & allAssigned, IHqlExpression * transform);
+    bool requiresFewerFields(const UsedFieldSet & other) const;
+    void set(const UsedFieldSet & source);
+    void setAll();
+    void setRecord(IHqlExpression * record);
+    void unionFields(const UsedFieldSet & source);
+
+    inline unsigned numFields() const { return fields.ordinality(); }
+    inline void clear() { kill(); }
+    inline bool includeAll() const { return all; }
+    inline IHqlExpression * queryOriginalRecord() const {
+        assertex(originalFields);
+        return originalFields->queryFinalRecord();
+    }
+    inline const UsedFieldSet * queryOriginal() const { return originalFields; }
+    inline IHqlExpression * queryFinalRecord() const { return finalRecord; }
+    inline void setAllIfAny() { if (originalFields) setAll(); }
+    inline void setOriginal(const UsedFieldSet * _originalFields) { originalFields = _originalFields; }
+
+protected:
+    bool contains(IHqlExpression & field) const;
+    bool contains(_ATOM name) const; // debugging only
+    IHqlExpression * createFilteredAssign(IHqlExpression * field, IHqlExpression * value, IHqlExpression * newSelf, const UsedFieldSet * exceptions) const;
+    void createFilteredAssigns(HqlExprArray & assigns, IHqlExpression * transform, IHqlExpression * newSelf, const UsedFieldSet * exceptions) const;
+    IHqlExpression * createRowTransform(IHqlExpression * row, const UsedFieldSet * exceptions) const;
+    void kill();
+    void gatherExpandSelectsUsed(HqlExprArray * selfSelects, SelectUsedArray * parentSelects, IHqlExpression * selector, IHqlExpression * source);
+    unsigned getOriginalPosition(IHqlExpression * field) const;
+
+protected:
+    OwnedHqlExpr finalRecord;
+    const UsedFieldSet * originalFields;
+#ifdef USE_IPROJECT_HASH
+    HqlExprHashTable hash;
+#endif
+    HqlExprArray fields;
+    CIArrayOf<NestedField> nested;
+    unsigned maxGathered;
+    bool all;
+};
+
+class RecordOrderComparer : public ICompare
+{
+public:
+    RecordOrderComparer(const UsedFieldSet & _fields) : fields(_fields) {}
+
+    virtual int docompare(const void * l,const void * r) const;
+
+protected:
+    const UsedFieldSet & fields;
+};
+
+
+class NestedField : public CInterface
+{
+public:
+    NestedField(IHqlExpression * _field, const UsedFieldSet * _original) : field(_field) { used.setOriginal(_original); }
+
+    NestedField * clone()
+    {
+        //MORE: The following needs testing - for correctness and speed improvements.
+        //if (used.includeAll())
+        //    return LINK(this);
+        NestedField * ret = new NestedField(field, used.queryOriginal());
+        ret->used.clone(used);
+        return ret;
+    }
+    void clear() { used.clear(); }
+
+    inline bool isEmpty() const { return used.isEmpty(); }
+    inline bool includeAll() const { return used.includeAll(); }
+
+public:
+    IHqlExpression * field;
+    UsedFieldSet used;
+};
+
 struct ImplicitProjectOptions
 {
     unsigned insertProjectCostLevel;
@@ -146,6 +219,7 @@ struct ImplicitProjectOptions
     bool autoPackRecords;
     bool optimizeSpills;
     bool enableCompoundCsvRead;
+    bool projectNestedTables;
 };
 
 class ImplicitProjectInfo;
@@ -209,34 +283,35 @@ public:
     bool canReorderOutput:1;
     bool calcedReorderOutput:1;
     bool visitedAllowingActivity:1;
-
-    byte childDatasetType;
 };
 
 typedef ICopyArrayOf<ComplexImplicitProjectInfo> ProjectInfoArray;
-class ComplexImplicitProjectInfo : public ImplicitProjectInfo, public ICompare
+class ComplexImplicitProjectInfo : public ImplicitProjectInfo
 {
 public:
     ComplexImplicitProjectInfo(IHqlExpression * _original, ProjectExprKind _kind);
     IMPLEMENT_IINTERFACE
 
     virtual ComplexImplicitProjectInfo * queryComplexInfo() { return this; }
-    virtual int docompare(const void *,const void *) const;             // compare within output record
 
     void addAllOutputs();
-    bool addOutputField(IHqlExpression * field);
     IHqlExpression * createOutputProject(IHqlExpression * ds);
     void finalizeOutputRecord();
-    void inheritRequiredFields(UsedFieldSet * requiredList);
+    void inheritRequiredFields(const UsedFieldSet & requiredList);
     bool safeToReorderInput();
     bool safeToReorderOutput();
     void setMatchingOutput(ComplexImplicitProjectInfo * other);
     void setReorderOutput(bool ok)              { canReorderOutput = ok; calcedReorderOutput = true; }
+    void setOriginalRecord(ComplexImplicitProjectInfo * outputInfo) { outputFields.setOriginal(&outputInfo->outputFields); }
 
     void stopOptimizeCompound(bool cascade);
     void trace();
 
-    inline bool outputChanged() const                   { return newOutputRecord != original->queryRecord() && okToOptimize(); }
+    inline bool outputChanged() const
+    {
+        return (queryOutputRecord() != original->queryRecord()) && okToOptimize();
+    }
+    inline IHqlExpression * queryOutputRecord() const { return outputFields.queryFinalRecord(); }
 
     virtual void notifyRequiredFields(ComplexImplicitProjectInfo * whichInput);
 
@@ -251,9 +326,6 @@ protected:
 public:
     //later: create a derived class - if is activity or has child dataset
 
-    ComplexImplicitProjectInfo * outputInfo;
-    HqlExprAttr newOutputRecord;                // Once set it indicates it won't be changed again
-
     ProjectInfoArray inputs;
     ProjectInfoArray outputs;
     UsedFieldSet outputFields;
@@ -290,8 +362,6 @@ protected:
 
     void calculateFieldsUsed(IHqlExpression * expr);
     void connect(IHqlExpression * source, IHqlExpression * sink);
-    void createFilteredAssigns(HqlExprArray & assigns, IHqlExpression * transform, const UsedFieldSet & fields, IHqlExpression * newSelf, const UsedFieldSet * exceptions);
-    IHqlExpression * createFilteredTransform(IHqlExpression * transform, const UsedFieldSet & fields, IHqlExpression * record, const UsedFieldSet * exceptions = NULL);
     void finalizeFields();
     void finalizeFields(IHqlExpression * expr);
     void gatherFieldsUsed(IHqlExpression * expr, ImplicitProjectInfo * extra);
@@ -307,14 +377,14 @@ protected:
     const SelectUsedArray & querySelectsUsedForField(IHqlExpression * transform, IHqlExpression * field);
     void traceActivities();
     IHqlExpression * updateSelectors(IHqlExpression * newExpr, IHqlExpression * oldExpr);
-    IHqlExpression * updateChildSelectors(IHqlExpression * expr, IHqlExpression * oldSelector, IHqlExpression * newSelector, unsigned firstChild);
 
-    void processSelectsUsedForCreateRecord(ComplexImplicitProjectInfo * extra, SelectUsedArray const & selectsUsed, IHqlExpression * ds, IHqlExpression * leftSelect, IHqlExpression * rightSelect);
-    void processSelectsUsedForDenormalize(ComplexImplicitProjectInfo * extra, SelectUsedArray const & selectsUsed, IHqlExpression * leftSelect, IHqlExpression * rightSelect);
-    void processSelectsUsedForTransformRecord(ComplexImplicitProjectInfo * extra, SelectUsedArray const & selectsUsed, IHqlExpression * ds, IHqlExpression * leftSelect, IHqlExpression * rightSelect);
+    void processSelect(ComplexImplicitProjectInfo * extra, IHqlExpression * curSelect, IHqlExpression * ds, IHqlExpression * leftSelect, IHqlExpression * rightSelect);
+    void processSelects(ComplexImplicitProjectInfo * extra, SelectUsedArray const & selectsUsed, IHqlExpression * ds, IHqlExpression * leftSelect, IHqlExpression * rightSelect);
+    void processTransform(ComplexImplicitProjectInfo * extra, IHqlExpression * transform, IHqlExpression * ds, IHqlExpression * leftSelect, IHqlExpression * rightSelect);
 
 protected:
     const SelectUsedArray & querySelectsUsed(IHqlExpression * expr);
+    void setOriginal(UsedFieldSet & fields, IHqlExpression * ds) { fields.setOriginal(&queryBodyComplexExtra(ds->queryRecord())->outputFields); }
 
 protected:
     HqlCppTranslator &  translator;

+ 44 - 0
ecl/regress/countset.ecl

@@ -0,0 +1,44 @@
+/*##############################################################################
+
+    Copyright (C) 2011 HPCC Systems.
+
+    All rights reserved. This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+############################################################################## */
+
+strRec := { string value; };
+
+namesRecord :=
+            RECORD
+string20        surname;
+string10        forename;
+dataset(strRec) paths;
+integer2        age := 25;
+            END;
+
+namesTable1 := dataset('x1',namesRecord,FLAT);
+
+getUniqueSet(dataset(strRec) values) := FUNCTION
+    unsigned MaxPaths := 100;
+    uniquePaths := DEDUP(values, value, ALL);
+    RETURN IF(COUNT(uniquePaths)<MaxPaths, SET(uniquePaths, value), ['Default']);
+END;
+
+getUniqueSet2(dataset(strRec) values) := FUNCTION
+    unsigned MaxPaths := 100;
+    uniquePaths := DEDUP(values, value, ALL);
+    limited := IF(COUNT(uniquePaths)<MaxPaths, uniquePaths, DATASET(['Default'], strRec));
+    RETURN SET(limited, value);
+END;
+
+output(namesTable1(surname not in getUniqueSet(paths)));

+ 44 - 0
ecl/regress/nested10.ecl

@@ -0,0 +1,44 @@
+/*##############################################################################
+
+    Copyright (C) 2011 HPCC Systems.
+
+    All rights reserved. This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+############################################################################## */
+
+level1Rec := RECORD
+    UNSIGNED a;
+    UNSIGNED b;
+    UNSIGNED c;
+END;
+
+level2Rec := RECORD
+    level1Rec a;
+    level1Rec b;
+    level1Rec c;
+END;
+
+level3Rec := RECORD
+    UNSIGNED id;
+    level2Rec a;
+    level2Rec b;
+    level2Rec c;
+END;
+
+ds := DATASET('ds', level3Rec, thor);
+
+f := ds(a.b.c != 10);
+
+//prevent a compound disk operation
+d := dedup(f, c.b.a);
+output(d, { b.a });

+ 60 - 0
ecl/regress/nested11.ecl

@@ -0,0 +1,60 @@
+/*##############################################################################
+
+    Copyright (C) 2011 HPCC Systems.
+
+    All rights reserved. This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+############################################################################## */
+
+level1Rec := RECORD
+    UNSIGNED a;
+    UNSIGNED b;
+    UNSIGNED c;
+END;
+
+level2Rec := RECORD
+    level1Rec a;
+    level1Rec b;
+    level1Rec c;
+END;
+
+level3Rec := RECORD
+    UNSIGNED id;
+    level2Rec a;
+    level2Rec b;
+    level2Rec c;
+END;
+
+t1(unsigned a) := transform(level1Rec, self.a := a-1; self.b := a; self.c := a+1;);
+
+level2Rec t2() := transform
+    SELF.a := IF(random() = 1, row(t1(10)), row(t1(20)));
+    SELF.b := iF(random() = 3, row(t1(9)), row(t1(12)));
+    SELF.c := iF(random() = 8, row(t1(9)), row(t1(12)));
+END;
+
+
+ds := DATASET('ds', level3Rec, thor);
+
+level3Rec tx(level3Rec l) := transform
+    self.c := row(t2());
+    self := l;
+END;
+
+p := project(ds, tx(LEFT));
+
+f := p(a.b.c != 10);
+
+//prevent a compound disk operation
+d := dedup(f, c.b.a);
+output(d, { b.a });

+ 44 - 0
ecl/regress/nested9.ecl

@@ -0,0 +1,44 @@
+/*##############################################################################
+
+    Copyright (C) 2011 HPCC Systems.
+
+    All rights reserved. This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+############################################################################## */
+
+level1Rec := RECORD
+    UNSIGNED a;
+    UNSIGNED b;
+    UNSIGNED c;
+END;
+
+level2Rec := RECORD
+    level1Rec a;
+    level1Rec b;
+    level1Rec c;
+END;
+
+level3Rec := RECORD
+    UNSIGNED id;
+    level2Rec a;
+    level2Rec b;
+    level2Rec c;
+END;
+
+ds := DATASET('ds', level3Rec, thor);
+
+f := ds(a.b.c != 10);
+
+//prevent a compound disk operation
+d := dedup(f, c.b.a);
+output(count(d));