浏览代码

WIP: DICTIONARY support

Add DICTIONARY(dataset, record) support.
Relax the record matching in several places to ignore payload
attribute, so that rows can be linked rather than copied more often.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 13 年之前
父节点
当前提交
5c687988fb

+ 3 - 1
ecl/hql/hqlattr.cpp

@@ -276,6 +276,8 @@ unsigned getOperatorMetaFlags(node_operator op)
     case no_temprow:
 
 //Dictionaries
+    case no_userdictionary:
+    case no_newuserdictionary:
     case no_inlinedictionary:
 
 //Datasets [see also selection operators]
@@ -615,7 +617,7 @@ unsigned getOperatorMetaFlags(node_operator op)
 
     case no_unused6:
     case no_unused13: case no_unused14: case no_unused15: case no_unused18: case no_unused19:
-    case no_unused20: case no_unused21: case no_unused22: case no_unused23: case no_unused24: case no_unused25: case no_unused26: case no_unused27: case no_unused28: case no_unused29:
+    case no_unused20: case no_unused21: case no_unused22: case no_unused23: case no_unused24: case no_unused25: case no_unused28: case no_unused29:
     case no_unused30: case no_unused31: case no_unused32: case no_unused33: case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
     case no_unused50: case no_unused52:

+ 17 - 2
ecl/hql/hqlexpr.cpp

@@ -1447,11 +1447,11 @@ const char *getOpString(node_operator op)
     case no_debug_option_value: return "__DEBUG__";
     case no_dataset_alias: return "TABLE";
     case no_childquery: return "no_childquery";
-    case no_inlinedictionary: return "DICTIONARY";
+    case no_inlinedictionary: case no_userdictionary: case no_newuserdictionary: return "DICTIONARY";
 
     case no_unused6:
     case no_unused13: case no_unused14: case no_unused15: case no_unused18: case no_unused19:
-    case no_unused20: case no_unused21: case no_unused22: case no_unused23: case no_unused24: case no_unused25: case no_unused26: case no_unused27: case no_unused28: case no_unused29:
+    case no_unused20: case no_unused21: case no_unused22: case no_unused23: case no_unused24: case no_unused25: case no_unused28: case no_unused29:
     case no_unused30: case no_unused31: case no_unused32: case no_unused33: case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
     case no_unused50: case no_unused52:
@@ -1816,6 +1816,8 @@ childDatasetType getChildDatasetType(IHqlExpression * expr)
     case no_transformascii:
     case no_selectfields:
     case no_newaggregate:
+    case no_userdictionary:
+    case no_newuserdictionary:
     case no_newusertable:
     case no_usertable:
     case no_alias_project:
@@ -2102,6 +2104,7 @@ inline unsigned doGetNumChildTables(IHqlExpression * dataset)
     case no_compound_inline:
     case no_transformascii:
     case no_transformebcdic:
+    case no_newuserdictionary:
     case no_newusertable:
     case no_aggregate:
     case no_usertable:
@@ -2606,6 +2609,7 @@ IHqlExpression * queryNewColumnProvider(IHqlExpression * expr)
     case no_createrow:
     case no_typetransfer:
         return expr->queryChild(0);
+    case no_userdictionary:
     case no_usertable:
     case no_selectfields:
     case no_transformebcdic:
@@ -2623,6 +2627,7 @@ IHqlExpression * queryNewColumnProvider(IHqlExpression * expr)
     case no_newkeyindex:
     case no_aggregate:
     case no_newaggregate:
+    case no_newuserdictionary:
     case no_newusertable:
     case no_normalize:
     case no_xmlparse:
@@ -9884,6 +9889,8 @@ IHqlExpression *createDictionary(node_operator op, HqlExprArray & parms)
 
     switch (op)
     {
+    case no_newuserdictionary:
+    case no_userdictionary:
     case no_inlinedictionary:
         type.setown(makeDictionaryType(makeRowType(createRecordType(&parms.item(1)))));
         break;
@@ -15241,6 +15248,14 @@ bool recordTypesMatch(IHqlExpression * left, IHqlExpression * right)
 }
 
 
+bool recordTypesMatchIgnorePayload(IHqlExpression *left, IHqlExpression *right)
+{
+    OwnedHqlExpr simpleLeft = removeProperty(left->queryRecord(), _payload_Atom);
+    OwnedHqlExpr simpleRight = removeProperty(right->queryRecord(), _payload_Atom);
+    return recordTypesMatch(simpleLeft->queryType(), simpleRight->queryType());
+}
+
+
 IHqlExpression * queryTransformSingleAssign(IHqlExpression * transform)
 {
     if (transform->numChildren() != 1)

+ 3 - 2
ecl/hql/hqlexpr.hpp

@@ -362,8 +362,8 @@ enum _node_operator {
         no_indict,
         no_countdict,
         no_any,
-    no_unused27,
-    no_unused26,
+        no_userdictionary,
+        no_newuserdictionary,
     no_unused25,
     no_unused28,  
     no_unused29,
@@ -1564,6 +1564,7 @@ extern HQL_API IHqlExpression * extractChildren(IHqlExpression * value);
 extern HQL_API IHqlExpression * queryOnlyField(IHqlExpression * record);
 extern HQL_API bool recordTypesMatch(ITypeInfo * left, ITypeInfo * right);
 extern HQL_API bool recordTypesMatch(IHqlExpression * left, IHqlExpression * right);
+extern HQL_API bool recordTypesMatchIgnorePayload(IHqlExpression *left, IHqlExpression *right);
 extern HQL_API IHqlExpression * queryOriginalRecord(IHqlExpression * expr);
 extern HQL_API IHqlExpression * queryOriginalRecord(ITypeInfo * type);
 extern HQL_API IHqlExpression * queryOriginalTypeExpression(ITypeInfo * type);

+ 2 - 0
ecl/hql/hqlfold.cpp

@@ -5584,6 +5584,8 @@ HqlConstantPercolator * CExprFolderTransformer::gatherConstants(IHqlExpression *
         //all bets are off.
         break;
 
+    case no_newuserdictionary:
+    case no_userdictionary:
     case no_inlinedictionary:
     case no_selectmap:
         // MORE - maybe should be something here?

+ 11 - 4
ecl/hql/hqlgram.y

@@ -7113,13 +7113,20 @@ simpleDictionary
                             $$.setExpr(createDictionary(no_nohoist, $3.getExpr(), NULL));
                             $$.setPosition($1);
                         }
-/*
-    | DICTIONARY '(' dataSet ',' recordDef ')'
+
+    | DICTIONARY '(' startTopFilter ',' recordDef ')' endTopFilter
                         {
-                            $$.setExpr($3.getExpr()); // MORE!
+                            OwnedHqlExpr dataset = $3.getExpr();
+                            parser->checkOutputRecord($5, false);
+                            OwnedHqlExpr record = $5.getExpr();
+                            HqlExprArray args;
+                            args.append(*LINK(dataset));
+                            args.append(*LINK(record));
+                            $$.setExpr(createDictionary(no_userdictionary, args));
+                            parser->checkProjectedFields($$.queryExpr(), $5);
                             $$.setPosition($1);
                         }
-*/
+
     | DICTIONARY '(' '[' ']' ',' recordDef ')'
                         {
                             HqlExprArray values;  // Empty list

+ 3 - 1
ecl/hql/hqlir.cpp

@@ -802,10 +802,12 @@ static const char * getOperatorText(node_operator op)
     DUMP_CASE(no,inlinedictionary);
     DUMP_CASE(no,indict);
     DUMP_CASE(no,countdict);
+    DUMP_CASE(no,userdictionary);
+    DUMP_CASE(no,newuserdictionary);
 
     case no_unused6:
     case no_unused13: case no_unused14: case no_unused15: case no_unused18: case no_unused19:
-    case no_unused20: case no_unused21: case no_unused22: case no_unused23: case no_unused24: case no_unused25: case no_unused26: case no_unused27: case no_unused28: case no_unused29:
+    case no_unused20: case no_unused21: case no_unused22: case no_unused23: case no_unused24: case no_unused25: case no_unused28: case no_unused29:
     case no_unused30: case no_unused31: case no_unused32: case no_unused33: case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
     case no_unused50: case no_unused52:

+ 2 - 2
ecl/hql/hqlpmap.cpp

@@ -973,11 +973,10 @@ static bool isTrivialTransform(IHqlExpression * expr, IHqlExpression * selector)
     return true;
 }
 
-
 bool isNullProject(IHqlExpression * expr, bool canLoseFieldsFromEnd)
 {
     IHqlExpression * ds = expr->queryChild(0);
-    if (!recordTypesMatch(expr, ds))
+    if (!recordTypesMatchIgnorePayload(expr, ds))
     {
         if (canLoseFieldsFromEnd)
         {
@@ -1001,6 +1000,7 @@ bool isSimpleProject(IHqlExpression * expr)
     case no_projectrow:
         selector.setown(createSelector(no_left, ds, querySelSeq(expr)));
         break;
+    case no_newuserdictionary:
     case no_newusertable:
          if (isAggregateDataset(expr))
              return false;

+ 2 - 0
ecl/hql/hqltrans.cpp

@@ -3375,6 +3375,7 @@ void ScopedTransformer::analyseChildren(IHqlExpression * expr)
     case no_setgraphresult:
     case no_setgraphloopresult:
     case no_extractresult:
+    case no_newuserdictionary:
         {
             IHqlExpression * dataset = expr->queryChild(0);
             pushScope();
@@ -3752,6 +3753,7 @@ IHqlExpression * ScopedTransformer::createTransformed(IHqlExpression * expr)
     case no_setgraphresult:
     case no_setgraphloopresult:
     case no_extractresult:
+    case no_newuserdictionary:
         {
             IHqlExpression * dataset = expr->queryChild(0);
             pushScope();

+ 7 - 0
ecl/hqlcpp/hqlcppds.cpp

@@ -2665,6 +2665,11 @@ void HqlCppTranslator::buildDatasetAssignProject(BuildCtx & ctx, IHqlCppDatasetB
 
     if (sourceCursor)
     {
+        if (isNullProject(expr, false))
+        {
+            if (target->buildLinkRow(iterctx, sourceCursor))
+                return;
+        }
         BoundRow * targetRow = target->buildCreateRow(iterctx);
         HqlExprAssociation * skipAssociation = NULL;
         if (containsSkip)
@@ -2679,6 +2684,7 @@ void HqlCppTranslator::buildDatasetAssignProject(BuildCtx & ctx, IHqlCppDatasetB
         case no_hqlproject:
             doBuildRowAssignProject(iterctx, targetRef, expr);
             break;
+        case no_newuserdictionary:
         case no_newusertable:
             doBuildRowAssignUserTable(iterctx, targetRef, expr);
             break;
@@ -2814,6 +2820,7 @@ void HqlCppTranslator::buildDatasetAssign(BuildCtx & ctx, IHqlCppDatasetBuilder
         return;
     case no_hqlproject:
     case no_newusertable:
+    case no_newuserdictionary:
         buildDatasetAssignProject(subctx, target, expr);
         return;
     case no_compound_childread:

+ 4 - 2
ecl/hqlcpp/hqlcset.cpp

@@ -578,7 +578,7 @@ void InlineLinkedDatasetCursor::buildIterateClass(BuildCtx & ctx, StringBuffer &
     ctx.addQuoted(decl);
 }
 
-BoundRow * InlineLinkedDatasetCursor::buildIterateLoop(BuildCtx & ctx, bool needToBreak)
+BoundRow * InlineLinkedDatasetCursor::doBuildIterateLoop(BuildCtx & ctx, bool needToBreak, bool checkForNull)
 {
     StringBuffer rowName;
     OwnedHqlExpr row = createRow(ctx, "row", rowName, false);
@@ -620,6 +620,8 @@ BoundRow * InlineLinkedDatasetCursor::buildIterateLoop(BuildCtx & ctx, bool need
 
     ctx.addLoop(test, NULL, false);
     ctx.addQuoted(s.clear().append(rowName).append(" = *").append(cursorName).append("++;"));
+    if (checkForNull)
+        ctx.addQuoted(s.clear().append("if (!").append(rowName).append(") continue;"));
     BoundRow * cursor = translator.bindTableCursor(ctx, ds, row);
 
     return cursor;
@@ -1677,7 +1679,7 @@ void LinkedDatasetBuilderBase::buildFinish(BuildCtx & ctx, CHqlBoundExpr & bound
 bool LinkedDatasetBuilderBase::buildLinkRow(BuildCtx & ctx, BoundRow * sourceRow)
 {
     IHqlExpression * sourceRecord = sourceRow->queryRecord();
-    if (recordTypesMatch(sourceRecord, record) && sourceRow->isBinary())
+    if (recordTypesMatchIgnorePayload(sourceRecord, record) && sourceRow->isBinary())
     {
         OwnedHqlExpr source = getPointer(sourceRow->queryBound());
         BuildCtx subctx(ctx);

+ 5 - 2
ecl/hqlcpp/hqlcset.ipp

@@ -74,9 +74,12 @@ public:
 
     virtual void buildCount(BuildCtx & ctx, CHqlBoundExpr & tgt);
     virtual void buildExists(BuildCtx & ctx, CHqlBoundExpr & tgt);
-    virtual BoundRow * buildIterateLoop(BuildCtx & ctx, bool needToBreak);
+    virtual BoundRow * buildIterateLoop(BuildCtx & ctx, bool needToBreak) { return doBuildIterateLoop(ctx, needToBreak, false); }
     virtual BoundRow * buildSelectNth(BuildCtx & ctx, IHqlExpression * indexExpr);
     virtual void buildIterateClass(BuildCtx & ctx, StringBuffer & cursorName, BuildCtx * initctx);
+
+protected:
+    BoundRow * doBuildIterateLoop(BuildCtx & ctx, bool needToBreak, bool checkForNull);
 };
 
 class InlineLinkedDictionaryCursor : public InlineLinkedDatasetCursor
@@ -84,7 +87,7 @@ class InlineLinkedDictionaryCursor : public InlineLinkedDatasetCursor
 public:
     InlineLinkedDictionaryCursor(HqlCppTranslator & _translator, IHqlExpression * _ds, CHqlBoundExpr & _boundDs);
 
-    virtual BoundRow * buildIterateLoop(BuildCtx & ctx, bool needToBreak) { throwUnexpected(); }
+    virtual BoundRow * buildIterateLoop(BuildCtx & ctx, bool needToBreak) { return doBuildIterateLoop(ctx, needToBreak, true); }
     virtual BoundRow * buildSelectMap(BuildCtx & ctx, IHqlExpression * indexExpr);
     virtual void buildInDataset(BuildCtx & ctx, IHqlExpression * inExpr, CHqlBoundExpr & tgt);
     virtual void buildIterateClass(BuildCtx & ctx, StringBuffer & cursorName, BuildCtx * initctx) { throwUnexpected(); }

+ 3 - 0
ecl/hqlcpp/hqlinline.cpp

@@ -216,6 +216,9 @@ static unsigned calcInlineFlags(BuildCtx * ctx, IHqlExpression * expr)
         return 0;       // for the moment always do this out of line 
     case no_table:
         return 0;
+    case no_newuserdictionary:
+    case no_userdictionary:
+        return RETassign;
     case no_inlinedictionary:
         return RETassign;
     case no_owned_ds:

+ 3 - 2
ecl/hqlcpp/hqlttcpp.cpp

@@ -8694,7 +8694,7 @@ IHqlExpression * HqlScopeTagger::transformSelect(IHqlExpression * expr)
     IHqlExpression * cursor = queryDatasetCursor(ds);
     if (cursor->isDataset())
     {
-        if (expr->isDataset())
+        if (expr->isDataset() || expr->isDictionary())
         {
             if (!isValidNormalizeSelector(cursor))
             {
@@ -10201,7 +10201,7 @@ IHqlExpression * HqlTreeNormalizer::convertSelectToProject(IHqlExpression * newR
     unsigned numChildren = expr->numChildren();
     for (unsigned idx = 2; idx < numChildren; idx++)
         args.append(*transform(expr->queryChild(idx)));
-    OwnedHqlExpr project = createDataset(no_newusertable, args);
+    OwnedHqlExpr project = expr->isDictionary() ? createDictionary(no_newuserdictionary, args) : createDataset(no_newusertable, args);
     return expr->cloneAllAnnotations(project);
 }
 
@@ -11269,6 +11269,7 @@ IHqlExpression * HqlTreeNormalizer::createTransformedBody(IHqlExpression * expr)
             }
             return Parent::createTransformed(cleaned);
         }
+    case no_userdictionary:
     case no_usertable:
     case no_selectfields:
         {

+ 25 - 7
rtl/eclrtl/rtlds.cpp

@@ -344,11 +344,21 @@ void RtlLinkedDatasetBuilder::appendRows(size32_t num, byte * * rows)
 {
     if (num && (count < choosenLimit))
     {
-        unsigned numToAdd = (count + num < choosenLimit) ? num : choosenLimit - count;
-        ensure(count+numToAdd);
-        for (unsigned i=0; i < numToAdd; i++)
-            rowset[count+i] = (byte *)rowAllocator->linkRow(rows[i]);
-        count += numToAdd;
+        unsigned maxNumToAdd = (count + num < choosenLimit) ? num : choosenLimit - count;
+        unsigned numAdded = 0;
+        ensure(count+maxNumToAdd);
+        for (unsigned i=0; i < num; i++)
+        {
+            byte *row = rows[i];
+            if (row)
+            {
+                rowset[count+numAdded] = (byte *)rowAllocator->linkRow(row);
+                numAdded++;
+                if (numAdded == maxNumToAdd)
+                    break;
+            }
+        }
+        count += numAdded;
     }
 }
 
@@ -501,9 +511,17 @@ void appendRowsToRowset(size32_t & targetCount, byte * * & targetRowset, IEngine
     {
         size32_t prevCount = targetCount;
         byte * * expandedRowset = rowAllocator->reallocRows(targetRowset, prevCount, prevCount+extraCount);
+        unsigned numAdded = 0;
         for (unsigned i=0; i < extraCount; i++)
-            expandedRowset[prevCount+i] = (byte *)rowAllocator->linkRow(extraRows[i]);
-        targetCount = prevCount + extraCount;
+        {
+            byte *extraRow = extraRows[i];
+            if (extraRow)
+            {
+                expandedRowset[prevCount+numAdded] = (byte *)rowAllocator->linkRow(extraRow);
+                numAdded++;
+            }
+        }
+        targetCount = prevCount + numAdded;
         targetRowset = expandedRowset;
     }
 }

+ 3 - 0
testing/ecl/dict2.ecl

@@ -37,3 +37,6 @@ count(d2n) = 1;
 count(d3) = 0;
 5 not in d3n;
 count(d3n) = 0;
+
+ds6 := dataset([{5, 'Richard'}], { integer id, string name });
+d6 := DICTIONARY(ds6, { id => name });

+ 5 - 0
testing/ecl/dict3.ecl

@@ -0,0 +1,5 @@
+ds6 := nofold(dataset([{5, 'Richard'}], { integer id, string name }));
+d6 := DICTIONARY(ds6, { id => name });
+
+5 in d6;
+count(d6)=1;