浏览代码

HPCC-8429 Implement DATASET(dictionary)

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 12 年之前
父节点
当前提交
db35f673dc

+ 3 - 1
ecl/hql/hqlattr.cpp

@@ -366,6 +366,7 @@ unsigned getOperatorMetaFlags(node_operator op)
     case no_nwaymerge:
     case no_stepped:
     case no_datasetfromrow:
+    case no_datasetfromdictionary:
     case no_assert_ds:
     case no_combine:
     case no_rollupgroup:
@@ -614,7 +615,7 @@ unsigned getOperatorMetaFlags(node_operator op)
     case no_dataset_from_transform:
 
     case no_unused6:
-    case no_unused13: case no_unused14: case no_unused15: case no_unused19:
+    case no_unused13: case no_unused14: case no_unused15:
     case no_unused20: case no_unused21: case no_unused22: case no_unused23: case no_unused24: case no_unused25: case no_unused28: case no_unused29:
     case no_unused30: case no_unused31: case no_unused32: case no_unused33: case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
@@ -2837,6 +2838,7 @@ IHqlExpression * calcRowInformation(IHqlExpression * expr)
     case no_process:
     case no_pipe:
     case no_translated:
+    case no_datasetfromdictionary:
         //MORE could improve each of these
         info.setUnknown(RCMdisk);
         break;

+ 8 - 1
ecl/hql/hqlexpr.cpp

@@ -1448,9 +1448,10 @@ const char *getOpString(node_operator op)
     case no_childquery: return "no_childquery";
     case no_createdictionary: return "DICTIONARY";
     case no_chooseds: return "CHOOSE";
+    case no_datasetfromdictionary: return "DICTIONARY";
 
     case no_unused6:
-    case no_unused13: case no_unused14: case no_unused15: case no_unused19:
+    case no_unused13: case no_unused14: case no_unused15:
     case no_unused20: case no_unused21: case no_unused22: case no_unused23: case no_unused24: case no_unused25: case no_unused28: case no_unused29:
     case no_unused30: case no_unused31: case no_unused32: case no_unused33: case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
@@ -1785,6 +1786,7 @@ childDatasetType getChildDatasetType(IHqlExpression * expr)
     case no_id2blob:
     case no_cppbody:
     case no_datasetfromrow:
+    case no_datasetfromdictionary:
     case no_createrow:
     case no_param:
     case no_typetransfer:
@@ -2196,6 +2198,7 @@ inline unsigned doGetNumChildTables(IHqlExpression * dataset)
     case no_id2blob:
     case no_cppbody:
     case no_datasetfromrow:
+    case no_datasetfromdictionary:
     case no_param:
     case no_translated:
     case no_call:
@@ -2488,6 +2491,7 @@ bool definesColumnList(IHqlExpression * dataset)
     case no_externalcall:
     case no_projectrow:
     case no_datasetfromrow:
+    case no_datasetfromdictionary:
     case no_forcelocal:                 // for the moment this defines a table, otherwise the transforms get rather tricky.
     case no_forcenolocal:
     case no_allnodes:
@@ -5902,6 +5906,7 @@ void CHqlDataset::cacheParent()
     case no_inlinetable:
     case no_xmlproject:
     case no_datasetfromrow:
+    case no_datasetfromdictionary:
     case no_fail:
     case no_skip:
     case no_field:
@@ -10836,6 +10841,7 @@ IHqlExpression *createDataset(node_operator op, HqlExprArray & parms)
     case no_fail:
     case no_skip:
     case no_datasetfromrow:
+    case no_datasetfromdictionary:
     case no_if:
     case no_translated:
     case no_rows:
@@ -11759,6 +11765,7 @@ IHqlExpression *createDataset(node_operator op, HqlExprArray & parms)
         type.set(childType);
         break;
     case no_datasetfromrow:
+    case no_datasetfromdictionary:
         type.setown(makeTableType(makeRowType(createRecordType(&parms.item(0))), NULL, NULL, NULL));
         break;
     default:

+ 1 - 1
ecl/hql/hqlexpr.hpp

@@ -348,7 +348,7 @@ enum _node_operator {
         no_shuffle,
         no_chooseds,
         no_alias,
-    no_unused19,
+        no_datasetfromdictionary,
     no_unused20,
     no_unused21,
     no_unused22,

+ 1 - 0
ecl/hql/hqlfold.cpp

@@ -5568,6 +5568,7 @@ HqlConstantPercolator * CExprFolderTransformer::gatherConstants(IHqlExpression *
     case no_stepped:
     case no_cluster:
     case no_datasetfromrow:
+    case no_datasetfromdictionary:
     case no_filtergroup:
     case no_section:
     case no_sectioninput:

+ 5 - 0
ecl/hql/hqlgram.y

@@ -8932,6 +8932,11 @@ simpleDataSet
                             $$.setExpr(createDatasetFromRow(row));
                             $$.setPosition($1);
                         }
+    | DATASET '(' dictionary ')'
+                        {
+                            IHqlExpression * dictionary = $3.getExpr();
+                            $$.setExpr(createDataset(no_datasetfromdictionary, dictionary), $1);
+                        }
     | _EMPTY_ '(' recordDef ')'
                         {
                             IHqlExpression * record = $3.getExpr();

+ 1 - 1
ecl/hql/hqlir.cpp

@@ -820,7 +820,7 @@ static const char * getOperatorText(node_operator op)
     DUMP_CASE(no,chooseds);
 
     case no_unused6:
-    case no_unused13: case no_unused14: case no_unused15: case no_unused19:
+    case no_unused13: case no_unused14: case no_unused15:
     case no_unused20: case no_unused21: case no_unused22: case no_unused23: case no_unused24: case no_unused25: case no_unused28: case no_unused29:
     case no_unused30: case no_unused31: case no_unused32: case no_unused33: case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:

+ 3 - 0
ecl/hql/hqlutil.cpp

@@ -1523,6 +1523,7 @@ unsigned getNumActivityArguments(IHqlExpression * expr)
     case no_thisnode:
     case no_keydiff:
     case no_keypatch:
+    case no_datasetfromdictionary:
         return 0;
     case no_setresult:
         if (expr->queryChild(0)->isAction())
@@ -1665,6 +1666,7 @@ bool isDistributedSourceActivity(IHqlExpression * expr)
     case no_compound_selectnew:
     case no_compound_inline:
     case no_rows:
+    case no_datasetfromdictionary:
         return false;
     default:
         UNIMPLEMENTED;
@@ -1691,6 +1693,7 @@ bool isSourceActivity(IHqlExpression * expr, bool ignoreCompound)
     case no_rows:
     case no_allnodes:
     case no_thisnode:
+    case no_datasetfromdictionary:
         return true;
     case no_null:
         return expr->isDataset();

+ 1 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -1189,6 +1189,7 @@ public:
 
     BoundRow * buildDatasetIterateSelectN(BuildCtx & ctx, IHqlExpression * expr, bool needToBreak);
     BoundRow * buildDatasetIterateChoosen(BuildCtx & ctx, IHqlExpression * expr, bool needToBreak);
+    BoundRow * buildDatasetIterateFromDictionary(BuildCtx & ctx, IHqlExpression * expr, bool needToBreak);
     BoundRow * buildDatasetIterateLimit(BuildCtx & ctx, IHqlExpression * expr, bool needToBreak);
     BoundRow * buildDatasetIterateProject(BuildCtx & ctx, IHqlExpression * expr, bool needToBreak);
     BoundRow * buildDatasetIterateUserTable(BuildCtx & ctx, IHqlExpression * expr, bool needToBreak);

+ 11 - 0
ecl/hqlcpp/hqlcppds.cpp

@@ -3378,6 +3378,15 @@ BoundRow * HqlCppTranslator::buildDatasetIterateSpecialTempTable(BuildCtx & ctx,
 }
 
 
+BoundRow * HqlCppTranslator::buildDatasetIterateFromDictionary(BuildCtx & ctx, IHqlExpression * expr, bool needToBreak)
+{
+    BoundRow * dictionaryRow = buildDatasetIterate(ctx, expr->queryChild(0), needToBreak);
+    assertex(dictionaryRow->isConditional());
+    ctx.addFilter(dictionaryRow->queryBound());
+    return rebindTableCursor(ctx, expr, dictionaryRow, no_none, NULL);
+}
+
+
 BoundRow * HqlCppTranslator::buildDatasetIterateStreamedCall(BuildCtx & ctx, IHqlExpression * expr, bool needToBreak)
 {
     CHqlBoundExpr bound;
@@ -3523,6 +3532,8 @@ BoundRow * HqlCppTranslator::buildDatasetIterate(BuildCtx & ctx, IHqlExpression
             }
             break;
         }
+    case no_datasetfromdictionary:
+        return buildDatasetIterateFromDictionary(ctx, expr, needToBreak);
     case no_call:
     case no_externalcall:
         if (hasStreamedModifier(expr->queryType()))

+ 1 - 0
ecl/hqlcpp/hqlcse.cpp

@@ -560,6 +560,7 @@ bool CseSpotter::checkPotentialCSE(IHqlExpression * expr, CseSpotterInfo * extra
     case no_inlinetable:
     case no_xmlproject:
     case no_datasetfromrow:
+    case no_datasetfromdictionary:
     case no_preservemeta:
     case no_dataset_alias:
     case no_workunit_dataset:

+ 2 - 3
ecl/hqlcpp/hqlcset.cpp

@@ -619,10 +619,9 @@ BoundRow * InlineLinkedDatasetCursor::doBuildIterateLoop(BuildCtx & ctx, bool ne
 
     ctx.addLoop(test, NULL, false);
     ctx.addQuoted(s.clear().append(rowName).append(" = *").append(cursorName).append("++;"));
-    if (checkForNull)
-        ctx.addQuoted(s.clear().append("if (!").append(rowName).append(") continue;"));
     BoundRow * cursor = translator.bindTableCursor(ctx, ds, row);
-
+    if (checkForNull)
+        cursor->setConditional(true);
     return cursor;
 }
 

+ 4 - 0
ecl/hqlcpp/hqlhtcpp.cpp

@@ -400,6 +400,7 @@ public:
             //The expressions in the transform may contain datasets
         case no_addfiles:
         case no_datasetfromrow:
+        case no_datasetfromdictionary:
         case no_alias_scope:
             //child datasets may have something worth creating a graph for
         case no_if:
@@ -6126,6 +6127,9 @@ ABoundActivity * HqlCppTranslator::buildActivity(BuildCtx & ctx, IHqlExpression
                         result = buildCachedActivity(ctx, row);
                     break;
                 }
+            case no_datasetfromdictionary:
+                result = doBuildActivityChildDataset(ctx, expr);
+                break;
             case no_temptable:
                 result = doBuildActivityTempTable(ctx, expr);
                 break;

+ 4 - 0
ecl/hqlcpp/hqlinline.cpp

@@ -219,6 +219,8 @@ static unsigned calcInlineFlags(BuildCtx * ctx, IHqlExpression * expr)
         return 0;
     case no_createdictionary:
         return RETassign;
+    case no_datasetfromdictionary:
+        return RETiterate;
     case no_owned_ds:
         {
             unsigned childFlags = getInlineFlags(ctx, expr->queryChild(0));
@@ -624,6 +626,8 @@ GraphLocalisation queryActivityLocalisation(IHqlExpression * expr)
             }
             break;
         }
+    case no_datasetfromdictionary:
+        return GraphCoLocal;
     case no_createrow:
     case no_inlinetable:
         {

+ 1 - 0
ecl/hqlcpp/hqliproj.cpp

@@ -1960,6 +1960,7 @@ ProjectExprKind ImplicitProjectTransformer::getProjectExprKind(IHqlExpression *
     case no_nwayjoin:           // could probably project output of this one...
     case no_nwaymerge:
     case no_libraryselect:
+    case no_datasetfromdictionary:
         return SourceActivity;
     case no_setresult:
     case no_ensureresult:

+ 2 - 0
ecl/hqlcpp/hqlresource.cpp

@@ -560,6 +560,7 @@ bool queryAddUniqueToActivity(IHqlExpression * expr)
     case no_getgraphloopresult:
     case no_xmlproject:
     case no_datasetfromrow:
+    case no_datasetfromdictionary:
     case no_rows:
     case no_allnodes:
     case no_thisnode:
@@ -1565,6 +1566,7 @@ bool ResourcerInfo::expandRatherThanSpill(bool noteOtherSpills)
         case no_compound_childgroupaggregate:
         case no_compound_selectnew:
         case no_compound_inline:
+        case no_datasetfromdictionary:
             expr = expr->queryChild(0);
             break;
         case no_filter:

+ 1 - 0
ecl/hqlcpp/hqlsource.cpp

@@ -162,6 +162,7 @@ bool isSimpleSource(IHqlExpression * expr)
         case no_xmlproject:
         case no_null:
         case no_datasetfromrow:
+        case no_datasetfromdictionary:
         case no_getgraphresult:
         case no_getgraphloopresult:
         case no_rows:

+ 1 - 0
ecl/hqlcpp/hqlttcpp.cpp

@@ -69,6 +69,7 @@ static bool isWorthHoisting(IHqlExpression * expr, bool asSubQuery)
         case no_independent:
         case no_field:
         case no_datasetfromrow:
+        case no_datasetfromdictionary:
         case no_null:
         case no_workunit_dataset:
         case no_colon:

+ 3 - 1
testing/ecl/dict14.ecl

@@ -33,4 +33,6 @@ squareRoots := DICTIONARY(squares, { UNSIGNED value := square => UNSIGNED root :
 //Use a dictionary as a dataset
 values := NOFOLD(DATASET(squareRoots)(value = root * root));
 
-OUTPUT(values);
+OUTPUT(TABLE(values, { cnt := COUNT(GROUP)}));
+OUTPUT(CHOOSEN(SORT(values, value), 10));
+

+ 15 - 0
testing/ecl/key/dict14.xml

@@ -0,0 +1,15 @@
+<Dataset name='Result 1'>
+ <Row><cnt>100</cnt></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><value>0</value><root>0</root></Row>
+ <Row><value>1</value><root>1</root></Row>
+ <Row><value>4</value><root>2</root></Row>
+ <Row><value>9</value><root>3</root></Row>
+ <Row><value>16</value><root>4</root></Row>
+ <Row><value>25</value><root>5</root></Row>
+ <Row><value>36</value><root>6</root></Row>
+ <Row><value>49</value><root>7</root></Row>
+ <Row><value>64</value><root>8</root></Row>
+ <Row><value>81</value><root>9</root></Row>
+</Dataset>