فهرست منبع

HPCC-8486 Optimize DICTIONARY(null-dataset)

Also adds support for EXISTS(dictionary) and optimizes the null case.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 12 سال پیش
والد
کامیت
22a9717356

+ 2 - 1
ecl/hql/hqlattr.cpp

@@ -229,6 +229,7 @@ unsigned getOperatorMetaFlags(node_operator op)
 //Aggregate operators
     case no_count:
     case no_exists:
+    case no_existsdict:
     case no_max:
     case no_min:
     case no_sum:
@@ -621,7 +622,7 @@ unsigned getOperatorMetaFlags(node_operator op)
     case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
     case no_unused50: case no_unused52:
     case no_unused80: case no_unused83:
-    case no_unused100: case no_unused101:
+    case no_unused101:
     case no_is_null:
     case no_position:
     case no_current_time:

+ 1 - 2
ecl/hql/hqlexpr.cpp

@@ -994,7 +994,7 @@ const char *getOpString(node_operator op)
     case no_mapto: return "=>";
     case no_constant: return "<constant>";
     case no_field: return "<field>";
-    case no_exists: case no_existslist: return "EXISTS";
+    case no_exists: case no_existslist: case no_existsdict: return "EXISTS";
     case no_existsgroup: return "EXISTS";
     case no_select: return ".";
     case no_table: return "DATASET";
@@ -1460,7 +1460,6 @@ const char *getOpString(node_operator op)
     case no_unused80:
     case no_unused81:
     case no_unused83:
-    case no_unused100:
     case no_unused101:
         return "unused";
     /* if fail, use "hqltest -internal" to find out why. */

+ 1 - 1
ecl/hql/hqlexpr.hpp

@@ -361,7 +361,7 @@ enum _node_operator {
         no_indict,
         no_countdict,
         no_any,
-    no_unused100,
+        no_existsdict,
     no_unused101,
     no_unused25,
     no_unused28,  

+ 29 - 1
ecl/hql/hqlfold.cpp

@@ -1948,6 +1948,10 @@ IHqlExpression * foldConstantOperator(IHqlExpression * expr, unsigned foldOption
             }
             break;
         }
+    case no_indict:
+        if (isNull(expr->queryChild(1)))
+            return createConstant(false);
+        break;
     case no_in:
     case no_notin:
         {
@@ -3002,7 +3006,23 @@ IHqlExpression * foldConstantOperator(IHqlExpression * expr, unsigned foldOption
         {
             IHqlExpression * child = expr->queryChild(0);
             node_operator childOp = child->getOperator();
-            //MORE: Can't really optimize count of a dictionary since the input dataset may contain duplicates which will be removed.
+            // Can't optimize count of a dictionary in general, since the input dataset may contain duplicates which will be removed.
+            switch (child->getOperator())
+            {
+            case no_null:
+                return createConstant(0);
+            }
+            break;
+        }
+    case no_existsdict:
+        {
+            IHqlExpression * child = expr->queryChild(0);
+            node_operator childOp = child->getOperator();
+            switch (child->getOperator())
+            {
+            case no_null:
+                return createConstant(false);
+            }
             break;
         }
     case no_countlist:
@@ -3765,6 +3785,14 @@ IHqlExpression * NullFolderMixin::foldNullDataset(IHqlExpression * expr)
         if (isNull(child) && isNull(expr->queryChild(1)))
             return replaceWithNull(expr);
         break;
+    case no_createdictionary:
+        if (isNull(child))
+            return replaceWithNull(expr);
+        break;
+    case no_selectmap:
+        if (isNull(child))
+            return replaceWithNullRow(child);
+        break;
     case no_selectnth:
 //      if (isNull(child) || isZero(expr->queryChild(1)))
         if (isNull(child))

+ 4 - 0
ecl/hql/hqlgram.y

@@ -5459,6 +5459,10 @@ primexpr1
                             $$.setExpr(createBoolExpr(no_exists, $3.getExpr(), $4.getExpr()));
                             $$.setPosition($1);
                         }
+    | EXISTS '(' dictionary ')'
+                        {
+                            $$.setExpr(createValue(no_existsdict, makeBoolType(), $3.getExpr()));
+                        }
     | MAP '(' mapSpec ',' expression ')'
                         {
                             parser->normalizeExpression($5);

+ 1 - 1
ecl/hql/hqlir.cpp

@@ -281,7 +281,7 @@ const char * getOperatorIRText(node_operator op)
     EXPAND_CASE(no,indict);
     EXPAND_CASE(no,countdict);
     EXPAND_CASE(no,any);
-    EXPAND_CASE(no,unused100);
+    EXPAND_CASE(no,existsdict);
     EXPAND_CASE(no,unused101);
     EXPAND_CASE(no,unused25);
     EXPAND_CASE(no,unused28);

+ 2 - 0
ecl/hqlcpp/hqlcatom.cpp

@@ -242,6 +242,7 @@ _ATOM destroyRegexAtom;
 _ATOM destroyWRegexAtom;
 _ATOM destructMetaMemberAtom;
 _ATOM dictionaryCountAtom;
+_ATOM dictionaryExistsAtom;
 _ATOM dictionaryLookupAtom;
 _ATOM dictionaryLookupExistsAtom;
 _ATOM doNotifyAtom;
@@ -962,6 +963,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM-1)
     MAKEATOM(destroyWRegex);
     MAKEATOM(destructMetaMember);
     MAKEATOM(dictionaryCount);
+    MAKEATOM(dictionaryExists);
     MAKEATOM(dictionaryLookup);
     MAKEATOM(dictionaryLookupExists);
     MAKEATOM(doNotify);

+ 1 - 0
ecl/hqlcpp/hqlcatom.hpp

@@ -242,6 +242,7 @@ extern _ATOM destroyRegexAtom;
 extern _ATOM destroyWRegexAtom;
 extern _ATOM destructMetaMemberAtom;
 extern _ATOM dictionaryCountAtom;
+extern _ATOM dictionaryExistsAtom;
 extern _ATOM dictionaryLookupAtom;
 extern _ATOM dictionaryLookupExistsAtom;
 extern _ATOM doNotifyAtom;

+ 10 - 0
ecl/hqlcpp/hqlcpp.cpp

@@ -2860,6 +2860,10 @@ void HqlCppTranslator::buildExpr(BuildCtx & ctx, IHqlExpression * expr, CHqlBoun
         if (!(expr->isPure() && ctx.getMatchExpr(expr, tgt)))
             doBuildExprCountDict(ctx, expr, tgt);
         return;
+    case no_existsdict:
+        if (!(expr->isPure() && ctx.getMatchExpr(expr, tgt)))
+            doBuildExprExistsDict(ctx, expr, tgt);
+        return;
     case no_existslist:
         doBuildAggregateList(ctx, NULL, expr, &tgt);
         return;
@@ -5183,6 +5187,12 @@ void HqlCppTranslator::doBuildExprCountDict(BuildCtx & ctx, IHqlExpression * exp
     cursor->buildCountDict(ctx, tgt); // not the same as buildCount - that is the size of the table, we want the number of populated entries
 }
 
+void HqlCppTranslator::doBuildExprExistsDict(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt)
+{
+    IHqlExpression *dict = expr->queryChild(0);
+    Owned<IHqlCppDatasetCursor> cursor = createDatasetSelector(ctx, dict);
+    cursor->buildExistsDict(ctx, tgt);
+}
 
 //---------------------------------------------------------------------------
 

+ 2 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -177,6 +177,7 @@ interface IHqlCppDatasetCursor : public IInterface
     virtual void buildInDataset(BuildCtx & ctx, IHqlExpression * inExpr, CHqlBoundExpr & tgt) = 0;
     virtual void buildIterateMembers(BuildCtx & declarectx, BuildCtx & initctx) = 0;
     virtual void buildCountDict(BuildCtx & ctx, CHqlBoundExpr & tgt) = 0;
+    virtual void buildExistsDict(BuildCtx & ctx, CHqlBoundExpr & tgt) = 0;
 };
 
 interface IHqlCppSetCursor : public IInterface
@@ -1282,6 +1283,7 @@ public:
     void doBuildExprEmbedBody(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr * tgt);
     void doBuildExprEvaluate(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt);
     void doBuildExprExists(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt);
+    void doBuildExprExistsDict(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt);
     void doBuildExprFailCode(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt);
     void doBuildExprField(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt);
     void doBuildExprFileLogicalName(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt);

+ 6 - 2
ecl/hqlcpp/hqlcppds.cpp

@@ -2069,8 +2069,12 @@ void HqlCppTranslator::doBuildDataset(BuildCtx & ctx, IHqlExpression * expr, CHq
             tgt.count.setown(getSizetConstant(0));
             tgt.length.setown(getSizetConstant(0));
             IHqlExpression * record = expr->queryRecord();
-            Owned<ITypeInfo> type = makeTableType(makeRowType(record->getType()), NULL, NULL, NULL);
-            if ((format == FormatLinkedDataset) || (format == FormatArrayDataset))
+            Owned<ITypeInfo> type;
+            if (expr->isDictionary())
+                type.setown(makeDictionaryType(makeRowType(record->getType())));
+            else
+                type.setown(makeTableType(makeRowType(record->getType()), NULL, NULL, NULL));
+            if ((format == FormatLinkedDataset) || (format == FormatArrayDataset) || expr->isDictionary())
                 type.setown(setLinkCountedAttr(type, true));
             tgt.expr.setown(createValue(no_nullptr, makeReferenceModifier(type.getClear())));
             return;

+ 1 - 0
ecl/hqlcpp/hqlcppsys.ecl

@@ -820,6 +820,7 @@ const char * cppSystemText[]  = {
 
     // Dictionary support
     "    integer8 dictionaryCount(_linkcounted_ dictionary dict) : eclrtl,include,pure,entrypoint='rtlDictionaryCount';",
+    "    boolean dictionaryExists(_linkcounted_ dictionary dict) : eclrtl,include,pure,entrypoint='rtlDictionaryExists';",
     "   _linkcounted_ row(dummyRecord) dictionaryLookup(IHThorHashLookupInfo meta, _linkcounted_ dictionary dict, row key, _linkcounted_ row defaultrow) : eclrtl,include,pure,entrypoint='rtlDictionaryLookup';",
     "   _linkcounted_ row(dummyRecord) dictionaryLookupString(_linkcounted_ dictionary dict, const string key, _linkcounted_ row defaultrow) : eclrtl,include,pure,entrypoint='rtlDictionaryLookupString';",
     "   _linkcounted_ row(dummyRecord) dictionaryLookupStringN(_linkcounted_ dictionary dict, const unsigned4 size, const string key, _linkcounted_ row defaultrow) : eclrtl,include,pure,entrypoint='rtlDictionaryLookupStringN';",

+ 15 - 1
ecl/hqlcpp/hqlcset.cpp

@@ -139,6 +139,12 @@ void BaseDatasetCursor::buildCountDict(BuildCtx & ctx, CHqlBoundExpr & tgt)
     throwUnexpected();
 }
 
+void BaseDatasetCursor::buildExistsDict(BuildCtx & ctx, CHqlBoundExpr & tgt)
+{
+    // Should only be seen for dictionaries
+    throwUnexpected();
+}
+
 void BaseDatasetCursor::buildInDataset(BuildCtx & ctx, IHqlExpression * inExpr, CHqlBoundExpr & tgt)
 {
     // Should only be seen for dictionaries, for now
@@ -845,7 +851,15 @@ void InlineLinkedDictionaryCursor::buildCountDict(BuildCtx & ctx, CHqlBoundExpr
 {
     HqlExprArray args;
     args.append(*LINK(ds));
-    OwnedHqlExpr call = translator.bindFunctionCall(dictionaryCountAtom, args, makeBoolType());
+    OwnedHqlExpr call = translator.bindFunctionCall(dictionaryCountAtom, args, makeIntType(8, false));
+    translator.buildExpr(ctx, call, tgt);
+}
+
+void InlineLinkedDictionaryCursor::buildExistsDict(BuildCtx & ctx, CHqlBoundExpr & tgt)
+{
+    HqlExprArray args;
+    args.append(*LINK(ds));
+    OwnedHqlExpr call = translator.bindFunctionCall(dictionaryExistsAtom, args, makeBoolType());
     translator.buildExpr(ctx, call, tgt);
 }
 

+ 2 - 0
ecl/hqlcpp/hqlcset.ipp

@@ -30,6 +30,7 @@ public:
     virtual void buildInDataset(BuildCtx & ctx, IHqlExpression * inExpr, CHqlBoundExpr & tgt);
     virtual void buildIterateMembers(BuildCtx & declarectx, BuildCtx & initctx);
     virtual void buildCountDict(BuildCtx & ctx, CHqlBoundExpr & tgt);
+    virtual void buildExistsDict(BuildCtx & ctx, CHqlBoundExpr & tgt);
 
 protected:
     virtual void buildIterateClass(BuildCtx & ctx, StringBuffer & cursorName, BuildCtx * initctx) = 0;
@@ -91,6 +92,7 @@ public:
     virtual void buildInDataset(BuildCtx & ctx, IHqlExpression * inExpr, CHqlBoundExpr & tgt);
     virtual void buildIterateClass(BuildCtx & ctx, StringBuffer & cursorName, BuildCtx * initctx) { throwUnexpected(); }
     virtual void buildCountDict(BuildCtx & ctx, CHqlBoundExpr & tgt);
+    virtual void buildExistsDict(BuildCtx & ctx, CHqlBoundExpr & tgt);
 };
 
 class MultiLevelDatasetCursor : public BaseDatasetCursor

+ 9 - 0
rtl/eclrtl/rtlds.cpp

@@ -662,6 +662,15 @@ extern ECLRTL_API unsigned __int64 rtlDictionaryCount(size32_t tableSize, byte *
     return ret;
 }
 
+extern ECLRTL_API bool rtlDictionaryExists(size32_t tableSize, byte **table)
+{
+    unsigned __int64 ret = 0;
+    for (size32_t i = 0; i < tableSize; i++)
+        if (table[i])
+            return true;
+    return false;
+}
+
 extern ECLRTL_API byte *rtlDictionaryLookup(IHThorHashLookupInfo &hashInfo, size32_t tableSize, byte **table, const byte *source, byte *defaultRow)
 {
     if (!tableSize)

+ 1 - 0
rtl/eclrtl/rtlds_imp.hpp

@@ -437,6 +437,7 @@ protected:
 };
 
 extern ECLRTL_API unsigned __int64 rtlDictionaryCount(size32_t tableSize, byte **table);
+extern ECLRTL_API bool rtlDictionaryExists(size32_t tableSize, byte **table);
 extern ECLRTL_API byte *rtlDictionaryLookup(IHThorHashLookupInfo &hashInfo, size32_t tableSize, byte **table, const byte *source, byte *defaultRow);
 extern ECLRTL_API byte *rtlDictionaryLookupString(size32_t tableSize, byte **table, size32_t len, const char *source, byte *defaultRow);
 extern ECLRTL_API byte *rtlDictionaryLookupStringN(size32_t tableSize, byte **table, size32_t N, size32_t len, const char *source, byte *defaultRow);

+ 22 - 0
testing/ecl/dict_null.ecl

@@ -0,0 +1,22 @@
+ds := dataset([], { string key, string value} );
+ds2 := dataset([], { string key, string value} ) : stored('mystery');
+
+dict1 := dictionary(ds, {key => value});
+dict2 := dictionary([], {STRING key => STRING value { default('here')} });
+dict3 := dictionary(ds2, {key => value});
+
+
+dict1['s'].value;
+exists(dict1);
+count(dict1);
+'s' IN dict1;
+
+dict2['s'].value;
+exists(dict2);
+count(dict2);
+'s' IN dict2;
+
+dict3['s'].value;
+exists(dict3);
+count(dict3);
+'s' IN dict3;

+ 36 - 0
testing/ecl/key/dict_null.xml

@@ -0,0 +1,36 @@
+<Dataset name='Result 1'>
+ <Row><Result_1></Result_1></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><Result_2>false</Result_2></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><Result_3>0</Result_3></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><Result_4>false</Result_4></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><Result_5>here</Result_5></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><Result_6>false</Result_6></Row>
+</Dataset>
+<Dataset name='Result 7'>
+ <Row><Result_7>0</Result_7></Row>
+</Dataset>
+<Dataset name='Result 8'>
+ <Row><Result_8>false</Result_8></Row>
+</Dataset>
+<Dataset name='Result 9'>
+ <Row><Result_9></Result_9></Row>
+</Dataset>
+<Dataset name='Result 10'>
+ <Row><Result_10>false</Result_10></Row>
+</Dataset>
+<Dataset name='Result 11'>
+ <Row><Result_11>0</Result_11></Row>
+</Dataset>
+<Dataset name='Result 12'>
+ <Row><Result_12>false</Result_12></Row>
+</Dataset>