Browse Source

HPCC-10068 Implement syntax for JOIN(,SMART)

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 11 years ago
parent
commit
ded28b3cdf

+ 3 - 1
ecl/hql/hqlatoms.cpp

@@ -350,11 +350,12 @@ IAtom * setAtom;
 IAtom * sharedAtom;
 IAtom * shutdownAtom;
 IAtom * _sideEffect_Atom;
+IAtom * singleAtom;
 IAtom * sizeAtom;
 IAtom * sizeofAtom;
 IAtom * skewAtom;
 IAtom * skipAtom;
-IAtom * singleAtom;
+IAtom * smartAtom;
 IAtom * snapshotAtom;
 IAtom * soapActionAtom;
 IAtom * httpHeaderAtom;
@@ -760,6 +761,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKEATOM(sizeof);
     MAKEATOM(skew);
     MAKEATOM(skip);
+    MAKEATOM(smart);
     MAKEATOM(snapshot);
     MAKEATOM(soapAction);
     sort_AllAtom = createLowerCaseAtom("SORT ALL");

+ 1 - 0
ecl/hql/hqlatoms.hpp

@@ -358,6 +358,7 @@ extern HQL_API IAtom * sizeAtom;
 extern HQL_API IAtom * sizeofAtom;
 extern HQL_API IAtom * skewAtom;
 extern HQL_API IAtom * skipAtom;
+extern HQL_API IAtom * smartAtom;
 extern HQL_API IAtom * snapshotAtom;
 extern HQL_API IAtom * soapActionAtom;
 extern HQL_API IAtom * httpHeaderAtom;

+ 2 - 2
ecl/hql/hqlexpr.cpp

@@ -14305,7 +14305,7 @@ bool isKeyedJoin(IHqlExpression * expr)
     node_operator op = expr->getOperator();
     if ((op == no_join) || (op == no_joincount) || (op == no_denormalize) || (op == no_denormalizegroup))
     {
-        if (expr->hasAttribute(allAtom) || expr->hasAttribute(lookupAtom))
+        if (expr->hasAttribute(allAtom) || expr->hasAttribute(lookupAtom) || expr->hasAttribute(smartAtom))
             return false;
         if (expr->hasAttribute(keyedAtom) || containsAssertKeyed(expr->queryChild(2)))
             return true;
@@ -14385,7 +14385,7 @@ bool isSelfJoin(IHqlExpression * expr)
     if (datasetL->queryBody() != datasetR->queryBody())
         return false;
 
-    if (expr->hasAttribute(allAtom) || expr->hasAttribute(lookupAtom))
+    if (expr->hasAttribute(allAtom) || expr->hasAttribute(lookupAtom) || expr->hasAttribute(smartAtom))
         return false;
 
     if (expr->queryChild(2)->isConstant())

+ 2 - 0
ecl/hql/hqlgram.y

@@ -396,6 +396,7 @@ static void eclsyntaxerror(HqlGram * parser, const char * s, short yystate, int
   SIZEOF
   SKEW
   SKIP
+  SMART
   SOAPACTION
   SOAPCALL
   SORT
@@ -10040,6 +10041,7 @@ JoinFlag
     | GROUPED           {   $$.setExpr(createComma(createAttribute(lookupAtom), createAttribute(manyAtom), createAttribute(groupedAtom))); $$.setPosition($1); }
     | MANY              {   $$.setExpr(createAttribute(manyAtom)); $$.setPosition($1); }
     | LOOKUP            {   $$.setExpr(createAttribute(lookupAtom)); $$.setPosition($1); }
+    | SMART             {   $$.setExpr(createAttribute(smartAtom)); $$.setPosition($1); }
     | NOSORT            {   $$.setExpr(createAttribute(noSortAtom)); $$.setPosition($1); }
     | NOSORT '(' LEFT ')'
                         {   $$.setExpr(createAttribute(noSortAtom, createAttribute(leftAtom))); $$.setPosition($1); }

+ 5 - 4
ecl/hql/hqlgram2.cpp

@@ -7659,8 +7659,8 @@ void HqlGram::checkJoinFlags(const attribute &err, IHqlExpression * join)
     IHqlExpression * keyed = join->queryAttribute(keyedAtom);
     if (keyed)
     {
-        if (join->hasAttribute(allAtom) || join->hasAttribute(lookupAtom))
-            reportError(ERR_KEYEDINDEXINVALID, err, "LOOKUP/ALL not compatible with KEYED");
+        if (join->hasAttribute(allAtom) || join->hasAttribute(lookupAtom) || join->hasAttribute(smartAtom))
+            reportError(ERR_KEYEDINDEXINVALID, err, "LOOKUP/ALL/SMART not compatible with KEYED");
 
         IHqlExpression * index = keyed->queryChild(0);
         if (index)
@@ -7705,9 +7705,9 @@ void HqlGram::checkJoinFlags(const attribute &err, IHqlExpression * join)
             }
         }
     }
-    if (join->hasAttribute(lookupAtom))
+    if (join->hasAttribute(lookupAtom) || join->hasAttribute(smartAtom))
     {
-        bool isMany = join->hasAttribute(manyAtom);
+        bool isMany = join->hasAttribute(manyAtom) || join->hasAttribute(smartAtom);
         if (ro || fo)
             reportError(ERR_BADKIND_LOOKUPJOIN, err, "JOIN(LOOKUP) only supports INNER, LEFT OUTER, and LEFT ONLY joins");
         if (join->hasAttribute(partitionRightAtom))
@@ -10367,6 +10367,7 @@ static void getTokenText(StringBuffer & msg, int token)
     case SIZEOF: msg.append("SIZEOF"); break;
     case SKEW: msg.append("SKEW"); break;
     case SKIP: msg.append("SKIP"); break;
+    case SMART: msg.append("SMART"); break;
     case SOAPACTION: msg.append("SOAPACTION"); break;
     case __STAND_ALONE__: msg.append("__STAND_ALONE__"); break;
     case HTTPHEADER: msg.append("HTTPHEADER"); break;

+ 1 - 0
ecl/hql/hqllex.l

@@ -897,6 +897,7 @@ SINH                { RETURNSYM(SINH); }
 SIZEOF              { RETURNSYM(SIZEOF); }
 SKEW                { RETURNSYM(SKEW); }
 SKIP                { RETURNSYM(SKIP); }
+SMART               { RETURNSYM(SMART); }
 SOAPACTION          { RETURNSYM(SOAPACTION); }
 SOAPCALL            { RETURNSYM(SOAPCALL); }
 SORT                { RETURNSYM(SORT); }

+ 4 - 2
ecl/hql/hqlmeta.cpp

@@ -2153,7 +2153,8 @@ void calculateDatasetMeta(CHqlMetaInfo & meta, IHqlExpression * expr)
             bool isLookupJoin = expr->queryAttribute(lookupAtom) != NULL;
             bool isAllJoin = expr->queryAttribute(allAtom) != NULL;
             bool isHashJoin = expr->queryAttribute(hashAtom) != NULL;
-            bool isKeyedJoin = !isAllJoin && !isLookupJoin && (expr->queryAttribute(keyedAtom) || isKey(expr->queryChild(1)));
+            bool isSmartJoin = expr->queryAttribute(smartAtom) != NULL;
+            bool isKeyedJoin = !isAllJoin && !isLookupJoin && !isSmartJoin && (expr->queryAttribute(keyedAtom) || isKey(expr->queryChild(1)));
             bool isLocal = (expr->queryAttribute(localAtom) != NULL);
             bool fo = expr->queryAttribute(fullonlyAtom) || expr->queryAttribute(fullouterAtom);
             bool createDefaultLeft = fo || expr->queryAttribute(rightonlyAtom) || expr->queryAttribute(rightouterAtom);
@@ -3056,8 +3057,9 @@ ITypeInfo * calculateDatasetType(node_operator op, const HqlExprArray & parms)
     case no_joincount:
         {
             bool isLookupJoin = queryAttribute(lookupAtom, parms) != NULL;
+            bool isSmartJoin = queryAttribute(smartAtom, parms) != NULL;
             bool isAllJoin = queryAttribute(allAtom, parms) != NULL;
-            bool isKeyedJoin = !isAllJoin && !isLookupJoin && (queryAttribute(keyedAtom, parms) || isKey(&parms.item(1)));
+            bool isKeyedJoin = !isAllJoin && !isLookupJoin && !isSmartJoin && (queryAttribute(keyedAtom, parms) || isKey(&parms.item(1)));
 
             recordArg = 3;
             if (isKeyedJoin || isAllJoin || isLookupJoin)

+ 1 - 1
ecl/hql/hqlutil.cpp

@@ -1158,7 +1158,7 @@ IHqlExpression * getNormalizedFilename(IHqlExpression * filename)
 
 bool canBeSlidingJoin(IHqlExpression * expr)
 {
-    if (expr->hasAttribute(hashAtom) || expr->hasAttribute(lookupAtom) || expr->hasAttribute(allAtom))
+    if (expr->hasAttribute(hashAtom) || expr->hasAttribute(lookupAtom) || expr->hasAttribute(smartAtom)|| expr->hasAttribute(allAtom))
         return false;
     if (expr->hasAttribute(rightouterAtom) || expr->hasAttribute(fullouterAtom) ||
         expr->hasAttribute(leftonlyAtom) || expr->hasAttribute(rightonlyAtom) || expr->hasAttribute(fullonlyAtom))

+ 6 - 4
ecl/hqlcpp/hqlhtcpp.cpp

@@ -11506,6 +11506,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityJoinOrDenormalize(BuildCtx & c
         isAllJoin = true;
 
     bool isLookupJoin = expr->hasAttribute(lookupAtom);
+    bool isSmartJoin = expr->hasAttribute(smartAtom);
     bool isHashJoin = targetThor() && expr->hasAttribute(hashAtom);
     bool isLocalJoin = !isHashJoin && expr->hasAttribute(localAtom);
     bool joinToSelf = (op == no_selfjoin);
@@ -11628,7 +11629,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityJoinOrDenormalize(BuildCtx & c
             kind = TAKalljoin;
             argName = "AllJoin";
         }
-        else if (isLookupJoin)
+        else if (isLookupJoin || isSmartJoin)
         {
             kind = TAKlookupjoin;
             argName = "HashJoin";
@@ -11651,7 +11652,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityJoinOrDenormalize(BuildCtx & c
             kind = TAKalldenormalize;
             argName = "AllDenormalize";
         }
-        else if (isLookupJoin)
+        else if (isLookupJoin || isSmartJoin)
         {
             kind = TAKlookupdenormalize;
             argName = "HashDenormalize";
@@ -11674,7 +11675,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityJoinOrDenormalize(BuildCtx & c
             kind = TAKalldenormalizegroup;
             argName = "AllDenormalizeGroup";
         }
-        else if (isLookupJoin)
+        else if (isLookupJoin || isSmartJoin)
         {
             kind = TAKlookupdenormalizegroup;
             argName = "HashDenormalizeGroup";
@@ -11764,6 +11765,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityJoinOrDenormalize(BuildCtx & c
         flags.append("|JFleftSortedLocally");
     if (isAlreadySorted(dataset2, joinInfo.queryRightSort(), true, true) || userPreventsSort(noSortAttr, no_right))
         flags.append("|JFrightSortedLocally");
+    if (isSmartJoin) flags.append("|JFsmart|JFmanylookup");
 
     if (flags.length())
         doBuildUnsignedFunction(instance->classctx, "getJoinFlags", flags.str()+1);
@@ -11851,7 +11853,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityJoinOrDenormalize(BuildCtx & c
         buildSlidingMatchFunction(instance->nestedctx, joinInfo.queryLeftSort(), joinInfo.queryRightSort(), joinInfo.slidingMatches, "CompareLeftRightUpper", 2, lhsDsRef, rhsDsRef);
     }
 
-    if (isHashJoin||isLookupJoin)
+    if (isHashJoin||isLookupJoin|isSmartJoin)
     {
         OwnedHqlExpr leftList = createValueSafe(no_sortlist, makeSortListType(NULL), joinInfo.queryLeftReq());
         buildHashOfExprsClass(instance->nestedctx, "HashLeft", leftList, lhsDsRef, false);

+ 6 - 0
ecl/hqlcpp/hqlresource.cpp

@@ -89,6 +89,12 @@ void getResources(IHqlExpression * expr, CResources & resources, const CResource
                 resources.setManyToMasterSockets(1);
             }
         }
+        else if (expr->hasAttribute(smartAtom))
+        {
+            //GH->JCS what should we assume here?
+            resources.setHeavyweight();
+            setHashResources(expr, resources, options);
+        }
         else if (expr->hasAttribute(hashAtom))
         {
             resources.setHeavyweight();

+ 3 - 3
ecl/hqlcpp/hqlttcpp.cpp

@@ -2616,7 +2616,7 @@ IHqlExpression * ThorHqlTransformer::normalizeJoinOrDenormalize(IHqlExpression *
     }
 
     //Check to see if this join should be done as a keyed join...
-    if (!expr->hasAttribute(lookupAtom) && !expr->hasAttribute(allAtom))
+    if (!expr->hasAttribute(lookupAtom) && !expr->hasAttribute(smartAtom) && !expr->hasAttribute(allAtom))
     {
         if (rightDs->getOperator() == no_filter)
         {
@@ -2784,7 +2784,7 @@ IHqlExpression * ThorHqlTransformer::normalizeJoinOrDenormalize(IHqlExpression *
     }
 
     //Sort,Sort->join is O(NlnN) lookup join using a hash table is O(N) =>convert for hthor/roxie
-    if (!isThorCluster(targetClusterType) && !expr->hasAttribute(_normalized_Atom))
+    if (!isThorCluster(targetClusterType) && !expr->hasAttribute(_normalized_Atom) && !expr->hasAttribute(smartAtom))
     {
         bool createLookup = false;
         if ((op == no_join) && options.convertJoinToLookup)
@@ -2879,7 +2879,7 @@ IHqlExpression * ThorHqlTransformer::normalizeJoinOrDenormalize(IHqlExpression *
         return expr->clone(args);
     }
 
-    if (isThorCluster(targetClusterType) && isLocal && options.implicitJoinSubSort)
+    if (isThorCluster(targetClusterType) && isLocal && options.implicitJoinSubSort && !expr->hasAttribute(smartAtom))
     {
         IHqlExpression * noSortAttr = expr->queryAttribute(noSortAtom);
         OwnedHqlExpr newLeft;

+ 42 - 0
ecl/regress/issue10068.ecl

@@ -0,0 +1,42 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+iirec := RECORD
+    INTEGER4 i;
+    INTEGER4 j;
+END;
+
+iiset1 := DATASET([{1, 1}, {2, 1}, {2, 2}, {3, 1}, {3, 2}, {3, 3}, {4, 4}], iirec);
+
+iiset2 := DATASET([{1, 6}, {2, 6}, {2, 7}, {3, 6}, {3, 7}, {3, 8}], iirec);
+
+iiset2b := DATASET([{1, 6}, {3, 6}, {3, 7}], iirec);
+
+iiset1b := DATASET([{1, 1}, {3, 1}, {2, 9}, {9, 9}, {3, 2}, {4, 4}], iirec);
+
+iiset1c := DATASET([{1, 1}, {3, 1}, {8, 9}, {9, 9}, {3, 2}, {4, 4}], iirec);
+
+iirec xform(iirec l, iirec r) := TRANSFORM
+    SELF.i := l.i * 10 + r.i;
+    SELF.j := l.j * 10 + r.j;
+END;
+
+OUTPUT(JOIN(iiset1, iiset2, LEFT.i = RIGHT.i, xform(LEFT, RIGHT), SMART)); // 1+2+3 = 6 records output
+
+OUTPUT(JOIN(iiset1, iiset2, LEFT.i = RIGHT.i, xform(LEFT, RIGHT), SMART, LEFT OUTER)); // 1+2+3+1 = 7 records output
+
+OUTPUT(JOIN(GROUP(SORT(iiset1b, i), i), iiset2b, LEFT.i = RIGHT.i, xform(LEFT, RIGHT), SMART)); // 1+2=3 records output

+ 1 - 0
rtl/include/eclhelper.hpp

@@ -1571,6 +1571,7 @@ enum {
     JFindexfromactivity          = 0x02000000,
     JFleftSortedLocally          = 0x04000000,
     JFrightSortedLocally         = 0x08000000,
+    JFsmart                      = 0x10000000,
 };
 
 // FetchFlags