Browse Source

Merge branch 'candidate-7.4.x' into candidate-7.6.0

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 5 years ago
parent
commit
328761b281

+ 3 - 1
common/workunit/workunit.cpp

@@ -3085,7 +3085,9 @@ void WuScopeFilter::finishedFilter()
         if (!(properties & (PTattributes|PThints)))
             sourceFlags &= ~(SSFsearchGraph);
 
-        setDepth(2, 2);
+        //This should really be setDepth(2,2) but workunits prior to 7.4 did not have graph ids prefixed by the wfid
+        //Remove once 7.2 is a distant memory (see HPCC-22887)
+        setDepth(1, 2);
     }
     else if (matchOnly(SSTsubgraph))
     {

+ 4 - 4
docs/EN_US/ECLStandardLibraryReference/SLR-Mods/Crypto.xml

@@ -222,7 +222,7 @@ STD.Crypto.SupportedPublicKeyAlgorithms(); //returns SET of STRINGs</programlist
           <row>
             <entry><emphasis>myHashModule</emphasis></entry>
 
-            <entry>The name of the Hashing module structure </entry>
+            <entry>The name of the Hashing module structure</entry>
           </row>
 
           <row>
@@ -254,7 +254,7 @@ OUTPUT(hash1);
 OUTPUT(hash2);</programlisting>
   </sect1>
 
-  <sect1 id="hash">
+  <sect1 id="hash_crypto">
     <title>Hash</title>
 
     <para><emphasis>myHashModule</emphasis><emphasis
@@ -1184,7 +1184,7 @@ STRING privateKey := '-----BEGIN RSA PRIVATE KEY-----' + '\n' +
 '-----END RSA PRIVATE KEY-----';
 
 //PKE Encryption module definition
-myPKEModule := STD.Crypto.PublicKeyEncryption('RSA', publicKey, privateKey,''); 
+myPKEModule := STD.Crypto.PublicKeyEncryptionFromBuffer('RSA', publicKey, privateKey,''); 
 
 DATA signature := myPKEModule.Sign((DATA)'The quick brown fox jumps');
 OUTPUT(TRUE = myPKEModule.VerifySignature(signature, (DATA)'The quick brown fox jumps'));</programlisting>
@@ -1281,7 +1281,7 @@ STRING privateKey := '-----BEGIN RSA PRIVATE KEY-----' + '\n' +
 '-----END RSA PRIVATE KEY-----';
 
 //PKE Encryption module definition
-myPKEModule := STD.Crypto.PublicKeyEncryption('RSA', publicKey, privateKey,''); 
+myPKEModule := STD.Crypto.PublicKeyEncryptionFromBuffer('RSA', publicKey, privateKey,''); 
 
 DATA signature := myPKEModule.Sign((DATA)'The quick brown fox jumps');
 OUTPUT(TRUE = myPKEModule.VerifySignature(signature, (DATA)'The quick brown fox jumps'));</programlisting>

+ 2 - 0
ecl/hql/hqlatoms.cpp

@@ -210,6 +210,7 @@ IAtom * _graphLocal_Atom;
 IAtom * groupAtom;
 IAtom * groupedAtom;
 IAtom * hashAtom;
+IAtom * _hash_dollar_Atom;
 IAtom * headingAtom;
 IAtom * _hidden_Atom;
 IAtom * hintAtom;
@@ -686,6 +687,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKEATOM(group);
     MAKEATOM(grouped);
     MAKEATOM(hash);
+    MAKESYSATOM(hash_dollar);
     MAKEATOM(heading);
     MAKESYSATOM(hidden);
     MAKEATOM(hint);

+ 1 - 0
ecl/hql/hqlatoms.hpp

@@ -214,6 +214,7 @@ extern HQL_API IAtom * _graphLocal_Atom;
 extern HQL_API IAtom * groupAtom;
 extern HQL_API IAtom * groupedAtom;
 extern HQL_API IAtom * hashAtom;
+extern HQL_API IAtom * _hash_dollar_Atom;
 extern HQL_API IAtom * headingAtom;
 extern HQL_API IAtom * _hidden_Atom;
 extern HQL_API IAtom * hintAtom;

+ 5 - 0
ecl/hql/hqlgram.hpp

@@ -417,6 +417,8 @@ public:
 
     void saveContext(HqlGramCtx & ctx, bool cloneScopes);
     IHqlScope * queryGlobalScope();
+    IHqlScope * queryMacroScope();
+    IAtom * queryGlobalScopeId();
 
     bool canFollowCurrentState(int tok, const short * yyps);
     int mapToken(int lexToken) const;
@@ -1125,6 +1127,8 @@ class HqlLex
         HqlLex* getParentLex() { return parentLex; }
         void setParentLex(HqlLex* pLex) { parentLex = pLex; }
         const char* getMacroName() { return (macroExpr) ? str(macroExpr->queryName()) : "<param>"; }
+        const char * queryMacroScopeName();
+
         IPropertyTree * getClearJavadoc();
         void doSlashSlashHash(attribute const & returnToken, const char * command);
 
@@ -1282,6 +1286,7 @@ private:
         IHqlExpression *macroExpr;
         Owned<IFileContents> forBody;
         Owned<IFileContents> forFilter;
+        IAtom * hashDollar = nullptr;
 
         IXmlScope *xmlScope;
 

+ 13 - 2
ecl/hql/hqlgram.y

@@ -584,6 +584,7 @@ static void eclsyntaxerror(HqlGram * parser, const char * s, short yystate, int
   HASH_LINK
   HASH_ONWARNING
   HASH_WEBSERVICE
+  HASH_DOLLAR
 
   INTERNAL_READ_NEXT_TOKEN
 
@@ -745,6 +746,9 @@ importId
     | '$'               {
                             $$.setExpr(createAttribute(selfAtom), $1);
                         }
+    | HASH_DOLLAR       {
+                            $$.setExpr(createAttribute(_hash_dollar_Atom), $1);
+                        }
     | '^'               {
                             $$.setExpr(createAttribute(_root_Atom), $1);
                         }
@@ -1053,7 +1057,8 @@ badObject
 macro
     : MACRO             {
                             Owned<IFileContents> contents = $1.getContents();
-                            IHqlExpression* expr = createUnknown(no_macro, makeBoolType(), macroAtom, LINK(contents));
+                            IAtom * globalId = parser->queryGlobalScopeId();
+                            IHqlExpression* expr = createUnknown(no_macro, makeBoolType(), globalId, LINK(contents));
 #if defined(TRACE_MACRO)
                             DBGLOG("MACRO>> verify: macro definition at %d:%d\n",yylval.startLine, yylval.startColumn);
 #endif
@@ -1068,7 +1073,8 @@ macro
     | COMPLEX_MACRO     {
                             Owned<IFileContents> contents = $1.getContents();
 
-                            IHqlExpression* expr = createUnknown(no_macro, makeVoidType(), macroAtom, LINK(contents));
+                            IAtom * globalId = parser->queryGlobalScopeId();
+                            IHqlExpression* expr = createUnknown(no_macro, makeVoidType(), globalId, LINK(contents));
 
 #if defined(TRACE_MACRO)
                             DBGLOG("MACRO>> verify: macro definition at %d:%d\n",yylval.startLine, yylval.startColumn);
@@ -7237,6 +7243,11 @@ abstractModule
                             IHqlExpression * scopeExpr = queryExpression(parser->globalScope);
                             $$.setExpr(LINK(scopeExpr), $1);
                         }
+    | HASH_DOLLAR
+                        {
+                            IHqlExpression * scopeExpr = queryExpression(parser->queryMacroScope());
+                            $$.setExpr(LINK(scopeExpr), $1);
+                        }
     | VALUE_MACRO abstractModule ENDMACRO
                         {
                             $$.setExpr($2.getExpr());

+ 29 - 2
ecl/hql/hqlgram2.cpp

@@ -419,6 +419,25 @@ IHqlScope * HqlGram::queryGlobalScope()
     return globalScope;
 }
 
+IHqlScope * HqlGram::queryMacroScope()
+{
+    const char * scopeName = lexObject->queryMacroScopeName();
+    if (scopeName)
+    {
+        OwnedHqlExpr matched = getResolveAttributeFullPath(scopeName, LSFpublic, lookupCtx);
+        if (matched && matched->queryScope())
+            return matched->queryScope();
+    }
+
+    return globalScope;
+}
+
+IAtom * HqlGram::queryGlobalScopeId()
+{
+    const char * globalName = globalScope->queryFullName();
+    return createAtom(globalName);
+}
+
 void HqlGram::init(IHqlScope * _globalScope, IHqlScope * _containerScope)
 {
     minimumScopeIndex = 0;
@@ -10899,6 +10918,11 @@ inline bool isDollarModule(IHqlExpression * expr)
     return expr->isAttribute() && (expr->queryName() == selfAtom);
 }
 
+inline bool isHashDollarModule(IHqlExpression * expr)
+{
+    return expr->isAttribute() && (expr->queryName() == _hash_dollar_Atom);
+}
+
 inline bool isRootModule(IHqlExpression * expr)
 {
     return expr->isAttribute() && (expr->queryName() == _root_Atom);
@@ -10908,6 +10932,8 @@ IHqlExpression * HqlGram::resolveImportModule(const attribute & errpos, IHqlExpr
 {
     if (isDollarModule(expr))
         return LINK(queryExpression(globalScope));
+    if (isHashDollarModule(expr))
+        return LINK(queryExpression(queryMacroScope()));
     if (isRootModule(expr))
         return LINK(queryExpression(lookupCtx.queryRepository()->queryRootScope()));
 
@@ -11557,6 +11583,7 @@ static void getTokenText(StringBuffer & msg, int token)
     case HASH_LINK: msg.append("#LINK"); break;
     case HASH_WORKUNIT: msg.append("#WORKUNIT"); break;
     case HASH_WEBSERVICE: msg.append("#WEBSERVICE"); break;
+    case HASH_DOLLAR: msg.append("#$"); break;
     case SIMPLE_TYPE: msg.append("type-name"); break;
 
     case EQ: msg.append("="); break;
@@ -11705,9 +11732,9 @@ void HqlGram::simplifyExpected(int *expected)
     simplify(expected, LIBRARY, LIBRARY, SCOPE_FUNCTION, STORED, PROJECT, INTERFACE, MODULE, 0);
     simplify(expected, MATCHROW, MATCHROW, LEFT, RIGHT, IF, IFF, ROW, HTTPCALL, SOAPCALL, PROJECT, GLOBAL, NOFOLD, NOHOIST, ALLNODES, THISNODE, SKIP, DATAROW_FUNCTION, TRANSFER, RIGHT_NN, FROMXML, FROMJSON, 0);
     simplify(expected, TRANSFORM_ID, TRANSFORM_FUNCTION, TRANSFORM, '@', 0);
-    simplify(expected, RECORD, RECORDOF, RECORD_ID, RECORD_FUNCTION, SCOPE_ID, VALUE_MACRO, '{', '@', 0);
+    simplify(expected, SCOPE_ID, '$', HASH_DOLLAR, '^', 0);
+    simplify(expected, RECORD, RECORDOF, RECORD_ID, RECORD_FUNCTION, VALUE_MACRO, '{', '@', '$', HASH_DOLLAR, IF, 0);
     simplify(expected, IFBLOCK, ANY, PACKED, BIG, LITTLE, 0);
-    simplify(expected, SCOPE_ID, '$', 0);
     simplify(expected, SIMPLE_TYPE, _ARRAY_, LINKCOUNTED, EMBEDDED, STREAMED, 0);
     simplify(expected, END, '}', 0);
 }

+ 4 - 0
ecl/hql/hqllex.l

@@ -634,6 +634,10 @@ __LINE__            {
                         lexer->reportError(returnToken, ERR_TMPLT_UNKNOWNCOMMAND, "%s", msg.str());
                         return INTERNAL_READ_NEXT_TOKEN;
                     }
+#\$                 {
+                        updatepos1;
+                        return HASH_DOLLAR;
+                    }
         
 ABS                 { RETURNSYM(ABS); }
 ACOS                { RETURNSYM(ACOS); }

+ 14 - 1
ecl/hql/hqlparse.cpp

@@ -429,7 +429,6 @@ void HqlLex::pushMacro(IHqlExpression *expr)
         child(1) = parameters
         child(2) = defaults for parameters
     */
-
     attribute nextToken;
     int tok = yyLex(nextToken, false, 0);
     if (tok != '(')
@@ -596,6 +595,7 @@ void HqlLex::pushMacro(IHqlExpression *expr)
         inmacro->yyColumn = macroBodyExpr->getStartColumn();
         inmacro->setParentLex(this);
         inmacro->macroParms.setown(macroParms.getClear());
+        inmacro->hashDollar = macroBodyExpr->queryBody()->queryName();
     }
 }
 
@@ -2386,6 +2386,19 @@ void HqlLex::loadXML(const attribute & errpos, const char *name, const char * ch
     }
 }
 
+const char * HqlLex::queryMacroScopeName()
+{
+    if (inmacro)
+    {
+        const char * scope = inmacro->queryMacroScopeName();
+        if (scope)
+            return scope;
+    }
+    if (hashDollar)
+        return str(hashDollar);
+    return nullptr;
+}
+
 IPropertyTree * HqlLex::getClearJavadoc()
 {
     if (javaDocComment.length() == 0)

+ 1 - 0
ecl/hql/reservedwords.cpp

@@ -108,6 +108,7 @@ static const char * eclReserved3[] = { //Template language
     "#warning",
     "#webservice",
     "#workunit",
+    "#$",
     "loadxml",
     NULL
 };

+ 45 - 0
ecl/regress/hashdollar1.eclxml

@@ -0,0 +1,45 @@
+<Archive useArchivePlugins="1">
+<!--
+
+    HPCC SYSTEMS software Copyright (C) 2019 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+ <Module name="x1">
+  <Attribute name="m1">
+export
+ m1() := MACRO
+
+   import $;
+
+   output($.value)
+ENDMACRO;
+  </Attribute>
+  <Attribute name="value">
+export value := 'Good';
+  </Attribute>
+ </Module>
+ <Module name="x2">
+  <Attribute name="f1">
+  import x1;
+  x1.m1();
+  </Attribute>
+  <Attribute name="value">
+export value := 'Bad';
+  </Attribute>
+ </Module>
+ <Query>
+    import x1,x2;
+    x2.f1;
+ </Query>
+</Archive>

+ 45 - 0
ecl/regress/hashdollar1b.eclxml

@@ -0,0 +1,45 @@
+<Archive useArchivePlugins="1">
+<!--
+
+    HPCC SYSTEMS software Copyright (C) 2019 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+ <Module name="x1">
+  <Attribute name="m1">
+export m1 := MODULE
+ export f1() := MACRO
+
+   import $;
+   output($.value)
+ENDMACRO;
+END;
+  </Attribute>
+  <Attribute name="value">
+export value := 'Good';
+  </Attribute>
+ </Module>
+ <Module name="x2">
+  <Attribute name="f1">
+  import x1;
+  x1.m1.f1();
+  </Attribute>
+  <Attribute name="value">
+export value := 'Bad';
+  </Attribute>
+ </Module>
+ <Query>
+    import x1,x2;
+    x2.f1;
+ </Query>
+</Archive>

+ 45 - 0
ecl/regress/hashdollar2.eclxml

@@ -0,0 +1,45 @@
+<Archive useArchivePlugins="1">
+<!--
+
+    HPCC SYSTEMS software Copyright (C) 2019 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+ <Module name="x1">
+  <Attribute name="m1">
+export
+ m1() := MACRO
+
+   import #$;
+
+   output(#$.value)
+ENDMACRO;
+  </Attribute>
+  <Attribute name="value">
+export value := 'Good';
+  </Attribute>
+ </Module>
+ <Module name="x2">
+  <Attribute name="f1">
+  import x1;
+  x1.m1();
+  </Attribute>
+  <Attribute name="value">
+export value := 'Bad';
+  </Attribute>
+ </Module>
+ <Query>
+    import x1,x2;
+    x2.f1;
+ </Query>
+</Archive>

+ 52 - 0
ecl/regress/hashdollar3.eclxml

@@ -0,0 +1,52 @@
+<Archive useArchivePlugins="1">
+<!--
+
+    HPCC SYSTEMS software Copyright (C) 2019 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+ <Module name="x1.y1">
+  <Attribute name="m1">
+export
+ m1() := MACRO
+
+   import #$;
+   import myMod from #$;
+
+   output(#$.value);
+   output(myMod.value)
+ENDMACRO;
+  </Attribute>
+  <Attribute name="value">
+export value := 'Good';
+  </Attribute>
+  <Attribute name="mymod">
+export mymod := MODULE
+    export value := 'Good II';
+END;
+  </Attribute>
+ </Module>
+ <Module name="x2">
+  <Attribute name="f1">
+  import x1;
+  x1.y1.m1();
+  </Attribute>
+  <Attribute name="value">
+export value := 'Bad';
+  </Attribute>
+ </Module>
+ <Query>
+    import x1,x2;
+    x2.f1;
+ </Query>
+</Archive>

+ 56 - 0
ecl/regress/hashdollar4.eclxml

@@ -0,0 +1,56 @@
+<Archive useArchivePlugins="1">
+<!--
+
+    HPCC SYSTEMS software Copyright (C) 2019 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+ <Module name="x1.y1">
+  <Attribute name="m1">
+export m1 := MODULE
+    export m2() := MACRO
+
+   import #$;
+   import #$.m1 as me;
+   import myMod from #$;
+
+   output(#$.value);
+   output(me.value);
+   output(myMod.value)
+ENDMACRO;
+  export value := 'x1.y1.m1.value';
+END;
+  </Attribute>
+  <Attribute name="value">
+export value := 'Good';
+  </Attribute>
+  <Attribute name="mymod">
+export mymod := MODULE
+    export value := 'Good II';
+END;
+  </Attribute>
+ </Module>
+ <Module name="x2">
+  <Attribute name="f1">
+  import x1;
+  x1.y1.m1.m2();
+  </Attribute>
+  <Attribute name="value">
+export value := 'Bad';
+  </Attribute>
+ </Module>
+ <Query>
+    import x1,x2;
+    x2.f1;
+ </Query>
+</Archive>

+ 74 - 0
ecl/regress/hashdollar5.eclxml

@@ -0,0 +1,74 @@
+<Archive useArchivePlugins="1">
+<!--
+
+    HPCC SYSTEMS software Copyright (C) 2019 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+ <Module name="x1.y1">
+  <Attribute name="m1">
+export
+ m1() := MACRO
+
+   import #$;
+   import myMod from #$;
+
+   output(#$.value);
+   output(myMod.value);
+   #$.z.m2()
+ENDMACRO;
+  </Attribute>
+  <Attribute name="value">
+export value := 'Good';
+  </Attribute>
+  <Attribute name="mymod">
+export mymod := MODULE
+    export value := 'Good II';
+END;
+  </Attribute>
+ </Module>
+ <Module name="x1.y1.z">
+  <Attribute name="m2">
+export
+ m2() := MACRO
+
+   import #$;
+   import myMod from #$ as myMod2;
+
+   output(#$.value);
+   output(myMod2.value)
+ENDMACRO;
+  </Attribute>
+  <Attribute name="value">
+export value := 'Good III';
+  </Attribute>
+  <Attribute name="mymod">
+export mymod := MODULE
+    export value := 'Good IV';
+END;
+  </Attribute>
+ </Module>
+ <Module name="x2">
+  <Attribute name="f1">
+  import x1;
+  x1.y1.m1();
+  </Attribute>
+  <Attribute name="value">
+export value := 'Bad';
+  </Attribute>
+ </Module>
+ <Query>
+    import x1,x2;
+    x2.f1;
+ </Query>
+</Archive>

+ 3 - 1
system/jlib/jstats.cpp

@@ -2999,7 +2999,9 @@ void ScopeFilter::finishedFilter()
             intersectDepth(1, 1);
             break;
         case SSTgraph:
-            intersectDepth(2, 2);
+            //This should really be intersectDepth(2,2) but workunits prior to 7.4 did not have graph ids prefixed by the wfid
+            //Remove once 7.2 is a distant memory (see HPCC-22887)
+            intersectDepth(1, 2);
             break;
         case SSTsubgraph:
             intersectDepth(3, UINT_MAX);

+ 8 - 1
testing/regress/ecl/dedupwithbest.ecl

@@ -15,7 +15,14 @@
     limitations under the License.
 ############################################################################## */
 
-#option ('testHashDedupSpillTimes',10);
+//version spillTimes=0
+//version spillTimes=10
+
+import ^ as root;
+spillTimes := #IFDEFINED(root.spillTimes, 0);
+
+#option ('testHashDedupSpillTimes', spillTimes);
+
 MyRec := RECORD
     INTEGER3 Id;
     STRING10 Field1;

+ 18 - 0
testing/regress/ecl/key/lightweightjoin.xml

@@ -0,0 +1,18 @@
+<Dataset name='Result 1'>
+ <Row><id>0</id></Row>
+ <Row><id>0</id></Row>
+ <Row><id>1</id></Row>
+ <Row><id>1</id></Row>
+ <Row><id>2</id></Row>
+ <Row><id>2</id></Row>
+ <Row><id>3</id></Row>
+ <Row><id>3</id></Row>
+ <Row><id>4</id></Row>
+ <Row><id>4</id></Row>
+ <Row><id>5</id></Row>
+ <Row><id>5</id></Row>
+ <Row><id>6</id></Row>
+ <Row><id>6</id></Row>
+ <Row><id>7</id></Row>
+ <Row><id>7</id></Row>
+</Dataset>

+ 26 - 0
testing/regress/ecl/lightweightjoin.ecl

@@ -0,0 +1,26 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2019 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+// Lightweight join is only implemented in Thor
+
+lhs := DATASET(8, TRANSFORM({unsigned id}, SELF.id := COUNTER-1));
+glhs := GROUP(SORTED(lhs, id), id, LOCAL) : INDEPENDENT;
+rhs := DATASET(16, TRANSFORM({unsigned id}, SELF.id := (COUNTER-1)/2));
+grhs := GROUP(SORTED(rhs, id), id, LOCAL) : INDEPENDENT;
+
+JOIN(glhs, grhs, LEFT.id=RIGHT.id, LOCAL);
+

+ 66 - 33
thorlcr/activities/hashdistrib/thhashdistribslave.cpp

@@ -2559,6 +2559,7 @@ class CHashTableRowTable : private CThorExpandingRowArray
                 ++pos;
                 if (row) return row;
             }
+            // JCSMORE - could clear parent table at this point, i.e. free up ptr table
             stopped = true;
             return nullptr;
         }
@@ -2741,8 +2742,8 @@ class CBucket : public CSimpleInterface
     bool keepBest;
     ICompare *keepBestCompare;
     void doSpillHashTable();
-    bool rowsInBucketDedupedAlready;
-    bool streamed = true;
+    bool completed = false;
+    bool streamed = false;
 
 public:
     CBucket(HashDedupSlaveActivityBase &_owner, IThorRowInterfaces *_rowIf, IThorRowInterfaces *_keyIf, IHash *_iRowHash, IHash *_iKeyHash, ICompare *_iCompare, bool _extractKey, unsigned _bucketN, CHashTableRowTable *_htRows);
@@ -2760,7 +2761,7 @@ public:
     bool spillHashTable(bool critical); // returns true if freed mem
     bool flush(bool critical);
     bool rehash();
-    void close()
+    void closeSpillStreams()
     {
         rowSpill.close();
         keySpill.close();
@@ -2785,14 +2786,14 @@ public:
             return htRows->queryRow(htPos);
         return nullptr;
     }
-    inline void setRowsInBucketDeduped()
+    inline void setCompleted()
     {
         dbgassertex(!isSpilt());
-        rowsInBucketDedupedAlready=true;
+        completed = true;
     }
-    inline bool areRowsInBucketDeduped() const
+    inline bool isCompleted() const
     {
-        return rowsInBucketDedupedAlready;
+        return completed;
     }
 };
 
@@ -2811,7 +2812,6 @@ class CBucketHandler : public CSimpleInterface, implements IInterface, implement
     mutable rowidx_t peakKeyCount;
     bool callbacksInstalled = false;
     unsigned nextBestBucket = 0;
-    bool bestReady = false;
     CriticalSection spillCrit;
 
     rowidx_t getTotalBucketCount() const
@@ -2906,7 +2906,12 @@ public:
                 // The one left, will be last bucket standing and grown to fill mem
                 // it is still useful to use as much as poss. of remaining bucket HT as filter
                 if (bucket->spillHashTable(critical))
+                {
+                    // If marked as done, then can close now (NB: must be closed before can be read by getNextBestRowStream())
+                    if (bucket->isCompleted())
+                        bucket->closeSpillStreams(); // close stream now, to flush rows out in write streams, so ready to be read
                     return true;
+                }
             }
         }
         while (nextToSpill != start);
@@ -2931,35 +2936,57 @@ public:
     {
         return spillBucket(critical);
     }
-    IRowStream * getNextBestRowStream()
+
+    void checkCompletedBuckets()
     {
         // NB: Called only once input has been read
         CriticalBlock b(spillCrit); // buckets can still be spilt
-        if (!bestReady)
-        {
-            // All non-spilled buckets in memory at this point in time are deduped
-            // -> set flag in all these buckets just in case they are spilled so that
-            //    when they need to be streamed back it's not necessary to dedup again
-            bestReady = true;
-            for (unsigned cur=0; cur<numBuckets; cur++)
-            {
-                if (!buckets[cur]->isSpilt())
-                    buckets[cur]->setRowsInBucketDeduped();
-            }
+        // All non-spilled buckets in memory at this point in time are fully deduped
+        // -> set flag in all these buckets just in case they are spilled so that
+        //    if they need to be streamed back it's not necessary to dedup again
+        for (unsigned cur=0; cur<numBuckets; cur++)
+        {
+            CBucket &bucket = *buckets[cur];
+            if (bucket.isSpilt())
+                bucket.closeSpillStreams(); // close stream now, to flush rows out in write streams, so ready to be read
+            else
+                bucket.setCompleted();
         }
+    }
+/*
+ * NB: getNextBestRowStream() is only used when BEST involved
+ * It is called once all input rows have been consumed.
+ *
+ * Returns: a stream of next available unspilt, or spilt buckets
+ * that were marked complete (setCompleted())
+ */
+    IRowStream * getNextBestRowStream()
+    {
+        // NB: Called only once input has been read
+        CriticalBlock b(spillCrit); // buckets can still be spilt
         while (nextBestBucket < numBuckets)
         {
             CBucket *bucket = buckets[nextBestBucket++];
+
+            /* JCSMORE - It would be better to prioritize non-spilt buckets first
+             * So that those memory consuming buckets are consumed 1st.
+             */
             if (bucket->isSpilt())
             {
-                if (bucket->areRowsInBucketDeduped())
+                if (bucket->isCompleted())
                 {
+                    /* NB: getSpillRowStream() will be empty, because this bucket completed
+                     * And key stream are whole rows because BEST does not extract key.
+                     */
                     rowcount_t count; // unused
-                    return bucket->getSpillRowStream(&count);
+                    return bucket->getSpillKeyStream(&count);
                 }
             }
             else
             {
+                /* JCSMORE - this should really create a stream that is spillable
+                 * As it is, getRowStream() marks the bucket as unspillable until the stream is consumed
+                 */
                 if (bucket->getKeyCount())
                     return bucket->getRowStream();
             }
@@ -3167,6 +3194,16 @@ public:
             }
             else
             {
+                // end of input from phase
+
+                // For testing only: spill one bucket. NB: this is in effect before finished reading input. i.e. the buckets are not yet marked as completed
+                if (testSpillTimes)
+                {
+                    bucketHandler->spillBucket(false);
+                    testSpillTimes--;
+                }
+
+                bucketHandler->checkCompletedBuckets();
                 // Keepbest has populated the hashtable with the best rows
                 // -> stream back best rows from hash table
                 if (keepBest)
@@ -3178,8 +3215,10 @@ public:
                         testSpillTimes--;
                     }
 
-                    /* Get next available best IRowStream, i.e. buckets that did not spill before end of input.
-                     * The bucket whose stream is returned is no longer be spillable.
+                    /* Get next available best IRowStream, i.e. buckets that did not spill before end of input or,
+                     * buckets which were marked complete, but have since spilt.
+                     * The bucket whose stream is returned is no longer spillable
+                     * (JCSMORE - but could use a spillable stream impl. so they were spillable)
                      * Other buckets continue to be, but are marked to be ignored by future handler stages.
                      */
                     bestRowStream.setown(bucketHandler->getNextBestRowStream());
@@ -3313,7 +3352,7 @@ void CHashTableRowTable::rehash(const void **newRows)
 
 CBucket::CBucket(HashDedupSlaveActivityBase &_owner, IThorRowInterfaces *_rowIf, IThorRowInterfaces *_keyIf, IHash *_iRowHash, IHash *_iKeyHash, ICompare *_iCompare, bool _extractKey, unsigned _bucketN, CHashTableRowTable *_htRows)
     : owner(_owner), rowIf(_rowIf), keyIf(_keyIf), iRowHash(_iRowHash), iKeyHash(_iKeyHash), iCompare(_iCompare), extractKey(_extractKey), bucketN(_bucketN), htRows(_htRows),
-      rowSpill(owner, _rowIf, "rows", _bucketN), keySpill(owner, _keyIf, "keys", _bucketN), rowsInBucketDedupedAlready(false)
+      rowSpill(owner, _rowIf, "rows", _bucketN), keySpill(owner, _keyIf, "keys", _bucketN)
 
 {
     spilt = false;
@@ -3561,13 +3600,7 @@ void CBucketHandler::flushBuckets()
 {
     clearCallbacks();
     for (unsigned i=0; i<numBuckets; i++)
-    {
-        CBucket &bucket = *buckets[i];
-        bucket.clear();
-        // close stream now, to flush rows out in write streams
-        if (bucket.isSpilt())
-            bucket.close();
-    }
+        buckets[i]->clear();
 }
 
 unsigned CBucketHandler::getBucketEstimateWithPrev(rowcount_t totalRows, rowidx_t prevPeakKeys, rowidx_t keyCount) const
@@ -3681,7 +3714,7 @@ CBucketHandler *CBucketHandler::getNextBucketHandler(Owned<IRowStream> &nextInpu
     while (currentBucket<numBuckets)
     {
         CBucket *bucket = buckets[currentBucket];
-        if (bucket->isSpilt() && !bucket->areRowsInBucketDeduped())
+        if (bucket->isSpilt() && !bucket->isCompleted())
         {
             rowcount_t keyCount, count;
             /* If each key and row stream were to use a unique allocator per target bucket

+ 6 - 6
thorlcr/activities/join/thjoinslave.cpp

@@ -444,10 +444,10 @@ public:
             leftInput->getMetaInfo(info);
             if (info.totalRowsMax==0) 
                 isemptylhs = true;
-            if (rightpartition)
-                leftStream.set(leftInputStream); // already ungrouped
-            else
+            if (leftInput->isGrouped())
                 leftStream.setown(createUngroupStream(leftInputStream));
+            else
+                leftStream.set(leftInputStream);
         }
         else
         {
@@ -465,10 +465,10 @@ public:
         }
         else if (helper->isRightAlreadyLocallySorted())
         {
-            if (rightpartition)
-                rightStream.set(createUngroupStream(rightInputStream));
+            if (rightInput->isGrouped())
+                rightStream.setown(createUngroupStream(rightInputStream));
             else
-                rightStream.set(rightInputStream); // already ungrouped
+                rightStream.set(rightInputStream);
         }
         else
         {