Przeglądaj źródła

Introduce extra hash dedup helper functions

Introduce extra hash-dedup helper functions to enable thor to optimize
the implementation.  Also indicate whether the entire row is being
deduped - to avoid conversion to a key.

See Issue #2700.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 13 lat temu
rodzic
commit
1e9fc3e650

+ 23 - 0
ecl/hqlcpp/hqlhtcpp.cpp

@@ -13329,6 +13329,29 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDedup(BuildCtx & ctx, IHqlExpr
             buildCompareMember(instance->nestedctx, "KeyCompare", keyOrder, DatasetReference(keyDataset, no_activetable, NULL));
         else
             instance->nestedctx.addQuoted("virtual ICompare * queryKeyCompare() { return &Compare; }");
+
+        //virtual unsigned getFlags() = 0;
+        {
+            StringBuffer flags;
+            if (recordTypesMatch(dataset, keyDataset)) flags.append("|HFDwholerecord");
+            if (flags.length())
+                doBuildUnsignedFunction(instance->classctx, "getFlags", flags.str()+1);
+        }
+
+        //virtual IHash    * queryKeyHash()=0;
+        if (reuseCompare)
+            instance->nestedctx.addQuoted("virtual IHash * queryKeyHash() { return &Hash; }");
+        else
+            buildHashOfExprsClass(instance->nestedctx, "KeyHash", keyOrder, DatasetReference(keyDataset, no_activetable, NULL), true);
+
+        //virtual ICompare * queryRowKeyCompare()=0; // lhs is a row, rhs is a key
+        if (!reuseCompare)
+        {
+            doCompareLeftRight(instance->nestedctx, "RowKeyCompare", DatasetReference(dataset), DatasetReference(keyDataset, no_activetable, NULL), info.equalities, selects);
+        }
+        else
+            instance->nestedctx.addQuoted("virtual ICompare * queryRowKeyCompare() { return &Compare; }");
+
     }
 
     buildInstanceSuffix(instance);

+ 35 - 0
ecl/regress/hashdedup.ecl

@@ -0,0 +1,35 @@
+/*##############################################################################
+
+    Copyright (C) 2011 HPCC Systems.
+
+    All rights reserved. This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string20        surname;
+string10        forename;
+integer2        age := 25;
+            END;
+
+namesTable := dataset('x',namesRecord,FLAT);
+
+//Computed
+d := dedup(namesTable,surname[1..5]+(string)age,hash);
+count(d);
+
+//Whole record
+d2 := dedup(namesTable, hash);
+count(d2);

+ 12 - 2
rtl/include/eclhelper.hpp

@@ -31,7 +31,7 @@ It should only contain pure interface definitions or inline functions.
 
 //Should be incremented whenever the virtuals in the context or a helper are changed, so
 //that a work unit can't be rerun.  Try as hard as possible to retain compatibility.
-#define ACTIVITY_INTERFACE_VERSION      139
+#define ACTIVITY_INTERFACE_VERSION      140
 #define MIN_ACTIVITY_INTERFACE_VERSION  138             //minimum value that is compatible with current interface - without using selectInterface
 
 typedef unsigned char byte;
@@ -958,6 +958,7 @@ enum ActivityInterfaceEnum
     TAIpipewritearg_2,
     TAIinlinetablearg_1,
     TAIshuffleextra_1,
+    TAIhashdeduparg_2,
 
 //Should remain as last of all meaningful tags, but before aliases
     TAImax,
@@ -1793,6 +1794,11 @@ struct IHThorHashDistributeArg : public IHThorArg
     virtual ICompare * queryMergeCompare()=0;       // iff TAKhasdistributemerge
 };
 
+enum
+{
+    HFDwholerecord  = 0x0001,
+};
+
 struct IHThorHashDedupArg : public IHThorArg
 {
     virtual ICompare * queryCompare()=0;
@@ -1800,6 +1806,10 @@ struct IHThorHashDedupArg : public IHThorArg
     virtual IOutputMetaData * queryKeySize() = 0;
     virtual size32_t recordToKey(ARowBuilder & rowBuilder, const void * _record) = 0;
     virtual ICompare * queryKeyCompare()=0;
+    //the following are only valid if selectInterface(TAIhashdeduparg_2) returns non-null
+    virtual unsigned getFlags() = 0;
+    virtual IHash    * queryKeyHash()=0;
+    virtual ICompare * queryRowKeyCompare()=0; // lhs is a row, rhs is a key
 };
 
 struct IHThorHashMinusArg : public IHThorArg
@@ -1906,7 +1916,7 @@ struct ICsvParameters
     virtual bool         queryEBCDIC() = 0;
     virtual const char * queryHeader()              { return NULL; }
     virtual unsigned     queryHeaderLen() = 0;
-    virtual size32_t         queryMaxSize() = 0;
+    virtual size32_t     queryMaxSize() = 0;
     virtual const char * queryQuote(unsigned idx) = 0;
     virtual const char * querySeparator(unsigned idx) = 0;
     virtual const char * queryTerminator(unsigned idx) = 0;

+ 3 - 0
rtl/include/eclhelper_base.hpp

@@ -1953,10 +1953,13 @@ class CThorHashDedupArg : public CThorArg, implements IHThorHashDedupArg
         {
         case TAIarg:
         case TAIhashdeduparg_1:
+        case TAIhashdeduparg_2:
             return static_cast<IHThorHashDedupArg *>(this);
         }
         return NULL;
     }
+
+    virtual unsigned getFlags() { return 0; }
 };
 
 class CThorHashMinusArg : public CThorArg, implements IHThorHashMinusArg