Jelajahi Sumber

HPCC-10507 Add new MAXLENGTH option onto INDEX and BUILD

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 11 tahun lalu
induk
melakukan
5ad5e848a3

+ 18 - 0
ecl/hql/hqlgram.y

@@ -2897,6 +2897,15 @@ buildFlag
                         {
                             $$.setExpr(createExprAttribute(filepositionAtom, $2.getExpr()), $1);
                         }
+    | MAXLENGTH
+                        {
+                            $$.setExpr(createExprAttribute(maxLengthAtom), $1);
+                        }
+    | MAXLENGTH '(' constExpression ')'
+                        {
+                            parser->normalizeExpression($3, type_numeric, false);
+                            $$.setExpr(createExprAttribute(maxLengthAtom, $3.getExpr()), $1);
+                        }
     ;
 
 localAttribute
@@ -3099,6 +3108,15 @@ indexFlag
                         {
                             $$.setExpr(createExprAttribute(filepositionAtom, $2.getExpr()), $1);
                         }
+    | MAXLENGTH
+                        {
+                            $$.setExpr(createExprAttribute(maxLengthAtom), $1);
+                        }
+    | MAXLENGTH '(' constExpression ')'
+                        {
+                            parser->normalizeExpression($3, type_numeric, false);
+                            $$.setExpr(createExprAttribute(maxLengthAtom, $3.getExpr()), $1);
+                        }
     | commonAttribute
     ;
 

+ 6 - 1
ecl/hql/hqlgram2.cpp

@@ -1036,12 +1036,12 @@ IHqlExpression * HqlGram::processIndexBuild(attribute & indexAttr, attribute * r
         checkIndexRecordType(dataset->queryRecord(), 1, false, indexAttr);
     }
 
-
     HqlExprArray args;
     args.append(*LINK(inputDataset));
     args.append(*filenameAttr.getExpr());
     if (flags)
         flags->unwindList(args, no_comma);
+
     checkDistributer(flagsAttr, args);
     return createValue(no_buildindex, makeVoidType(), args);
 }
@@ -6717,6 +6717,11 @@ IHqlExpression * HqlGram::createBuildIndexFromIndex(attribute & indexAttr, attri
                     distribution.setown(replaceSelector(cur, queryActiveTableSelector(), select));
                 args.append(*createLocalAttribute());
             }
+            else if (name == maxLengthAtom)
+            {
+                if (!queryAttribute(name, args))
+                    args.append(*LINK(cur));
+            }
         }
     }
     IHqlExpression * payload = index->queryAttribute(_payload_Atom);

+ 10 - 9
ecl/hqlcpp/CMakeLists.txt

@@ -105,15 +105,16 @@ set (    SRCS
 include_directories ( 
          ${CMAKE_BINARY_DIR}
          ${CMAKE_BINARY_DIR}/oss
-         ./../../common/remote 
-         ./../../common/workunit 
-         ./../../common/deftype 
-         ./../../system/include 
-         ./../../ecl/hql 
-         ./../../common/dllserver 
-         ./../../system/jlib 
-         ./../../common/thorhelper 
-         ./../../rtl/eclrtl 
+         ./../../common/remote
+         ./../../common/workunit
+         ./../../common/deftype
+         ./../../system/include
+         ./../../ecl/hql
+         ./../../common/dllserver
+         ./../../system/jlib
+         ./../../system/jhtree
+         ./../../common/thorhelper
+         ./../../rtl/eclrtl
          ./../../rtl/include
     )
 

+ 2 - 0
ecl/hqlcpp/hqlcerrors.hpp

@@ -210,6 +210,7 @@
 #define HQLERR_VariableRowMustBeLinked          4190
 #define HQLERR_UserCodeNotAllowed               4191
 #define HQLERR_StreamInputUsedDirectly          4192
+#define HQLERR_MaxlengthExceedsLimit            4193
 
 //Warnings....
 #define HQLWRN_PersistDataNotLikely             4500
@@ -546,6 +547,7 @@
 #define HQLERR_OnceCannotAccessStored_Text      "ONCE workflow items cannot be dependent on other workflow items (including ONCE)"
 #define HQLERR_ThorCombineOnlyLocal_Text        "Thor currently only supports the local version of COMBINE"
 #define HQLERR_SteppedNotImplemented_Text       "STEPPED could not be merged into an index read activity"
+#define HQLERR_MaxlengthExceedsLimit_Text       "MAXLENGTH(%u) for BUILD(index) exceeds the maximum of (%u)"
 
 #define HQLERR_NoClearOnLocalDataset_Text       "INTERNAL: Clear not supported on LOCAL datasets"
 #define HQLERR_NoCreateLocalDataset_Text        "INTERNAL: Local datasets cannot be created"

+ 1 - 1
ecl/hqlcpp/hqlcpp.ipp

@@ -1651,7 +1651,7 @@ public:
     void buildSkewThresholdMembers(BuildCtx & ctx, IHqlExpression * expr);
     void doCompareLeftRight(BuildCtx & ctx, const char * funcname, const DatasetReference & datasetLeft, const DatasetReference & datasetRight, const HqlExprArray & left, const HqlExprArray & right);
     void buildSlidingMatchFunction(BuildCtx & ctx, const HqlExprArray & leftEq, const HqlExprArray & rightEq, const HqlExprArray & slidingMatches, const char * funcname, unsigned childIndex, const DatasetReference & datasetL, const DatasetReference & datasetR);
-    void doBuildIndexOutputTransform(BuildCtx & ctx, IHqlExpression * record, SharedHqlExpr & rawRecord, bool hasFileposition);
+    void doBuildIndexOutputTransform(BuildCtx & ctx, IHqlExpression * record, SharedHqlExpr & rawRecord, bool hasFileposition, IHqlExpression * maxlength);
 
     void buildKeyedJoinExtra(ActivityInstance & instance, IHqlExpression * expr, KeyedJoinInfo * joinKey);
     void buildKeyJoinIndexReadHelper(ActivityInstance & instance, IHqlExpression * expr, KeyedJoinInfo * joinKey);

+ 22 - 2
ecl/hqlcpp/hqlhtcpp.cpp

@@ -64,6 +64,8 @@
 #include "rtlds_imp.hpp"
 #include "eclhelper_base.hpp"
 
+#include "ctfile.hpp"   // for KEYBUILD_MAXLENGTH
+
 #define MAX_ROWS_OUTPUT_TO_SDS              1000
 #define MAX_SAFE_RECORD_SIZE                10000000
 #define MAX_GRAPH_ECL_LENGTH                1000
@@ -9843,7 +9845,7 @@ static void createOutputIndexTransform(HqlExprArray & assigns, IHqlExpression *
 }
 
 
-void HqlCppTranslator::doBuildIndexOutputTransform(BuildCtx & ctx, IHqlExpression * record, SharedHqlExpr & rawRecord, bool hasFileposition)
+void HqlCppTranslator::doBuildIndexOutputTransform(BuildCtx & ctx, IHqlExpression * record, SharedHqlExpr & rawRecord, bool hasFileposition, IHqlExpression * maxlength)
 {
     OwnedHqlExpr srcDataset = createDataset(no_anon, LINK(record));
 
@@ -9882,6 +9884,23 @@ void HqlCppTranslator::doBuildIndexOutputTransform(BuildCtx & ctx, IHqlExpressio
     buildReturnRecordSize(subctx, selfCursor);
 
     buildMetaMember(ctx, tgtDataset, false, "queryDiskRecordSize");
+
+    size32_t maxRecordSize = 32767;
+    if (isVariableSizeRecord(newRecord))
+    {
+        if (maxlength)
+        {
+            maxRecordSize = getIntValue(maxlength->queryChild(0), 0);
+            if (maxRecordSize == 0)
+                maxRecordSize = getMaxRecordSize(newRecord);
+        }
+    }
+    else
+        maxRecordSize = getMinRecordSize(newRecord);
+
+    doBuildUnsignedFunction(ctx, "getMaxKeySize", maxRecordSize);
+    if (maxRecordSize > KEYBUILD_MAXLENGTH)
+        throwError2(HQLERR_MaxlengthExceedsLimit, maxRecordSize, KEYBUILD_MAXLENGTH);
 }
 
 
@@ -10050,6 +10069,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityOutputIndex(BuildCtx & ctx, IH
     if (updateAttr && !updateAttr->queryAttribute(alwaysAtom)) flags.append("|TIWupdate");
     if (!hasTLK && !singlePart)           flags.append("|TIWlocal");
     if (expr->hasAttribute(expireAtom))   flags.append("|TIWexpires");
+    if (expr->hasAttribute(maxLengthAtom))   flags.append("|TIWmaxlength");
 
     if (compressAttr)
     {
@@ -10116,7 +10136,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityOutputIndex(BuildCtx & ctx, IH
     }
 
     OwnedHqlExpr rawRecord;
-    doBuildIndexOutputTransform(instance->startctx, record, rawRecord, hasFileposition);
+    doBuildIndexOutputTransform(instance->startctx, record, rawRecord, hasFileposition, expr->queryAttribute(maxLengthAtom));
     buildFormatCrcFunction(instance->classctx, "getFormatCrc", rawRecord, expr, 0);
 
     if (compressAttr && compressAttr->hasAttribute(rowAtom))

+ 40 - 0
ecl/regress/issue10507.ecl

@@ -0,0 +1,40 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string20        surname;
+string          forename{maxlength(12345)};
+            END;
+
+ds := dataset('ds', namesRecord, thor);
+o1 := BUILD(ds, {surname}, {forename },'i1',overwrite);
+o2 := BUILD(ds, {surname}, {forename },'i2',overwrite,maxlength);
+o3 := BUILD(ds, {surname}, {forename },'i3',overwrite,maxlength(18000));
+
+i1 := index(ds, {surname}, {forename },'i1');
+i2 := index(ds, {surname}, {forename },'i1',maxlength);
+i3 := index(ds, {surname}, {forename },'i1',maxlength(4321));
+o4 := BUILD(i1, overwrite);
+o5 := BUILD(i2, overwrite);
+o6 := BUILD(i3, overwrite);
+
+o7 := BUILD(i2, overwrite, maxlength(23456.0));
+o8 := BUILD(i3, overwrite, maxlength(32000));
+
+SEQUENTIAL(o1, o2, o3, o4, o5, o6, o7, o8);

+ 26 - 0
ecl/regress/issue10507a.ecl

@@ -0,0 +1,26 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string20        surname;
+string          forename{maxlength(65536),blob};
+            END;
+
+ds := dataset('ds', namesRecord, thor);
+BUILD(ds, {surname}, {forename },'i2',overwrite,maxlength); // would be too big if blob not processed correctly

+ 26 - 0
ecl/regress/issue10507b.ecl

@@ -0,0 +1,26 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string32767     surname;
+unsigned8       extra;
+            END;
+
+ds := dataset('ds', namesRecord, thor);
+BUILD(ds, {surname}, {extra },'i2',overwrite,maxlength); // would be too big if fileposition not processed correctly

+ 26 - 0
ecl/regress/issue10507e1.ecl

@@ -0,0 +1,26 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string20        surname;
+string          forename{maxlength(32748)};
+            END;
+
+ds := dataset('ds', namesRecord, thor);
+BUILD(ds, {surname}, {forename },'i3',overwrite,maxlength(32768)); // too large

+ 27 - 0
ecl/regress/issue10507e2.ecl

@@ -0,0 +1,27 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string20        surname;
+string          forename{maxlength(32748)};
+            END;
+
+ds := dataset('ds', namesRecord, thor);
+i2 := index(ds, {surname}, {forename },'i1',maxlength); // too large
+BUILD(i2, overwrite);

+ 27 - 0
ecl/regress/issue10507e3.ecl

@@ -0,0 +1,27 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string20        surname;
+string          forename{maxlength(32748)};
+            END;
+
+ds := dataset('ds', namesRecord, thor);
+i2 := index(ds, {surname}, {forename },'i1',maxlength(-1)); // too large
+BUILD(i2, overwrite);

+ 26 - 0
ecl/regress/issue10507e4.ecl

@@ -0,0 +1,26 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+
+namesRecord :=
+            RECORD
+string32767     surname;
+unsigned8       extra;
+            END;
+
+ds := dataset('ds', namesRecord, thor);
+BUILD(ds, {surname}, {extra },'i2',overwrite,maxlength,fileposition(false)); //  too big since fileposition not special cased

+ 4 - 2
rtl/include/eclhelper.hpp

@@ -39,8 +39,8 @@ if the supplied pointer was not from the roxiemem heap. Usually an OwnedRoxieStr
 
 //Should be incremented whenever the virtuals in the context or a helper are changed, so
 //that a work unit can't be rerun.  Try as hard as possible to retain compatibility.
-#define ACTIVITY_INTERFACE_VERSION      153
-#define MIN_ACTIVITY_INTERFACE_VERSION  153             //minimum value that is compatible with current interface - without using selectInterface
+#define ACTIVITY_INTERFACE_VERSION      154
+#define MIN_ACTIVITY_INTERFACE_VERSION  154             //minimum value that is compatible with current interface - without using selectInterface
 
 typedef unsigned char byte;
 
@@ -1177,6 +1177,7 @@ enum
     TIWupdatecrc        = 0x0400,
     TIWhaswidth         = 0x0800,
     TIWexpires          = 0x1000,
+    TIWmaxlength        = 0x2000,       // explicit maxlength
 };
 
 //flags for thor dataset/temp tables
@@ -1205,6 +1206,7 @@ struct IHThorIndexWriteArg : public IHThorArg
     virtual bool getIndexMeta(size32_t & lenName, char * & name, size32_t & lenValue, char * & value, unsigned idx) = 0;
     virtual unsigned getWidth() = 0;                // only guaranteed present if TIWhaswidth defined
     virtual ICompare * queryCompare() = 0;          // only guaranteed present if TIWhaswidth defined
+    virtual unsigned getMaxKeySize() = 0;
 };
 
 struct IHThorFirstNArg : public IHThorArg