Переглянути джерело

Merge pull request #13734 from dcamper/hpcc-24002-datapatterns-1.6.5

HPCC-24002 Update Std.DataPatterns to version 1.6.5

Reviewed-By: Gordon Smith <gordon.smith@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 5 роки тому
батько
коміт
c3880c0153

+ 29 - 17
ecllibrary/std/DataPatterns/BestRecordStructure.ecl

@@ -279,10 +279,12 @@ EXPORT BestRecordStructure(inFile, sampling = 100, emitTransform = FALSE, textOu
     LOCAL __MakeRecDefinition(DATASET(RECORDOF(__fieldInfo20)) ds, STRING layoutName, BOOLEAN useBest = TRUE) := FUNCTION
         displayPrefix := IF(useBest, 'New', 'Old');
         displayedLayoutName := displayPrefix + layoutName;
-        RETURN DATASET([{displayedLayoutName + ' := RECORD'}], __LayoutItems)
-            & PROJECT
+        RETURN (+)
                 (
-                    DISTRIBUTE(SORT(ds, position), 0),
+                DATASET([{displayedLayoutName + ' := RECORD'}], __LayoutItems),
+                PROJECT
+                    (
+                        SORT(DISTRIBUTE(ds, 0), position, LOCAL),
                     TRANSFORM
                         (
                             __LayoutItems,
@@ -300,8 +302,10 @@ EXPORT BestRecordStructure(inFile, sampling = 100, emitTransform = FALSE, textOu
                                 ),
                             SELF := LEFT
                         )
-                )
-            & DATASET([{'END;'}], __LayoutItems);
+                    ),
+                DATASET([{'END;'}], __LayoutItems),
+                ORDERED(TRUE)
+            );
     END;
 
     // Iteratively process embedded records and child dataset definitions,
@@ -386,8 +390,10 @@ EXPORT BestRecordStructure(inFile, sampling = 100, emitTransform = FALSE, textOu
     // Creates an ECL TRANSFORM function based on the collected information
     // about a record definition
     LOCAL __MakeTransforms(__ChildRecLayout recInfo) := FUNCTION
-        RETURN DATASET(['New' + recInfo.layoutName + ' Make_New' + recInfo.layoutName + '(Old' + recInfo.layoutName + ' r) := TRANSFORM'], __StringRec)
-            & PROJECT
+        RETURN (+)
+            (
+                DATASET(['New' + recInfo.layoutName + ' Make_New' + recInfo.layoutName + '(Old' + recInfo.layoutName + ' r) := TRANSFORM'], __StringRec),
+                PROJECT
                 (
                     DISTRIBUTE(recInfo.items, 0),
                     TRANSFORM
@@ -396,9 +402,11 @@ EXPORT BestRecordStructure(inFile, sampling = 100, emitTransform = FALSE, textOu
                             assignment := LEFT.bestAssignment;
                             SELF.s := IF(assignment != '', assignment, SKIP)
                         )
-                )
-            & DATASET(['    SELF := r;'], __StringRec)
-            & DATASET(['END;'], __StringRec);
+                    ),
+                DATASET(['    SELF := r;'], __StringRec),
+                DATASET(['END;'], __StringRec),
+                ORDERED(TRUE)
+            );
     END;
 
     LOCAL __allTransforms := PROJECT
@@ -418,13 +426,17 @@ EXPORT BestRecordStructure(inFile, sampling = 100, emitTransform = FALSE, textOu
     // definitions, and a sample PROJECT for kicking it all off
     LOCAL __conditionalBR := #IF((BOOLEAN)textOutput) '<br/>' #ELSE '' #END;
 
-    LOCAL __oldRecDefsPlusTransforms := DATASET(['//----------' + __conditionalBR], __StringRec)
-        & PROJECT(__allOldRecDefs.items, __StringRec)
-        & DATASET(['//----------' + __conditionalBR], __StringRec)
-        & __allTransforms.lines
-        & DATASET(['//----------' + __conditionalBR], __StringRec)
-        & DATASET(['oldDS := DATASET([], OldLayout);' + __conditionalBR], __StringRec)
-        & DATASET(['newDS := PROJECT(oldDS, Make_NewLayout(LEFT));' + __conditionalBR], __StringRec);
+    LOCAL __oldRecDefsPlusTransforms := (+)
+        (
+            DATASET(['//----------' + __conditionalBR], __StringRec),
+            PROJECT(__allOldRecDefs.items, __StringRec),
+            DATASET(['//----------' + __conditionalBR], __StringRec),
+            __allTransforms.lines,
+            DATASET(['//----------' + __conditionalBR], __StringRec),
+            DATASET(['oldDS := DATASET([], OldLayout);' + __conditionalBR], __StringRec),
+            DATASET(['newDS := PROJECT(oldDS, Make_NewLayout(LEFT));' + __conditionalBR], __StringRec),
+            ORDERED(TRUE)
+        );
 
     // Combine old definitions and transforms conditionally
     LOCAL __conditionalOldStuff :=

+ 6 - 6
ecllibrary/std/DataPatterns/Profile.ecl

@@ -177,7 +177,7 @@
  *                          argument is 1-100; values outside of this range
  *                          will be clamped; OPTIONAL, defaults to 100 (which
  *                          indicates that the entire dataset will be analyzed)
- * @param   lcbLimit        A positive integer (<= 500) indicating the maximum
+ * @param   lcbLimit        A positive integer (<= 1000) indicating the maximum
  *                          cardinality allowed for an attribute in order to
  *                          emit a breakdown of the attribute's values; this
  *                          parameter will be ignored if cardinality_breakdown
@@ -219,9 +219,9 @@ EXPORT Profile(inFile,
     #UNIQUENAME(trimmedFieldList);
     LOCAL %trimmedFieldList% := TRIM(fieldListStr, ALL);
 
-    // Clamp lcbLimit to 0..500
+    // Clamp lcbLimit to 0..1000
     #UNIQUENAME(lowCardinalityThreshold);
-    LOCAL %lowCardinalityThreshold% := MIN(MAX(lcbLimit, 0), 500);
+    LOCAL %lowCardinalityThreshold% := MIN(MAX(lcbLimit, 0), 1000);
 
     // The maximum number of mode values to return
     #UNIQUENAME(MAX_MODES);
@@ -1177,7 +1177,7 @@ EXPORT Profile(inFile,
         #UNIQUENAME(dataPatternStats);
         LOCAL %dataPatternStats% := TABLE
             (
-                DISTRIBUTE(%dataPatternStats0%, HASH32(attribute)),
+                %dataPatternStats0%,
                 {
                     attribute,
                     data_pattern,
@@ -1185,10 +1185,10 @@ EXPORT Profile(inFile,
                     UNSIGNED4   rec_count := SUM(GROUP, value_count)
                 },
                 attribute, data_pattern,
-                LOCAL
+                MERGE
             ) : ONWARNING(2168, IGNORE);
         #UNIQUENAME(groupedDataPatterns);
-        LOCAL %groupedDataPatterns% := GROUP(SORT(%dataPatternStats%, attribute, LOCAL), attribute, LOCAL);
+        LOCAL %groupedDataPatterns% := GROUP(SORT(DISTRIBUTE(%dataPatternStats%, HASH32(attribute)), attribute, LOCAL), attribute, LOCAL);
         #UNIQUENAME(topDataPatterns);
         LOCAL %topDataPatterns% := UNGROUP(TOPN(%groupedDataPatterns%, (UNSIGNED)_maxPatterns, -rec_count, data_pattern));
         #UNIQUENAME(rareDataPatterns0);