Browse Source

HPCC-9132 Improve spotting lightweight denormalize

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 12 years ago
parent
commit
9ab749a664
5 changed files with 88 additions and 9 deletions
  1. 11 2
      ecl/hql/hqlexpr.cpp
  2. 5 4
      ecl/hqlcpp/hqlhtcpp.cpp
  3. 25 3
      ecl/hqlcpp/hqlttcpp.cpp
  4. 2 0
      ecl/hqlcpp/hqlttcpp.ipp
  5. 45 0
      ecl/regress/issue9132.ecl

+ 11 - 2
ecl/hql/hqlexpr.cpp

@@ -11151,12 +11151,18 @@ IHqlExpression *createDataset(node_operator op, HqlExprArray & parms)
                     TableProjectMapper mapper;
                     mapper.setMapping(mapTransform, leftSelect);
                     type.setown(getTypeProject(type, transform->queryRecord(), mapper));
+
+                    //For no_denormalize information is only preserved if it is the same whether or not the transform was called.
+                    if (op == no_denormalize)
+                        type.setown(getTypeIntersection(type, datasetType));
                 }
                 else
                     type.setown(getTypeCannotProject(type, transform->queryRecord()));
             }
             else if (isLocal)
             {
+                OwnedITypeInfo ungroupedDatasetType = getTypeUngroup(datasetType);
+
                 //local operation so try and preserve the current distribution, no clue about the following sort order, 
                 //and result is never grouped.
                 if (queryProperty(_lightweight_Atom, parms) && !createDefaultLeft)
@@ -11166,8 +11172,7 @@ IHqlExpression *createDataset(node_operator op, HqlExprArray & parms)
                     TableProjectMapper mapper;
                     mapper.setMapping(transform, leftSelect);
 
-                    type.setown(getTypeUngroup(datasetType));
-                    type.setown(getTypeProject(type, transform->queryRecord(), mapper));
+                    type.setown(getTypeProject(ungroupedDatasetType, transform->queryRecord(), mapper));
                 }
                 else
                 {
@@ -11192,6 +11197,10 @@ IHqlExpression *createDataset(node_operator op, HqlExprArray & parms)
                         newDistributeInfo = getUnknownAttribute();
                     type.setown(makeTableType(makeRowType(LINK(transform->queryRecordType())), newDistributeInfo, NULL, NULL));
                 }
+
+                //For no_denormalize information is only preserved if it is the same whether or not the transform was called.
+                if (op == no_denormalize)
+                    type.setown(getTypeIntersection(type, ungroupedDatasetType));
             }
             else if (isHashJoin)
             {

+ 5 - 4
ecl/hqlcpp/hqlhtcpp.cpp

@@ -11705,10 +11705,11 @@ ABoundActivity * HqlCppTranslator::doBuildActivityJoinOrDenormalize(BuildCtx & c
     Owned<ActivityInstance> instance = new ActivityInstance(*this, ctx, kind, expr, argName);
     if (isLightweight)
     {
-        if ((kind == TAKselfjoinlight) || (kind == TAKselfjoin))
-            instance->graphLabel.set("Lightweight Self Join");
-        else
-            instance->graphLabel.set("Lightweight Join");
+        StringBuffer graphLabel;
+        if (kind != TAKselfjoinlight)
+            graphLabel.append("Lightweight ");
+        graphLabel.append(getActivityText(kind));
+        instance->graphLabel.set(graphLabel.str());
     }
 
     instance->setLocal(isLocalJoin);

+ 25 - 3
ecl/hqlcpp/hqlttcpp.cpp

@@ -2555,6 +2555,30 @@ bool canReorderMatchExistingLocalSort(HqlExprArray & newElements1, HqlExprArray
 }
 
 
+bool ThorHqlTransformer::isLightweightJoinCandidate(IHqlExpression * expr, bool isLocal, bool isLimitedSubstringJoin)
+{
+    //This is equally applicable to hthor and roxie.  However non lookup joins currently generate group activities on
+    //the inputs which look less efficient.  It may still be better to enable it though.
+    if (!translator.targetThor())
+        return false;
+
+    if (!options.spotLocalMerge || isLimitedSubstringJoin || !isLocal)
+        return false;
+
+    if (expr->hasProperty(_lightweight_Atom))
+        return false;
+
+    switch (expr->getOperator())
+    {
+    case no_join:
+    case no_selfjoin:
+    case no_denormalizegroup:
+    case no_denormalize:
+        return true;
+    }
+    return false;
+}
+
 IHqlExpression * ThorHqlTransformer::normalizeJoinOrDenormalize(IHqlExpression * expr)
 {
     IHqlExpression * leftDs = expr->queryChild(0);
@@ -2677,9 +2701,7 @@ IHqlExpression * ThorHqlTransformer::normalizeJoinOrDenormalize(IHqlExpression *
     //Try and convert local joins to a lightweight join that doesn't require any sorting of the inputs.
     //Improves resourcing for thor, and prevents lookup conversion for hthor/roxie
     //Worthwhile even for lookup joins
-    if (translator.targetThor() &&
-        options.spotLocalMerge && !isLimitedSubstringJoin &&
-        ((op == no_join) || (op == no_selfjoin)) && isLocal && !expr->hasProperty(_lightweight_Atom))
+    if (isLightweightJoinCandidate(expr, isLocal, isLimitedSubstringJoin))
     {
         if (isAlreadySorted(leftDs, leftSorts, true, true) &&
             isAlreadySorted(rightDs, rightSorts, true, true))

+ 2 - 0
ecl/hqlcpp/hqlttcpp.ipp

@@ -212,6 +212,8 @@ protected:
     IHqlExpression * skipGroupsWithinGroup(IHqlExpression * expr, bool isLocal);
     IHqlExpression * skipOverGroups(IHqlExpression * dataset, bool isLocal);
 
+    bool isLightweightJoinCandidate(IHqlExpression * expr, bool isLocal, bool isLimitedSubstringJoin);
+
 protected:
     typedef NewHqlTransformer PARENT;
     HqlCppTranslator &  translator;

+ 45 - 0
ecl/regress/issue9132.ecl

@@ -0,0 +1,45 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+namesRecord :=
+            RECORD
+string20        surname;
+string20        forename;
+integer2        age := 25;
+            END;
+
+namesTable1 := dataset('x',namesRecord,FLAT);
+namesTable2 := dataset('y',namesRecord,FLAT);
+
+s1 := SORT(DISTRIBUTE(namesTable1, HASH(surname)), surname, LOCAL);
+s2 := SORT(DISTRIBUTE(namesTable2, HASH(surname)), surname, LOCAL);
+
+j1 := DENORMALIZE(s1, s2, LEFT.surname = RIGHT.surname, TRANSFORM(LEFT));
+j2 := DENORMALIZE(s1, s2, LEFT.surname = RIGHT.surname, TRANSFORM(namesRecord, SELF.surname := RIGHT.forename; SELF.forename := LEFT.surname));
+output(sort(j1, surname, local));   // this sort can be removed
+output(sort(j2, forename, local));  // no this sort cannot be removed - the transform may be called 0,1,or many times.
+
+
+namesTable3 := dataset('z1',namesRecord,FLAT);
+namesTable4 := dataset('z2',namesRecord,FLAT);
+
+s3 := SORT(DISTRIBUTE(namesTable3, HASH(surname)), surname, forename, LOCAL);
+s4 := SORT(DISTRIBUTE(namesTable4, HASH(surname)), surname, forename, LOCAL);
+
+j3 := DENORMALIZE(s3, s4, LEFT.surname = RIGHT.surname AND LEFT.forename = RIGHT.forename, TRANSFORM(namesRecord, SELF.forename := RIGHT.forename; SELF := LEFT));
+output(sort(j3, surname, forename, local));   // this sort cannot be removed
+output(sort(j3, surname, local));  // this can..