ソースを参照

Merge pull request #11362 from ghalliday/issue20010

HPCC-20010 Remove invalid optimizations of NONEMPTY

Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 7 年 前
コミット
8fe41d81b5
3 ファイル変更67 行追加2 行削除
  1. 25 2
      ecl/hql/hqlopt.cpp
  2. 28 0
      testing/regress/ecl/key/nonempty.xml
  3. 14 0
      testing/regress/ecl/nonempty.ecl

+ 25 - 2
ecl/hql/hqlopt.cpp

@@ -2224,6 +2224,7 @@ IHqlExpression * CTreeOptimizer::queryMoveKeyedExpr(IHqlExpression * transformed
     case no_if:
         return swapIntoIf(transformed, true);
     case no_nonempty:
+        return nullptr;
     case no_addfiles:
     case no_chooseds:
         return swapIntoAddFiles(transformed, true);
@@ -3058,9 +3059,24 @@ IHqlExpression * CTreeOptimizer::doCreateTransformed(IHqlExpression * transforme
                 }
             case no_if:
                 return swapIntoIf(transformed);
-            case no_nonempty:
             case no_chooseds:
                 return swapIntoAddFiles(transformed);
+            case no_nonempty:
+            {
+                //NOEMPTY(a,b)(filter).  If b(filter) is [] then this can be simplified to a(filter)
+                unsigned max = getNumActivityArguments(child);
+                if (max == 2)
+                {
+                    //It would be valid to special case NONEMPTY(a,b,c,d) to remove trailing expressions that evaluate to
+                    //empty datasets, but that is left to a later change - catch the common case.
+                    IHqlExpression * lastChild = child->queryChild(max-1);
+                    OwnedHqlExpr filteredLast = replaceChild(transformed, lastChild);
+                    OwnedHqlExpr transformedLast = transform(filteredLast);
+                    if (isNull(transformedLast))
+                        return swapIntoAddFiles(transformed);
+                }
+                break;
+            }
             case no_fetch:
                 if (isPureActivity(child) && !hasUnknownTransform(child))
                 {
@@ -3241,6 +3257,9 @@ IHqlExpression * CTreeOptimizer::doCreateTransformed(IHqlExpression * transforme
                     break;
                 return swapIntoIf(transformed);
             case no_nonempty:
+                if (assignsContainSkip(transform))
+                    break;
+                //fallthrough
             case no_chooseds:
                 if (isComplexTransform(transform))
                     break;
@@ -3438,6 +3457,11 @@ IHqlExpression * CTreeOptimizer::doCreateTransformed(IHqlExpression * transforme
                     break;
                 return swapIntoIf(transformed);
             case no_nonempty:
+                if (isAggregateDataset(child))
+                    break;
+                if (assignsContainSkip(transformed->queryChild(2)))
+                    break;
+                //fallthrough
             case no_chooseds:
                 if (isComplexTransform(transformed->queryChild(2)))
                     break;
@@ -3749,7 +3773,6 @@ IHqlExpression * CTreeOptimizer::doCreateTransformed(IHqlExpression * transforme
             {
             case no_if:
                 return swapIntoIf(transformed);
-            case no_nonempty:
             case no_chooseds:
                 return swapIntoAddFiles(transformed);
             case no_compound_diskread:

+ 28 - 0
testing/regress/ecl/key/nonempty.xml

@@ -18,3 +18,31 @@
 <Dataset name='Result 7'>
  <Row><surname>Jones               </surname><forename>James     </forename><age>22</age></Row>
 </Dataset>
+<Dataset name='Result 8'>
+</Dataset>
+<Dataset name='Result 9'>
+ <Row><Result_9>20</Result_9></Row>
+</Dataset>
+<Dataset name='Result 10'>
+ <Row><Result_10>21</Result_10></Row>
+</Dataset>
+<Dataset name='Result 11'>
+ <Row><Result_11>true</Result_11></Row>
+</Dataset>
+<Dataset name='Result 12'>
+</Dataset>
+<Dataset name='Result 13'>
+ <Row><Result_13>20</Result_13></Row>
+</Dataset>
+<Dataset name='Result 14'>
+ <Row><Result_14>21</Result_14></Row>
+</Dataset>
+<Dataset name='Result 15'>
+ <Row><sumage>21</sumage></Row>
+</Dataset>
+<Dataset name='Result 16'>
+ <Row><Result_16>true</Result_16></Row>
+</Dataset>
+<Dataset name='Result 17'>
+ <Row><x>true</x></Row>
+</Dataset>

+ 14 - 0
testing/regress/ecl/nonempty.ecl

@@ -33,7 +33,9 @@ d2 := dataset([{'Jones','John',twentyone}], namesRecord);
 d3 := dataset([{'Jones','James',twentytwo}], namesRecord);
 d4 := dataset([{'Jones','Jimmy',twenty}], namesRecord);
 d5 := dataset([{'Jones','Jonnie',twentyone}], namesRecord);
+d6 := dataset([{'Jones','Jonnie',21}], namesRecord);
 
+sequential(
 output(nonempty(d1(age != 40), d2(age != 40)));
 output(nonempty(d1(age != 20), d2(age != 40)));
 output(nonempty(d1(age != 40), d2(age != 21)));
@@ -42,3 +44,15 @@ output(nonempty(d1, d2, d3, d4));
 output(nonempty(d1(age = 0), d2(age = 0), d3(age = 0), d4));
 output(nonempty(d1(age = 0), d2(age = 0), d3, d4));
 
+output(nonempty(d1, d2)(age != 20)); // Should be blank
+output(sum(nonempty(d1, d2), age)); // Should be 20
+output(sum(nonempty(d1(age != 20), d2), age)); // Should be 21
+output(exists(nonempty(d1(age != 20), d2))); // Should be 21
+
+output(nonempty(d1, d6)(age != 20)); // Should be blank
+output(sum(nonempty(d1, d6), age)); // Should be 20
+output(sum(nonempty(d1(age != 20), d6), age)); // Should be 21
+output(TABLE(nonempty(d1(age != 20), d6), { sumage := SUM(GROUP, age) })); // Should be 21
+output(exists(nonempty(d1(age != 20), d6))); // Should be true
+output(TABLE(nonempty(d1(age != 20), d6), { boolean x := exists(group)})); // Should be true
+);