Przeglądaj źródła

HPCC-2253 Implement Syntax for grouped hash aggregate

Allow ,GROUPED to be specified on a TABLE(,FEW) where the input dataset
is grouped and a grouping condition is supplied.  Without ,GROUPED the
aggregation would be done globally.  With ,GROUPED it will be done
within each group.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 12 lat temu
rodzic
commit
2f775df07e

+ 1 - 1
ecl/hql/hqlattr.cpp

@@ -1673,7 +1673,7 @@ bool isLocalActivity(IHqlExpression * expr)
 bool isGroupedAggregateActivity(IHqlExpression * expr, IHqlExpression * grouping)
 {
     if (grouping && !grouping->isAttribute())
-        return false;
+        return expr->hasProperty(groupedAtom);
 
     return isGrouped(expr->queryChild(0));
 }

+ 21 - 7
ecl/hql/hqlexpr.cpp

@@ -11446,16 +11446,30 @@ IHqlExpression *createDataset(node_operator op, HqlExprArray & parms)
                 grouping = NULL;
 
             type.set(datasetType);
-            //grouping causes the sort order (and distribution) to be lost - because it might be done by a hash aggregate.
             if (grouping)
             {
-                type.setown(getTypeRemoveAllSortOrders(type));
-                if (!queryProperty(localAtom, parms))
-                    type.setown(getTypeUnknownDistribution(type));      // will be distributed by some function of the grouping fields
+                if (hasProperty(groupedAtom, parms))
+                {
+                    //A grouped hash aggregate - the sort order within the groups will be lost.
+                    type.setown(getTypeRemoveActiveSort(type));
+                }
+                else
+                {
+                    //grouping causes the sort order (and distribution) to be lost - because it might be done by a hash aggregate.
+                    type.setown(getTypeRemoveAllSortOrders(type));
+                    if (!queryProperty(localAtom, parms))
+                        type.setown(getTypeUnknownDistribution(type));      // will be distributed by some function of the grouping fields
+
+                    //Aggregation removes grouping, unless explicitly marked as a grouped operation
+                    type.setown(getTypeUngroup(type));
+                }
+            }
+            else
+            {
+                //Aggregation removes grouping
+                if (op == no_newaggregate || op == no_aggregate || (mapping && mapping->isGroupAggregateFunction()) || hasProperty(aggregateAtom, parms))
+                    type.setown(getTypeUngroup(type));
             }
-            //Aggregation removes grouping)
-            if (op == no_newaggregate || op == no_aggregate || (mapping && mapping->isGroupAggregateFunction()) || grouping || hasProperty(aggregateAtom, parms))
-                type.setown(getTypeUngroup(type));
             //Now map any fields that we can.
             type.setown(getTypeProject(type, record, mapper));
             break;

+ 5 - 1
ecl/hql/hqlgram.y

@@ -8052,7 +8052,7 @@ simpleDataSet
                             OwnedHqlExpr attrs;
                             OwnedHqlExpr grouping = parser->processSortList($7, no_usertable, dataset, sortItems, NULL, &attrs);
 
-                            if (grouping)
+                            if (grouping && !queryPropertyInList(groupedAtom, attrs))
                             {
                                 parser->checkGrouping($7, dataset,record,grouping);
                                 if (dataset->getOperator() == no_group && dataset->queryType()->queryGroupInfo())
@@ -10887,6 +10887,10 @@ sortItem
                             $$.setExpr(createAttribute(keyedAtom));
                             $$.setPosition($1);
                         }
+    | GROUPED
+                        {
+                            $$.setExpr(createAttribute(groupedAtom), $1);
+                        }
     | UNSTABLE '(' expression ')'
                         {
                             parser->normalizeExpression($3, type_string, false);

+ 1 - 0
ecl/hql/hqlgram2.cpp

@@ -5433,6 +5433,7 @@ IHqlExpression * HqlGram::processSortList(const attribute & errpos, node_operato
                     if (attr == keyedAtom) ok = true;
                     if (attr == prefetchAtom) ok = true;
                     if (attr == mergeAtom) ok = true;
+                    if (attr == groupedAtom) ok = true;
                     //fall through
                 case no_group:
                     if (attr == allAtom) ok = true;

+ 1 - 1
ecl/hqlcpp/hqlttcpp.cpp

@@ -3403,7 +3403,7 @@ IHqlExpression * ThorHqlTransformer::normalizeTableGrouping(IHqlExpression * exp
                 useHashAggregate = true;
         }
 
-        if (!expr->hasProperty(aggregateAtom) && !useHashAggregate)
+        if (!expr->hasProperty(aggregateAtom) && !useHashAggregate && !expr->hasProperty(groupedAtom))
             return convertAggregateGroupingToGroupedAggregate(expr, group);
     }
     return NULL;

+ 5 - 0
ecl/regress/aggds4.ecl

@@ -32,3 +32,8 @@ output(table(pr2(seq > 10), { surname, ave(group, aage) }, surname, few, keyed))
 //Should not generate a grouped Hash Aggregate
 output(sort(table(group(sort(sqNamesTable1, surname),surname), { surname, ave(group, aage) }, surname, few), record));
 
+//This should generate a grouped Hash Aggregate
+output(sort(table(group(sort(sqNamesTable1, surname),surname), { surname, ave(group, aage) }, surname, few, grouped), record));
+
+//As should this...
+output(sort(table(group(sort(sqNamesTable1, surname),surname), { surname, ave(group, aage) }, surname, grouped), record));

+ 53 - 0
ecl/regress/issue2253.ecl

@@ -0,0 +1,53 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+
+idRecord :=
+            RECORD
+UNSIGNED        id1;
+UNSIGNED        id2;
+UNSIGNED        id3;
+            END;
+
+ds0 := dataset([
+    {1,1,1},
+    {1,1,2},
+    {1,2,1},
+    {2,1,1},
+    {2,2,1},
+    {2,3,1},
+    {3,1,1},
+    {99,99,99}
+    ], idRecord);
+ds := sorted(ds0, id1, id2);
+
+agg1 := TABLE(ds, { id1, id2, cnt := count(group) }, id1);
+agg2 := TABLE(ds, { id2, cnt := count(group) }, id2);
+agg3 := TABLE(GROUP(ds,id1), { id2, cnt := count(group) }, id2);
+agg4 := TABLE(GROUP(ds,id1), { id1, id2, cnt := count(group) }, id2, grouped);
+agg5 := TABLE(GROUP(ds,id1,id2), { id1, cnt := count(group) }, id1);
+agg6 := TABLE(GROUP(ds,id1,id2), { id1, cnt := count(group) }, id1, grouped);
+
+sequential(
+    output(agg1);
+    output(sort(group(nofold(agg2)),id2));
+    output(sort(group(nofold(agg3)),id2));
+    output(sort(group(nofold(agg4)),id1, id2));
+    output(agg5);
+    output(sort(group(nofold(agg6)), id1, cnt));
+);
+    

+ 53 - 0
testing/ecl/grouphashagg.ecl

@@ -0,0 +1,53 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+
+idRecord :=
+            RECORD
+UNSIGNED        id1;
+UNSIGNED        id2;
+UNSIGNED        id3;
+            END;
+
+ds0 := dataset([
+    {1,1,1},
+    {1,1,2},
+    {1,2,1},
+    {2,1,1},
+    {2,2,1},
+    {2,3,1},
+    {3,1,1},
+    {99,99,99}
+    ], idRecord);
+ds := sorted(ds0, id1, id2);
+
+agg1 := TABLE(ds, { id1, id2, cnt := count(group) }, id1);
+agg2 := TABLE(ds, { id2, cnt := count(group) }, id2);
+agg3 := TABLE(GROUP(ds,id1), { id2, cnt := count(group) }, id2);
+agg4 := TABLE(GROUP(ds,id1), { id1, id2, cnt := count(group) }, id2, grouped);
+agg5 := TABLE(GROUP(ds,id1,id2), { id1, cnt := count(group) }, id1);
+agg6 := TABLE(GROUP(ds,id1,id2), { id1, cnt := count(group) }, id1, grouped);
+
+sequential(
+    output(agg1);
+    output(sort(group(nofold(agg2)),id2));
+    output(sort(group(nofold(agg3)),id2));
+    output(sort(group(nofold(agg4)),id1, id2));
+    output(agg5);
+    output(sort(group(nofold(agg6)), id1, cnt));
+);
+    

+ 42 - 0
testing/ecl/key/grouphashagg.xml

@@ -0,0 +1,42 @@
+<Dataset name='Result 1'>
+ <Row><id1>1</id1><id2>1</id2><cnt>3</cnt></Row>
+ <Row><id1>2</id1><id2>1</id2><cnt>3</cnt></Row>
+ <Row><id1>3</id1><id2>1</id2><cnt>1</cnt></Row>
+ <Row><id1>99</id1><id2>99</id2><cnt>1</cnt></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><id2>1</id2><cnt>4</cnt></Row>
+ <Row><id2>2</id2><cnt>2</cnt></Row>
+ <Row><id2>3</id2><cnt>1</cnt></Row>
+ <Row><id2>99</id2><cnt>1</cnt></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><id2>1</id2><cnt>4</cnt></Row>
+ <Row><id2>2</id2><cnt>2</cnt></Row>
+ <Row><id2>3</id2><cnt>1</cnt></Row>
+ <Row><id2>99</id2><cnt>1</cnt></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><id1>1</id1><id2>1</id2><cnt>2</cnt></Row>
+ <Row><id1>1</id1><id2>2</id2><cnt>1</cnt></Row>
+ <Row><id1>2</id1><id2>1</id2><cnt>1</cnt></Row>
+ <Row><id1>2</id1><id2>2</id2><cnt>1</cnt></Row>
+ <Row><id1>2</id1><id2>3</id2><cnt>1</cnt></Row>
+ <Row><id1>3</id1><id2>1</id2><cnt>1</cnt></Row>
+ <Row><id1>99</id1><id2>99</id2><cnt>1</cnt></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><id1>1</id1><cnt>3</cnt></Row>
+ <Row><id1>2</id1><cnt>3</cnt></Row>
+ <Row><id1>3</id1><cnt>1</cnt></Row>
+ <Row><id1>99</id1><cnt>1</cnt></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><id1>1</id1><cnt>1</cnt></Row>
+ <Row><id1>1</id1><cnt>2</cnt></Row>
+ <Row><id1>2</id1><cnt>1</cnt></Row>
+ <Row><id1>2</id1><cnt>1</cnt></Row>
+ <Row><id1>2</id1><cnt>1</cnt></Row>
+ <Row><id1>3</id1><cnt>1</cnt></Row>
+ <Row><id1>99</id1><cnt>1</cnt></Row>
+</Dataset>