Browse Source

Merge pull request #13174 from ghalliday/issue22971

HPCC-22971 Avoid creating compound disk reads for alien types

Reviewed-By: Jake Smith <jake.smith@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 5 years ago
parent
commit
d96f7d40cc

+ 79 - 0
ecl/hql/hqlattr.cpp

@@ -4069,3 +4069,82 @@ CHqlMetaProperty * queryMetaProperty(IHqlExpression * expr)
     CHqlExprMeta::addProperty(body, EPmeta, info);
     return info;
 }
+
+//--------------------------------------------------------------------------------------------------------------------
+
+inline bool isSelfSelect(IHqlExpression * expr)
+{
+    if (expr->getOperator() != no_select)
+        return false;
+    if (expr->queryChild(0)->getOperator() != no_selfref)
+        return false;
+    return true;
+}
+
+//This should possibly cache the results for a record using an attribute....
+bool canDefinitelyProcessWithTranslator(IHqlExpression * record)
+{
+    dbgassertex(record->getOperator() == no_record);
+    ForEachChild(i, record)
+    {
+        IHqlExpression * cur = record->queryChild(i);
+        switch (cur->getOperator())
+        {
+        case no_field:
+        {
+            ITypeInfo * type = cur->queryType();
+            switch (type->getTypeCode())
+            {
+            case type_alien:
+            case type_any: // I doubt these ever occur...
+            case type_bitfield: // Need to check if these have been implemented
+                return false;
+            case type_row:
+                if (hasReferenceModifier(type))  // Never currently generated
+                    return false;
+                if (!canDefinitelyProcessWithTranslator(cur->queryRecord()))
+                    return false;
+                break;
+            case type_dictionary:
+            case type_groupedtable:
+            case type_table:
+                {
+                    //Check for weird versions of DATASET where the count/size are specified by another field.
+                    ForEachChild(j, cur)
+                    {
+                        IHqlExpression * attr = cur->queryChild(j);
+                        if (attr->isAttribute())
+                        {
+                            IAtom * name = attr->queryName();
+                            if ((name == countAtom) || (name == sizeofAtom))
+                                return false;
+                        }
+                    }
+                    break;
+                }
+            }
+            break;
+        }
+        case no_ifblock:
+            {
+                IHqlExpression * cond = cur->queryChild(0);
+                node_operator condOp = cond->getOperator();
+                //Match the subset of the expressions that are supported by the record translation
+                //false positives are acceptable, false negatives are not
+                if ((condOp == no_eq) || (condOp == no_ne))
+                {
+                    //SELF.x [=|!=] constant
+                    IHqlExpression * lhs = cond->queryChild(0);
+                    IHqlExpression * rhs = cond->queryChild(1);
+                    if (isSelfSelect(lhs) && rhs->getOperator() == no_constant)
+                        return true;
+                }
+                else if (isSelfSelect(cond)) // SELF.someboolean
+                    return true;
+                return false;
+            }
+        }
+    }
+
+    return true;
+}

+ 3 - 0
ecl/hql/hqlattr.hpp

@@ -49,6 +49,9 @@ extern HQL_API IHqlExpression * getSerializedForm(IHqlExpression * expr, IAtom *
 extern HQL_API ITypeInfo * getSerializedForm(ITypeInfo * type, IAtom * variation);
 extern HQL_API IHqlExpression * getPackedRecord(IHqlExpression * expr);
 
+// Is it possible to process this record using the record translators.  May have false -ves, but no false +ves.
+extern HQL_API bool canDefinitelyProcessWithTranslator(IHqlExpression * record);
+
 //This returns a record that compares equal with another result if the normalized records will compare equal
 extern HQL_API IHqlExpression * getUnadornedRecordOrField(IHqlExpression * expr);
 

+ 4 - 0
ecl/hqlcpp/hqlttcpp.cpp

@@ -4325,6 +4325,8 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
     switch (op)
     {
     case no_newkeyindex:
+        if (!canDefinitelyProcessWithTranslator(expr->queryRecord()))
+            break;
         extra->sourceOp = no_compound_indexread;
         extra->uid.set(expr->queryAttribute(_uid_Atom));
         extra->mode = no_thor;
@@ -4334,6 +4336,8 @@ void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
             IHqlExpression * mode = expr->queryChild(2);
             if (!mode)
                 break;
+            if (!canDefinitelyProcessWithTranslator(expr->queryRecord()))
+                break;
             switch (mode->getOperator())
             {
             case no_thor:

+ 4 - 4
testing/regress/ecl/diskread.ecl

@@ -72,7 +72,7 @@ DG_FlatFile_pl0 := PRELOAD(Files.DG_FlatFile);
 DG_FlatFileEvens_pl0 := PRELOAD(Files.DG_FlatFileEvens);
 //DG_CSVFile_pl0 := PRELOAD(Files.DG_CSVFile);
 //DG_XMLFile_pl0 := PRELOAD(Files.DG_XMLFile);
-DG_VarFile_pl0 := PRELOAD(Files.DG_VarFile);
+//DG_VarFile_pl0 := PRELOAD(Files.DG_VarFile);
 
 // straight disk count
 
@@ -80,7 +80,7 @@ count(DG_FlatFile_pl0);
 count(DG_FlatFileEvens_pl0);
 //count(DG_CSVFile_pl0);
 //count(DG_XMLFile_pl0);
-count(DG_VarFile_pl0);
+//count(DG_VarFile_pl0);
 
 // straight disk read
 
@@ -88,7 +88,7 @@ COUNT(DEDUP(DG_FlatFile_pl0, RECORD));
 COUNT(DEDUP(DG_FlatFileEvens_pl0, RECORD));
 //COUNT(DEDUP(DG_CSVFile_pl0, RECORD));
 //COUNT(DEDUP(DG_XMLFile_pl0, RECORD));
-COUNT(DEDUP(DG_VarFile_pl0, RECORD));
+//COUNT(DEDUP(DG_VarFile_pl0, RECORD));
 
 // filtered disk read
 
@@ -96,7 +96,7 @@ output(DG_FlatFile_pl0(DG_firstname='CLAIRE'));
 output(DG_FlatFileEvens_pl0(DG_firstname='CLAIRE'));
 //output(DG_CSVFile_pl0(DG_firstname='CLAIRE'));
 //output(DG_XMLFile_pl0(DG_firstname='CLAIRE'));
-output(DG_VarFile_pl0(DG_firstname='CLAIRE'));
+//output(DG_VarFile_pl0(DG_firstname='CLAIRE'));
 
 // keyed disk read
 

+ 4 - 28
testing/regress/ecl/key/diskread.xml

@@ -8,15 +8,9 @@
  <Row><Result_3>64</Result_3></Row>
 </Dataset>
 <Dataset name='Result 4'>
- <Row><Result_4>64</Result_4></Row>
+ <Row><Result_4>32</Result_4></Row>
 </Dataset>
 <Dataset name='Result 5'>
- <Row><Result_5>32</Result_5></Row>
-</Dataset>
-<Dataset name='Result 6'>
- <Row><Result_6>64</Result_6></Row>
-</Dataset>
-<Dataset name='Result 7'>
  <Row><dg_parentid>16</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>1</dg_prange><filepos>400</filepos></Row>
  <Row><dg_parentid>17</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>2</dg_prange><filepos>425</filepos></Row>
  <Row><dg_parentid>18</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>3</dg_prange><filepos>450</filepos></Row>
@@ -34,7 +28,7 @@
  <Row><dg_parentid>30</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>SMITH     </dg_lastname><dg_prange>3</dg_prange><filepos>750</filepos></Row>
  <Row><dg_parentid>31</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>SMITH     </dg_lastname><dg_prange>4</dg_prange><filepos>775</filepos></Row>
 </Dataset>
-<Dataset name='Result 8'>
+<Dataset name='Result 6'>
  <Row><dg_parentid>16</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>1</dg_prange><filepos>0</filepos></Row>
  <Row><dg_parentid>17</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>2</dg_prange><filepos>25</filepos></Row>
  <Row><dg_parentid>18</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>3</dg_prange><filepos>50</filepos></Row>
@@ -52,25 +46,7 @@
  <Row><dg_parentid>30</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>SMITH     </dg_lastname><dg_prange>3</dg_prange><filepos>350</filepos></Row>
  <Row><dg_parentid>31</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>SMITH     </dg_lastname><dg_prange>4</dg_prange><filepos>375</filepos></Row>
 </Dataset>
-<Dataset name='Result 9'>
- <Row><emptyfield></emptyfield><dg_parentid>16</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>1</dg_prange><__filepos>624</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>17</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>2</dg_prange><extrafield>BACL                </extrafield><__filepos>653</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>18</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>3</dg_prange><__filepos>702</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>19</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>4</dg_prange><extrafield>BAYLCLAI            </extrafield><__filepos>731</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>20</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>DOLSON    </dg_lastname><dg_prange>1</dg_prange><__filepos>780</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>21</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>DOLSON    </dg_lastname><dg_prange>2</dg_prange><extrafield>DOCL                </extrafield><__filepos>809</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>22</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>DOLSON    </dg_lastname><dg_prange>3</dg_prange><__filepos>858</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>23</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>DOLSON    </dg_lastname><dg_prange>4</dg_prange><extrafield>DOLSCLAI            </extrafield><__filepos>887</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>24</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BILLINGTON</dg_lastname><dg_prange>1</dg_prange><__filepos>936</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>25</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BILLINGTON</dg_lastname><dg_prange>2</dg_prange><extrafield>BICL                </extrafield><__filepos>965</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>26</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BILLINGTON</dg_lastname><dg_prange>3</dg_prange><__filepos>1014</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>27</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BILLINGTON</dg_lastname><dg_prange>4</dg_prange><extrafield>BILLCLAI            </extrafield><__filepos>1043</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>28</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>SMITH     </dg_lastname><dg_prange>1</dg_prange><__filepos>1092</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>29</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>SMITH     </dg_lastname><dg_prange>2</dg_prange><extrafield>SMCL                </extrafield><__filepos>1121</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>30</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>SMITH     </dg_lastname><dg_prange>3</dg_prange><__filepos>1170</__filepos></Row>
- <Row><emptyfield></emptyfield><dg_parentid>31</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>SMITH     </dg_lastname><dg_prange>4</dg_prange><extrafield>SMITCLAI            </extrafield><__filepos>1199</__filepos></Row>
-</Dataset>
-<Dataset name='Result 10'>
+<Dataset name='Result 7'>
  <Row><dg_parentid>16</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>1</dg_prange><filepos>400</filepos></Row>
  <Row><dg_parentid>17</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>2</dg_prange><filepos>425</filepos></Row>
  <Row><dg_parentid>18</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>3</dg_prange><filepos>450</filepos></Row>
@@ -88,7 +64,7 @@
  <Row><dg_parentid>30</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>SMITH     </dg_lastname><dg_prange>3</dg_prange><filepos>750</filepos></Row>
  <Row><dg_parentid>31</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>SMITH     </dg_lastname><dg_prange>4</dg_prange><filepos>775</filepos></Row>
 </Dataset>
-<Dataset name='Result 11'>
+<Dataset name='Result 8'>
  <Row><dg_parentid>16</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>1</dg_prange><filepos>0</filepos></Row>
  <Row><dg_parentid>17</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>2</dg_prange><filepos>25</filepos></Row>
  <Row><dg_parentid>18</dg_parentid><dg_firstname>CLAIRE    </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>3</dg_prange><filepos>50</filepos></Row>