hqliproj.cpp 115 KB


  1. /*##############################################################################
  2. Copyright (C) 2011 HPCC Systems.
  3. All rights reserved. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU Affero General Public License as
  5. published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Affero General Public License for more details.
  11. You should have received a copy of the GNU Affero General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ############################################################################## */
  14. #include "platform.h"
  15. #include "jlib.hpp"
  16. #include "jmisc.hpp"
  17. #include "jstream.ipp"
  18. #include "hql.hpp"
  19. #include "hqliproj.ipp"
  20. #include "hqlutil.hpp"
  21. #include "hqlcpputil.hpp"
  22. #include "hqlthql.hpp"
  23. #include "hqlhtcpp.ipp"
  24. #include "hqltcppc.ipp"
  25. #include "hqlcatom.hpp"
  26. #include "hqlfold.hpp"
  27. #include "hqlpmap.hpp"
  28. #include "hqlopt.hpp"
  29. #include "hqlcerrors.hpp"
  30. #include "hqlsource.ipp"
  31. #include "hqlattr.hpp"
  32. #include "jsort.hpp"
  33. //#define PRESERVE_TRANSFORM_ANNOTATION // improves the text in the graph, occasionally prevents optimizations. Maybe add on a debug flag.
  34. //#define POST_COMMON_ANNOTATION // would be better if this just commoned up transforms...
  35. //#define PARANOID_CHECKING
  36. //MORE: Extend UsedFieldSet/NestedField so that once all is set it is never modified and then allow a LINK from
  37. //a nestedfield to clone.
  38. //Problems:
  39. // combine the two classes for simplicity
  40. // save links in the complexextra rather than objects.
  41. // Ensure all functions that modify now return objects + clone if modifying and was all.
  42. // Need to implement gathered-enough in a different way so it isn't shared.
  43. enum
  44. {
  45. CostMemoryCopy = 1,
  46. CostNetworkGroup = 1,
  47. CostGlobalTopN = 2,
  48. CostManyCopy = 3,
  49. CostNetworkCopy = 10,
  50. };
  51. //-------------------------------------------------------------------------------------------------
  52. void UsedFieldSet::addUnique(IHqlExpression * field)
  53. {
  54. //MORE: Add if (!all test to short-circuit contains)
  55. if (!contains(*field))
  56. appendField(*LINK(field));
  57. }
  58. NestedField * UsedFieldSet::addNested(IHqlExpression * field)
  59. {
  60. NestedField * match = findNested(field);
  61. if (!match)
  62. {
  63. assertex(originalFields);
  64. #ifdef PARANOID_CHECKING
  65. assertex(!contains(*field));
  66. assertex(originalFields->contains(*field));
  67. #endif
  68. NestedField * original = originalFields->findNested(field);
  69. assertex(original);
  70. match = new NestedField(field, &original->used);
  71. appendNested(*LINK(field), match);
  72. }
  73. else
  74. {
  75. assertex(contains(*field));
  76. }
  77. return match;
  78. }
  79. void UsedFieldSet::appendNested(IHqlExpression & ownedField, NestedField * ownedNested)
  80. {
  81. appendField(ownedField);
  82. nested.append(*ownedNested);
  83. }
  84. bool UsedFieldSet::checkAllFieldsUsed()
  85. {
  86. if (all)
  87. return true;
  88. assertex(originalFields);
  89. if (fields.ordinality() != originalFields->fields.ordinality())
  90. return false;
  91. ForEachItemIn(i, nested)
  92. {
  93. NestedField & cur = nested.item(i);
  94. if (!cur.used.checkAllFieldsUsed())
  95. return false;
  96. }
  97. all = true;
  98. return true;
  99. }
  100. bool UsedFieldSet::allGathered() const
  101. {
  102. if (maxGathered == (unsigned)-1)
  103. return true;
  104. if (maxGathered < fields.ordinality())
  105. return false;
  106. ForEachItemIn(i, nested)
  107. {
  108. if (!nested.item(i).used.allGathered())
  109. return false;
  110. }
  111. return true;
  112. }
  113. void UsedFieldSet::appendField(IHqlExpression & ownedField)
  114. {
  115. #ifdef PARANOID_CHECKING
  116. assertex(!contains(ownedField));
  117. #endif
  118. #ifdef USE_IPROJECT_HASH
  119. if (!all)
  120. hash.add(&ownedField);
  121. #endif
  122. fields.append(ownedField);
  123. #ifdef PARANOID_CHECKING
  124. if (originalFields)
  125. assertex(originalFields->contains(ownedField));
  126. #endif
  127. }
  128. void UsedFieldSet::clone(const UsedFieldSet & source)
  129. {
  130. assertex(originalFields == source.originalFields);
  131. ForEachItemIn(i, source.fields)
  132. appendField(OLINK(source.fields.item(i)));
  133. ForEachItemIn(i1, source.nested)
  134. nested.append(*source.nested.item(i1).clone());
  135. if (source.all)
  136. all = true;
  137. finalRecord.set(source.finalRecord);
  138. }
  139. unsigned UsedFieldSet::getOriginalPosition(IHqlExpression * field) const
  140. {
  141. assertex(originalFields == this);
  142. unsigned match = fields.find(*field);
  143. if (match != NotFound)
  144. return match;
  145. assertex(field->isDatarow());
  146. assertex(finalRecord);
  147. OwnedHqlExpr originalField = finalRecord->querySimpleScope()->lookupSymbol(field->queryName());
  148. assertex(originalField && originalField != field);
  149. unsigned matchOriginal = fields.find(*originalField);
  150. assertex(matchOriginal != NotFound);
  151. return matchOriginal;
  152. }
  153. int UsedFieldSet::compareOrder(IHqlExpression * left, IHqlExpression * right) const
  154. {
  155. return (int)(getOriginalPosition(left) - getOriginalPosition(right));
  156. }
  157. bool UsedFieldSet::contains(IHqlExpression & field) const
  158. {
  159. if (all)
  160. return true;
  161. #ifdef USE_IPROJECT_HASH
  162. return hash.find(&field) != NULL;
  163. #else
  164. return fields.contains(field);
  165. #endif
  166. }
  167. bool UsedFieldSet::contains(_ATOM name) const
  168. {
  169. if (all)
  170. return true;
  171. ForEachItemIn(i, fields)
  172. {
  173. if (fields.item(i).queryName() == name)
  174. return true;
  175. }
  176. return false;
  177. }
  178. //Calculate left - right
  179. void UsedFieldSet::createDifference(const UsedFieldSet & left, const UsedFieldSet & right)
  180. {
  181. if (right.includeAll())
  182. return;
  183. //if all are used and non modifyable this code will need changing.
  184. ForEachItemIn(i, left.fields)
  185. {
  186. IHqlExpression & cur = left.fields.item(i);
  187. if (cur.isDatarow())
  188. {
  189. NestedField * leftNested = left.findNested(&cur);
  190. NestedField * rightNested = right.findNested(&cur);
  191. assertex(leftNested);
  192. if (rightNested)
  193. {
  194. Owned<NestedField> diffNested = new NestedField(&cur, leftNested->used.queryOriginal());
  195. diffNested->used.createDifference(leftNested->used, rightNested->used);
  196. if (!diffNested->isEmpty())
  197. appendNested(OLINK(cur), diffNested.getClear());
  198. }
  199. else if (!leftNested->isEmpty())
  200. {
  201. appendNested(OLINK(cur), leftNested->clone());
  202. }
  203. }
  204. else
  205. {
  206. if (!right.contains(cur))
  207. appendField(OLINK(cur));
  208. }
  209. }
  210. }
  211. NestedField * UsedFieldSet::findNested(IHqlExpression * field) const
  212. {
  213. ForEachItemIn(i, nested)
  214. {
  215. NestedField & cur = nested.item(i);
  216. if (cur.field == field)
  217. return &cur;
  218. }
  219. return NULL;
  220. }
  221. IHqlExpression * UsedFieldSet::createFilteredAssign(IHqlExpression * field, IHqlExpression * value, IHqlExpression * newSelf, const UsedFieldSet * exceptions) const
  222. {
  223. if (!contains(*field))
  224. return NULL;
  225. OwnedHqlExpr newValue = LINK(value);
  226. OwnedHqlExpr newField;
  227. if (field->isDatarow())
  228. {
  229. NestedField * match = findNested(field);
  230. assertex(match);
  231. NestedField * exception = exceptions ? exceptions->findNested(field) : NULL;
  232. if (!match->isEmpty())
  233. {
  234. bool createSubset = true;
  235. if (match->used.checkAllFieldsUsed())
  236. {
  237. if (!exception || exception->isEmpty())
  238. {
  239. createSubset = false;
  240. }
  241. else if (exception && exception->used.checkAllFieldsUsed())
  242. {
  243. newValue.setown(createNullExpr(field));
  244. createSubset = false;
  245. }
  246. }
  247. if (createSubset)
  248. {
  249. newField.setown(finalRecord->querySimpleScope()->lookupSymbol(field->queryName()));
  250. assertex(newField);
  251. assertex(exception || newField != field);
  252. //Two options - this is either a no_createrow, and we extract the assignments from the transform
  253. //or we create a no_createrow to extract the values from the other record
  254. OwnedHqlExpr newTransform;
  255. UsedFieldSet * exceptionFields = exception ? &exception->used : NULL;
  256. if (value->getOperator() == no_createrow)
  257. {
  258. newTransform.setown(match->used.createFilteredTransform(value->queryChild(0), exceptionFields));
  259. }
  260. else
  261. {
  262. newTransform.setown(match->used.createRowTransform(value, exceptionFields));
  263. }
  264. newValue.setown(createRow(no_createrow, newTransform.getClear()));
  265. #if defined(PRESERVE_TRANSFORM_ANNOTATION)
  266. newValue.setown(value->cloneAllAnnotations(newValue));
  267. #endif
  268. }
  269. }
  270. else
  271. newValue.clear();
  272. }
  273. else
  274. {
  275. if (exceptions && exceptions->contains(*field))
  276. {
  277. newValue.setown(createNullExpr(field));
  278. }
  279. }
  280. if (newValue)
  281. {
  282. IHqlExpression * rhs = newField ? newField.get() : field;
  283. OwnedHqlExpr newLhs = createSelectExpr(LINK(newSelf), LINK(rhs));
  284. return createAssign(newLhs.getClear(), newValue.getClear());
  285. }
  286. return NULL;
  287. }
  288. void UsedFieldSet::createFilteredAssigns(HqlExprArray & assigns, IHqlExpression * transform, IHqlExpression * newSelf, const UsedFieldSet * exceptions) const
  289. {
  290. ForEachChild(i, transform)
  291. {
  292. IHqlExpression * cur = transform->queryChild(i);
  293. switch (cur->getOperator())
  294. {
  295. case no_assign:
  296. {
  297. IHqlExpression * lhs = cur->queryChild(0);
  298. IHqlExpression * field = lhs->queryChild(1);
  299. IHqlExpression * value = cur->queryChild(1);
  300. IHqlExpression * assign = createFilteredAssign(field, value, newSelf, exceptions);
  301. if (assign)
  302. assigns.append(*assign);
  303. break;
  304. }
  305. case no_assignall:
  306. createFilteredAssigns(assigns, cur, newSelf, exceptions);
  307. break;
  308. default:
  309. assigns.append(*LINK(cur));
  310. break;
  311. }
  312. }
  313. }
  314. IHqlExpression * UsedFieldSet::createFilteredTransform(IHqlExpression * transform, const UsedFieldSet * exceptions) const
  315. {
  316. OwnedHqlExpr self = getSelf(finalRecord);
  317. HqlExprArray assigns;
  318. createFilteredAssigns(assigns, transform, self, exceptions);
  319. OwnedHqlExpr ret = createValue(transform->getOperator(), makeTransformType(finalRecord->getType()), assigns);
  320. #if defined(PRESERVE_TRANSFORM_ANNOTATION)
  321. return transform->cloneAllAnnotations(ret);
  322. #else
  323. return ret.getClear();
  324. #endif
  325. }
  326. IHqlExpression * UsedFieldSet::createRowTransform(IHqlExpression * row, const UsedFieldSet * exceptions) const
  327. {
  328. OwnedHqlExpr self = getSelf(finalRecord);
  329. HqlExprArray assigns;
  330. ForEachItemIn(i, fields)
  331. {
  332. IHqlExpression & field = fields.item(i);
  333. OwnedHqlExpr value = createNewSelectExpr(LINK(row), LINK(&field));
  334. OwnedHqlExpr assign = createFilteredAssign(&field, value, self, exceptions);
  335. if (assign)
  336. assigns.append(*assign.getClear());
  337. }
  338. return createValue(no_transform, makeTransformType(finalRecord->getType()), assigns);
  339. }
  340. void UsedFieldSet::calcFinalRecord(bool canPack, bool ignoreIfEmpty)
  341. {
  342. assertex(originalFields);
  343. if (finalRecord)
  344. return;
  345. ForEachItemIn(i1, nested)
  346. nested.item(i1).used.calcFinalRecord(canPack, true);
  347. IHqlExpression * originalRecord = queryOriginalRecord();
  348. if (checkAllFieldsUsed())
  349. {
  350. if (canPack)
  351. finalRecord.setown(getPackedRecord(originalRecord));
  352. else
  353. finalRecord.set(originalRecord);
  354. return;
  355. }
  356. HqlExprArray recordFields;
  357. ForEachItemIn(i, fields)
  358. {
  359. IHqlExpression & cur = fields.item(i);
  360. if (cur.isDatarow())
  361. {
  362. NestedField * match = findNested(&cur);
  363. assertex(match);
  364. IHqlExpression * record = cur.queryRecord();
  365. IHqlExpression * newRecord = match->used.queryFinalRecord();
  366. if (record == newRecord)
  367. {
  368. recordFields.append(OLINK(cur));
  369. }
  370. else if (newRecord)
  371. {
  372. HqlExprArray args;
  373. unwindChildren(args, &cur);
  374. OwnedHqlExpr newField = createField(cur.queryName(), makeRowType(newRecord->getType()), args);
  375. recordFields.append(*newField.getClear());
  376. }
  377. }
  378. else
  379. recordFields.append(OLINK(cur));
  380. }
  381. if (originalFields)
  382. {
  383. //Reorder the record to match the original fields
  384. RecordOrderComparer compare(*originalFields);
  385. qsortvec((void * *)recordFields.getArray(), recordFields.ordinality(), compare);
  386. }
  387. if (recordFields.ordinality() == 0)
  388. {
  389. if (ignoreIfEmpty)
  390. return;
  391. recordFields.append(*createAttribute(_nonEmpty_Atom));
  392. }
  393. finalRecord.setown(createRecord(recordFields));
  394. if (canPack)
  395. finalRecord.setown(getPackedRecord(finalRecord));
  396. OwnedHqlExpr serializedRecord = getSerializedForm(finalRecord);
  397. if (maxRecordSizeUsesDefault(serializedRecord))
  398. {
  399. HqlExprArray recordFields;
  400. unwindChildren(recordFields, finalRecord);
  401. //Lost some indication of the record size->add an attribute
  402. IHqlExpression * max = originalRecord->queryProperty(maxLengthAtom);
  403. if (max)
  404. recordFields.append(*LINK(max));
  405. else
  406. {
  407. bool isKnownSize, useDefaultRecordSize;
  408. OwnedHqlExpr oldSerializedRecord = getSerializedForm(originalRecord);
  409. unsigned oldRecordSize = getMaxRecordSize(oldSerializedRecord, 0, isKnownSize, useDefaultRecordSize);
  410. if (!useDefaultRecordSize)
  411. recordFields.append(*createAttribute(maxLengthAtom, getSizetConstant(oldRecordSize)));
  412. }
  413. finalRecord.setown(createRecord(recordFields));
  414. }
  415. }
  416. void UsedFieldSet::gatherExpandSelectsUsed(HqlExprArray * selfSelects, SelectUsedArray * parentSelects, IHqlExpression * selector, IHqlExpression * source)
  417. {
  418. assertex(selfSelects ? selector != NULL : true);
  419. for (unsigned i1 = maxGathered; i1 < fields.ordinality(); i1++)
  420. {
  421. IHqlExpression & cur = fields.item(i1);
  422. if (!cur.isDatarow())
  423. {
  424. if (selfSelects)
  425. {
  426. OwnedHqlExpr selected = createSelectExpr(LINK(selector), LINK(&cur));
  427. selfSelects->append(*selected.getClear());
  428. }
  429. if (parentSelects)
  430. {
  431. OwnedHqlExpr sourceSelected = createSelectExpr(LINK(source), LINK(&cur));
  432. parentSelects->append(*sourceSelected.getClear());
  433. }
  434. }
  435. }
  436. maxGathered = fields.ordinality();
  437. ForEachItemIn(i2, nested)
  438. {
  439. NestedField & curNested = nested.item(i2);
  440. IHqlExpression * field = curNested.field;
  441. OwnedHqlExpr selected = selector ? createSelectExpr(LINK(selector), LINK(field)) : NULL;
  442. OwnedHqlExpr sourceSelected = createSelectExpr(LINK(source), LINK(field));
  443. if (!curNested.includeAll())
  444. {
  445. curNested.used.gatherExpandSelectsUsed(selfSelects, parentSelects, selected, sourceSelected);
  446. sourceSelected.clear();
  447. }
  448. else
  449. {
  450. curNested.used.noteGatheredAll();
  451. if (selfSelects)
  452. selfSelects->append(*selected.getClear());
  453. if (parentSelects)
  454. parentSelects->append(*sourceSelected.getClear());
  455. }
  456. }
  457. }
  458. inline bool isSelector(IHqlExpression * expr)
  459. {
  460. return (expr->getOperator() == no_select) && !isNewSelector(expr);
  461. }
  462. void UsedFieldSet::gatherTransformValuesUsed(HqlExprArray * selfSelects, SelectUsedArray * parentSelects, HqlExprArray * values, IHqlExpression * selector, IHqlExpression * transform)
  463. {
  464. for (unsigned i = maxGathered; i < fields.ordinality(); i++)
  465. {
  466. IHqlExpression & cur = fields.item(i);
  467. if (!cur.isDatarow())
  468. {
  469. if (selfSelects)
  470. {
  471. OwnedHqlExpr selected = createSelectExpr(LINK(selector), LINK(&cur));
  472. selfSelects->append(*selected.getClear());
  473. }
  474. if (values)
  475. {
  476. IHqlExpression * transformValue = queryTransformAssignValue(transform, &cur);
  477. //If no transform value is found then we almost certainly have an invalid query (e.g, LEFT inside a
  478. //global). Don't add the value - you'll definitely get a later follow on error
  479. assertex(transformValue);
  480. values->append(*LINK(transformValue));
  481. }
  482. }
  483. }
  484. maxGathered = fields.ordinality();
  485. ForEachItemIn(i2, nested)
  486. {
  487. NestedField & curNested = nested.item(i2);
  488. if (!curNested.isEmpty() && !curNested.used.allGathered())
  489. {
  490. IHqlExpression * field = curNested.field;
  491. OwnedHqlExpr selected = selector ? createSelectExpr(LINK(selector), LINK(field)) : NULL;
  492. IHqlExpression * transformValue = queryTransformAssignValue(transform, field);
  493. assertex(transformValue);
  494. bool includeThis = true;
  495. if (!curNested.includeAll() && transformValue->isPure())
  496. {
  497. if (transformValue->getOperator() == no_createrow)
  498. {
  499. curNested.used.gatherTransformValuesUsed(selfSelects, parentSelects, values, selected, transformValue->queryChild(0));
  500. includeThis = false;
  501. }
  502. else if (isAlwaysActiveRow(transformValue) || isSelector(transformValue))
  503. {
  504. curNested.used.gatherExpandSelectsUsed(selfSelects, parentSelects, selected, transformValue);
  505. includeThis = false;
  506. }
  507. //otherwise use the whole value.
  508. }
  509. if (includeThis)
  510. {
  511. curNested.used.noteGatheredAll();
  512. if (selfSelects)
  513. selfSelects->append(*selected.getClear());
  514. if (values)
  515. values->append(*LINK(transformValue));
  516. }
  517. }
  518. }
  519. }
  520. void UsedFieldSet::getText(StringBuffer & s) const
  521. {
  522. if (all)
  523. s.append("ALL");
  524. s.append("[");
  525. ForEachItemIn(i, fields)
  526. {
  527. IHqlExpression & cur = fields.item(i);
  528. if (i) s.append(",");
  529. s.append(cur.queryName());
  530. if (cur.isDatarow())
  531. {
  532. NestedField * match = findNested(&cur);
  533. if (!match->used.checkAllFieldsUsed())
  534. match->used.getText(s);
  535. }
  536. }
  537. s.append("]");
  538. }
  539. void UsedFieldSet::intersectFields(const UsedFieldSet & source)
  540. {
  541. if (source.includeAll())
  542. return;
  543. if (includeAll())
  544. set(source);
  545. else
  546. {
  547. finalRecord.clear();
  548. ForEachItemInRev(i1, fields)
  549. {
  550. IHqlExpression & field = fields.item(i1);
  551. if (!field.isDatarow() && !source.contains(field))
  552. {
  553. fields.remove(i1);
  554. #ifdef USE_IPROJECT_HASH
  555. hash.remove(&field);
  556. #endif
  557. }
  558. }
  559. ForEachItemInRev(i2, nested)
  560. {
  561. NestedField & cur = nested.item(i2);
  562. NestedField * match = source.findNested(cur.field);
  563. //MORE: If we never modify items that have been all set then the following will need changing:
  564. if (match)
  565. {
  566. cur.used.intersectFields(match->used);
  567. }
  568. else
  569. {
  570. cur.clear();
  571. }
  572. }
  573. }
  574. }
  575. void UsedFieldSet::optimizeFieldsToBlank(const UsedFieldSet & allAssigned, IHqlExpression * transform)
  576. {
  577. //MORE:
  578. //this contains a list of fields that can be blanked instead of assigning.
  579. //If there is a sequence of assignments SELF.x := LEFT.x then
  580. //a) the the field will already be in the input and output records (since it is a rollup/iterate)
  581. //b) if the previous field is assigned, then it may generate more efficient code to also assign this
  582. // field rather than blanking it.
  583. //Therefore we should walk the transform, and if a field is an exception and previous field is used
  584. //and possibly the exception is fixed length, then remove it from the exceptions.
  585. }
  586. bool UsedFieldSet::requiresFewerFields(const UsedFieldSet & other) const
  587. {
  588. if (includeAll())
  589. return false;
  590. return (fields.ordinality() < other.fields.ordinality());
  591. }
  592. void UsedFieldSet::unionFields(const UsedFieldSet & source)
  593. {
  594. if (includeAll())
  595. return;
  596. if (source.includeAll())
  597. set(source);
  598. else
  599. {
  600. ForEachItemIn(i, source.fields)
  601. {
  602. IHqlExpression & field = source.fields.item(i);
  603. if (!contains(field))
  604. appendField(OLINK(field));
  605. }
  606. ForEachItemIn(i1, source.nested)
  607. {
  608. NestedField & cur = source.nested.item(i1);
  609. NestedField * match = findNested(cur.field);
  610. if (match)
  611. match->used.unionFields(cur.used);
  612. else
  613. nested.append(*cur.clone());
  614. }
  615. }
  616. }
  617. bool UsedFieldSet::isEmpty() const
  618. {
  619. ForEachItemIn(i1, fields)
  620. {
  621. IHqlExpression & cur = fields.item(i1);
  622. if (!cur.isDatarow())
  623. return false;
  624. }
  625. ForEachItemIn(i2, nested)
  626. {
  627. if (!nested.item(i2).isEmpty())
  628. return false;
  629. }
  630. return true;
  631. }
  632. void UsedFieldSet::kill()
  633. {
  634. #ifdef USE_IPROJECT_HASH
  635. hash.kill();
  636. #endif
  637. fields.kill();
  638. nested.kill();
  639. all = false;
  640. maxGathered = 0;
  641. finalRecord.clear();
  642. }
  643. void UsedFieldSet::set(const UsedFieldSet & source)
  644. {
  645. kill();
  646. clone(source);
  647. }
  648. void UsedFieldSet::setAll()
  649. {
  650. if (all)
  651. return;
  652. assertex(originalFields);
  653. kill();
  654. clone(*originalFields);
  655. }
  656. void UsedFieldSet::setRecord(IHqlExpression * record)
  657. {
  658. assertex(fields.ordinality() == 0);
  659. all = true;
  660. unwindFields(fields, record);
  661. ForEachItemIn(i, fields)
  662. {
  663. IHqlExpression & cur = fields.item(i);
  664. if (cur.isDatarow())
  665. {
  666. NestedField * child = new NestedField(&cur, NULL);
  667. child->used.setRecord(cur.queryRecord());
  668. nested.append(*child);
  669. }
  670. }
  671. finalRecord.set(record->queryBody());
  672. originalFields = this;
  673. }
  674. static UsedFieldSet * addNestedField(UsedFieldSet & fields, IHqlExpression * expr, IHqlExpression * selector)
  675. {
  676. if (expr == selector)
  677. return &fields;
  678. IHqlExpression * ds = expr->queryChild(0);
  679. UsedFieldSet * parent = addNestedField(fields, ds, selector);
  680. if (parent)
  681. {
  682. NestedField * nested = parent->addNested(expr->queryChild(1));
  683. if (!nested || nested->includeAll())
  684. return NULL;
  685. return &nested->used;
  686. }
  687. return NULL;
  688. }
  689. bool processMatchingSelector(UsedFieldSet & fields, IHqlExpression * select, IHqlExpression * selector)
  690. {
  691. if (select == selector)
  692. {
  693. fields.setAll();
  694. return true;
  695. }
  696. if (select->getOperator() != no_select)
  697. return false;
  698. //Could be <root>.blah.ds - queryDatasetSelector needs to be applied to the lhs.
  699. IHqlExpression * root = queryDatasetCursor(select->queryChild(0));
  700. if (root == selector)
  701. {
  702. if (select->isDatarow())
  703. {
  704. UsedFieldSet * nested = addNestedField(fields, select, selector);
  705. if (nested)
  706. nested->setAll();
  707. }
  708. else
  709. {
  710. IHqlExpression * ds = select->queryChild(0);
  711. IHqlExpression * field = select->queryChild(1);
  712. UsedFieldSet * nested = addNestedField(fields, ds, selector);
  713. if (nested)
  714. nested->addUnique(field);
  715. }
  716. }
  717. return false;
  718. }
  719. //---------------------------------------------------------------------------------------------------------------------
  720. int RecordOrderComparer::docompare(const void * l,const void * r) const
  721. {
  722. IHqlExpression * lExpr = (IHqlExpression *)l;
  723. IHqlExpression * rExpr = (IHqlExpression *)r;
  724. return fields.compareOrder(lExpr, rExpr);
  725. }
  726. //---------------------------------------------------------------------------------------------------------------------
  727. static unsigned getActivityCost(IHqlExpression * expr, ClusterType targetClusterType)
  728. {
  729. switch (targetClusterType)
  730. {
  731. case ThorCluster:
  732. case ThorLCRCluster:
  733. {
  734. switch (expr->getOperator())
  735. {
  736. case no_sort:
  737. //MORE: What about checking for grouped!
  738. if (!expr->hasProperty(localAtom))
  739. return CostNetworkCopy;
  740. return CostManyCopy;
  741. case no_shuffle:
  742. if (!expr->hasProperty(localAtom) && !isGrouped(expr))
  743. return CostNetworkCopy;
  744. break;
  745. case no_group:
  746. if (!expr->hasProperty(localAtom))
  747. return CostNetworkGroup;
  748. break;
  749. case no_keyeddistribute:
  750. case no_distribute:
  751. case no_cosort:
  752. return CostNetworkCopy;
  753. case no_topn:
  754. if (!expr->hasProperty(localAtom))
  755. return CostGlobalTopN;
  756. break;
  757. case no_selfjoin:
  758. if (!expr->hasProperty(localAtom))
  759. return CostNetworkCopy;
  760. break;
  761. case no_denormalize:
  762. case no_denormalizegroup:
  763. case no_join:
  764. case no_joincount:
  765. if (!expr->hasProperty(localAtom))
  766. {
  767. if (isKeyedJoin(expr))
  768. break;
  769. if (expr->hasProperty(lookupAtom))
  770. return CostNetworkCopy/2; //insert on rhs.
  771. return CostNetworkCopy;
  772. }
  773. break;
  774. //case no_dedup: all non local, may be worth it..
  775. }
  776. }
  777. }
  778. return 0;
  779. }
  780. //MORE: Should cache this in the extra for a record, quite possibly with the unwound fields as well.
  781. bool isSensibleRecord(IHqlExpression * record)
  782. {
  783. ForEachChild(i, record)
  784. {
  785. IHqlExpression * cur = record->queryChild(i);
  786. switch (cur->getOperator())
  787. {
  788. case no_record:
  789. if (!isSensibleRecord(cur))
  790. return false;
  791. break;
  792. case no_ifblock:
  793. return false;
  794. case no_field:
  795. //Could loosen this condition so that it didn't use any fields within the record.
  796. switch (cur->queryType()->getTypeCode())
  797. {
  798. case type_alien:
  799. return false;
  800. case type_table:
  801. case type_groupedtable:
  802. {
  803. //disqualify datasets with no_selfref counts/lengths
  804. IHqlExpression * limit = cur->queryProperty(countAtom);
  805. if (!limit)
  806. limit = cur->queryProperty(sizeAtom);
  807. if (limit && !limit->isConstant())
  808. return false;
  809. break;
  810. }
  811. }
  812. break;
  813. }
  814. }
  815. return true;
  816. }
  817. IHqlExpression * queryRootSelector(IHqlExpression * select)
  818. {
  819. loop
  820. {
  821. if (select->hasProperty(newAtom))
  822. return select;
  823. IHqlExpression * ds = select->queryChild(0);
  824. if (ds->getOperator() != no_select)
  825. return select;
  826. select = ds;
  827. }
  828. }
  829. static node_operator queryCompoundOp(IHqlExpression * expr)
  830. {
  831. switch (expr->getOperator())
  832. {
  833. case no_table:
  834. return no_compound_diskread;
  835. case no_newkeyindex:
  836. return no_compound_indexread;
  837. case no_dataset_alias:
  838. case no_preservemeta:
  839. return queryCompoundOp(expr->queryChild(0));
  840. }
  841. throwUnexpectedOp(expr->getOperator());
  842. }
  843. static int compareHqlExprPtr(IInterface * * left, IInterface * * right)
  844. {
  845. return *left == *right ? 0 : *left < *right ? -1 : +1;
  846. }
  847. //------------------------------------------------------------------------
  848. ImplicitProjectInfo::ImplicitProjectInfo(IHqlExpression * _original, ProjectExprKind _kind) : NewTransformInfo(_original), kind(_kind)
  849. {
  850. visited = false;
  851. gatheredSelectsUsed = false;
  852. //The following logically belong to the complexProjectInfo, see note in header
  853. canOptimize = true;
  854. insertProject = false;
  855. alreadyInScope = false;
  856. canReorderOutput = true;
  857. calcedReorderOutput = false;
  858. visitedAllowingActivity = false;
  859. }
  860. void ImplicitProjectInfo::addActiveSelect(IHqlExpression * select)
  861. {
  862. if (selectsUsed.find(*select) == NotFound)
  863. selectsUsed.append(*select);
  864. }
  865. void ImplicitProjectInfo::addActiveSelects(const SelectUsedArray & src)
  866. {
  867. unsigned numSrc = src.ordinality();
  868. if (numSrc == 0)
  869. return;
  870. if (selectsUsed.ordinality() == 0)
  871. {
  872. //No need to check for pre-existence, can be significant
  873. selectsUsed.ensure(numSrc);
  874. for (unsigned i=0; i < numSrc; i++)
  875. selectsUsed.append(src.item(i));
  876. }
  877. else
  878. {
  879. //MORE: Should only check if exists in pre-existing selects otherwise O(N^2) in items added
  880. for (unsigned i=0; i < numSrc; i++)
  881. addActiveSelect(&src.item(i));
  882. }
  883. }
  884. void ImplicitProjectInfo::removeProductionSelects()
  885. {
  886. ForEachItemInRev(i, selectsUsed)
  887. {
  888. IHqlExpression & cur = selectsUsed.item(i);
  889. if ((cur.getOperator() == no_matchattr) || (cur.queryChild(0)->getOperator() == no_matchattr))
  890. selectsUsed.remove(i);
  891. }
  892. }
  893. void ImplicitProjectInfo::removeScopedFields(IHqlExpression * selector)
  894. {
  895. ForEachItemInRev(i, selectsUsed)
  896. {
  897. IHqlExpression & cur = selectsUsed.item(i);
  898. if ((&cur == selector) ||
  899. ((cur.getOperator() == no_select) && (queryDatasetCursor(cur.queryChild(0)) == selector)))
  900. selectsUsed.remove(i);
  901. }
  902. }
  903. void ImplicitProjectInfo::removeRowsFields(IHqlExpression * expr, IHqlExpression * left, IHqlExpression * right)
  904. {
  905. node_operator rowsSide = queryHasRows(expr);
  906. if (rowsSide == no_none)
  907. return;
  908. IHqlExpression * rowsid = expr->queryProperty(_rowsid_Atom);
  909. switch (rowsSide)
  910. {
  911. case no_left:
  912. {
  913. OwnedHqlExpr rowsExpr = createDataset(no_rows, LINK(left), LINK(rowsid));
  914. removeScopedFields(rowsExpr);
  915. break;
  916. }
  917. case no_right:
  918. {
  919. OwnedHqlExpr rowsExpr = createDataset(no_rows, LINK(right), LINK(rowsid));
  920. removeScopedFields(rowsExpr);
  921. break;
  922. }
  923. default:
  924. throwUnexpectedOp(rowsSide);
  925. }
  926. }
  927. //------------------------------------------------------------------------
  928. ComplexImplicitProjectInfo::ComplexImplicitProjectInfo(IHqlExpression * _original, ProjectExprKind _kind) : ImplicitProjectInfo(_original, _kind)
  929. {
  930. }
  931. void ComplexImplicitProjectInfo::addAllOutputs()
  932. {
  933. outputFields.setAll();
  934. }
  935. IHqlExpression * ComplexImplicitProjectInfo::createOutputProject(IHqlExpression * ds)
  936. {
  937. if (ds->getOperator() == no_null)
  938. return createDataset(no_null, LINK(queryOutputRecord()));
  939. OwnedHqlExpr seq = createSelectorSequence();
  940. OwnedHqlExpr left = createSelector(no_left, ds, seq);
  941. OwnedHqlExpr self = getSelf(queryOutputRecord());
  942. IHqlExpression * transform = createMappingTransform(self, left);
  943. if (ds->isDataset())
  944. return createDataset(no_hqlproject, LINK(ds), createComma(transform, LINK(seq)));
  945. return createRow(no_projectrow, LINK(ds), createComma(transform, LINK(seq)));
  946. }
  947. void ComplexImplicitProjectInfo::finalizeOutputRecord()
  948. {
  949. //MORE: Create them in the same order as the original record + don't change if numOutputFields = numOriginalOutputFields
  950. if (!queryOutputRecord())
  951. {
  952. bool canPack = (safeToReorderOutput() && okToOptimize());
  953. outputFields.calcFinalRecord(canPack, false);
  954. }
  955. }
  956. unsigned ComplexImplicitProjectInfo::queryCostFactor(ClusterType targetClusterType)
  957. {
  958. //MORE: Could cache the value, but this option isn't really used, and not called a lot.
  959. return getActivityCost(original, targetClusterType);
  960. }
  961. void ComplexImplicitProjectInfo::stopOptimizeCompound(bool cascade)
  962. {
  963. if (cascade)
  964. {
  965. canOptimize = false;
  966. ForEachItemIn(i, inputs)
  967. inputs.item(i).stopOptimizeCompound(cascade);
  968. }
  969. else if (kind == CompoundableActivity)
  970. canOptimize = false;
  971. }
  972. void ComplexImplicitProjectInfo::trace()
  973. {
  974. StringBuffer s;
  975. if (original->queryName())
  976. s.append(original->queryName()).append(" := ");
  977. s.append(getOpString(original->getOperator()));
  978. DBGLOG("%s", s.str());
  979. switch (getChildDatasetType(original))
  980. {
  981. case childdataset_none:
  982. case childdataset_many_noscope:
  983. case childdataset_many:
  984. case childdataset_if:
  985. case childdataset_case:
  986. case childdataset_map:
  987. case childdataset_dataset_noscope:
  988. case childdataset_nway_left_right:
  989. break;
  990. case childdataset_dataset:
  991. case childdataset_datasetleft:
  992. case childdataset_top_left_right:
  993. case childdataset_same_left_right:
  994. trace("input", leftFieldsRequired);
  995. break;
  996. case childdataset_left:
  997. trace("left", leftFieldsRequired);
  998. break;
  999. case childdataset_leftright:
  1000. trace("left", leftFieldsRequired);
  1001. trace("right", rightFieldsRequired);
  1002. break;
  1003. }
  1004. trace("output", outputFields);
  1005. }
  1006. void ComplexImplicitProjectInfo::trace(const char * label, const UsedFieldSet & fields)
  1007. {
  1008. StringBuffer s;
  1009. s.append(" ").append(label).append(": ");
  1010. fields.getText(s);
  1011. DBGLOG("%s", s.str());
  1012. }
  1013. void ComplexImplicitProjectInfo::inheritRequiredFields(const UsedFieldSet & requiredList)
  1014. {
  1015. //Temporary code to avoid a check. It is permissible for the fields of an AnyTypeActivity to not match
  1016. if ((activityKind() == AnyTypeActivity) && !outputFields.includeAll() && requiredList.includeAll())
  1017. outputFields.setOriginal(requiredList.queryOriginal());
  1018. outputFields.unionFields(requiredList);
  1019. }
  1020. void ComplexImplicitProjectInfo::notifyRequiredFields(ComplexImplicitProjectInfo * whichInput)
  1021. {
  1022. if (activityKind() == PassThroughActivity)
  1023. {
  1024. whichInput->inheritRequiredFields(outputFields);
  1025. }
  1026. else if ((activityKind() == RollupTransformActivity) || (activityKind() == IterateTransformActivity))
  1027. {
  1028. whichInput->inheritRequiredFields(leftFieldsRequired);
  1029. whichInput->inheritRequiredFields(rightFieldsRequired);
  1030. }
  1031. else if (original->getOperator() == no_fetch)
  1032. {
  1033. assertex(whichInput == &inputs.item(0));
  1034. whichInput->inheritRequiredFields(rightFieldsRequired);
  1035. }
  1036. else if (whichInput == &inputs.item(0))
  1037. {
  1038. whichInput->inheritRequiredFields(leftFieldsRequired);
  1039. //can occur if same dataset is used for left and right - e.g., non-symmetric self join
  1040. if ((inputs.ordinality() > 1) && (whichInput == &inputs.item(1)))
  1041. whichInput->inheritRequiredFields(rightFieldsRequired);
  1042. }
  1043. else if (whichInput == &inputs.item(1))
  1044. {
  1045. whichInput->inheritRequiredFields(rightFieldsRequired);
  1046. }
  1047. else if (inputs.contains(*whichInput))
  1048. whichInput->addAllOutputs();
  1049. else
  1050. throwUnexpected();
  1051. }
  1052. bool ComplexImplicitProjectInfo::safeToReorderOutput()
  1053. {
  1054. if (!calcedReorderOutput)
  1055. {
  1056. canReorderOutput = true;
  1057. switch (activityKind())
  1058. {
  1059. case FixedInputActivity:
  1060. //can occur with weird operations in the middle of a dataset. Should probably only set if an action.
  1061. canReorderOutput = false;
  1062. break;
  1063. default:
  1064. ForEachItemIn(i, outputs)
  1065. {
  1066. if (!outputs.item(i).safeToReorderInput())
  1067. {
  1068. canReorderOutput = false;
  1069. break;
  1070. }
  1071. }
  1072. break;
  1073. }
  1074. calcedReorderOutput = true;
  1075. }
  1076. return canReorderOutput;
  1077. }
  1078. bool ComplexImplicitProjectInfo::safeToReorderInput()
  1079. {
  1080. switch (activityKind())
  1081. {
  1082. case CreateRecordActivity:
  1083. case CreateRecordLRActivity:
  1084. case ScalarSelectActivity:
  1085. //These activities have remove the constraints of the inputs on their outputs.
  1086. return true;
  1087. case FixedInputActivity:
  1088. return false;
  1089. }
  1090. return safeToReorderOutput();
  1091. }
  1092. void ComplexImplicitProjectInfo::setMatchingOutput(ComplexImplicitProjectInfo * other)
  1093. {
  1094. assertex(other->queryOutputRecord());
  1095. outputFields.set(other->outputFields);
  1096. }
  1097. //-----------------------------------------------------------------------------------------------
  1098. static HqlTransformerInfo implicitProjectTransformerInfo("ImplicitProjectTransformer");
  1099. ImplicitProjectTransformer::ImplicitProjectTransformer(HqlCppTranslator & _translator, bool _optimizeSpills)
  1100. : NewHqlTransformer(implicitProjectTransformerInfo), translator(_translator)
  1101. {
  1102. const HqlCppOptions & transOptions = translator.queryOptions();
  1103. targetClusterType = translator.getTargetClusterType();
  1104. options.isRoxie = (targetClusterType == RoxieCluster);
  1105. options.optimizeProjectsPreservePersists = transOptions.optimizeProjectsPreservePersists;
  1106. options.autoPackRecords = transOptions.autoPackRecords;
  1107. options.notifyOptimizedProjects = translator.notifyOptimizedProjectsLevel();
  1108. options.optimizeSpills = _optimizeSpills;
  1109. options.enableCompoundCsvRead = translator.queryOptions().enableCompoundCsvRead;
  1110. options.projectNestedTables = translator.queryOptions().projectNestedTables;
  1111. allowActivity = true;
  1112. options.insertProjectCostLevel = 0;
  1113. if (transOptions.reduceNetworkTraffic)
  1114. options.insertProjectCostLevel = (transOptions.insertProjectCostLevel != (unsigned)-1) ? transOptions.insertProjectCostLevel : CostNetworkCopy;
  1115. }
  1116. void ImplicitProjectTransformer::analyseExpr(IHqlExpression * expr)
  1117. {
  1118. ImplicitProjectInfo * extra = queryBodyExtra(expr);
  1119. ComplexImplicitProjectInfo * complexExtra = extra->queryComplexInfo();
  1120. if (complexExtra)
  1121. {
  1122. if (complexExtra->alreadyInScope)
  1123. return;
  1124. if (!options.autoPackRecords)
  1125. complexExtra->setReorderOutput(false);
  1126. if (extra->checkAlreadyVisited())
  1127. {
  1128. //Don't allow modification if referenced from activity and non-activity context
  1129. if (allowActivity)
  1130. {
  1131. if ((extra->activityKind() != NonActivity) || (expr->getOperator() == no_record))
  1132. return;
  1133. //either allowed before, but tagged as a non
  1134. //If previously this was called in an allowactivity context it must have been explicitly disabled, so no point recursing.
  1135. if (complexExtra->visitedAllowingActivity)
  1136. return;
  1137. //otherwise, probably worth recursing again...
  1138. extra->preventOptimization();
  1139. }
  1140. else
  1141. {
  1142. extra->preventOptimization();
  1143. return;
  1144. }
  1145. }
  1146. if (allowActivity)
  1147. complexExtra->visitedAllowingActivity = true;
  1148. }
  1149. else
  1150. {
  1151. if (extra->checkAlreadyVisited())
  1152. return;
  1153. }
  1154. node_operator op = expr->getOperator();
  1155. switch (op)
  1156. {
  1157. case no_record:
  1158. {
  1159. complexExtra->outputFields.setRecord(expr);
  1160. return;
  1161. }
  1162. case no_constant:
  1163. case no_attr:
  1164. return;
  1165. case no_transform:
  1166. case no_newtransform:
  1167. case no_transformlist:
  1168. case no_list:
  1169. if (expr->isConstant())
  1170. return;
  1171. break;
  1172. }
  1173. ITypeInfo * type = expr->queryType();
  1174. if (allowActivity)
  1175. {
  1176. switch (op)
  1177. {
  1178. case no_evaluate:
  1179. throwUnexpected();
  1180. case no_select:
  1181. if (expr->isDataset() || expr->isDatarow())
  1182. {
  1183. //MORE: These means that selects from a parent dataset don't project down the parent dataset.
  1184. //I'm not sure how big an issue that would be.
  1185. allowActivity = false;
  1186. Parent::analyseExpr(expr);
  1187. allowActivity = true;
  1188. assertex(extra->activityKind() == SourceActivity);
  1189. activities.append(*LINK(expr));
  1190. IHqlExpression * record = expr->queryRecord();
  1191. complexExtra->setOriginalRecord(queryBodyComplexExtra(record));
  1192. analyseExpr(record);
  1193. }
  1194. else if (isNewSelector(expr))
  1195. {
  1196. Parent::analyseExpr(expr);
  1197. assertex(extra->activityKind() == ScalarSelectActivity);
  1198. if (expr->hasProperty(newAtom))
  1199. connect(expr->queryChild(0), expr);
  1200. activities.append(*LINK(expr));
  1201. }
  1202. gatherFieldsUsed(expr, extra);
  1203. return;
  1204. case no_activerow:
  1205. assertex(extra->activityKind() == SimpleActivity);
  1206. allowActivity = false;
  1207. Parent::analyseExpr(expr);
  1208. allowActivity = true;
  1209. activities.append(*LINK(expr));
  1210. gatherFieldsUsed(expr, extra);
  1211. return;
  1212. case no_attr:
  1213. case no_attr_expr:
  1214. case no_attr_link:
  1215. allowActivity = false;
  1216. Parent::analyseExpr(expr);
  1217. allowActivity = true;
  1218. return;
  1219. case no_thor:
  1220. if (expr->isDataset() || expr->isDatarow())
  1221. {
  1222. assertex(extra->activityKind() == SimpleActivity);
  1223. Parent::analyseExpr(expr);
  1224. connect(expr->queryChild(0), expr);
  1225. }
  1226. else
  1227. {
  1228. assertex(extra->activityKind() == NonActivity);
  1229. Parent::analyseExpr(expr);
  1230. }
  1231. break;
  1232. case no_compound:
  1233. if (expr->isDataset())
  1234. {
  1235. assertex(extra->activityKind() == SimpleActivity);
  1236. Parent::analyseExpr(expr);
  1237. connect(expr->queryChild(1), expr);
  1238. break;
  1239. }
  1240. assertex(extra->activityKind() == NonActivity);
  1241. Parent::analyseExpr(expr);
  1242. break;
  1243. case no_executewhen:
  1244. if (expr->isDataset())
  1245. {
  1246. assertex(extra->activityKind() == SimpleActivity);
  1247. Parent::analyseExpr(expr);
  1248. connect(expr->queryChild(0), expr);
  1249. break;
  1250. }
  1251. assertex(extra->activityKind() == NonActivity);
  1252. Parent::analyseExpr(expr);
  1253. break;
  1254. case no_subgraph:
  1255. assertex(extra->activityKind() == NonActivity);
  1256. Parent::analyseExpr(expr);
  1257. break;
  1258. case no_libraryselect:
  1259. assertex(extra->activityKind() == SourceActivity);
  1260. analyseExpr(expr->queryChild(1));
  1261. break;
  1262. case no_libraryscopeinstance:
  1263. {
  1264. assertex(extra->activityKind() == NonActivity);
  1265. ForEachChild(i, expr)
  1266. {
  1267. IHqlExpression * cur = expr->queryChild(i);
  1268. if (cur->isDataset())
  1269. {
  1270. analyseExpr(cur);
  1271. queryBodyExtra(cur)->preventOptimization();
  1272. }
  1273. }
  1274. break;
  1275. }
  1276. case no_mergejoin:
  1277. case no_nwayjoin: // could probably project output of this one...
  1278. case no_nwaymerge:
  1279. {
  1280. assertex(extra->activityKind() == SourceActivity);
  1281. //Don't allow any of the inputs to be optimized - otherwise the can end up with inconsistent record types
  1282. allowActivity = false;
  1283. Parent::analyseExpr(expr->queryChild(0));
  1284. allowActivity = true;
  1285. break;
  1286. }
  1287. case no_setresult:
  1288. case no_ensureresult:
  1289. {
  1290. IHqlExpression * value = expr->queryChild(0);
  1291. if (value->isDataset() || value->isDatarow())// || value->isList())
  1292. {
  1293. assertex(extra->activityKind() == FixedInputActivity);
  1294. analyseExpr(value);
  1295. //no need to analyse other fields since they are all constant
  1296. connect(value, expr);
  1297. }
  1298. else
  1299. {
  1300. assertex(extra->activityKind() == NonActivity);
  1301. Parent::analyseExpr(expr);
  1302. }
  1303. break;
  1304. }
  1305. case no_newtransform:
  1306. case no_transform:
  1307. case no_transformlist:
  1308. assertex(extra->kind == NonActivity);
  1309. if (!expr->isConstant())
  1310. Parent::analyseExpr(expr);
  1311. return;
  1312. default:
  1313. {
  1314. unsigned numArgs = expr->numChildren();
  1315. unsigned first = 0;
  1316. unsigned last = numArgs;
  1317. unsigned start = 0;
  1318. if (!expr->isAction() && !expr->isDataset() && !expr->isDatarow())
  1319. {
  1320. switch (op)
  1321. {
  1322. case NO_AGGREGATE:
  1323. case no_createset:
  1324. last = 1;
  1325. break;
  1326. case no_sizeof:
  1327. last = 0;
  1328. break;
  1329. default:
  1330. extra->kind = NonActivity;
  1331. break;
  1332. }
  1333. }
  1334. else
  1335. {
  1336. IHqlExpression * record = expr->queryRecord();
  1337. if (!record && expr->queryChild(0))
  1338. record = expr->queryChild(0)->queryRecord();
  1339. if (!record || !isSensibleRecord(record))
  1340. extra->preventOptimization();
  1341. first = getFirstActivityArgument(expr);
  1342. last = first + getNumActivityArguments(expr);
  1343. switch (expr->getOperator())
  1344. {
  1345. case no_dedup:
  1346. if (dedupMatchesWholeRecord(expr))
  1347. extra->preventOptimization();
  1348. break;
  1349. case no_process:
  1350. extra->preventOptimization();
  1351. break;
  1352. case no_executewhen:
  1353. last = 1;
  1354. break;
  1355. case no_newkeyindex:
  1356. // case no_dataset:
  1357. //No point walking the transform for an index
  1358. start = 3;
  1359. numArgs = 4;
  1360. break;
  1361. case no_compound_diskaggregate:
  1362. case no_compound_diskcount:
  1363. case no_compound_diskgroupaggregate:
  1364. case no_compound_indexaggregate:
  1365. case no_compound_indexcount:
  1366. case no_compound_indexgroupaggregate:
  1367. //walk inside these... they're not compoundable, but they may be able to lose some fields from the transform.
  1368. last = 1;
  1369. break;
  1370. }
  1371. }
  1372. for (unsigned i =start; i < numArgs; i++)
  1373. {
  1374. IHqlExpression * cur = expr->queryChild(i);
  1375. allowActivity = (i >= first) && (i < last);
  1376. analyseExpr(cur);
  1377. if (allowActivity)
  1378. {
  1379. if (extra->kind == NonActivity)
  1380. {
  1381. ImplicitProjectInfo * childExtra = queryBodyExtra(cur);
  1382. childExtra->preventOptimization();
  1383. }
  1384. else if (!cur->isAction() && !cur->isAttribute())
  1385. {
  1386. connect(cur, expr);
  1387. }
  1388. }
  1389. }
  1390. if (extra->kind == NonActivity)
  1391. gatherFieldsUsed(expr, extra);
  1392. allowActivity = true;
  1393. }
  1394. }
  1395. }
  1396. else
  1397. {
  1398. extra->preventOptimization();
  1399. switch (op)
  1400. {
  1401. case no_attr_expr:
  1402. analyseChildren(expr);
  1403. break;
  1404. case no_newkeyindex:
  1405. // case no_sizeof:
  1406. //no point analysing parameters to keyed joins
  1407. break;
  1408. default:
  1409. Parent::analyseExpr(expr);
  1410. break;
  1411. }
  1412. }
  1413. //Add activities in depth first order, so traversing them backwards is guaranteed to be top down.
  1414. if (extra->activityKind() != NonActivity)
  1415. {
  1416. assertex(complexExtra);
  1417. switch (extra->activityKind())
  1418. {
  1419. case CreateRecordActivity:
  1420. case CreateRecordLRActivity:
  1421. case RollupTransformActivity:
  1422. case IterateTransformActivity:
  1423. case DenormalizeActivity:
  1424. case CreateRecordSourceActivity:
  1425. if (hasUnknownTransform(expr))
  1426. complexExtra->preventOptimization();
  1427. break;
  1428. }
  1429. activities.append(*LINK(expr));
  1430. IHqlExpression * child = expr->queryChild(0);
  1431. switch (extra->activityKind())
  1432. {
  1433. case CreateRecordActivity:
  1434. setOriginal(complexExtra->leftFieldsRequired, child);
  1435. break;
  1436. case CreateRecordLRActivity:
  1437. setOriginal(complexExtra->leftFieldsRequired, child);
  1438. setOriginal(complexExtra->rightFieldsRequired, expr->queryChild(1));
  1439. break;
  1440. case CompoundActivity:
  1441. case CompoundableActivity:
  1442. case CreateRecordSourceActivity:
  1443. case AnyTypeActivity:
  1444. break;
  1445. case RollupTransformActivity:
  1446. case IterateTransformActivity:
  1447. setOriginal(complexExtra->leftFieldsRequired, child);
  1448. setOriginal(complexExtra->rightFieldsRequired, child);
  1449. break;
  1450. case DenormalizeActivity:
  1451. setOriginal(complexExtra->leftFieldsRequired, child);
  1452. setOriginal(complexExtra->rightFieldsRequired, expr->queryChild(1));
  1453. break;
  1454. case FixedInputActivity:
  1455. assertex(child && (child->isDataset() || child->isDatarow()));
  1456. setOriginal(complexExtra->leftFieldsRequired, child);
  1457. if (getNumChildTables(expr) >= 2)
  1458. setOriginal(complexExtra->rightFieldsRequired, expr->queryChild(1));
  1459. break;
  1460. case SourceActivity:
  1461. case PassThroughActivity:
  1462. case ScalarSelectActivity:
  1463. break;
  1464. case SinkActivity:
  1465. setOriginal(complexExtra->leftFieldsRequired, child);
  1466. break;
  1467. case SimpleActivity:
  1468. if (expr->getOperator() == no_compound)
  1469. setOriginal(complexExtra->leftFieldsRequired, expr->queryChild(1));
  1470. else
  1471. setOriginal(complexExtra->leftFieldsRequired, child);
  1472. break;
  1473. default:
  1474. throwUnexpected();
  1475. }
  1476. }
  1477. IHqlExpression * record = expr->queryRecord();
  1478. if (record && !isPatternType(type) && !expr->isTransform())
  1479. {
  1480. assertex(complexExtra);
  1481. complexExtra->setOriginalRecord(queryBodyComplexExtra(record));
  1482. analyseExpr(record);
  1483. }
  1484. gatherFieldsUsed(expr, extra);
  1485. }
  1486. void ImplicitProjectTransformer::connect(IHqlExpression * source, IHqlExpression * sink)
  1487. {
  1488. queryBodyComplexExtra(source)->outputs.append(*queryBodyComplexExtra(sink));
  1489. queryBodyComplexExtra(sink)->inputs.append(*queryBodyComplexExtra(source));
  1490. }
  1491. //NB: This is very similar to the code in CHqlExpression::cacheTablesUsed()
  1492. void ImplicitProjectTransformer::gatherFieldsUsed(IHqlExpression * expr, ImplicitProjectInfo * extra)
  1493. {
  1494. if (extra->checkGatheredSelects())
  1495. return;
  1496. node_operator op = expr->getOperator();
  1497. switch (op)
  1498. {
  1499. case no_select:
  1500. {
  1501. if (options.projectNestedTables)
  1502. {
  1503. bool isNew;
  1504. IHqlExpression * ds = querySelectorDataset(expr, isNew);
  1505. if (isNew)
  1506. inheritActiveFields(extra, ds);
  1507. else
  1508. extra->addActiveSelect(expr);
  1509. }
  1510. else
  1511. {
  1512. //Either inherit from the dataset if new, or add the root field (x.a.b only adds x.a)
  1513. IHqlExpression * cur = expr;
  1514. loop
  1515. {
  1516. IHqlExpression * ds = cur->queryChild(0);
  1517. if (cur->hasProperty(newAtom))
  1518. {
  1519. inheritActiveFields(extra, ds);
  1520. break;
  1521. }
  1522. node_operator dsOp = ds->getOperator();
  1523. if (dsOp != no_select || ds->isDataset())
  1524. {
  1525. if ((dsOp != no_self) && (dsOp != no_selfref))
  1526. extra->addActiveSelect(cur);
  1527. break;
  1528. }
  1529. cur = ds;
  1530. }
  1531. }
  1532. break;
  1533. }
  1534. case no_activerow:
  1535. //active row used in some context
  1536. extra->addActiveSelect(expr->queryChild(0));
  1537. break;
  1538. case no_left:
  1539. case no_right:
  1540. extra->addActiveSelect(expr);
  1541. //left/right used in an expression context - assume the worse..
  1542. break;
  1543. case no_attr:
  1544. case no_attr_link:
  1545. case no_getresult:
  1546. break;
  1547. case no_attr_expr:
  1548. {
  1549. _ATOM name = expr->queryName();
  1550. if (name != _selectors_Atom)
  1551. inheritActiveFields(expr, extra, 0, expr->numChildren());
  1552. }
  1553. break;
  1554. case no_newkeyindex:
  1555. {
  1556. #ifdef _DEBUG
  1557. inheritActiveFields(expr, extra, 1, expr->numChildren());
  1558. extra->removeScopedFields(expr->queryChild(0)->queryNormalizedSelector());
  1559. extra->removeScopedFields(queryActiveTableSelector()); // for distributed() etc,
  1560. inheritActiveFields(expr, extra, 0, 1);
  1561. const SelectUsedArray & selectsUsed = extra->querySelectsUsed();
  1562. if (selectsUsed.ordinality() != 0)
  1563. {
  1564. StringBuffer s;
  1565. ForEachItemIn(i, selectsUsed)
  1566. {
  1567. if (i) s.append(',');
  1568. getExprECL(&selectsUsed.item(i), s);
  1569. }
  1570. throwError1(HQLERR_IndexHasActiveFields, s.str());
  1571. }
  1572. #else
  1573. inheritActiveFields(expr, extra, 3, 4); // just in case the filename is based on a parent row???
  1574. #endif
  1575. break;
  1576. }
  1577. case no_pat_production:
  1578. {
  1579. inheritActiveFields(expr, extra, 0, expr->numChildren());
  1580. extra->removeProductionSelects();
  1581. break;
  1582. }
  1583. case no_assign:
  1584. inheritActiveFields(expr, extra, 1, 2);
  1585. break;
  1586. /*
  1587. The following can be handled using the default mechanism because we're not tracking newtables.
  1588. case NO_AGGREGATE:
  1589. case no_createset:
  1590. */
  1591. case no_table:
  1592. {
  1593. inheritActiveFields(expr, extra, 0, expr->numChildren());
  1594. IHqlExpression * parent = expr->queryChild(3);
  1595. if (parent)
  1596. extra->removeScopedFields(parent->queryNormalizedSelector());
  1597. break;
  1598. }
  1599. default:
  1600. {
  1601. #if 0
  1602. //Optimization to enable later - if no active datasets, then can't have any active fields.
  1603. //should save some processing on root datasets, but may be insignificant
  1604. if (isIndependentOfScope(expr))
  1605. break;
  1606. #endif
  1607. unsigned max = expr->numChildren();
  1608. IHqlExpression * ds = expr->queryChild(0);
  1609. switch (getChildDatasetType(expr))
  1610. {
  1611. case childdataset_none:
  1612. case childdataset_many_noscope:
  1613. case childdataset_if:
  1614. case childdataset_case:
  1615. case childdataset_map:
  1616. case childdataset_dataset_noscope:
  1617. inheritActiveFields(expr, extra, 0, max);
  1618. //None of these have any scoped arguments, so no need to remove them
  1619. break;
  1620. case childdataset_many:
  1621. {
  1622. unsigned firstAttr = getNumChildTables(expr);
  1623. inheritActiveFields(expr, extra, firstAttr, max);
  1624. extra->removeScopedFields(queryActiveTableSelector());
  1625. inheritActiveFields(expr, extra, 0, firstAttr);
  1626. break;
  1627. }
  1628. case childdataset_dataset:
  1629. {
  1630. inheritActiveFields(expr, extra, 1, max);
  1631. extra->removeScopedFields(ds->queryNormalizedSelector());
  1632. inheritActiveFields(expr, extra, 0, 1);
  1633. }
  1634. break;
  1635. case childdataset_datasetleft:
  1636. {
  1637. OwnedHqlExpr left = createSelector(no_left, ds, querySelSeq(expr));
  1638. inheritActiveFields(expr, extra, 1, max);
  1639. extra->removeScopedFields(left);
  1640. extra->removeScopedFields(ds->queryNormalizedSelector());
  1641. extra->removeRowsFields(expr, left, NULL);
  1642. inheritActiveFields(expr, extra, 0, 1);
  1643. break;
  1644. }
  1645. case childdataset_left:
  1646. {
  1647. OwnedHqlExpr left = createSelector(no_left, ds, querySelSeq(expr));
  1648. inheritActiveFields(expr, extra, 1, max);
  1649. extra->removeScopedFields(left);
  1650. extra->removeRowsFields(expr, left, NULL);
  1651. inheritActiveFields(expr, extra, 0, 1);
  1652. break;
  1653. }
  1654. case childdataset_same_left_right:
  1655. case childdataset_nway_left_right:
  1656. {
  1657. IHqlExpression * seq = querySelSeq(expr);
  1658. OwnedHqlExpr left = createSelector(no_left, ds, seq);
  1659. OwnedHqlExpr right = createSelector(no_right, ds, seq);
  1660. inheritActiveFields(expr, extra, 1, max);
  1661. extra->removeScopedFields(left);
  1662. extra->removeScopedFields(right);
  1663. extra->removeRowsFields(expr, left, right);
  1664. inheritActiveFields(expr, extra, 0, 1);
  1665. break;
  1666. }
  1667. case childdataset_top_left_right:
  1668. {
  1669. IHqlExpression * seq = querySelSeq(expr);
  1670. OwnedHqlExpr left = createSelector(no_left, ds, seq);
  1671. OwnedHqlExpr right = createSelector(no_right, ds, seq);
  1672. inheritActiveFields(expr, extra, 1, max);
  1673. extra->removeScopedFields(ds->queryNormalizedSelector());
  1674. extra->removeScopedFields(left);
  1675. extra->removeScopedFields(right);
  1676. extra->removeRowsFields(expr, left, right);
  1677. inheritActiveFields(expr, extra, 0, 1);
  1678. break;
  1679. }
  1680. case childdataset_leftright:
  1681. {
  1682. IHqlExpression * leftDs = expr->queryChild(0);
  1683. IHqlExpression * rightDs = expr->queryChild(1);
  1684. IHqlExpression * seq = querySelSeq(expr);
  1685. OwnedHqlExpr left = createSelector(no_left, leftDs, seq);
  1686. OwnedHqlExpr right = createSelector(no_right, rightDs, seq);
  1687. inheritActiveFields(expr, extra, 2, max);
  1688. extra->removeScopedFields(right);
  1689. extra->removeRowsFields(expr, left, right);
  1690. if (expr->getOperator() == no_normalize)
  1691. {
  1692. inheritActiveFields(expr, extra, 1, 2);
  1693. extra->removeScopedFields(left);
  1694. inheritActiveFields(expr, extra, 0, 1);
  1695. }
  1696. else
  1697. {
  1698. extra->removeScopedFields(left);
  1699. inheritActiveFields(expr, extra, 0, 2);
  1700. }
  1701. break;
  1702. }
  1703. break;
  1704. case childdataset_evaluate:
  1705. //handled elsewhere...
  1706. default:
  1707. UNIMPLEMENTED;
  1708. }
  1709. switch (op)
  1710. {
  1711. case no_newparse:
  1712. case no_parse:
  1713. extra->removeProductionSelects();
  1714. break;
  1715. }
  1716. #ifdef _DEBUG
  1717. //MORE: This doesn't currently cope with access to parents within normalized child datasets
  1718. //e.g, sqnormds1.hql.
  1719. const SelectUsedArray & selectsUsed = extra->querySelectsUsed();
  1720. if (isIndependentOfScope(expr) && selectsUsed.ordinality() != 0)
  1721. {
  1722. switch (expr->getOperator())
  1723. {
  1724. case no_csv:
  1725. case no_xml:
  1726. break;
  1727. default:
  1728. {
  1729. StringBuffer s;
  1730. ForEachItemIn(i, selectsUsed)
  1731. {
  1732. if (i) s.append(',');
  1733. getExprECL(&selectsUsed.item(i), s);
  1734. }
  1735. throwError1(HQLERR_GlobalHasActiveFields, s.str());
  1736. }
  1737. }
  1738. }
  1739. #endif
  1740. break;
  1741. }
  1742. }
  1743. }
  1744. const SelectUsedArray & ImplicitProjectTransformer::querySelectsUsed(IHqlExpression * expr)
  1745. {
  1746. ImplicitProjectInfo * extra = queryBodyExtra(expr);
  1747. // gatherFieldsUsed(expr, extra);
  1748. return extra->querySelectsUsed();
  1749. }
  1750. ProjectExprKind ImplicitProjectTransformer::getProjectExprKind(IHqlExpression * expr)
  1751. {
  1752. switch (expr->getOperator())
  1753. {
  1754. case no_evaluate:
  1755. throwUnexpected();
  1756. case no_select:
  1757. if (expr->isDataset() || expr->isDatarow())
  1758. return SourceActivity;
  1759. if (isNewSelector(expr))
  1760. return ScalarSelectActivity;
  1761. return NonActivity;
  1762. case no_activerow:
  1763. return SimpleActivity;
  1764. case no_attr:
  1765. case no_attr_expr:
  1766. case no_attr_link:
  1767. return NonActivity;
  1768. case no_typetransfer:
  1769. if (expr->isDataset() || expr->isDatarow())
  1770. return SourceActivity;
  1771. return NonActivity;
  1772. case no_thor:
  1773. if (expr->isDataset() || expr->isDatarow())
  1774. return SimpleActivity;
  1775. return NonActivity;
  1776. case no_compound:
  1777. if (expr->isDataset())
  1778. return SimpleActivity;
  1779. if (expr->isDatarow())
  1780. return ComplexNonActivity;
  1781. return NonActivity;
  1782. case no_executewhen:
  1783. if (expr->isDataset() || expr->isDatarow())
  1784. return SimpleActivity;
  1785. return NonActivity;
  1786. case no_subgraph:
  1787. case no_libraryscopeinstance:
  1788. return NonActivity;
  1789. case no_mergejoin:
  1790. case no_nwayjoin: // could probably project output of this one...
  1791. case no_nwaymerge:
  1792. case no_libraryselect:
  1793. return SourceActivity;
  1794. case no_setresult:
  1795. case no_ensureresult:
  1796. {
  1797. IHqlExpression * value = expr->queryChild(0);
  1798. if (value->isDataset() || value->isDatarow())
  1799. return FixedInputActivity;
  1800. return NonActivity;
  1801. }
  1802. case no_newrow: //only used while transforming
  1803. case no_newaggregate:
  1804. case no_hqlproject:
  1805. case no_normalize:
  1806. case no_newusertable:
  1807. case no_newparse:
  1808. case no_newxmlparse:
  1809. case no_createrow:
  1810. case no_rollupgroup:
  1811. case no_projectrow:
  1812. return CreateRecordActivity;
  1813. case no_inlinetable:
  1814. case no_dataset_from_transform:
  1815. return CreateRecordSourceActivity;
  1816. case no_extractresult:
  1817. case no_apply:
  1818. return SinkActivity;
  1819. case no_denormalizegroup:
  1820. case no_join:
  1821. case no_fetch:
  1822. return CreateRecordLRActivity;
  1823. case no_process: // optimization currently disabled...
  1824. return PassThroughActivity;
  1825. case no_iterate:
  1826. return IterateTransformActivity;
  1827. case no_rollup:
  1828. return RollupTransformActivity;
  1829. case no_denormalize:
  1830. return DenormalizeActivity;
  1831. case no_null:
  1832. if (expr->isAction())
  1833. return NonActivity;
  1834. return AnyTypeActivity;
  1835. case no_skip:
  1836. case no_fail:
  1837. if (expr->isDataset() || expr->isDatarow())
  1838. return AnyTypeActivity;
  1839. return NonActivity;
  1840. case no_table:
  1841. switch (expr->queryChild(2)->getOperator())
  1842. {
  1843. case no_thor:
  1844. case no_flat:
  1845. if (expr->hasProperty(_spill_Atom) && options.isRoxie)
  1846. return SourceActivity;
  1847. if (options.optimizeProjectsPreservePersists)
  1848. {
  1849. //Don't project persists because it can mess up the redistibution code.
  1850. if (expr->hasProperty(_workflowPersist_Atom))
  1851. return SourceActivity;
  1852. }
  1853. return CompoundableActivity;
  1854. case no_csv:
  1855. if (options.enableCompoundCsvRead)
  1856. return CompoundableActivity;
  1857. return SourceActivity;
  1858. default:
  1859. return SourceActivity;
  1860. }
  1861. case no_newkeyindex:
  1862. //Not compoundable for the moment - because it causes problems with assertwild(all, <old-expression>)
  1863. return SourceActivity;//CompoundableActivity;
  1864. case no_compound_diskread:
  1865. case no_compound_disknormalize:
  1866. case no_compound_indexread:
  1867. case no_compound_indexnormalize:
  1868. {
  1869. if (options.optimizeProjectsPreservePersists)
  1870. {
  1871. //Don't project persists because it can mess up the redistibution code.
  1872. IHqlExpression * root = queryRoot(expr);
  1873. if (root && root->hasProperty(_workflowPersist_Atom))
  1874. return SourceActivity;
  1875. }
  1876. return CompoundActivity;
  1877. }
  1878. case no_compound_diskaggregate:
  1879. case no_compound_diskcount:
  1880. case no_compound_diskgroupaggregate:
  1881. case no_compound_indexaggregate:
  1882. case no_compound_indexcount:
  1883. case no_compound_indexgroupaggregate:
  1884. //Don't want to add projects to these...
  1885. return SimpleActivity;
  1886. case no_preload:
  1887. case no_forcelocal:
  1888. case no_workunit_dataset:
  1889. case no_getgraphresult:
  1890. case no_getgraphloopresult:
  1891. case no_rows:
  1892. return SourceActivity;
  1893. case no_allnodes:
  1894. case no_httpcall:
  1895. case no_soapcall:
  1896. case no_newsoapcall:
  1897. case no_libraryinput:
  1898. case no_thisnode:
  1899. if (expr->isDataset() || expr->isDatarow())
  1900. return SourceActivity;
  1901. return NonActivity;
  1902. case no_pipe:
  1903. case no_nofold:
  1904. case no_nohoist:
  1905. if (expr->isDataset() || expr->isDatarow())
  1906. return FixedInputActivity;
  1907. return NonActivity;
  1908. case no_soapcall_ds:
  1909. case no_newsoapcall_ds:
  1910. case no_output:
  1911. case no_distribution:
  1912. case no_buildindex:
  1913. case no_spill:
  1914. case no_setgraphresult:
  1915. case no_setgraphloopresult:
  1916. case no_spillgraphresult:
  1917. //MORE: Rethink these later:
  1918. case no_combine:
  1919. case no_combinegroup:
  1920. case no_regroup:
  1921. case no_loop:
  1922. case no_graphloop:
  1923. case no_filtergroup: //anything else would be tricky...
  1924. case no_normalizegroup:
  1925. return FixedInputActivity;
  1926. case no_aggregate:
  1927. if (expr->hasProperty(mergeTransformAtom))
  1928. return FixedInputActivity;
  1929. return FixedInputActivity; //MORE:???? Should be able to optimize this
  1930. case no_fromxml: // A bit bit like a source activity, no transform..., but has an input
  1931. return SourceActivity;
  1932. case no_selfjoin:
  1933. return CreateRecordActivity;
  1934. case no_if:
  1935. if (expr->isDataset())
  1936. return PassThroughActivity;
  1937. if (expr->isDatarow())
  1938. return PassThroughActivity;
  1939. return NonActivity;
  1940. case no_addfiles:
  1941. case no_merge:
  1942. case no_nonempty:
  1943. case no_cogroup:
  1944. return PassThroughActivity;
  1945. case no_keydiff:
  1946. case no_keypatch:
  1947. return NonActivity;
  1948. case no_datasetfromrow:
  1949. if (getNumActivityArguments(expr) == 0)
  1950. return SourceActivity;
  1951. return SimpleActivity;
  1952. case no_newtransform:
  1953. case no_transform:
  1954. return NonActivity;
  1955. return ComplexNonActivity;
  1956. case no_record:
  1957. case no_assign:
  1958. case no_assignall:
  1959. return NonActivity;
  1960. case NO_AGGREGATE:
  1961. case no_createset:
  1962. return SinkActivity;
  1963. case no_call:
  1964. case no_externalcall:
  1965. if (expr->isDataset() || expr->isDatarow())
  1966. return SourceActivity;
  1967. //MORE: What about parameters??
  1968. return NonActivity;
  1969. case no_commonspill:
  1970. case no_readspill:
  1971. return SimpleActivity;
  1972. case no_writespill:
  1973. return SinkActivity;
  1974. case no_preservemeta:
  1975. case no_dataset_alias:
  1976. if (getProjectExprKind(expr->queryChild(0)) == CompoundableActivity)
  1977. return CompoundableActivity;
  1978. return PassThroughActivity;
  1979. }
  1980. ITypeInfo * type = expr->queryType();
  1981. if (!type)
  1982. return NonActivity;
  1983. type_t tc = type->getTypeCode();
  1984. switch (tc)
  1985. {
  1986. case type_void:
  1987. if (getNumChildTables(expr) > 0)
  1988. return SinkActivity;
  1989. return NonActivity;
  1990. case type_row:
  1991. case type_table:
  1992. case type_groupedtable:
  1993. break;
  1994. case type_transform:
  1995. return NonActivity;
  1996. default:
  1997. return NonActivity;
  1998. }
  1999. if (getNumActivityArguments(expr) == 0)
  2000. return SourceActivity;
  2001. return SimpleActivity;
  2002. }
  2003. void ImplicitProjectTransformer::processSelect(ComplexImplicitProjectInfo * extra, IHqlExpression * curSelect, IHqlExpression * ds, IHqlExpression * leftSelect, IHqlExpression * rightSelect)
  2004. {
  2005. if (leftSelect)
  2006. processMatchingSelector(extra->leftFieldsRequired, curSelect, leftSelect);
  2007. if (ds)
  2008. processMatchingSelector(extra->leftFieldsRequired, curSelect, ds);
  2009. if (rightSelect)
  2010. processMatchingSelector(extra->rightFieldsRequired, curSelect, rightSelect);
  2011. switch (extra->activityKind())
  2012. {
  2013. case DenormalizeActivity:
  2014. //For DENORMALIZE the transform is always called, possibly multiple times. Therefore
  2015. //if a field is used from the output it must be included in the input (but could be blanked)
  2016. //if a field is used from LEFT then it must be in the input and the output
  2017. processMatchingSelector(extra->outputFields, curSelect, leftSelect);
  2018. break;
  2019. case RollupTransformActivity:
  2020. case IterateTransformActivity:
  2021. //For ROLLUP/ITERATE the transform may or may not be called. Therefore
  2022. //if a field is used from the output it is used from the input [ handled in the main processing loop]
  2023. //if a field is used from LEFT then it must be in the input and the output
  2024. //Anything used from the input must be in the output (but could be blanked) - handled elsewhere
  2025. processMatchingSelector(extra->outputFields, curSelect, leftSelect);
  2026. if (ds)
  2027. processMatchingSelector(extra->outputFields, curSelect, ds);
  2028. break;
  2029. }
  2030. }
  2031. void ImplicitProjectTransformer::processSelects(ComplexImplicitProjectInfo * extra, SelectUsedArray const & selectsUsed, IHqlExpression * ds, IHqlExpression * leftSelect, IHqlExpression * rightSelect)
  2032. {
  2033. ForEachItemIn(i2, selectsUsed)
  2034. {
  2035. IHqlExpression * curSelect = &selectsUsed.item(i2);
  2036. processSelect(extra, curSelect, ds, leftSelect, rightSelect);
  2037. }
  2038. }
  2039. void ImplicitProjectTransformer::processTransform(ComplexImplicitProjectInfo * extra, IHqlExpression * transform, IHqlExpression * dsSelect, IHqlExpression * leftSelect, IHqlExpression * rightSelect)
  2040. {
  2041. HqlExprCopyArray assigns;
  2042. unwindTransform(assigns, transform);
  2043. ForEachItemIn(itr, assigns)
  2044. {
  2045. IHqlExpression * cur = &assigns.item(itr);
  2046. //Need to handle skip attributes...
  2047. if (cur->getOperator() == no_skip)
  2048. {
  2049. const SelectUsedArray & selectsUsed = querySelectsUsed(cur);
  2050. processSelects(extra, selectsUsed, dsSelect, leftSelect, rightSelect);
  2051. }
  2052. else if (cur->getOperator() == no_assign)
  2053. {
  2054. IHqlExpression * value = cur->queryChild(1);
  2055. if (!value->isPure())
  2056. {
  2057. IHqlExpression * lhs = cur->queryChild(0);
  2058. processMatchingSelector(extra->outputFields, lhs, lhs->queryChild(0));
  2059. const SelectUsedArray & selectsUsed = querySelectsUsed(value);
  2060. processSelects(extra, selectsUsed, dsSelect, leftSelect, rightSelect);
  2061. }
  2062. }
  2063. }
  2064. }
  2065. void ImplicitProjectTransformer::calculateFieldsUsed(IHqlExpression * expr)
  2066. {
  2067. ComplexImplicitProjectInfo * extra = queryBodyComplexExtra(expr);
  2068. if (!extra->okToOptimize())
  2069. {
  2070. if (expr->queryRecord())
  2071. extra->addAllOutputs();
  2072. }
  2073. else
  2074. {
  2075. ForEachItemIn(i1, extra->outputs)
  2076. extra->outputs.item(i1).notifyRequiredFields(extra);
  2077. if (extra->outputFields.includeAll())
  2078. assertex(extra->queryOutputRecord() != NULL);
  2079. }
  2080. switch (extra->activityKind())
  2081. {
  2082. case CreateRecordActivity:
  2083. case CreateRecordLRActivity:
  2084. {
  2085. //output will now be whatever fields are required by the output fields.
  2086. //input will be whatever is used in the appropriate transforms.
  2087. IHqlExpression * transform = queryNewColumnProvider(expr);
  2088. if (!isKnownTransform(transform))
  2089. {
  2090. extra->leftFieldsRequired.setAll();
  2091. if (extra->activityKind() == CreateRecordLRActivity)
  2092. extra->rightFieldsRequired.setAll();
  2093. break;
  2094. }
  2095. IHqlExpression * ds = expr->queryChild(0)->queryNormalizedSelector();
  2096. IHqlExpression * selSeq = querySelSeq(expr);
  2097. OwnedHqlExpr dsSelect = LINK(ds);
  2098. OwnedHqlExpr leftSelect;
  2099. OwnedHqlExpr rightSelect;
  2100. if (selSeq)
  2101. leftSelect.setown(createSelector(no_left, ds, selSeq));
  2102. if (extra->activityKind() == CreateRecordLRActivity)
  2103. rightSelect.setown(createSelector(no_right, expr->queryChild(1), selSeq));
  2104. if (expr->getOperator() == no_selfjoin)
  2105. dsSelect.setown(createSelector(no_right, ds, selSeq));
  2106. //This is here to ensure that calls that have side-effects don't get removed because the fields are removed
  2107. if (hasSideEffects(transform))
  2108. extra->addAllOutputs();
  2109. //MORE: querySelectsUsedForField() could be optimized by creating a map first, but it is only ~1% of time, so not really worth it.
  2110. SelectUsedArray parentSelects;
  2111. HqlExprArray values;
  2112. extra->outputFields.gatherTransformValuesUsed(NULL, &parentSelects, &values, NULL, transform);
  2113. processSelects(extra, parentSelects, dsSelect, leftSelect, rightSelect);
  2114. ForEachItemIn(i, values)
  2115. processSelects(extra, querySelectsUsed(&values.item(i)), dsSelect, leftSelect, rightSelect);
  2116. if (!extra->outputFields.allGathered())
  2117. assertex(extra->outputFields.allGathered());
  2118. processTransform(extra, transform, dsSelect, leftSelect, rightSelect);
  2119. unsigned max = expr->numChildren();
  2120. unsigned first = extra->inputs.ordinality();
  2121. if (expr->getOperator() == no_fetch)
  2122. first = 2;
  2123. for (unsigned i2=first; i2 < max; i2++)
  2124. {
  2125. IHqlExpression * cur = expr->queryChild(i2);
  2126. if (cur == transform)
  2127. continue;
  2128. const SelectUsedArray & selectsUsed = querySelectsUsed(cur);
  2129. processSelects(extra, selectsUsed, dsSelect, leftSelect, rightSelect);
  2130. }
  2131. switch (expr->getOperator())
  2132. {
  2133. case no_newusertable:
  2134. case no_hqlproject:
  2135. if (extra->okToOptimize())
  2136. extra->inputs.item(0).stopOptimizeCompound(false);
  2137. break;
  2138. case no_newaggregate:
  2139. {
  2140. IHqlExpression * grouping = queryRealChild(expr, 3);
  2141. if (grouping)
  2142. {
  2143. //Need to make sure that grouping criteria fields are also in the output
  2144. ForEachChild(i, grouping)
  2145. {
  2146. IHqlExpression * curGrouping = grouping->queryChild(i);
  2147. IHqlExpression * match = NULL;
  2148. //All groupings have entries in the transform - find the corresponding field.
  2149. ForEachChild(j, transform)
  2150. {
  2151. IHqlExpression * cur = transform->queryChild(j);
  2152. IHqlExpression * rhs = cur->queryChild(1);
  2153. if (rhs->getOperator() == no_activerow)
  2154. rhs = rhs->queryChild(0);
  2155. if (rhs == curGrouping)
  2156. {
  2157. match = cur->queryChild(0);
  2158. break;
  2159. }
  2160. }
  2161. assertex(match);
  2162. processMatchingSelector(extra->outputFields, match, match->queryChild(0));
  2163. }
  2164. }
  2165. break;
  2166. }
  2167. }
  2168. break;
  2169. }
  2170. case CompoundActivity:
  2171. {
  2172. //output will now be whatever fields are required by the output fields.
  2173. //input will be the same as the output fields, since it is just a wrapper node.
  2174. extra->finalizeOutputRecord();
  2175. //MORE: Not sure this is neededextra->leftFieldsRequired.clone(extra->outputFields);
  2176. extra->insertProject = true;
  2177. assertex(extra->inputs.ordinality() == 0);
  2178. //extra->inputs.item(0).stopOptimizeCompound(true);
  2179. break;
  2180. }
  2181. case CompoundableActivity:
  2182. {
  2183. //Prevent preserve meta from stripping the disk read down to a single field.
  2184. if (extra->inputs.ordinality())
  2185. extra->inputs.item(0).stopOptimizeCompound(true);
  2186. if (extra->okToOptimize())
  2187. extra->finalizeOutputRecord();
  2188. break;
  2189. }
  2190. case CreateRecordSourceActivity:
  2191. case AnyTypeActivity:
  2192. {
  2193. if (extra->okToOptimize())
  2194. extra->finalizeOutputRecord();
  2195. break;
  2196. }
  2197. case PassThroughActivity:
  2198. if (extra->okToOptimize())
  2199. {
  2200. node_operator op = expr->getOperator();
  2201. if ((op == no_if) && expr->hasProperty(_resourced_Atom))
  2202. {
  2203. extra->preventOptimization();
  2204. extra->addAllOutputs();
  2205. }
  2206. else if (op == no_merge)
  2207. {
  2208. //Ensure all the fields used by the sort order are preserved in the input streams
  2209. IHqlExpression * order = expr->queryProperty(sortedAtom);
  2210. assertex(order);
  2211. ForEachChild(i, order)
  2212. {
  2213. IHqlExpression * cur = order->queryChild(i);
  2214. if (!cur->isAttribute() && !cur->isConstant()) // shouldn't really happen..
  2215. {
  2216. if ((cur->getOperator() == no_select) && !isNewSelector(cur))
  2217. {
  2218. IHqlExpression * ds = queryDatasetCursor(cur);
  2219. IHqlExpression * field = cur->queryChild(1);
  2220. if (ds == queryActiveTableSelector())
  2221. processMatchingSelector(extra->outputFields, cur, queryActiveTableSelector());
  2222. else
  2223. extra->addAllOutputs();
  2224. }
  2225. else
  2226. extra->addAllOutputs();
  2227. }
  2228. }
  2229. }
  2230. }
  2231. //No need to do anything - inputs are taken directly from required outputs
  2232. break;
  2233. case ScalarSelectActivity:
  2234. {
  2235. IHqlExpression * root = queryRootSelector(expr);
  2236. extra->leftFieldsRequired.appendField(*LINK(root->queryChild(1)));
  2237. break;
  2238. }
  2239. case RollupTransformActivity:
  2240. case IterateTransformActivity:
  2241. {
  2242. //currently rollup and iterate
  2243. //output record is fixed by input, and never gets changed.
  2244. //input is all fields used required in output (since can't change record format) plus any others used inside the transform
  2245. IHqlExpression * transform = queryNewColumnProvider(expr);
  2246. if (hasSideEffects(transform))
  2247. extra->addAllOutputs();
  2248. if (extra->outputFields.includeAll())
  2249. extra->leftFieldsRequired.setAll();
  2250. else
  2251. {
  2252. IHqlExpression * dsSelect = expr->queryChild(0)->queryNormalizedSelector();
  2253. IHqlExpression * selSeq = querySelSeq(expr);
  2254. OwnedHqlExpr leftSelect = createSelector(no_left, dsSelect, selSeq);
  2255. OwnedHqlExpr rightSelect = createSelector(no_right, dsSelect, selSeq);
  2256. //Need to handle skip attributes...
  2257. processTransform(extra, transform, dsSelect, leftSelect, rightSelect);
  2258. //Rollup criteria need to be included in the fields used!
  2259. unsigned max = expr->numChildren();
  2260. for (unsigned i2=1; i2 < max; i2++)
  2261. {
  2262. if (extra->leftFieldsRequired.includeAll())
  2263. break;
  2264. IHqlExpression * cur = expr->queryChild(i2);
  2265. if (cur != transform)
  2266. {
  2267. const SelectUsedArray & selectsUsed = querySelectsUsed(cur);
  2268. processSelects(extra, selectsUsed, dsSelect, leftSelect, rightSelect);
  2269. }
  2270. }
  2271. //NB: outputfields can extend...
  2272. while (!extra->outputFields.allGathered())
  2273. {
  2274. SelectUsedArray parentSelects;
  2275. HqlExprArray values;
  2276. HqlExprArray selfSelects;
  2277. extra->outputFields.gatherTransformValuesUsed(&selfSelects, &parentSelects, &values, dsSelect, transform);
  2278. \
  2279. //For ROLLUP/ITERATE the transform may or may not be called. Therefore
  2280. //if a field is used from the output it is used from the input
  2281. //if a field is used from LEFT then it must be in the input and the output
  2282. //if a field is used from RIGHT it muse be in the output (but could be blanked) - handled elsewhere
  2283. //Ensure all output rows are also included in the input dataset
  2284. ForEachItemIn(i1, selfSelects)
  2285. processMatchingSelector(extra->leftFieldsRequired, &selfSelects.item(i1), dsSelect);
  2286. processSelects(extra, parentSelects, NULL, leftSelect, rightSelect);
  2287. ForEachItemIn(i2, values)
  2288. processSelects(extra, querySelectsUsed(&values.item(i2)), NULL, leftSelect, rightSelect);
  2289. //If all fields selected from the output then select all fields from the input
  2290. if (extra->outputFields.checkAllFieldsUsed())
  2291. extra->leftFieldsRequired.setAll();
  2292. //if selected all fields from the input then already done.
  2293. if (extra->leftFieldsRequired.includeAll())
  2294. {
  2295. extra->outputFields.setAll();
  2296. break;
  2297. }
  2298. }
  2299. if (extra->leftFieldsRequired.includeAll())
  2300. extra->addAllOutputs();
  2301. }
  2302. break;
  2303. }
  2304. case DenormalizeActivity:
  2305. {
  2306. //output record is fixed by input
  2307. //input is all fields used required in output (since can't change record format) plus any others used inside the transform
  2308. IHqlExpression * transform = queryNewColumnProvider(expr);
  2309. if (hasSideEffects(transform))
  2310. extra->addAllOutputs();
  2311. if (extra->outputFields.includeAll())
  2312. extra->leftFieldsRequired.setAll();
  2313. IHqlExpression * left = expr->queryChild(0)->queryNormalizedSelector();
  2314. IHqlExpression * right = expr->queryChild(1)->queryNormalizedSelector();
  2315. IHqlExpression * selSeq = querySelSeq(expr);
  2316. OwnedHqlExpr leftSelect = createSelector(no_left, left, selSeq);
  2317. OwnedHqlExpr rightSelect = createSelector(no_right, right, selSeq);
  2318. processTransform(extra, transform, NULL, leftSelect, rightSelect);
  2319. //include all other attributes except for the transform
  2320. unsigned max = expr->numChildren();
  2321. for (unsigned i2=2; i2 < max; i2++)
  2322. {
  2323. IHqlExpression * cur = expr->queryChild(i2);
  2324. if (cur != transform)
  2325. {
  2326. const SelectUsedArray & selectsUsed = querySelectsUsed(cur);
  2327. processSelects(extra, selectsUsed, NULL, leftSelect, rightSelect);
  2328. }
  2329. }
  2330. while (!extra->outputFields.allGathered())
  2331. {
  2332. SelectUsedArray parentSelects;
  2333. HqlExprArray values;
  2334. HqlExprArray selfSelects;
  2335. extra->outputFields.gatherTransformValuesUsed(&selfSelects, &parentSelects, &values, left, transform);
  2336. //For DENORMALIZE the transform is always called, possibly multiple times. Therefore
  2337. //if a field is used from the output it must be included in the input (but could be blanked)
  2338. //if a field is used from LEFT then it must be in the input and the output
  2339. //Ensure all output rows are also included in the input dataset
  2340. ForEachItemIn(i1, selfSelects)
  2341. processMatchingSelector(extra->leftFieldsRequired, &selfSelects.item(i1), left); // more: Could blank
  2342. processSelects(extra, parentSelects, NULL, leftSelect, rightSelect);
  2343. ForEachItemIn(i2, values)
  2344. processSelects(extra, querySelectsUsed(&values.item(i2)), NULL, leftSelect, rightSelect);
  2345. }
  2346. break;
  2347. }
  2348. case FixedInputActivity:
  2349. {
  2350. extra->leftFieldsRequired.setAll();
  2351. extra->rightFieldsRequired.setAllIfAny();
  2352. if (expr->queryRecord())
  2353. extra->addAllOutputs();
  2354. break;
  2355. }
  2356. case SourceActivity:
  2357. {
  2358. //No inputs to worry about, and not compoundable so output record won't change.
  2359. extra->addAllOutputs();
  2360. break;
  2361. }
  2362. case SinkActivity:
  2363. case SimpleActivity:
  2364. {
  2365. //inputs will be outputs required plus any fields used within the function
  2366. //outputs will eventually match inputs when finished percolating.
  2367. if (extra->outputFields.includeAll())
  2368. extra->leftFieldsRequired.setAll();
  2369. else
  2370. {
  2371. if (extra->activityKind() != SinkActivity)
  2372. extra->leftFieldsRequired.clone(extra->outputFields);
  2373. IHqlExpression * ds = expr->queryChild(0)->queryNormalizedSelector();
  2374. IHqlExpression * selSeq = querySelSeq(expr);
  2375. //Left and right are here because of the dedup criteria. It would be better to
  2376. //special case that, but first lets get it working
  2377. OwnedHqlExpr leftSelect = selSeq ? createSelector(no_left, ds, selSeq) : NULL;
  2378. OwnedHqlExpr rightSelect = selSeq ? createSelector(no_right, ds, selSeq) : NULL;
  2379. unsigned max = expr->numChildren();
  2380. for (unsigned i2=1; i2 < max; i2++)
  2381. {
  2382. const SelectUsedArray & selectsUsed = querySelectsUsed(expr->queryChild(i2));
  2383. ForEachItemIn(i3, selectsUsed)
  2384. {
  2385. IHqlExpression * curSelect = &selectsUsed.item(i3);
  2386. processMatchingSelector(extra->leftFieldsRequired, curSelect, leftSelect);
  2387. processMatchingSelector(extra->leftFieldsRequired, curSelect, rightSelect);
  2388. processMatchingSelector(extra->leftFieldsRequired, curSelect, ds);
  2389. if (extra->leftFieldsRequired.includeAll())
  2390. break;
  2391. }
  2392. if (extra->leftFieldsRequired.includeAll())
  2393. break;
  2394. }
  2395. if (extra->activityKind() != SinkActivity)
  2396. {
  2397. if (extra->leftFieldsRequired.includeAll())
  2398. extra->addAllOutputs();
  2399. }
  2400. }
  2401. break;
  2402. }
  2403. default:
  2404. throwUnexpected();
  2405. }
  2406. }
  2407. void ImplicitProjectTransformer::logChange(const char * message, IHqlExpression * expr, const UsedFieldSet & fields)
  2408. {
  2409. _ATOM exprName = expr->queryName();
  2410. if (!exprName && isCompoundSource(expr))
  2411. exprName = expr->queryChild(0)->queryName();
  2412. StringBuffer name, fieldText;
  2413. if (exprName)
  2414. name.append(exprName).append(" ");
  2415. name.append(getOpString(expr->getOperator()));
  2416. const UsedFieldSet * original = fields.queryOriginal();
  2417. assertex(original);
  2418. fieldText.append("(").append(fields.numFields());
  2419. fieldText.append("/").append(original->numFields());
  2420. fieldText.append(")");
  2421. //If number removed < number remaining just log the fields removed.
  2422. if (fields.numFields() * 2 > original->numFields())
  2423. {
  2424. UsedFieldSet removed;
  2425. removed.createDifference(*original, fields);
  2426. fieldText.append(" removed ");
  2427. removed.getText(fieldText);
  2428. }
  2429. else
  2430. fields.getText(fieldText);
  2431. const char * const format = "ImplicitProject: %s %s now %s";
  2432. DBGLOG(format, message, name.str(), fieldText.str());
  2433. if (options.notifyOptimizedProjects)
  2434. {
  2435. if (options.notifyOptimizedProjects >= 2 || exprName)
  2436. {
  2437. StringBuffer messageText;
  2438. messageText.appendf(format, message, name.str(), fieldText.str());
  2439. translator.addWorkunitException(ExceptionSeverityInformation, 0, messageText.str(), NULL);
  2440. }
  2441. }
  2442. }
  2443. void ImplicitProjectTransformer::getTransformedChildren(IHqlExpression * expr, HqlExprArray & children)
  2444. {
  2445. transformChildren(expr, children);
  2446. }
  2447. IHqlExpression * ImplicitProjectTransformer::createParentTransformed(IHqlExpression * expr)
  2448. {
  2449. OwnedHqlExpr transformed = Parent::createTransformed(expr);
  2450. updateOrphanedSelectors(transformed, expr);
  2451. return transformed.getClear();
  2452. }
  2453. IHqlExpression * ImplicitProjectTransformer::createTransformed(IHqlExpression * expr)
  2454. {
  2455. if (expr->isConstant())
  2456. {
  2457. switch (expr->getOperator())
  2458. {
  2459. case no_transform:
  2460. case no_newtransform:
  2461. case no_transformlist:
  2462. case no_list:
  2463. return LINK(expr);
  2464. }
  2465. }
  2466. //Can't call Parent::createTransformed as a default because the TranformRecordActivities trigger asserts when types mismatch.
  2467. ImplicitProjectInfo * extra = queryBodyExtra(expr);
  2468. ComplexImplicitProjectInfo * complexExtra = extra->queryComplexInfo();
  2469. if (!complexExtra)
  2470. return createParentTransformed(expr);
  2471. OwnedHqlExpr transformed;
  2472. switch (extra->activityKind())
  2473. {
  2474. case DenormalizeActivity:
  2475. case RollupTransformActivity:
  2476. case IterateTransformActivity:
  2477. {
  2478. //Always reduce things that create a new record so they only project the fields they need to
  2479. if (complexExtra->outputChanged() || !complexExtra->fieldsToBlank.isEmpty())
  2480. {
  2481. unsigned transformPos = queryTransformIndex(expr);
  2482. //Walk transform, only including assigns that are in the output list.
  2483. HqlExprArray args;
  2484. getTransformedChildren(expr, args);
  2485. //MORE: If the input's output contains fields that are not required in this transforms output then
  2486. //include them, but assign them default values to stop them pulling in other variables.
  2487. IHqlExpression * transform = &args.item(transformPos);
  2488. IHqlExpression * newTransform = complexExtra->outputFields.createFilteredTransform(transform, &complexExtra->fieldsToBlank);
  2489. args.replace(*newTransform, transformPos);
  2490. transformed.setown(expr->clone(args));
  2491. transformed.setown(updateSelectors(transformed, expr));
  2492. logChange("Transform", expr, complexExtra->outputFields);
  2493. }
  2494. else
  2495. {
  2496. #ifdef _DEBUG
  2497. IHqlExpression * ds = expr->queryChild(0);
  2498. OwnedHqlExpr transformedDs = transform(ds);
  2499. assertex(recordTypesMatch(ds, transformedDs));
  2500. #endif
  2501. transformed.setown(createParentTransformed(expr));
  2502. //MORE: Need to replace left/right with their transformed varieties because the record may have changed format
  2503. transformed.setown(updateSelectors(transformed, expr));
  2504. }
  2505. break;
  2506. }
  2507. case CreateRecordActivity:
  2508. case CreateRecordLRActivity:
  2509. {
  2510. //Always reduce things that create a new record so they only project the fields they need to
  2511. if (complexExtra->outputChanged())
  2512. {
  2513. unsigned transformPos = queryTransformIndex(expr);
  2514. //Walk transform, only including assigns that are in the output list.
  2515. HqlExprArray args;
  2516. getTransformedChildren(expr, args);
  2517. IHqlExpression * transform = &args.item(transformPos);
  2518. IHqlExpression * newTransform = complexExtra->outputFields.createFilteredTransform(transform, NULL);
  2519. args.replace(*newTransform, transformPos);
  2520. if (transform->getOperator() == no_newtransform)
  2521. args.replace(*LINK(complexExtra->queryOutputRecord()), transformPos-1);
  2522. IHqlExpression * onFail = queryProperty(onFailAtom, args);
  2523. if (onFail)
  2524. {
  2525. IHqlExpression * newTransform = complexExtra->outputFields.createFilteredTransform(onFail->queryChild(0), NULL);
  2526. IHqlExpression * newOnFail = createExprAttribute(onFailAtom, newTransform);
  2527. args.replace(*newOnFail, args.find(*onFail));
  2528. }
  2529. //We may have converted a count project into a project..... (see bug18839.xhql)
  2530. if (expr->getOperator() == no_hqlproject)
  2531. {
  2532. IHqlExpression * countProjectAttr = queryProperty(_countProject_Atom, args);
  2533. if (countProjectAttr && !transformContainsCounter(newTransform, countProjectAttr->queryChild(0)))
  2534. args.zap(*countProjectAttr);
  2535. }
  2536. transformed.setown(expr->clone(args));
  2537. transformed.setown(updateSelectors(transformed, expr));
  2538. logChange("Minimize", expr, complexExtra->outputFields);
  2539. }
  2540. else
  2541. {
  2542. transformed.setown(createParentTransformed(expr));
  2543. //MORE: Need to replace left/right with their transformed varieties because the record may have changed format
  2544. transformed.setown(updateSelectors(transformed, expr));
  2545. }
  2546. break;
  2547. }
  2548. case CreateRecordSourceActivity:
  2549. {
  2550. assertex(expr->getOperator() == no_inlinetable || expr->getOperator() == no_dataset_from_transform);
  2551. //Always reduce things that create a new record so they only project the fields they need to
  2552. if (complexExtra->outputChanged())
  2553. {
  2554. HqlExprArray args;
  2555. switch (expr->getOperator())
  2556. {
  2557. case no_inlinetable:
  2558. {
  2559. IHqlExpression * transforms = expr->queryChild(0);
  2560. HqlExprArray newTransforms;
  2561. ForEachChild(i, transforms)
  2562. {
  2563. IHqlExpression * transform = transforms->queryChild(i);
  2564. newTransforms.append(*complexExtra->outputFields.createFilteredTransform(transform, NULL));
  2565. }
  2566. args.append(*transforms->clone(newTransforms));
  2567. break;
  2568. }
  2569. case no_dataset_from_transform:
  2570. {
  2571. IHqlExpression * transform = expr->queryChild(1);
  2572. args.append(*LINK(expr->queryChild(0)));
  2573. args.append(*complexExtra->outputFields.createFilteredTransform(transform, NULL));
  2574. break;
  2575. }
  2576. }
  2577. args.append(*LINK(complexExtra->queryOutputRecord()));
  2578. unwindChildren(args, expr, 2);
  2579. transformed.setown(expr->clone(args));
  2580. logChange("Minimize", expr, complexExtra->outputFields);
  2581. }
  2582. else
  2583. {
  2584. transformed.setown(createParentTransformed(expr));
  2585. //MORE: Need to replace left/right with their transformed varieties because the record may have changed format
  2586. transformed.setown(updateSelectors(transformed, expr));
  2587. }
  2588. break;
  2589. }
  2590. case CompoundActivity:
  2591. {
  2592. transformed.setown(createParentTransformed(expr));
  2593. if (complexExtra->outputChanged())
  2594. {
  2595. HqlExprArray args;
  2596. args.append(*complexExtra->createOutputProject(transformed->queryChild(0)));
  2597. transformed.setown(transformed->clone(args));
  2598. logChange("Project output from compound", expr, complexExtra->outputFields);
  2599. break;
  2600. }
  2601. }
  2602. case CompoundableActivity:
  2603. {
  2604. transformed.setown(createParentTransformed(expr));
  2605. //insert a project after the record.
  2606. if (complexExtra->outputChanged())
  2607. {
  2608. transformed.setown(complexExtra->createOutputProject(transformed));
  2609. transformed.setown(createWrapper(queryCompoundOp(expr), transformed.getClear()));
  2610. logChange("Project output from", expr, complexExtra->outputFields);
  2611. }
  2612. break;
  2613. }
  2614. case AnyTypeActivity:
  2615. {
  2616. transformed.setown(createParentTransformed(expr));
  2617. //insert a project after the record.
  2618. if (complexExtra->outputChanged())
  2619. {
  2620. logChange("Change format of dataset", expr, complexExtra->outputFields);
  2621. HqlExprArray args;
  2622. args.append(*LINK(complexExtra->queryOutputRecord()));
  2623. unwindChildren(args, transformed, 1);
  2624. transformed.setown(transformed->clone(args));
  2625. }
  2626. break;
  2627. }
  2628. case FixedInputActivity:
  2629. case SourceActivity:
  2630. case NonActivity:
  2631. case ScalarSelectActivity:
  2632. case SinkActivity:
  2633. transformed.setown(createParentTransformed(expr));
  2634. //can't change...
  2635. break;
  2636. case PassThroughActivity:
  2637. if (complexExtra->outputChanged())
  2638. {
  2639. HqlExprArray args;
  2640. ForEachChild(i, expr)
  2641. {
  2642. IHqlExpression * cur = expr->queryChild(i);
  2643. OwnedHqlExpr next = transform(cur);
  2644. if (cur->isDataset() || cur->isDatarow())
  2645. {
  2646. //Ensure all inputs have same format..
  2647. if (cur->queryRecord() != complexExtra->queryOutputRecord())
  2648. next.setown(complexExtra->createOutputProject(next));
  2649. }
  2650. args.append(*next.getClear());
  2651. }
  2652. transformed.setown(expr->clone(args));
  2653. logChange("Passthrough modified", expr, complexExtra->outputFields);
  2654. }
  2655. else
  2656. transformed.setown(createParentTransformed(expr));
  2657. break;
  2658. case SimpleActivity:
  2659. {
  2660. transformed.setown(createParentTransformed(expr));
  2661. IHqlExpression * onFail = transformed->queryProperty(onFailAtom);
  2662. if (onFail)
  2663. {
  2664. IHqlExpression * newTransform = complexExtra->outputFields.createFilteredTransform(onFail->queryChild(0), NULL);
  2665. IHqlExpression * newOnFail = createExprAttribute(onFailAtom, newTransform);
  2666. transformed.setown(replaceOwnedProperty(transformed, newOnFail));
  2667. }
  2668. if (complexExtra->insertProject)
  2669. {
  2670. HqlExprArray args;
  2671. OwnedHqlExpr inputProject = complexExtra->createOutputProject(transformed->queryChild(0));
  2672. OwnedHqlExpr replacement = replaceChildDataset(transformed, inputProject, 0);
  2673. transformed.setown(updateSelectors(replacement, expr));
  2674. logChange("Insert project before", expr, complexExtra->outputFields);
  2675. }
  2676. else
  2677. transformed.setown(updateSelectors(transformed, expr));
  2678. break;
  2679. }
  2680. default:
  2681. throwUnexpected();
  2682. }
  2683. return transformed.getClear();
  2684. }
  2685. ANewTransformInfo * ImplicitProjectTransformer::createTransformInfo(IHqlExpression * expr)
  2686. {
  2687. ProjectExprKind kind = getProjectExprKind(expr);
  2688. node_operator op = expr->getOperator();
  2689. if (kind == NonActivity)
  2690. {
  2691. switch (op)
  2692. {
  2693. case no_record:
  2694. case no_rowset:
  2695. case no_rowsetrange:
  2696. case no_datasetlist:
  2697. break;
  2698. default:
  2699. return CREATE_NEWTRANSFORMINFO2(ImplicitProjectInfo, expr, kind);
  2700. }
  2701. }
  2702. if (kind == ComplexNonActivity)
  2703. kind = NonActivity;
  2704. return CREATE_NEWTRANSFORMINFO2(ComplexImplicitProjectInfo, expr, kind);
  2705. }
  2706. void ImplicitProjectTransformer::finalizeFields()
  2707. {
  2708. ForEachItemIn(i, activities)
  2709. finalizeFields(&activities.item(i));
  2710. }
  2711. static bool requiresFewerFields(const UsedFieldSet & fields, ComplexImplicitProjectInfo & input)
  2712. {
  2713. return fields.requiresFewerFields(input.outputFields);
  2714. }
  2715. void ImplicitProjectTransformer::finalizeFields(IHqlExpression * expr)
  2716. {
  2717. ComplexImplicitProjectInfo * extra = queryBodyComplexExtra(expr);
  2718. if (!extra->okToOptimize())
  2719. return;
  2720. switch (extra->activityKind())
  2721. {
  2722. case CreateRecordActivity:
  2723. case CreateRecordLRActivity:
  2724. case CompoundActivity:
  2725. case CompoundableActivity:
  2726. case CreateRecordSourceActivity:
  2727. case AnyTypeActivity:
  2728. extra->finalizeOutputRecord();
  2729. break;
  2730. case DenormalizeActivity:
  2731. case RollupTransformActivity:
  2732. case IterateTransformActivity:
  2733. {
  2734. //output must always match the input..., but any fields that are in the input, but not needed in the output we'll add as exceptions
  2735. //and assign default values to them, otherwise it can cause other fields to be required in the input + causes chaos
  2736. extra->fieldsToBlank.createDifference(extra->inputs.item(0).outputFields, extra->outputFields);
  2737. extra->outputFields.unionFields(extra->fieldsToBlank);
  2738. extra->fieldsToBlank.optimizeFieldsToBlank(extra->outputFields, queryNewColumnProvider(expr));
  2739. if (!extra->fieldsToBlank.isEmpty())
  2740. {
  2741. const char * opString = getOpString(expr->getOperator());
  2742. StringBuffer fieldText;
  2743. extra->fieldsToBlank.getText(fieldText);
  2744. DBGLOG("ImplicitProject: Fields %s for %s not required by outputs - so blank in transform", fieldText.str(), opString);
  2745. }
  2746. extra->finalizeOutputRecord();
  2747. break;
  2748. }
  2749. case FixedInputActivity:
  2750. case SourceActivity:
  2751. case ScalarSelectActivity:
  2752. break;
  2753. case PassThroughActivity:
  2754. {
  2755. //Banches coming into this IF/MERGE etc. may have different fields (e.g., because of ITERATEs), and
  2756. //the output fields may be smaller (e.g., no merge sort conditions, no fields used and inputs filter)
  2757. //So use the intersection of the inputfields as the output record. 90% of the time they will be
  2758. //the same so no projects will be introduced.
  2759. bool anyProjected = false;
  2760. unsigned numInputs = extra->inputs.ordinality();
  2761. for (unsigned i=0; i != numInputs; i++)
  2762. {
  2763. ComplexImplicitProjectInfo & cur = extra->inputs.item(i);
  2764. if (!cur.outputFields.includeAll())
  2765. {
  2766. extra->outputFields.set(cur.outputFields);
  2767. for (unsigned i2=i+1; i2 != numInputs; i2++)
  2768. {
  2769. ComplexImplicitProjectInfo & cur = extra->inputs.item(i2);
  2770. extra->outputFields.intersectFields(cur.outputFields);
  2771. }
  2772. extra->finalizeOutputRecord();
  2773. anyProjected = true;
  2774. break;
  2775. }
  2776. }
  2777. if (!anyProjected)
  2778. extra->setMatchingOutput(&extra->inputs.item(0));
  2779. break;
  2780. }
  2781. case SinkActivity:
  2782. break;
  2783. case SimpleActivity:
  2784. if (extra->insertProject && requiresFewerFields(extra->leftFieldsRequired, extra->inputs.item(0)))
  2785. {
  2786. extra->outputFields.set(extra->leftFieldsRequired);
  2787. extra->finalizeOutputRecord();
  2788. }
  2789. else
  2790. extra->setMatchingOutput(&extra->inputs.item(0));
  2791. break;
  2792. default:
  2793. throwUnexpected();
  2794. }
  2795. }
  2796. void ImplicitProjectTransformer::inheritActiveFields(IHqlExpression * expr, ImplicitProjectInfo * extra, unsigned min, unsigned max)
  2797. {
  2798. for (unsigned i = min; i < max; i++)
  2799. inheritActiveFields(extra, expr->queryChild(i));
  2800. }
  2801. void ImplicitProjectTransformer::inheritActiveFields(ImplicitProjectInfo * target, IHqlExpression * source)
  2802. {
  2803. if (source->queryBody()->queryTransformExtra())
  2804. {
  2805. target->addActiveSelects(querySelectsUsed(source));
  2806. }
  2807. }
  2808. void ImplicitProjectTransformer::insertProjects()
  2809. {
  2810. ForEachItemIn(i, activities)
  2811. insertProjects(&activities.item(i));
  2812. }
  2813. void ImplicitProjectTransformer::insertProjects(IHqlExpression * expr)
  2814. {
  2815. ComplexImplicitProjectInfo * extra = queryBodyComplexExtra(expr);
  2816. if (!extra->okToOptimize())
  2817. return;
  2818. if (options.optimizeSpills && (expr->getOperator() == no_commonspill))
  2819. {
  2820. if (requiresFewerFields(extra->leftFieldsRequired, extra->inputs.item(0)))
  2821. extra->insertProject = true;
  2822. return;
  2823. }
  2824. if (options.insertProjectCostLevel == 0)
  2825. return;
  2826. if (extra->queryCostFactor(targetClusterType) < options.insertProjectCostLevel)
  2827. return;
  2828. switch (extra->activityKind())
  2829. {
  2830. case SimpleActivity:
  2831. if (requiresFewerFields(extra->leftFieldsRequired, extra->inputs.item(0)))
  2832. extra->insertProject = true;
  2833. break;
  2834. }
  2835. }
  2836. void ImplicitProjectTransformer::percolateFields()
  2837. {
  2838. ForEachItemInRev(i, activities)
  2839. calculateFieldsUsed(&activities.item(i));
  2840. }
  2841. IHqlExpression * ImplicitProjectTransformer::process(IHqlExpression * expr)
  2842. {
  2843. cycle_t time1 = msTick();
  2844. analyse(expr, 0); // gather a list of activities, and link them together.
  2845. cycle_t time2 = msTick();
  2846. //DEBUG_TIMERX(translator.queryTimeReporter(), "EclServer: implicit.analyse", time2-time1);
  2847. percolateFields();
  2848. cycle_t time3 = msTick();
  2849. //DEBUG_TIMERX(translator.queryTimeReporter(), "EclServer: implicit.percolate", time3-time2);
  2850. switch (targetClusterType)
  2851. {
  2852. case RoxieCluster:
  2853. //worth inserting projects after sources that can be compound.
  2854. //also may be worth projecting before iterating since an iterate
  2855. //copies data but can't change the fields in use
  2856. break;
  2857. case HThorCluster:
  2858. // same as roxie, but also maybe worth inserting projects to minimise the amount of data that is spilled.
  2859. break;
  2860. case ThorCluster:
  2861. case ThorLCRCluster:
  2862. //worth inserting projects to reduce copying, spilling, but primarily data transfered between nodes.
  2863. if (options.insertProjectCostLevel || options.optimizeSpills)
  2864. insertProjects();
  2865. break;
  2866. }
  2867. cycle_t time4 = msTick();
  2868. //DEBUG_TIMERX(translator.queryTimeReporter(), "EclServer: implicit.reduceData", time4-time3);
  2869. finalizeFields();
  2870. cycle_t time5 = msTick();
  2871. //DEBUG_TIMERX(translator.queryTimeReporter(), "EclServer: implicit.finalize", time5-time4);
  2872. //traceActivities();
  2873. OwnedHqlExpr ret = transformRoot(expr);
  2874. cycle_t time6 = msTick();
  2875. //DEBUG_TIMERX(translator.queryTimeReporter(), "EclServer: implicit.transform", time6-time5);
  2876. return ret.getClear();
  2877. }
  2878. void ImplicitProjectTransformer::traceActivities()
  2879. {
  2880. ForEachItemIn(i, activities)
  2881. queryBodyComplexExtra(&activities.item(i))->trace();
  2882. }
  2883. IHqlExpression * ImplicitProjectTransformer::updateSelectors(IHqlExpression * newExpr, IHqlExpression * oldExpr)
  2884. {
  2885. //MORE: Clean me up using new flags when they are merged
  2886. IHqlExpression * newDs = newExpr->queryChild(0);
  2887. IHqlExpression * oldDs = oldExpr->queryChild(0);
  2888. switch (getChildDatasetType(newExpr))
  2889. {
  2890. case childdataset_none:
  2891. case childdataset_many_noscope:
  2892. case childdataset_many:
  2893. case childdataset_if:
  2894. case childdataset_case:
  2895. case childdataset_map:
  2896. case childdataset_dataset_noscope:
  2897. return LINK(newExpr);
  2898. //None of these have any scoped arguments, so no need to remove them
  2899. break;
  2900. case childdataset_dataset:
  2901. {
  2902. return updateMappedFields(newExpr, oldDs->queryNormalizedSelector(), newDs->queryNormalizedSelector(), 1);
  2903. }
  2904. case childdataset_datasetleft:
  2905. {
  2906. OwnedHqlExpr mapped = updateMappedFields(newExpr, oldDs->queryNormalizedSelector(), newDs->queryNormalizedSelector(), 1);
  2907. IHqlExpression * selSeq = querySelSeq(newExpr);
  2908. assertex(selSeq == querySelSeq(oldExpr));
  2909. OwnedHqlExpr newLeft = createSelector(no_left, newDs, selSeq);
  2910. OwnedHqlExpr oldLeft = createSelector(no_left, oldDs, selSeq);
  2911. return updateChildSelectors(mapped, oldLeft, newLeft, 1);
  2912. }
  2913. case childdataset_left:
  2914. {
  2915. IHqlExpression * selSeq = querySelSeq(newExpr);
  2916. assertex(selSeq == querySelSeq(oldExpr));
  2917. OwnedHqlExpr newLeft = createSelector(no_left, newDs, selSeq);
  2918. OwnedHqlExpr oldLeft = createSelector(no_left, oldDs, selSeq);
  2919. return updateChildSelectors(newExpr, oldLeft, newLeft, 1);
  2920. }
  2921. case childdataset_same_left_right:
  2922. case childdataset_top_left_right:
  2923. case childdataset_nway_left_right:
  2924. {
  2925. OwnedHqlExpr mapped = updateMappedFields(newExpr, oldDs->queryNormalizedSelector(), newDs->queryNormalizedSelector(), 1);
  2926. IHqlExpression * selSeq = querySelSeq(newExpr);
  2927. assertex(selSeq == querySelSeq(oldExpr));
  2928. OwnedHqlExpr newLeft = createSelector(no_left, newExpr->queryChild(0), selSeq);
  2929. OwnedHqlExpr oldLeft = createSelector(no_left, oldExpr->queryChild(0), selSeq);
  2930. OwnedHqlExpr ds1 = updateChildSelectors(mapped, oldLeft, newLeft, 1);
  2931. OwnedHqlExpr newRight = createSelector(no_right, newExpr->queryChild(0), selSeq);
  2932. OwnedHqlExpr oldRight = createSelector(no_right, oldExpr->queryChild(0), selSeq);
  2933. return updateChildSelectors(ds1, oldRight, newRight, 1);
  2934. }
  2935. case childdataset_leftright:
  2936. {
  2937. IHqlExpression * selSeq = querySelSeq(newExpr);
  2938. assertex(selSeq == querySelSeq(oldExpr));
  2939. OwnedHqlExpr newLeft = createSelector(no_left, newExpr->queryChild(0), selSeq);
  2940. OwnedHqlExpr oldLeft = createSelector(no_left, oldExpr->queryChild(0), selSeq);
  2941. unsigned firstLeft = (newExpr->getOperator() == no_normalize) ? 1 : 2;
  2942. OwnedHqlExpr ds1 = updateChildSelectors(newExpr, oldLeft, newLeft, firstLeft);
  2943. OwnedHqlExpr newRight = createSelector(no_right, newExpr->queryChild(1), selSeq);
  2944. OwnedHqlExpr oldRight = createSelector(no_right, oldExpr->queryChild(1), selSeq);
  2945. return updateChildSelectors(ds1, oldRight, newRight, 2);
  2946. }
  2947. break;
  2948. default:
  2949. throwUnexpected();
  2950. }
  2951. }
  2952. const SelectUsedArray & ImplicitProjectTransformer::querySelectsUsedForField(IHqlExpression * transform, IHqlExpression * field)
  2953. {
  2954. IHqlExpression * transformValues = queryTransformAssignValue(transform, field);
  2955. if (!transformValues)
  2956. transformValues = queryTransformAssignValue(transform, field);
  2957. return querySelectsUsed(transformValues);
  2958. }
  2959. #include "hqlttcpp.ipp"
  2960. IHqlExpression * insertImplicitProjects(HqlCppTranslator & translator, IHqlExpression * expr, bool optimizeSpills)
  2961. {
  2962. #if defined(POST_COMMON_ANNOTATION)
  2963. HqlExprArray ret;
  2964. {
  2965. ImplicitProjectTransformer transformer(translator, optimizeSpills);
  2966. ret.append(*transformer.process(expr));
  2967. }
  2968. normalizeAnnotations(translator, ret);
  2969. return createActionList(ret);
  2970. #else
  2971. ImplicitProjectTransformer transformer(translator, optimizeSpills);
  2972. return transformer.process(expr);
  2973. #endif
  2974. }
  2975. void insertImplicitProjects(HqlCppTranslator & translator, HqlExprArray & exprs)
  2976. {
  2977. if (exprs.ordinality())
  2978. {
  2979. OwnedHqlExpr compound = createActionList(exprs);
  2980. OwnedHqlExpr ret = insertImplicitProjects(translator, compound, false);
  2981. exprs.kill();
  2982. ret->unwindList(exprs, no_actionlist);
  2983. }
  2984. }
  2985. /*
  2986. To Implement field gathering would need to do the following:
  2987. - Could assert that only non-nested fields are considered. That means all field references are in the form (ds.field).
  2988. This would simplify gathering, filtering, and translating the dataset selector....
  2989. - All no_selects on in-scope datasets (or sub fields?) get added to list of active fields in self.
  2990. - analyseExpr() clones all active fields from children into self.
  2991. - active datasets used in a dataset context need to also be added.
  2992. - any item inherits all child fields from non-dataset children, and removes all references to parent datasets (in what ever form)
  2993. very similar to the inScope Table processing. [Only really needed for non-global activities]
  2994. - any activity inherits all inScope fields from its parent.
  2995. - those expressions with scoped dataset inputs need two lists (childrensFieldAccess and inscopeFields)
  2996. - may be worth having different classes for handling the different categories:
  2997. (simpleExpression, scopedExpression, activity) with virtuals to handle differences
  2998. - removingChildReferences
  2999. would need matchesSelector(list, selector) which worked recursively.
  3000. */