hqlttcpp.cpp 461 KB


  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include "jlib.hpp"
  15. #include "jmisc.hpp"
  16. #include "jstream.ipp"
  17. #include "eclrtl.hpp"
  18. #include "hql.hpp"
  19. #include "hqlttcpp.ipp"
  20. #include "hqlmeta.hpp"
  21. #include "hqlutil.hpp"
  22. #include "hqlcpputil.hpp"
  23. #include "hqlthql.hpp"
  24. #include "hqlhtcpp.ipp"
  25. #include "hqltcppc.ipp"
  26. #include "hqlcatom.hpp"
  27. #include "hqlfold.hpp"
  28. #include "hqlgraph.ipp"
  29. #include "hqllib.ipp"
  30. #include "hqlpmap.hpp"
  31. #include "hqlopt.hpp"
  32. #include "hqlcerrors.hpp"
  33. #include "hqlscope.hpp"
  34. #include "hqlsource.ipp"
  35. #include "hqlvalid.hpp"
  36. #include "hqlerror.hpp"
  37. #include "hqlalias.hpp"
  38. #include "hqlir.hpp"
  39. #define TraceExprPrintLog(x, expr) TOSTRLOG(MCdebugInfo(300), unknownJob, x, (expr)->toString);
  40. //Following are for code that currently cause problems, but are probably a good idea
  41. //#define MAP_PROJECT_TO_USERTABLE
  42. //#define REMOVE_NAMED_SCALARS
  43. //#define OPTIMIZE_IMPLICIT_CAST
  44. #define PERSIST_VERSION 1 // Increment when implementation is incompatible.
  45. #define REMOVE_GLOBAL_ANNOTATION // This should improve cse. It currently does for some, but not others...
  46. #define DEFAULT_FOLD_OPTIONS HFOfoldfilterproject
  47. //#define PICK_ENGINE_EARLY
  48. //===========================================================================
  49. static bool isWorthHoisting(IHqlExpression * expr, bool asSubQuery)
  50. {
  51. bool isFiltered = false;
  52. loop
  53. {
  54. switch (expr->getOperator())
  55. {
  56. case no_newkeyindex:
  57. case no_table:
  58. case no_temptable:
  59. case no_inlinetable:
  60. case no_globalscope:
  61. case no_global:
  62. case no_independent:
  63. case no_field:
  64. case no_datasetfromrow:
  65. case no_datasetfromdictionary:
  66. case no_null:
  67. case no_workunit_dataset:
  68. case no_colon:
  69. //compound activities not in list because not created at this point.
  70. return (isFiltered && asSubQuery);
  71. case no_select:
  72. return !isTargetSelector(expr);
  73. case no_filter:
  74. expr = expr->queryChild(0);
  75. isFiltered = true;
  76. break;
  77. case no_compound_diskread:
  78. case no_compound_indexread:
  79. case no_hqlproject:
  80. case no_newusertable:
  81. case no_limit:
  82. case no_catchds:
  83. case no_keyedlimit:
  84. case no_sorted:
  85. case no_grouped:
  86. case no_stepped:
  87. case no_distributed:
  88. case no_preservemeta:
  89. case no_nofold:
  90. case no_nohoist:
  91. case no_section:
  92. case no_sectioninput:
  93. case no_dataset_alias:
  94. case no_forcegraph:
  95. expr = expr->queryChild(0);
  96. break;
  97. case no_fail:
  98. return false;
  99. default:
  100. return true;
  101. }
  102. }
  103. }
  104. IHqlExpression * getDebugValueExpr(IConstWorkUnit * wu, IHqlExpression * expr)
  105. {
  106. StringBuffer name;
  107. getStringValue(name, expr->queryChild(0));
  108. SCMStringBuffer value;
  109. wu->getDebugValue(name, value);
  110. ITypeInfo * exprType = expr->queryType();
  111. if (exprType->getTypeCode() == type_boolean)
  112. return createConstant(clipStrToBool(value.length(), value.str()));
  113. return createConstant(exprType->castFrom(value.length(), value.str()));
  114. }
  115. //===========================================================================
  116. struct GlobalAttributeInfo
  117. {
  118. public:
  119. GlobalAttributeInfo(const char * _filePrefix, const char * _storedPrefix, IHqlExpression * _value) : value(_value)
  120. {
  121. setOp = no_none;
  122. persistOp = no_none;
  123. few = false;
  124. filePrefix = _filePrefix;
  125. storedPrefix = _storedPrefix;
  126. numPersistInstances = 0;
  127. }
  128. void extractGlobal(IHqlExpression * global, ClusterType platform);
  129. void extractStoredInfo(IHqlExpression * expr, IHqlExpression * codehash, bool isRoxie, int multiplePersistInstances);
  130. void checkFew(HqlCppTranslator & translator);
  131. void splitGlobalDefinition(ITypeInfo * type, IHqlExpression * value, IConstWorkUnit * wu, SharedHqlExpr & setOutput, OwnedHqlExpr * getOutput, bool isRoxie);
  132. IHqlExpression * getStoredKey();
  133. void preventDiskSpill() { few = true; }
  134. IHqlExpression * queryCluster() const { return cluster; }
  135. int queryMaxPersistCopies() const { return numPersistInstances; }
  136. protected:
  137. void doSplitGlobalDefinition(ITypeInfo * type, IHqlExpression * value, IConstWorkUnit * wu, SharedHqlExpr & setOutput, OwnedHqlExpr * getOutput, bool isRoxie);
  138. IHqlExpression * createSetValue(IHqlExpression * value, IHqlExpression * aliasName);
  139. void createSmallOutput(IHqlExpression * value, SharedHqlExpr & setOutput);
  140. IHqlExpression * queryAlias(IHqlExpression * value);
  141. IHqlExpression * queryFilename(IHqlExpression * value, IConstWorkUnit * wu, bool isRoxie);
  142. void splitSmallDataset(IHqlExpression * value, SharedHqlExpr & setOutput, OwnedHqlExpr * getOutput);
  143. void setCluster(IHqlExpression * expr);
  144. public:
  145. LinkedHqlExpr value;
  146. OwnedHqlExpr storedName;
  147. OwnedHqlExpr originalLabel;
  148. OwnedHqlExpr sequence;
  149. node_operator setOp;
  150. node_operator persistOp;
  151. protected:
  152. OwnedHqlExpr aliasName;
  153. OwnedHqlExpr cachedFilename;
  154. OwnedHqlExpr cluster;
  155. OwnedHqlExpr extraSetAttr;
  156. OwnedHqlExpr extraOutputAttr;
  157. OwnedHqlExpr codehash;
  158. const char * filePrefix;
  159. const char * storedPrefix;
  160. int numPersistInstances;
  161. bool few;
  162. };
  163. static bool isTrivialInlineOutput(IHqlExpression * expr)
  164. {
  165. if (queryRealChild(expr, 1))
  166. return false;
  167. IHqlExpression * ds = expr->queryChild(0);
  168. if ((ds->getOperator() != no_null) || !ds->isDataset())
  169. return false;
  170. IHqlExpression * seq = querySequence(expr);
  171. if (getIntValue(seq, -1) >= 0)
  172. return false;
  173. return true;
  174. }
  175. //---------------------------------------------------------------------------
  176. IHqlExpression * createNextStringValue(IHqlExpression * value, const char * prefix = NULL)
  177. {
  178. StringBuffer valueText;
  179. valueText.append(prefix ? prefix : "a");
  180. getUniqueId(valueText);
  181. #if 0
  182. if (value)
  183. {
  184. const char * nameText = value->queryName()->str();
  185. if (nameText)
  186. valueText.append("__").appendLower(strlen(nameText), nameText);
  187. //otherwise append the operator?
  188. }
  189. #endif
  190. #if 0
  191. #ifdef _DEBUG
  192. //Following lines are here to add a break point when debugging
  193. if (stricmp(valueText.str(), "a1") == 0)
  194. valueText.length();
  195. if (stricmp(valueText.str(), "a2") == 0)
  196. valueText.length();
  197. #endif
  198. #endif
  199. return createConstant(valueText.str());
  200. }
  201. IHqlExpression * createSetResult(IHqlExpression * value)
  202. {
  203. HqlExprArray args;
  204. args.append(*LINK(value));
  205. args.append(*createAttribute(sequenceAtom, getLocalSequenceNumber()));
  206. args.append(*createAttribute(namedAtom, createNextStringValue(value)));
  207. return createSetResult(args);
  208. }
  209. static IHqlExpression * addAttrOwnToDataset(IHqlExpression * dataset, IHqlExpression * attr)
  210. {
  211. HqlExprArray args;
  212. unwindChildren(args, dataset);
  213. args.append(*attr);
  214. return dataset->clone(args);
  215. }
  216. static IHqlExpression * mergeLimitIntoDataset(IHqlExpression * dataset, IHqlExpression * limit)
  217. {
  218. return addAttrOwnToDataset(dataset, createAttribute(limitAtom, LINK(limit->queryChild(1)), LINK(limit->queryChild(2))));
  219. }
  220. //---------------------------------------------------------------------------
  221. static bool isOptionTooLate(const char * name)
  222. {
  223. if (stricmp(name, "gatherDependencies") == 0) return true;
  224. if (stricmp(name, "gatherDependenciesSelection") == 0) return true;
  225. if (stricmp(name, "archiveToCpp") == 0) return true;
  226. if (stricmp(name, "importAllModules") == 0) return true;
  227. if (stricmp(name, "importImplicitModules") == 0) return true;
  228. if (stricmp(name, "noCache") == 0) return true;
  229. return false;
  230. }
  231. static HqlTransformerInfo newThorStoredReplacerInfo("NewThorStoredReplacer");
  232. NewThorStoredReplacer::NewThorStoredReplacer(HqlCppTranslator & _translator, IWorkUnit * _wu, ICodegenContextCallback * _ctxCallback)
  233. : QuickHqlTransformer(newThorStoredReplacerInfo, NULL), translator(_translator)
  234. {
  235. wu = _wu;
  236. ctxCallback = _ctxCallback;
  237. foldStored = false;
  238. seenMeta = false;
  239. }
  240. void NewThorStoredReplacer::doAnalyseBody(IHqlExpression * expr)
  241. {
  242. //NOTE: This is called very early before no_assertconstant has been processed, so we need to explicitly
  243. //constant fold, and check it is constant (bug 26963)
  244. node_operator op = expr->getOperator();
  245. if (op == no_setmeta)
  246. {
  247. StringBuffer errorTemp;
  248. seenMeta = true;
  249. IAtom * kind = expr->queryChild(0)->queryName();
  250. if (kind == debugAtom)
  251. {
  252. OwnedHqlExpr foldedName = foldHqlExpression(expr->queryChild(1));
  253. OwnedHqlExpr foldedValue = foldHqlExpression(expr->queryChild(2));
  254. IValue * name = foldedName->queryValue();
  255. IValue * value = foldedValue->queryValue();
  256. if (!name)
  257. throwError1(HQLERR_ExpectedConstantDebug, getExprECL(foldedName, errorTemp).str());
  258. if (!value)
  259. throwError1(HQLERR_ExpectedConstantDebug, getExprECL(foldedValue, errorTemp).str());
  260. StringBuffer nameText,valueText;
  261. name->getStringValue(nameText);
  262. if (isOptionTooLate(nameText.str()))
  263. translator.reportWarning(HQLWRN_OptionSetToLate, HQLWRN_OptionSetToLate_Text, nameText.str());
  264. if (value->queryType()->getTypeCode() == type_boolean)
  265. valueText.append(value->getBoolValue() ? 1 : 0);
  266. else
  267. value->getStringValue(valueText);
  268. wu->setDebugValue(nameText.str(), valueText, true);
  269. }
  270. else if (kind == workunitAtom)
  271. {
  272. OwnedHqlExpr foldedName = foldHqlExpression(expr->queryChild(1));
  273. OwnedHqlExpr foldedValue = foldHqlExpression(expr->queryChild(2));
  274. IValue * name = foldedName->queryValue();
  275. IValue * value = foldedValue->queryValue();
  276. if (!name)
  277. throwError1(HQLERR_ExpectedConstantWorkunit, getExprECL(foldedName, errorTemp).str());
  278. if (!value)
  279. throwError1(HQLERR_ExpectedConstantWorkunit, getExprECL(foldedValue, errorTemp).str());
  280. StringBuffer nameText,valueText;
  281. name->getStringValue(nameText);
  282. value->getStringValue(valueText);
  283. if (stricmp(nameText.str(), "name") == 0)
  284. wu->setJobName(valueText.str());
  285. else if (stricmp(nameText.str(), "priority") == 0)
  286. {
  287. if (isStringType(value->queryType()))
  288. {
  289. WUPriorityClass prio = PriorityClassUnknown;
  290. if (stricmp(valueText.str(), "low") == 0)
  291. prio = PriorityClassLow;
  292. else if (stricmp(valueText.str(), "normal") == 0)
  293. prio = PriorityClassNormal;
  294. else if (stricmp(valueText.str(), "high") == 0)
  295. prio = PriorityClassHigh;
  296. wu->setPriority(prio);
  297. }
  298. else
  299. wu->setPriorityLevel((int)value->getIntValue());
  300. }
  301. else if (stricmp(nameText.str(), "cluster") == 0)
  302. {
  303. ctxCallback->noteCluster(valueText.str());
  304. wu->setClusterName(valueText.str());
  305. }
  306. else if (stricmp(nameText.str(), "protect") == 0)
  307. {
  308. wu->protect(value->getBoolValue());
  309. }
  310. else if (stricmp(nameText.str(), "scope") == 0)
  311. {
  312. wu->setWuScope(valueText.str());
  313. }
  314. else
  315. throwError1(HQLERR_UnsupportedHashWorkunit, nameText.str());
  316. }
  317. else if (kind == linkAtom)
  318. {
  319. OwnedHqlExpr foldedName = foldHqlExpression(expr->queryChild(1));
  320. StringBuffer libraryText;
  321. if (getStringValue(libraryText, foldedName).length())
  322. translator.useLibrary(libraryText);
  323. }
  324. else if ((kind == constAtom) || (kind == storedAtom))
  325. {
  326. //assume there won't be many of these... otherwise we should use a hash table
  327. OwnedHqlExpr lowerName = lowerCaseHqlExpr(expr->queryChild(1));
  328. //Use lowerName->queryBody() to remove named symbols/location annotations etc.
  329. storedNames.append(*LINK(lowerName->queryBody()));
  330. storedValues.append(*LINK(expr->queryChild(2)));
  331. storedIsConstant.append(kind == constAtom);
  332. }
  333. else if (kind == onWarningAtom)
  334. translator.addGlobalOnWarning(expr);
  335. }
  336. else if (op == no_colon)
  337. {
  338. if (queryAttributeInList(labeledAtom, expr->queryChild(1)))
  339. seenMeta = true;
  340. }
  341. QuickHqlTransformer::doAnalyseBody(expr);
  342. }
  343. bool NewThorStoredReplacer::needToTransform()
  344. {
  345. //foldStored = translator.queryOptions().foldStored; // NB: options isn't initialised correctly at this point.
  346. foldStored = wu->getDebugValueBool("foldStored", false);
  347. return (foldStored || seenMeta);
  348. }
  349. // This works on unnormalized trees, so based on QuickHqlTransformer
  350. IHqlExpression * NewThorStoredReplacer::createTransformed(IHqlExpression * expr)
  351. {
  352. switch (expr->getOperator())
  353. {
  354. case no_colon:
  355. {
  356. HqlExprArray actions;
  357. expr->queryChild(1)->unwindList(actions, no_comma);
  358. OwnedHqlExpr replacement;
  359. OwnedHqlExpr matchedName;
  360. bool onlyStored = true;
  361. bool forceConstant = foldStored;
  362. ForEachItemIn(idx, actions)
  363. {
  364. IHqlExpression & cur = actions.item(idx);
  365. switch (cur.getOperator())
  366. {
  367. case no_stored:
  368. {
  369. OwnedHqlExpr storedName = lowerCaseHqlExpr(cur.queryChild(0));
  370. IHqlExpression * searchName = storedName->queryBody();
  371. unsigned match = storedNames.find(*searchName);
  372. if (match != NotFound)
  373. {
  374. if (storedIsConstant.item(match))
  375. forceConstant = true;
  376. matchedName.set(searchName);
  377. replacement.set(&storedValues.item(match));
  378. }
  379. break;
  380. }
  381. case no_attr:
  382. case no_attr_expr:
  383. if (cur.queryName() == labeledAtom)
  384. {
  385. OwnedHqlExpr storedName = lowerCaseHqlExpr(cur.queryChild(0));
  386. IHqlExpression * searchName = storedName->queryBody();
  387. unsigned match = storedNames.find(*searchName);
  388. if (match != NotFound)
  389. {
  390. matchedName.set(searchName);
  391. replacement.set(&storedValues.item(match));
  392. }
  393. else
  394. replacement.set(expr->queryChild(0));
  395. forceConstant = true;
  396. break;
  397. }
  398. default:
  399. onlyStored = false;
  400. break;
  401. }
  402. }
  403. if (matchedName)
  404. {
  405. unsigned activeMatch = activeReplacements.find(*matchedName);
  406. if (activeMatch != NotFound)
  407. {
  408. StringBuffer nameText;
  409. getExprECL(matchedName, nameText);
  410. if (activeMatch+1 != activeReplacements.ordinality())
  411. {
  412. StringBuffer othersText;
  413. for (unsigned i=activeMatch+1; i < activeReplacements.ordinality(); i++)
  414. {
  415. othersText.append(",");
  416. getExprECL(&activeReplacements.item(i), othersText);
  417. }
  418. throwError3(HQLERR_RecursiveStoredOther, forceConstant ? "CONSTANT" : "STORED", nameText.str(), othersText.str()+1);
  419. }
  420. else
  421. throwError2(HQLERR_RecursiveStored, forceConstant ? "CONSTANT" : "STORED", nameText.str());
  422. }
  423. ITypeInfo * exprType = expr->queryType();
  424. ITypeInfo * replacementType = replacement->queryType();
  425. type_t etc = exprType->getTypeCode();
  426. type_t rtc = replacementType->getTypeCode();
  427. StringBuffer nameText, exprTypeText, replacementTypeText;
  428. switch (etc)
  429. {
  430. case type_groupedtable:
  431. case type_table:
  432. case type_row:
  433. case type_record:
  434. case type_transform:
  435. case type_void:
  436. {
  437. if (etc != rtc)
  438. {
  439. getExprECL(matchedName, nameText);
  440. getFriendlyTypeStr(exprType, exprTypeText);
  441. getFriendlyTypeStr(replacementType, replacementTypeText);
  442. throwError3(HQLERR_HashStoredTypeMismatch, nameText.str(), exprTypeText.str(), replacementTypeText.str());
  443. }
  444. else if (expr->queryRecord() != replacement->queryRecord())
  445. {
  446. StringBuffer s;
  447. throwError1(HQLERR_HashStoredRecordMismatch, getExprECL(matchedName, s).str());
  448. }
  449. }
  450. break;
  451. case type_set:
  452. case type_array:
  453. {
  454. if ((rtc != type_set) && (rtc != type_array))
  455. {
  456. getExprECL(matchedName, nameText);
  457. getFriendlyTypeStr(exprType, exprTypeText);
  458. getFriendlyTypeStr(replacementType, replacementTypeText);
  459. throwError3(HQLERR_HashStoredTypeMismatch, nameText.str(), exprTypeText.str(), replacementTypeText.str());
  460. }
  461. replacement.setown(ensureExprType(replacement, exprType));
  462. break;
  463. }
  464. default:
  465. {
  466. switch (rtc)
  467. {
  468. case type_groupedtable:
  469. case type_table:
  470. case type_row:
  471. case type_record:
  472. case type_transform:
  473. case type_void:
  474. {
  475. getExprECL(matchedName, nameText);
  476. getFriendlyTypeStr(exprType, exprTypeText);
  477. getFriendlyTypeStr(replacementType, replacementTypeText);
  478. throwError3(HQLERR_HashStoredTypeMismatch, nameText.str(), exprTypeText.str(), replacementTypeText.str());
  479. }
  480. default:
  481. replacement.setown(ensureExprType(replacement, exprType));
  482. }
  483. }
  484. break;
  485. }
  486. }
  487. LinkedHqlExpr result;
  488. if (matchedName)
  489. activeReplacements.append(*matchedName);
  490. if (onlyStored)
  491. {
  492. if (forceConstant && replacement)
  493. result.setown(transform(replacement));
  494. else if (foldStored)
  495. result.setown(transform(expr->queryChild(0)));
  496. }
  497. if (replacement && !result)
  498. {
  499. HqlExprArray args;
  500. args.append(*transform(replacement));
  501. result.setown(completeTransform(expr, args));
  502. }
  503. if (matchedName)
  504. activeReplacements.pop();
  505. if (result)
  506. return result.getClear();
  507. break;
  508. }
  509. case no_comma:
  510. case no_compound:
  511. if (expr->queryChild(0)->getOperator() == no_setmeta)
  512. return transform(expr->queryChild(1));
  513. if (expr->queryChild(1)->getOperator() == no_setmeta)
  514. return transform(expr->queryChild(0));
  515. break;
  516. case no_actionlist:
  517. case no_orderedactionlist:
  518. {
  519. HqlExprArray actions;
  520. ForEachChild(i, expr)
  521. {
  522. IHqlExpression * cur = expr->queryChild(i);
  523. if (cur->getOperator() != no_setmeta)
  524. actions.append(*transform(cur));
  525. }
  526. if (actions.ordinality() != 0)
  527. return expr->clone(actions);
  528. return transform(expr->queryChild(0));
  529. }
  530. }
  531. return QuickHqlTransformer::createTransformed(expr);
  532. }
  533. //---------------------------------------------------------------------------
  534. // NB: This is called after no_setresults are added, but before any normalization.
  535. static HqlTransformerInfo hqlThorBoundaryTransformerInfo("HqlThorBoundaryTransformer");
  536. HqlThorBoundaryTransformer::HqlThorBoundaryTransformer(IConstWorkUnit * _wu, bool _isRoxie, unsigned _maxRootMaybes, bool _resourceConditionalActions, bool _resourceSequential)
  537. : NewHqlTransformer(hqlThorBoundaryTransformerInfo)
  538. {
  539. wu = _wu;
  540. isRoxie = _isRoxie;
  541. maxRootMaybes = _maxRootMaybes;
  542. resourceConditionalActions = _resourceConditionalActions;
  543. resourceSequential = _resourceSequential;
  544. }
  545. void HqlThorBoundaryTransformer::transformCompound(HqlExprArray & result, node_operator compoundOp, const HqlExprArray & args, unsigned MaxMaybes)
  546. {
  547. UnsignedArray normalizeOptions;
  548. ForEachItemIn(i, args)
  549. {
  550. IHqlExpression & cur = args.item(i);
  551. normalizeOptions.append(normalizeThor(&cur));
  552. }
  553. //If any "yes" or "some" values are separated by maybes then convert the maybes (and "somes") into yes
  554. unsigned lastYes = NotFound;
  555. unsigned numMaybes = 0;
  556. ForEachItemIn(i2, args)
  557. {
  558. switch (normalizeOptions.item(i2))
  559. {
  560. case OptionYes:
  561. case OptionSome:
  562. if (lastYes != NotFound)
  563. {
  564. if (numMaybes <= MaxMaybes)
  565. {
  566. for (unsigned j = lastYes; j <= i2; j++)
  567. normalizeOptions.replace(OptionYes, j);
  568. }
  569. }
  570. numMaybes = 0;
  571. lastYes = i2;
  572. break;
  573. case OptionMaybe:
  574. numMaybes++;
  575. break;
  576. case OptionNo:
  577. lastYes = NotFound;
  578. break;
  579. }
  580. }
  581. //Do thor and non-thor parallel actions independently
  582. HqlExprArray thor;
  583. ForEachItemIn(idx, args)
  584. {
  585. IHqlExpression * cur = &args.item(idx);
  586. if (normalizeOptions.item(idx) == OptionYes)
  587. thor.append(*LINK(cur));
  588. else
  589. {
  590. if (thor.ordinality())
  591. {
  592. result.append(*createWrapper(no_thor, createCompound(compoundOp, thor)));
  593. thor.kill();
  594. }
  595. result.append(*createTransformed(cur));
  596. }
  597. }
  598. if (thor.ordinality())
  599. result.append(*createWrapper(no_thor, createCompound(compoundOp, thor)));
  600. }
  601. IHqlExpression * HqlThorBoundaryTransformer::createTransformed(IHqlExpression * expr)
  602. {
  603. node_operator op = expr->getOperator();
  604. switch (op)
  605. {
  606. case no_field:
  607. case no_constant:
  608. case no_attr:
  609. case no_attr_link:
  610. case no_getresult:
  611. case no_left:
  612. case no_right:
  613. return LINK(expr);
  614. case no_sizeof:
  615. case no_offsetof:
  616. return getTransformedChildren(expr);
  617. }
  618. //Unusually, wrap the expression in a thor node before processing annotations.
  619. //This ensures that the location/named symbol stays with the action.
  620. //MORE: If this is a dataset then it needs to turn it into a setResult()/getResult() pair.
  621. if (normalizeThor(expr) == OptionYes)
  622. {
  623. if (!expr->isTransform())
  624. return createWrapper(no_thor, LINK(expr));
  625. }
  626. IHqlExpression * ret = queryTransformAnnotation(expr);
  627. if (ret)
  628. return ret;
  629. switch (op)
  630. {
  631. case no_actionlist:
  632. case no_orderedactionlist:
  633. {
  634. HqlExprArray nonThor, args;
  635. expr->unwindList(args, op);
  636. transformCompound(nonThor, op, args, (unsigned)-1);
  637. return createCompound(op, nonThor);
  638. }
  639. case no_parallel:
  640. {
  641. HqlExprArray expanded;
  642. expr->unwindList(expanded, no_parallel);
  643. //Similar to compound, but possible to reorder branches...
  644. unsigned numThor = 0;
  645. UnsignedArray normalizeOptions;
  646. ForEachItemIn(idx, expanded)
  647. {
  648. YesNoOption option = normalizeThor(&expanded.item(idx));
  649. normalizeOptions.append(option);
  650. if ((option == OptionYes) || (option == OptionSome))
  651. numThor++;
  652. }
  653. if (numThor > 1)
  654. {
  655. HqlExprArray thor, nonThor;
  656. ForEachItemIn(idx, expanded)
  657. {
  658. IHqlExpression * cur = &expanded.item(idx);
  659. switch (normalizeOptions.item(idx))
  660. {
  661. case OptionYes:
  662. case OptionSome:
  663. thor.append(*LINK(cur));
  664. break;
  665. default:
  666. nonThor.append(*createTransformed(cur));
  667. break;
  668. }
  669. }
  670. if (nonThor.ordinality() == 0)
  671. {
  672. //can happen if inputs are a mixture of yes and some.
  673. return createWrapper(no_thor, LINK(expr));
  674. }
  675. nonThor.append(*createWrapper(no_thor, createValue(no_parallel, makeVoidType(), thor)));
  676. return expr->clone(nonThor);
  677. }
  678. break;
  679. }
  680. case no_nothor:
  681. return LINK(expr);
  682. }
  683. return NewHqlTransformer::createTransformed(expr);
  684. }
  685. static YesNoOption combine(YesNoOption left, YesNoOption right, bool isUnion)
  686. {
  687. if ((left == OptionNo) || (right == OptionNo))
  688. return OptionNo;
  689. if (left == OptionUnknown)
  690. return right;
  691. if (right == OptionUnknown)
  692. return left;
  693. //Yes,Some,Maybe
  694. if (isUnion)
  695. {
  696. //return definite if both branches may benefit.
  697. switch (left)
  698. {
  699. case OptionYes:
  700. return (right != OptionMaybe) ? OptionYes : OptionSome;
  701. case OptionSome:
  702. return (right != OptionMaybe) ? OptionYes : OptionSome;
  703. case OptionMaybe:
  704. return (right != OptionMaybe) ? OptionSome : OptionMaybe;
  705. }
  706. }
  707. else
  708. {
  709. //Intersection, return definite
  710. switch (left)
  711. {
  712. case OptionYes:
  713. return (right == OptionYes) ? OptionYes : OptionSome;
  714. case OptionSome:
  715. return OptionSome;
  716. case OptionMaybe:
  717. return (right == OptionMaybe) ? OptionMaybe : OptionSome;
  718. }
  719. }
  720. throwUnexpected();
  721. }
  722. //MORE: Needs to be yes, no, possibly.
  723. YesNoOption HqlThorBoundaryTransformer::normalizeThor(IHqlExpression * expr)
  724. {
  725. HqlThorBoundaryInfo * extra = queryBodyExtra(expr);
  726. if (extra->normalize == OptionUnknown)
  727. extra->normalize = calcNormalizeThor(expr);
  728. return extra->normalize;
  729. }
  730. YesNoOption HqlThorBoundaryTransformer::calcNormalizeThor(IHqlExpression * expr)
  731. {
  732. //MORE: This should probably be cached in the extra info & recursed more correctly
  733. node_operator op = expr->getOperator();
  734. ITypeInfo * type = expr->queryType();
  735. switch (op)
  736. {
  737. case no_constant:
  738. case no_field:
  739. case no_record:
  740. case no_attr:
  741. case no_attr_expr:
  742. case no_attr_link:
  743. case no_getresult:
  744. case no_left:
  745. case no_right:
  746. case no_sizeof:
  747. case no_all:
  748. case no_self:
  749. case no_activerow:
  750. return OptionMaybe;
  751. case no_evaluate:
  752. throwUnexpected();
  753. case no_select:
  754. {
  755. bool isNew;
  756. IHqlExpression * ds = querySelectorDataset(expr, isNew);
  757. switch (ds->getOperator())
  758. {
  759. case no_getresult:
  760. case no_call:
  761. case no_externalcall:
  762. return normalizeThor(ds);
  763. case no_self:
  764. return OptionMaybe;
  765. }
  766. return isNew ? OptionYes : OptionMaybe;
  767. }
  768. case NO_AGGREGATE:
  769. case no_executewhen:
  770. return OptionYes;
  771. case NO_ACTION_REQUIRES_GRAPH:
  772. {
  773. if ((op == no_output) && isTrivialInlineOutput(expr))
  774. return OptionMaybe;
  775. return OptionYes;
  776. }
  777. case no_sequential: // do not do inside thor - stops graphs being merged.
  778. if (!resourceSequential)
  779. return OptionNo;
  780. // fallthrough
  781. case no_actionlist:
  782. case no_orderedactionlist:
  783. case no_parallel:
  784. {
  785. YesNoOption option = OptionUnknown;
  786. ForEachChild(idx, expr)
  787. {
  788. YesNoOption childOption = normalizeThor(expr->queryChild(idx));
  789. if (childOption == OptionNo)
  790. return OptionNo;
  791. bool fixedOrder = (op == no_sequential) || (op == no_orderedactionlist);
  792. option = combine(option, childOption, fixedOrder); // can reorder parallel - so intersection is better
  793. }
  794. return option;
  795. }
  796. case no_cluster:
  797. case no_nothor:
  798. return OptionNo;
  799. case no_if:
  800. if (type && (type->getTypeCode() == type_void))
  801. {
  802. if (resourceConditionalActions)
  803. {
  804. IHqlExpression * falseExpr = expr->queryChild(2);
  805. YesNoOption leftOption = normalizeThor(expr->queryChild(1));
  806. YesNoOption rightOption = falseExpr ? normalizeThor(falseExpr) : OptionMaybe;
  807. YesNoOption branchOption = combine(leftOption, rightOption, true);
  808. YesNoOption condOption = normalizeThor(expr->queryChild(0));
  809. if ((branchOption == OptionYes) && (condOption != OptionNo))
  810. return OptionYes;
  811. // if ((condOption == OptionYes) && (branchOption != OptionNo))
  812. // return OptionYes;
  813. return combine(condOption, branchOption, true);
  814. }
  815. return OptionNo; // not supported
  816. }
  817. // default action. Not completely convinced it is correct....
  818. break;
  819. case no_choose:
  820. if (type && (type->getTypeCode() == type_void))
  821. {
  822. if (resourceConditionalActions)
  823. {
  824. UNIMPLEMENTED;
  825. }
  826. }
  827. break;
  828. case no_setresult:
  829. {
  830. IHqlExpression * value = expr->queryChild(0);
  831. YesNoOption valueOption = normalizeThor(value);
  832. //Probably worth doing the whole thing in thor if some part if it needs to be - will improve commoning up if nothing else.
  833. if (valueOption == OptionSome)
  834. return OptionYes;
  835. return valueOption;
  836. }
  837. case no_extractresult:
  838. {
  839. IHqlExpression * ds = expr->queryChild(0);
  840. OwnedHqlExpr transformedDs = transform(ds);
  841. if (transformedDs != ds)
  842. return OptionYes;
  843. //Probably worth doing the whole thing in thor if some part if it needs to be - will improve commoning up if nothing else.
  844. return normalizeThor(expr->queryChild(1));
  845. }
  846. case no_compound:
  847. {
  848. YesNoOption leftOption = normalizeThor(expr->queryChild(0));
  849. YesNoOption rightOption = normalizeThor(expr->queryChild(1));
  850. return combine(leftOption, rightOption, true);
  851. }
  852. case no_call:
  853. {
  854. YesNoOption bodyOption = normalizeThor(expr->queryBody()->queryFunctionDefinition());
  855. //do Something with it
  856. break;
  857. }
  858. case no_externalcall:
  859. {
  860. IHqlExpression * func = expr->queryExternalDefinition();
  861. IHqlExpression * funcDef = func->queryChild(0);
  862. if (funcDef->hasAttribute(gctxmethodAtom) || funcDef->hasAttribute(globalContextAtom))
  863. return OptionNo;
  864. // if (funcDef->hasAttribute(graphAtom))
  865. // return OptionYes;
  866. if (!resourceConditionalActions && expr->isAction())
  867. return OptionNo;
  868. //depends on the results of the arguments..
  869. type = NULL; // don't check the return type
  870. break;
  871. }
  872. case no_setworkflow_cond:
  873. case no_ensureresult:
  874. return OptionNo;
  875. case no_null:
  876. return OptionMaybe;
  877. }
  878. //NB: things like NOT EXISTS we want evaluated as NOT THOR(EXISTS()), or as part of a larger context
  879. //otherwise things like klogermann14.xhql don't get EXISTS() csed between graphs.
  880. YesNoOption option = OptionMaybe;
  881. ForEachChild(idx, expr)
  882. {
  883. YesNoOption childOption = normalizeThor(expr->queryChild(idx));
  884. if (childOption == OptionNo)
  885. return OptionNo;
  886. option = combine(option, childOption, true);
  887. }
  888. if (type)
  889. {
  890. switch (type->getTypeCode())
  891. {
  892. case type_row:
  893. switch (op)
  894. {
  895. case no_fromxml:
  896. return option;
  897. case no_createrow:
  898. //MORE: There are more cases that could be evaluated outside of thor, but playing safe.
  899. if (expr->queryChild(0)->isConstant())
  900. return option;
  901. break;
  902. }
  903. return OptionYes;
  904. case type_groupedtable:
  905. case type_table:
  906. //must be a dataset parameter to a call, or an argument to a comparison
  907. //Need to know whether it can be evaluate inline or not.
  908. //if it does require thor, then we will need to generate a setresult/get result pair to do it.
  909. return !canProcessInline(NULL, expr) ? OptionYes : OptionMaybe;
  910. }
  911. }
  912. return option;
  913. }
  914. void HqlThorBoundaryTransformer::transformRoot(const HqlExprArray & in, HqlExprArray & out)
  915. {
  916. //NewHqlTransformer::transformArray(in, out);
  917. //following theoretically might improve things, but generally just causes code to become worse,
  918. //because all the global set results are done in activities, and there isn't cse between them
  919. transformCompound(out, no_actionlist, in, maxRootMaybes);
  920. }
  921. void HqlCppTranslator::markThorBoundaries(WorkflowItem & curWorkflow)
  922. {
  923. HqlExprArray & exprs = curWorkflow.queryExprs();
  924. HqlExprArray bounded;
  925. HqlThorBoundaryTransformer thorTransformer(wu(), targetRoxie(), options.maxRootMaybeThorActions, options.resourceConditionalActions, options.resourceSequential);
  926. thorTransformer.transformRoot(exprs, bounded);
  927. replaceArray(exprs, bounded);
  928. }
  929. //---------------------------------------------------------------------------
  930. // NB: This is called after no_setresults are added, but before any normalization.
  931. IHqlExpression * ThorScalarTransformer::queryAlreadyTransformed(IHqlExpression * expr)
  932. {
  933. bool conditional = isConditional();
  934. if (conditional)
  935. {
  936. IHqlExpression * ret = queryExtra(expr)->transformed[false];
  937. if (ret)
  938. return ret;
  939. }
  940. return queryExtra(expr)->transformed[isConditional()];
  941. }
  942. IHqlExpression * ThorScalarTransformer::queryAlreadyTransformedSelector(IHqlExpression * expr)
  943. {
  944. return queryExtra(expr)->transformedSelector[isConditional()];
  945. }
  946. void ThorScalarTransformer::setTransformed(IHqlExpression * expr, IHqlExpression * transformed)
  947. {
  948. queryExtra(expr)->transformed[isConditional()].set(transformed);
  949. }
  950. void ThorScalarTransformer::setTransformedSelector(IHqlExpression * expr, IHqlExpression * transformed)
  951. {
  952. queryExtra(expr)->transformedSelector[isConditional()].set(transformed);
  953. }
  954. static HqlTransformerInfo ThorScalarTransformerInfo("ThorScalarTransformer");
  955. ThorScalarTransformer::ThorScalarTransformer(const HqlCppOptions & _options) : HoistingHqlTransformer(ThorScalarTransformerInfo, CTFnoteifactions), options(_options)
  956. {
  957. isConditionalDepth = 0;
  958. seenCandidate = false;
  959. }
  960. void ThorScalarTransformer::doAnalyseExpr(IHqlExpression * expr)
  961. {
  962. switch (expr->getOperator())
  963. {
  964. case no_thor:
  965. {
  966. ITypeInfo * type = expr->queryType();
  967. if (type && (type->isScalar() || type->getTypeCode() == type_row))
  968. seenCandidate = true;
  969. //No point looking further than a no_thor they don't (currently) get nested within each other.
  970. return;
  971. }
  972. }
  973. HoistingHqlTransformer::doAnalyseExpr(expr);
  974. }
  975. void ThorScalarTransformer::createHoisted(IHqlExpression * expr, SharedHqlExpr & setResultStmt, SharedHqlExpr & getResult, bool addWrapper)
  976. {
  977. IHqlExpression * value = expr;
  978. HqlExprArray actions;
  979. while (value->getOperator() == no_compound)
  980. {
  981. unwindCommaCompound(actions, value->queryChild(0));
  982. value = value->queryChild(1);
  983. }
  984. IHqlExpression * setResult = createSetResult(value);
  985. getResult.setown(createGetResultFromSetResult(setResult));
  986. actions.append(*setResult);
  987. setResultStmt.setown(createActionList(actions));
  988. if (addWrapper)
  989. setResultStmt.setown(createValue(no_thor, makeVoidType(), setResultStmt.getClear()));
  990. }
  991. IHqlExpression * ThorScalarTransformer::createTransformed(IHqlExpression * expr)
  992. {
  993. if (expr->isConstant())
  994. {
  995. //THOR(NULL) is marked as constant (probably incorrectly), but still needs hoisting.
  996. // if ((expr->getOperator() != no_thor) || (expr->numChildren() == 0))
  997. return LINK(expr);
  998. }
  999. IHqlExpression * ret = queryTransformAnnotation(expr);
  1000. if (ret)
  1001. return ret;
  1002. node_operator op = expr->getOperator();
  1003. switch (op)
  1004. {
  1005. case no_if:
  1006. case no_chooseds:
  1007. {
  1008. bool isGuard = isFailureGuard(expr);
  1009. HqlExprArray children;
  1010. children.append(*transform(expr->queryChild(0)));
  1011. if (!isGuard)
  1012. isConditionalDepth++;
  1013. transformChildren(expr, children);
  1014. if (!isGuard)
  1015. isConditionalDepth--;
  1016. return cloneOrLink(expr, children);
  1017. }
  1018. case no_thor:
  1019. {
  1020. ITypeInfo * type = expr->queryType();
  1021. // if (isUsedUnconditionally(expr) && type && (type->isScalar() || type->getTypeCode() == type_row))
  1022. if ((type->isScalar() || type->getTypeCode() == type_row))
  1023. {
  1024. //only other solution is to have some kind of graph result which is returned.
  1025. assertex(options.workunitTemporaries);
  1026. OwnedHqlExpr getResult, setResult;
  1027. OwnedHqlExpr transformedChild = transform(expr->queryChild(0));
  1028. createHoisted(transformedChild, setResult, getResult, true);
  1029. //Note sure if this condition is needed any more - I suspect better without.
  1030. if (isConditional())
  1031. return createCompound(setResult.getClear(), getResult.getClear());
  1032. else
  1033. {
  1034. appendToTarget(*setResult.getClear());
  1035. return getResult.getClear();
  1036. }
  1037. }
  1038. return LINK(expr);
  1039. }
  1040. break;
  1041. case no_mapto:
  1042. if (isConditional())
  1043. {
  1044. HqlExprArray children;
  1045. isConditionalDepth--;
  1046. children.append(*transform(expr->queryChild(0)));
  1047. isConditionalDepth++;
  1048. children.append(*transform(expr->queryChild(1)));
  1049. return cloneOrLink(expr, children);
  1050. }
  1051. break;
  1052. case no_case:
  1053. case no_map:
  1054. {
  1055. HqlExprArray children;
  1056. unsigned firstArg = 0;
  1057. unsigned numChildren = expr->numChildren();
  1058. children.ensure(numChildren);
  1059. if (op != no_map)
  1060. {
  1061. firstArg = 1;
  1062. children.append(*transform(expr->queryChild(0)));
  1063. }
  1064. isConditionalDepth++;
  1065. for (unsigned idx=firstArg; idx < numChildren; idx++)
  1066. children.append(*transform(expr->queryChild(idx)));
  1067. isConditionalDepth--;
  1068. return cloneOrLink(expr, children);
  1069. }
  1070. case no_table:
  1071. //Don't look at the default values for fields in the table's record
  1072. return LINK(expr);
  1073. }
  1074. return HoistingHqlTransformer::createTransformed(expr);
  1075. }
  1076. //---------------------------------------------------------------------------
  1077. /*
  1078. Look for expressions like count(x) and abc[1].x inside a condition and convert to
  1079. setresult(<complicated>, 'x')
  1080. getresult('x')
  1081. Problems:
  1082. o it tries to keep get/set results within the conditional branches that need them,
  1083. but that means if it occurs in more than one then it will only get added once.
  1084. o It (and all transformers) should possibly always generate a setresult,getresult locally,
  1085. and then have another pass that moves all the setresults to the optimal place. E.g. to the highest shared location.
  1086. [Alternatively can a global, but conditional, root graph be generated for it?]
  1087. Also converts
  1088. setresult(thor(x)) to thor(setresult(x))
  1089. thor(scalar) to setresult(scalar,'x'),getresult('x')
  1090. */
  1091. static void normalizeResultFormat(WorkflowItem & curWorkflow, const HqlCppOptions & options)
  1092. {
  1093. //Until thor has a way of calling a graph and returning a result we need to call this transformer, so that
  1094. //scalars that need to be evaluated in thor are correctly hoisted.
  1095. ThorScalarTransformer transformer(options);
  1096. HqlExprArray & exprs = curWorkflow.queryExprs();
  1097. transformer.analyseArray(exprs, 0);
  1098. if (transformer.needToTransform())
  1099. {
  1100. HqlExprArray transformed;
  1101. transformer.transformRoot(exprs, transformed);
  1102. replaceArray(exprs, transformed);
  1103. }
  1104. }
  1105. //---------------------------------------------------------------------------
  1106. //Try and get the HOLe queries at the start and the Thor queries at the end.
  1107. //Keep track of the dependencies of the statements, so that they don't get reordered
  1108. //too aggressively.
  1109. //---------------------------------------------------------------------------
  1110. static HqlTransformerInfo sequenceNumberAllocatorInfo("SequenceNumberAllocator");
  1111. SequenceNumberAllocator::SequenceNumberAllocator(HqlCppTranslator & _translator) : NewHqlTransformer(sequenceNumberAllocatorInfo), translator(_translator)
  1112. {
  1113. applyDepth = 0;
  1114. sequence = 0;
  1115. }
  1116. void SequenceNumberAllocator::nextSequence(HqlExprArray & args, IHqlExpression * name, IAtom * overwriteAction, IHqlExpression * value, bool needAttr, bool * duplicate)
  1117. {
  1118. IHqlExpression * seq = NULL;
  1119. if (duplicate)
  1120. *duplicate = false;
  1121. if (name)
  1122. {
  1123. SharedHqlExpr * matched = namedMap.getValue(name);
  1124. if (matched)
  1125. {
  1126. StringBuffer nameText;
  1127. name->toString(nameText);
  1128. IHqlExpression * prev = matched->get();
  1129. if (prev->isAttribute())
  1130. {
  1131. IAtom * prevName = prev->queryName();
  1132. if (!overwriteAction)
  1133. {
  1134. if (prevName == extendAtom)
  1135. throwError1(HQLERR_ExtendMismatch, nameText.str());
  1136. else
  1137. throwError1(HQLERR_OverwriteMismatch, nameText.str());
  1138. }
  1139. else if (prevName != overwriteAction)
  1140. throwError1(HQLERR_ExtendOverwriteMismatch, nameText.str());
  1141. IHqlExpression * prevValue = prev->queryChild(1);
  1142. if (!recordTypesMatch(prevValue->queryType(), value->queryType()))
  1143. throwError1(HQLERR_ExtendTypeMismatch, nameText.str());
  1144. seq = LINK(prev->queryChild(0));
  1145. assertex(duplicate);
  1146. *duplicate = true;
  1147. }
  1148. else
  1149. {
  1150. if (overwriteAction)
  1151. {
  1152. if (overwriteAction == extendAtom)
  1153. throwError1(HQLERR_ExtendMismatch, nameText.str());
  1154. else
  1155. throwError1(HQLERR_OverwriteMismatch, nameText.str());
  1156. }
  1157. else
  1158. throwError1(HQLERR_DuplicateNameOutput, nameText.str());
  1159. }
  1160. }
  1161. if (!seq)
  1162. {
  1163. seq = createConstant(signedType->castFrom(true, (__int64)sequence++));
  1164. OwnedHqlExpr saveValue = overwriteAction ? createAttribute(overwriteAction, LINK(seq), LINK(value)) : LINK(seq);
  1165. namedMap.setValue(name, saveValue);
  1166. }
  1167. }
  1168. else
  1169. seq = createConstant(signedType->castFrom(true, (__int64)sequence++));
  1170. if (needAttr)
  1171. args.append(*createAttribute(sequenceAtom, seq));
  1172. else
  1173. args.append(*seq);
  1174. }
  1175. IHqlExpression * SequenceNumberAllocator::doTransformRootExpr(IHqlExpression * expr)
  1176. {
  1177. node_operator op = expr->getOperator();
  1178. switch(op)
  1179. {
  1180. case no_compound:
  1181. case no_comma:
  1182. case no_parallel:
  1183. case no_sequential:
  1184. case no_actionlist:
  1185. case no_orderedactionlist:
  1186. {
  1187. HqlExprArray args;
  1188. ForEachChild(idx, expr)
  1189. args.append(*doTransformRootExpr(expr->queryChild(idx)));
  1190. return cloneOrLink(expr, args);
  1191. }
  1192. case no_buildindex:
  1193. case no_output:
  1194. case no_apply:
  1195. case no_distribution:
  1196. case no_keydiff:
  1197. case no_keypatch:
  1198. case no_outputscalar:
  1199. return createTransformed(expr); //NB: Do not common up!!!
  1200. case no_setmeta:
  1201. return LINK(expr);
  1202. default:
  1203. {
  1204. OwnedHqlExpr transformed = transform(expr);
  1205. ITypeInfo * type = transformed->queryType();
  1206. if (type && type->getTypeCode() != type_void)
  1207. {
  1208. HqlExprArray args;
  1209. bool isOuterWorkflow = (op == no_colon) && workflowContainsSchedule(transformed);
  1210. assertex(!isOuterWorkflow || !workflowContainsNonSchedule(transformed));
  1211. if (isOuterWorkflow)
  1212. args.append(*LINK(transformed->queryChild(0)));
  1213. else
  1214. args.append(*LINK(transformed));
  1215. nextSequence(args, NULL, NULL, NULL, true, NULL);
  1216. IHqlExpression * ret = createSetResult(args);
  1217. if (isOuterWorkflow)
  1218. {
  1219. args.kill();
  1220. args.append(*ret);
  1221. unwindChildren(args, transformed, 1);
  1222. ret = transformed->clone(args);
  1223. }
  1224. return ret;
  1225. }
  1226. return LINK(transformed);
  1227. }
  1228. }
  1229. }
  1230. IHqlExpression * SequenceNumberAllocator::createTransformed(IHqlExpression * expr)
  1231. {
  1232. switch (expr->getOperator())
  1233. {
  1234. case no_actionlist:
  1235. return doTransformRootExpr(expr);
  1236. case no_apply:
  1237. {
  1238. HqlExprArray args;
  1239. args.append(*transform(expr->queryChild(0)));
  1240. applyDepth++;
  1241. args.append(*transform(expr->queryChild(1)));
  1242. applyDepth--;
  1243. OwnedHqlExpr ret = completeTransform(expr, args);
  1244. return attachSequenceNumber(ret);
  1245. }
  1246. case no_outputscalar:
  1247. if (applyDepth)
  1248. {
  1249. translator.WARNINGAT(expr, HQLERR_ScalarOutputWithinApply);
  1250. }
  1251. break;
  1252. }
  1253. Owned<IHqlExpression> transformed = NewHqlTransformer::createTransformed(expr);
  1254. return attachSequenceNumber(transformed.get());
  1255. }
  1256. static IAtom * queryOverwriteAction(IHqlExpression * expr)
  1257. {
  1258. if (expr->hasAttribute(extendAtom))
  1259. return extendAtom;
  1260. if (expr->hasAttribute(overwriteAtom))
  1261. return overwriteAtom;
  1262. if (expr->hasAttribute(noOverwriteAtom))
  1263. return noOverwriteAtom;
  1264. return NULL;
  1265. }
  1266. IHqlExpression * SequenceNumberAllocator::attachSequenceNumber(IHqlExpression * expr)
  1267. {
  1268. switch (expr->getOperator())
  1269. {
  1270. case no_buildindex:
  1271. case no_output:
  1272. case no_apply:
  1273. case no_distribution:
  1274. case no_keydiff:
  1275. case no_keypatch:
  1276. {
  1277. queryExtra(expr)->setGetsSequence();
  1278. HqlExprArray args;
  1279. unwindChildren(args, expr);
  1280. bool duplicate = false;
  1281. nextSequence(args, queryResultName(expr), queryOverwriteAction(expr), expr->queryChild(0), true, &duplicate);
  1282. args.append(*createUniqueId());
  1283. return expr->clone(args);
  1284. }
  1285. break;
  1286. case no_outputscalar:
  1287. {
  1288. IHqlExpression * name = queryResultName(expr);
  1289. queryExtra(expr)->setGetsSequence();
  1290. HqlExprArray args;
  1291. args.append(*LINK(expr->queryChild(0)));
  1292. bool duplicate = false;
  1293. nextSequence(args, name, queryOverwriteAction(expr), expr->queryChild(0), true, &duplicate);
  1294. if (name)
  1295. args.append(*createAttribute(namedAtom, LINK(name)));
  1296. args.append(*createAttribute(outputAtom));
  1297. args.append(*createUniqueId());
  1298. return createSetResult(args);
  1299. }
  1300. default:
  1301. return LINK(expr);
  1302. }
  1303. }
  1304. void HqlCppTranslator::allocateSequenceNumbers(HqlExprArray & exprs)
  1305. {
  1306. HqlExprArray sequenced;
  1307. SequenceNumberAllocator transformer(*this);
  1308. transformer.transformRoot(exprs, sequenced);
  1309. replaceArray(exprs, sequenced);
  1310. maxSequence = transformer.getMaxSequence();
  1311. }
  1312. //---------------------------------------------------------------------------
  1313. static void replaceAssignSelector(HqlExprArray & assigns, IHqlExpression * newSelector)
  1314. {
  1315. ForEachItemIn(idx, assigns)
  1316. {
  1317. IHqlExpression & cur = assigns.item(idx);
  1318. IHqlExpression * lhs = cur.queryChild(0);
  1319. IHqlExpression * rhs = cur.queryChild(1);
  1320. assigns.replace(*createAssign(replaceSelector(lhs, queryActiveTableSelector(), newSelector), replaceSelector(rhs, queryActiveTableSelector(), newSelector)), idx);
  1321. }
  1322. }
  1323. static IHqlExpression * createArith(node_operator op, ITypeInfo * type, IHqlExpression * numerator, IHqlExpression * denominator)
  1324. {
  1325. return createValue(op, LINK(type), ensureExprType(numerator, type), ensureExprType(denominator, type));
  1326. }
  1327. //MORE: This might be better as a class, it would reduce the number of parameters
  1328. IHqlExpression * doNormalizeAggregateExpr(IHqlExpression * selector, IHqlExpression * expr, HqlExprArray & fields, HqlExprArray & assigns, bool & extraSelectNeeded, bool canOptimizeCasts);
  1329. IHqlExpression * evalNormalizeAggregateExpr(IHqlExpression * selector, IHqlExpression * expr, HqlExprArray & fields, HqlExprArray & assigns, bool & extraSelectNeeded, bool canOptimizeCasts)
  1330. {
  1331. switch (expr->getOperator())
  1332. {
  1333. case no_avegroup:
  1334. //Map this to sum(x)/count(x)
  1335. {
  1336. IHqlExpression * arg = expr->queryChild(0);
  1337. IHqlExpression * cond = expr->queryChild(1);
  1338. Owned<ITypeInfo> sumType = getSumAggType(arg);
  1339. ITypeInfo * exprType = expr->queryType();
  1340. OwnedHqlExpr sum = createValue(no_sumgroup, LINK(sumType), LINK(arg), LINK(cond));
  1341. OwnedHqlExpr count = createValue(no_countgroup, LINK(defaultIntegralType), LINK(cond));
  1342. //average should be done as a real operation I think, possibly decimal, if argument is decimal
  1343. OwnedHqlExpr avg = createArith(no_div, exprType, sum, count);
  1344. return doNormalizeAggregateExpr(selector, avg, fields, assigns, extraSelectNeeded, false);
  1345. }
  1346. case no_vargroup:
  1347. //Map this to (sum(x^2)-sum(x)^2/count())/count()
  1348. {
  1349. IHqlExpression * arg = expr->queryChild(0);
  1350. IHqlExpression * cond = expr->queryChild(1);
  1351. ITypeInfo * exprType = expr->queryType();
  1352. OwnedHqlExpr xx = createArith(no_mul, exprType, arg, arg);
  1353. OwnedHqlExpr sumxx = createValue(no_sumgroup, LINK(exprType), LINK(xx), LINK(cond));
  1354. OwnedHqlExpr sumx = createValue(no_sumgroup, LINK(exprType), LINK(arg), LINK(cond));
  1355. OwnedHqlExpr count = createValue(no_countgroup, LINK(defaultIntegralType), LINK(cond));
  1356. //average should be done as a real operation I think, possibly decimal, if argument is decimal
  1357. OwnedHqlExpr n1 = createArith(no_mul, exprType, sumx, sumx);
  1358. OwnedHqlExpr n2 = createArith(no_div, exprType, n1, count);
  1359. OwnedHqlExpr n3 = createArith(no_sub, exprType, sumxx, n2);
  1360. OwnedHqlExpr n4 = createArith(no_div, exprType, n3, count);
  1361. return doNormalizeAggregateExpr(selector, n4, fields, assigns, extraSelectNeeded, false);
  1362. }
  1363. case no_covargroup:
  1364. //Map this to (sum(x.y)-sum(x).sum(y)/count())/count()
  1365. {
  1366. IHqlExpression * argX = expr->queryChild(0);
  1367. IHqlExpression * argY = expr->queryChild(1);
  1368. IHqlExpression * cond = expr->queryChild(2);
  1369. ITypeInfo * exprType = expr->queryType();
  1370. OwnedHqlExpr xy = createArith(no_mul, exprType, argX, argY);
  1371. OwnedHqlExpr sumxy = createValue(no_sumgroup, LINK(exprType), LINK(xy), LINK(cond));
  1372. OwnedHqlExpr sumx = createValue(no_sumgroup, LINK(exprType), LINK(argX), LINK(cond));
  1373. OwnedHqlExpr sumy = createValue(no_sumgroup, LINK(exprType), LINK(argY), LINK(cond));
  1374. OwnedHqlExpr count = createValue(no_countgroup, LINK(defaultIntegralType), LINK(cond));
  1375. //average should be done as a real operation I think, possibly decimal, if argument is decimal
  1376. OwnedHqlExpr n1 = createArith(no_mul, exprType, sumx, sumy);
  1377. OwnedHqlExpr n2 = createArith(no_div, exprType, n1, count);
  1378. OwnedHqlExpr n3 = createArith(no_sub, exprType, sumxy, n2);
  1379. OwnedHqlExpr n4 = createArith(no_div, exprType, n3, count);
  1380. return doNormalizeAggregateExpr(selector, n4, fields, assigns, extraSelectNeeded, false);
  1381. }
  1382. case no_corrgroup:
  1383. //Map this to (covar(x,y)/(var(x).var(y)))
  1384. //== (sum(x.y)*count() - sum(x).sum(y))/sqrt((sum(x.x)*count()-sum(x)^2) * (sum(y.y)*count()-sum(y)^2))
  1385. {
  1386. IHqlExpression * argX = expr->queryChild(0);
  1387. IHqlExpression * argY = expr->queryChild(1);
  1388. IHqlExpression * cond = expr->queryChild(2);
  1389. ITypeInfo * exprType = expr->queryType();
  1390. OwnedHqlExpr xx = createArith(no_mul, exprType, argX, argX);
  1391. OwnedHqlExpr sumxx = createValue(no_sumgroup, LINK(exprType), LINK(xx), LINK(cond));
  1392. OwnedHqlExpr xy = createArith(no_mul, exprType, argX, argY);
  1393. OwnedHqlExpr sumxy = createValue(no_sumgroup, LINK(exprType), LINK(xy), LINK(cond));
  1394. OwnedHqlExpr yy = createArith(no_mul, exprType, argY, argY);
  1395. OwnedHqlExpr sumyy = createValue(no_sumgroup, LINK(exprType), LINK(yy), LINK(cond));
  1396. OwnedHqlExpr sumx = createValue(no_sumgroup, LINK(exprType), LINK(argX), LINK(cond));
  1397. OwnedHqlExpr sumy = createValue(no_sumgroup, LINK(exprType), LINK(argY), LINK(cond));
  1398. OwnedHqlExpr count = createValue(no_countgroup, LINK(defaultIntegralType), LINK(cond));
  1399. OwnedHqlExpr n1 = createArith(no_mul, exprType, sumxy, count);
  1400. OwnedHqlExpr n2 = createArith(no_mul, exprType, sumx, sumy);
  1401. OwnedHqlExpr n3 = createArith(no_sub, exprType, n1, n2);
  1402. OwnedHqlExpr n4 = createArith(no_mul, exprType, sumxx, count);
  1403. OwnedHqlExpr n5 = createArith(no_mul, exprType, sumx, sumx);
  1404. OwnedHqlExpr n6 = createArith(no_sub, exprType, n4, n5);
  1405. OwnedHqlExpr n7 = createArith(no_mul, exprType, sumyy, count);
  1406. OwnedHqlExpr n8 = createArith(no_mul, exprType, sumy, sumy);
  1407. OwnedHqlExpr n9 = createArith(no_sub, exprType, n7, n8);
  1408. OwnedHqlExpr n10 = createArith(no_mul, exprType, n6, n9);
  1409. OwnedHqlExpr n11 = createValue(no_sqrt, LINK(exprType), LINK(n10));
  1410. OwnedHqlExpr n12 = createArith(no_div, exprType, n3, n11);
  1411. return doNormalizeAggregateExpr(selector, n12, fields, assigns, extraSelectNeeded, false);
  1412. }
  1413. throwUnexpected();
  1414. case no_variance:
  1415. case no_covariance:
  1416. case no_correlation:
  1417. throwUnexpectedOp(expr->getOperator());
  1418. case no_count:
  1419. case no_sum:
  1420. case no_max:
  1421. case no_min:
  1422. case no_ave:
  1423. case no_select:
  1424. case no_exists:
  1425. case no_field:
  1426. // a count on a child dataset or something else - add it as it is...
  1427. //goes wrong for count(group)*
  1428. return LINK(expr);
  1429. case no_countgroup:
  1430. case no_sumgroup:
  1431. case no_maxgroup:
  1432. case no_mingroup:
  1433. case no_existsgroup:
  1434. {
  1435. ForEachItemIn(idx, assigns)
  1436. {
  1437. IHqlExpression & cur = assigns.item(idx);
  1438. if (cur.queryChild(1) == expr)
  1439. {
  1440. extraSelectNeeded = true;
  1441. return LINK(cur.queryChild(0)); //replaceSelector(cur.queryChild(0), querySelf(), queryActiveTableSelector());
  1442. }
  1443. }
  1444. IHqlExpression * targetField;
  1445. if (selector)
  1446. {
  1447. targetField = LINK(selector->queryChild(1));
  1448. }
  1449. else
  1450. {
  1451. StringBuffer temp;
  1452. temp.append("_agg_").append(assigns.ordinality());
  1453. targetField = createField(createIdAtom(temp.str()), expr->getType(), NULL);
  1454. extraSelectNeeded = true;
  1455. }
  1456. fields.append(*targetField);
  1457. assigns.append(*createAssign(createSelectExpr(getActiveTableSelector(), LINK(targetField)), LINK(expr)));
  1458. return createSelectExpr(getActiveTableSelector(), LINK(targetField));
  1459. }
  1460. case no_cast:
  1461. case no_implicitcast:
  1462. if (selector && canOptimizeCasts)
  1463. {
  1464. IHqlExpression * child = expr->queryChild(0);
  1465. if (expr->queryType()->getTypeCode() == child->queryType()->getTypeCode())
  1466. {
  1467. IHqlExpression * ret = doNormalizeAggregateExpr(selector, child, fields, assigns, extraSelectNeeded, false);
  1468. //This should be ret==child
  1469. if (ret == selector)
  1470. return ret;
  1471. HqlExprArray args;
  1472. args.append(*ret);
  1473. return expr->clone(args);
  1474. }
  1475. }
  1476. //fallthrough...
  1477. default:
  1478. {
  1479. HqlExprArray args;
  1480. unsigned max = expr->numChildren();
  1481. unsigned idx;
  1482. bool diff = false;
  1483. args.ensure(max);
  1484. for (idx = 0; idx < max; idx++)
  1485. {
  1486. IHqlExpression * child = expr->queryChild(idx);
  1487. IHqlExpression * changed = doNormalizeAggregateExpr(NULL, child, fields, assigns, extraSelectNeeded, false);
  1488. args.append(*changed);
  1489. if (child != changed)
  1490. diff = true;
  1491. }
  1492. if (diff)
  1493. return expr->clone(args);
  1494. return LINK(expr);
  1495. }
  1496. }
  1497. }
  1498. IHqlExpression * doNormalizeAggregateExpr(IHqlExpression * selector, IHqlExpression * expr, HqlExprArray & fields, HqlExprArray & assigns, bool & extraSelectNeeded, bool canOptimizeCasts)
  1499. {
  1500. IHqlExpression * match = static_cast<IHqlExpression *>(expr->queryTransformExtra());
  1501. if (match)
  1502. return LINK(match);
  1503. IHqlExpression * ret = evalNormalizeAggregateExpr(selector, expr, fields, assigns, extraSelectNeeded, canOptimizeCasts);
  1504. expr->setTransformExtra(ret);
  1505. return ret;
  1506. }
  1507. IHqlExpression * normalizeAggregateExpr(IHqlExpression * selector, IHqlExpression * expr, HqlExprArray & fields, HqlExprArray & assigns, bool & extraSelectNeeded, bool canOptimizeCasts)
  1508. {
  1509. TransformMutexBlock block;
  1510. return doNormalizeAggregateExpr(selector, expr, fields, assigns, extraSelectNeeded, canOptimizeCasts);
  1511. }
  1512. //---------------------------------------------------------------------------
  1513. static void appendComponent(HqlExprArray & cpts, bool invert, IHqlExpression * expr)
  1514. {
  1515. if (invert)
  1516. cpts.append(*createValue(no_negate, expr->getType(), LINK(expr)));
  1517. else
  1518. cpts.append(*LINK(expr));
  1519. }
  1520. static void expandRowComponents(HqlExprArray & cpts, bool invert, IHqlExpression * select, IHqlExpression * record)
  1521. {
  1522. ForEachChild(i, record)
  1523. {
  1524. IHqlExpression * cur = record->queryChild(i);
  1525. switch (cur->getOperator())
  1526. {
  1527. case no_record:
  1528. expandRowComponents(cpts, invert, select, cur);
  1529. break;
  1530. case no_ifblock:
  1531. expandRowComponents(cpts, invert, select, cur->queryChild(1));
  1532. break;
  1533. case no_field:
  1534. {
  1535. OwnedHqlExpr childSelect = createSelectExpr(LINK(select), LINK(cur));
  1536. if (!childSelect->isDatarow())
  1537. appendComponent(cpts, invert, childSelect);
  1538. else
  1539. expandRowComponents(cpts, invert, childSelect, childSelect->queryRecord());
  1540. break;
  1541. }
  1542. }
  1543. }
  1544. }
  1545. static IHqlExpression * simplifySortlistComplexity(IHqlExpression * sortlist)
  1546. {
  1547. if (!sortlist)
  1548. return NULL;
  1549. //convert concat on fixed width strings to a list of fields.
  1550. bool same = true;
  1551. HqlExprArray cpts;
  1552. ForEachChild(idx, sortlist)
  1553. {
  1554. IHqlExpression * cpt = sortlist->queryChild(idx);
  1555. IHqlExpression * cur = cpt;
  1556. bool expand = false;
  1557. bool invert = false;
  1558. if (cpt->getOperator() == no_negate)
  1559. {
  1560. invert = true;
  1561. cur = cur->queryChild(0);
  1562. }
  1563. if (cur->getOperator() == no_concat)
  1564. {
  1565. HqlExprArray concats;
  1566. cur->unwindList(concats, no_concat);
  1567. expand = true;
  1568. ForEachItemIn(idxc, concats)
  1569. {
  1570. ITypeInfo * type = concats.item(idxc).queryType();
  1571. unsigned tc = type->getTypeCode();
  1572. if (!((tc == type_string || tc == type_data) && (type->getSize() != UNKNOWN_LENGTH)))
  1573. expand = false;
  1574. }
  1575. if (expand)
  1576. {
  1577. ForEachItemIn(idxc, concats)
  1578. appendComponent(cpts, invert, &concats.item(idxc));
  1579. }
  1580. }
  1581. else
  1582. {
  1583. #if 0
  1584. if (cur->getOperator() == no_select && cur->isDatarow() && !cur->hasAttribute(newAtom))
  1585. {
  1586. expand = true;
  1587. expandRowComponents(cpts, invert, cur, cur->queryRecord());
  1588. }
  1589. #endif
  1590. }
  1591. if (!expand)
  1592. cpts.append(*LINK(cpt));
  1593. else
  1594. same = false;
  1595. }
  1596. if (!same)
  1597. return createSortList(cpts);
  1598. return NULL;
  1599. }
  1600. static IHqlExpression * normalizeIndexBuild(IHqlExpression * expr, bool sortIndexPayload, bool alwaysLocal, bool allowImplicitSubSort)
  1601. {
  1602. LinkedHqlExpr dataset = expr->queryChild(0);
  1603. IHqlExpression * normalizedDs = dataset->queryNormalizedSelector();
  1604. IHqlExpression * buildRecord = dataset->queryRecord();
  1605. // If any field types collate differently before and after translation to their hozed
  1606. // format, then we need to do the translation here, otherwise this
  1607. // sort may not be in the correct order. (ebcdic->ascii? integers are ok; unicode isn't!)
  1608. // First build the sort order we need....
  1609. HqlExprArray sorts;
  1610. gatherIndexBuildSortOrder(sorts, expr, sortIndexPayload);
  1611. OwnedHqlExpr sortOrder = createSortList(sorts);
  1612. OwnedHqlExpr newsort = simplifySortlistComplexity(sortOrder);
  1613. if (!newsort)
  1614. newsort.set(sortOrder);
  1615. ForEachChild(i1, expr)
  1616. {
  1617. IHqlExpression * cur = expr->queryChild(i1);
  1618. if (cur->getOperator() == no_distributer)
  1619. {
  1620. LinkedHqlExpr ds = dataset;
  1621. IHqlExpression * index = cur->queryChild(0);
  1622. if (!expr->hasAttribute(sortedAtom))
  1623. {
  1624. if (!expr->hasAttribute(localAtom))
  1625. {
  1626. HqlExprArray joinCondition;
  1627. IHqlExpression * indexRecord = index->queryChild(1);
  1628. assertex(indexRecord->numChildren() == buildRecord->numChildren());
  1629. unsigned numFields = firstPayloadField(index);
  1630. OwnedHqlExpr seq = createSelectorSequence();
  1631. OwnedHqlExpr left = createSelector(no_left, dataset, seq);
  1632. OwnedHqlExpr right = createSelector(no_right, index, seq);
  1633. OwnedHqlExpr cond;
  1634. unsigned idxLhs = 0;
  1635. unsigned idxRhs = 0;
  1636. for (unsigned i2=0; i2 < numFields; i2++)
  1637. {
  1638. IHqlExpression * lhs = createSelectExpr(LINK(left), LINK(queryNextRecordField(buildRecord, idxLhs)));
  1639. IHqlExpression * rhs = createSelectExpr(LINK(right), LINK(queryNextRecordField(indexRecord, idxRhs)));
  1640. IHqlExpression * test = createBoolExpr(no_eq, lhs, rhs);
  1641. extendConditionOwn(cond, no_and, test);
  1642. }
  1643. HqlExprArray args;
  1644. args.append(*ds.getClear());
  1645. args.append(*LINK(index));
  1646. args.append(*cond.getClear());
  1647. args.append(*LINK(seq));
  1648. ds.setown(createDataset(no_keyeddistribute, args));
  1649. ds.setown(cloneInheritedAnnotations(expr, ds));
  1650. }
  1651. ds.setown(createDataset(no_sort, ds.getClear(), createComma(LINK(newsort), createLocalAttribute())));
  1652. ds.setown(cloneInheritedAnnotations(expr, ds));
  1653. }
  1654. if (expr->hasAttribute(mergeAtom))
  1655. {
  1656. LinkedHqlExpr sortedIndex = index;
  1657. if (!index->hasAttribute(sortedAtom))
  1658. {
  1659. HqlExprArray args;
  1660. unwindChildren(args, index);
  1661. args.append(*createAttribute(sortedAtom));
  1662. sortedIndex.setown(index->clone(args));
  1663. }
  1664. HqlExprArray sorts;
  1665. unwindChildren(sorts, newsort);
  1666. OwnedHqlExpr sortAttr = createExprAttribute(sortedAtom, sorts);
  1667. HqlExprArray args;
  1668. args.append(*LINK(ds));
  1669. args.append(*sortedIndex.getClear());
  1670. args.append(*createLocalAttribute());
  1671. args.append(*replaceSelector(sortAttr, ds->queryNormalizedSelector(), queryActiveTableSelector()));
  1672. ds.setown(createDataset(no_merge, args));
  1673. ds.setown(cloneInheritedAnnotations(expr, ds));
  1674. }
  1675. HqlExprArray args;
  1676. unwindChildren(args, expr);
  1677. args.replace(*ds.getClear(), 0);
  1678. args.remove(i1);
  1679. args.append(*createAttribute(sortedAtom));
  1680. args.append(*createLocalAttribute());
  1681. args.append(*createAttribute(indexAtom, LINK(index->queryChild(3))));
  1682. return expr->clone(args);
  1683. }
  1684. }
  1685. IHqlExpression * distributed = expr->queryAttribute(distributedAtom);
  1686. if (distributed && distributed->queryChild(0))
  1687. {
  1688. OwnedHqlExpr distribute = createDataset(no_distribute, LINK(dataset), LINK(distributed->queryChild(0)));
  1689. distribute.setown(cloneInheritedAnnotations(expr, distribute));
  1690. HqlExprArray args;
  1691. args.append(*distribute.getClear());
  1692. unwindChildren(args, expr, 1);
  1693. args.zap(*distributed);
  1694. return expr->clone(args);
  1695. }
  1696. if (!expr->hasAttribute(sortedAtom))
  1697. {
  1698. if (dataset->queryType()->getTypeCode() == type_groupedtable)
  1699. {
  1700. while (dataset->getOperator() == no_group)
  1701. dataset.set(dataset->queryChild(0));
  1702. if (dataset->queryType()->getTypeCode() == type_groupedtable)
  1703. {
  1704. dataset.setown(createDataset(no_group, LINK(dataset), NULL));
  1705. dataset.setown(cloneInheritedAnnotations(expr, dataset));
  1706. }
  1707. }
  1708. OwnedHqlExpr sorted = ensureSorted(dataset, newsort, expr->hasAttribute(localAtom), true, alwaysLocal, allowImplicitSubSort);
  1709. if (sorted == dataset)
  1710. return NULL;
  1711. sorted.setown(inheritAttribute(sorted, expr, skewAtom));
  1712. sorted.setown(inheritAttribute(sorted, expr, thresholdAtom));
  1713. HqlExprArray args;
  1714. args.append(*LINK(sorted));
  1715. unwindChildren(args, expr, 1);
  1716. args.append(*createAttribute(sortedAtom));
  1717. return expr->clone(args);
  1718. }
  1719. if (expr->hasAttribute(dedupAtom))
  1720. {
  1721. IHqlExpression * ds = expr->queryChild(0);
  1722. OwnedHqlExpr seq = createSelectorSequence();
  1723. OwnedHqlExpr mappedSortList = replaceSelector(newsort, queryActiveTableSelector(), ds);
  1724. HqlExprArray dedupArgs;
  1725. dedupArgs.append(*LINK(expr->queryChild(0)));
  1726. unwindChildren(dedupArgs, mappedSortList);
  1727. dedupArgs.append(*createLocalAttribute());
  1728. dedupArgs.append(*LINK(seq));
  1729. OwnedHqlExpr dedup = createDataset(no_dedup, dedupArgs);
  1730. HqlExprArray buildArgs;
  1731. buildArgs.append(*cloneInheritedAnnotations(expr, dedup));
  1732. unwindChildren(buildArgs, expr, 1);
  1733. removeAttribute(buildArgs, dedupAtom);
  1734. return expr->clone(buildArgs);
  1735. }
  1736. return NULL;
  1737. }
  1738. static HqlTransformerInfo thorHqlTransformerInfo("ThorHqlTransformer");
  1739. ThorHqlTransformer::ThorHqlTransformer(HqlCppTranslator & _translator, ClusterType _targetClusterType, IConstWorkUnit * wu)
  1740. : NewHqlTransformer(thorHqlTransformerInfo), translator(_translator), options(_translator.queryOptions())
  1741. {
  1742. targetClusterType = _targetClusterType;
  1743. topNlimit = options.topnLimit;
  1744. groupAllDistribute = isThorCluster(targetClusterType) && options.groupAllDistribute;
  1745. }
  1746. IHqlExpression * ThorHqlTransformer::createTransformed(IHqlExpression * expr)
  1747. {
  1748. OwnedHqlExpr transformed = PARENT::createTransformed(expr);
  1749. updateOrphanedSelectors(transformed, expr);
  1750. IHqlExpression * normalized = NULL;
  1751. switch (transformed->getOperator())
  1752. {
  1753. case no_group:
  1754. normalized = normalizeGroup(transformed);
  1755. break;
  1756. case no_join:
  1757. case no_selfjoin:
  1758. case no_denormalize:
  1759. case no_denormalizegroup:
  1760. if (transformed->hasAttribute(groupAtom))
  1761. normalized = normalizeJoinAndGroup(transformed);
  1762. else
  1763. normalized = normalizeJoinOrDenormalize(transformed);
  1764. break;
  1765. case no_cosort:
  1766. case no_sort:
  1767. case no_sorted:
  1768. case no_assertsorted:
  1769. normalized = normalizeSort(transformed);
  1770. break;
  1771. case no_subsort:
  1772. normalized = normalizeSubSort(transformed);
  1773. break;
  1774. case no_cogroup:
  1775. normalized = normalizeCoGroup(transformed);
  1776. break;
  1777. case no_choosen:
  1778. normalized = normalizeChooseN(transformed);
  1779. break;
  1780. case no_aggregate:
  1781. normalized = normalizeTableGrouping(transformed);
  1782. break;
  1783. case no_newusertable:
  1784. normalized = normalizeTableGrouping(transformed);
  1785. break;
  1786. case no_newaggregate:
  1787. normalized = normalizeTableGrouping(transformed);
  1788. if (!normalized)
  1789. normalized = normalizeTableToAggregate(transformed, true);
  1790. if (!normalized || (normalized == transformed))
  1791. normalized = normalizePrefetchAggregate(transformed);
  1792. break;
  1793. case no_dedup:
  1794. normalized = normalizeDedup(transformed);
  1795. break;
  1796. case no_rollup:
  1797. normalized = normalizeRollup(transformed);
  1798. break;
  1799. case no_select:
  1800. normalized = normalizeSelect(transformed);
  1801. break;
  1802. case no_temptable:
  1803. normalized = normalizeTempTable(transformed);
  1804. break;
  1805. //MORE should do whole aggregate expression e.g., max(x)-min(x)
  1806. case NO_AGGREGATE:
  1807. normalized = normalizeScalarAggregate(transformed);
  1808. break;
  1809. case no_setresult:
  1810. normalized = convertSetResultToExtract(transformed);
  1811. break;
  1812. case no_projectrow:
  1813. {
  1814. IHqlExpression * ds = transformed->queryChild(0);
  1815. if (isAlwaysActiveRow(ds))
  1816. {
  1817. //Transform PROJECT(row, transform) to a ROW(transform') since more efficient
  1818. OwnedHqlExpr myLeft = createSelector(no_left, ds, querySelSeq(transformed));
  1819. OwnedHqlExpr replaced = replaceSelector(transformed->queryChild(1), myLeft, ds);
  1820. normalized = createRow(no_createrow, LINK(replaced));
  1821. }
  1822. break;
  1823. }
  1824. case no_debug_option_value:
  1825. //pick best engine etc. definitely done by now, so substitute any options that haven't been processed already
  1826. return getDebugValueExpr(translator.wu(), expr);
  1827. }
  1828. if (normalized && (normalized != transformed))
  1829. {
  1830. transformed.setown(transform(normalized));
  1831. normalized->Release();
  1832. }
  1833. /*
  1834. //Has a minor impact on unnecessary local attributes
  1835. if (!translator.targetThor() && transformed->hasAttribute(localAtom) && localChangesActivityAction(transformed))
  1836. return removeAttribute(transformed, localAtom);
  1837. */
  1838. return transformed.getClear();
  1839. }
  1840. static IHqlExpression * convertDedupToGroupedDedup(IHqlExpression * expr, IHqlExpression * grouping, bool compareAll)
  1841. {
  1842. IHqlExpression * localAttr = expr->queryAttribute(localAtom);
  1843. HqlExprArray groupArgs;
  1844. groupArgs.append(*LINK(expr->queryChild(0)));
  1845. groupArgs.append(*LINK(grouping));
  1846. if (compareAll)
  1847. groupArgs.append(*createAttribute(allAtom));
  1848. if (localAttr)
  1849. groupArgs.append(*LINK(localAttr));
  1850. //Ideally this would remove the equality conditions from the dedup, but ok since they are ignored later when generating
  1851. OwnedHqlExpr group = createDataset(no_group, groupArgs);
  1852. group.setown(cloneInheritedAnnotations(expr, group));
  1853. HqlExprArray dedupArgs;
  1854. dedupArgs.append(*LINK(group));
  1855. unwindChildren(dedupArgs, expr, 1);
  1856. removeAttribute(dedupArgs, localAtom); //(since now a grouped dedup)
  1857. OwnedHqlExpr ungroup = createDataset(no_group, expr->clone(dedupArgs), NULL);
  1858. return cloneInheritedAnnotations(expr, ungroup);
  1859. }
  1860. IHqlExpression * ThorHqlTransformer::normalizeDedup(IHqlExpression * expr)
  1861. {
  1862. if (isGroupedActivity(expr))
  1863. {
  1864. //MORE: It should be possible to remove ,ALL if no conditions and
  1865. //the equalities (ignoring any grouping conditions) match the group sort order
  1866. return NULL;
  1867. }
  1868. // DEDUP, ALL, local: - pre sort the data, group, dedup and ungroup
  1869. // DEDUP, ALL, global - if just had a sort by any of the criteria then do it local
  1870. // DEDUP, not all, not grouped, group by criteria, dedup, degroup
  1871. DedupInfoExtractor info(expr);
  1872. if (info.equalities.ordinality() == 0)
  1873. return NULL;
  1874. IHqlExpression * dataset = expr->queryChild(0);
  1875. bool hasLocal = isLocalActivity(expr);
  1876. bool isLocal = hasLocal || !translator.targetThor();
  1877. bool isHashDedup = expr->hasAttribute(hashAtom);
  1878. if (info.compareAllRows)
  1879. {
  1880. IHqlExpression * manyProp = expr->queryAttribute(manyAtom);
  1881. if (!isLocal && manyProp)
  1882. {
  1883. //If lots of duplicates, then dedup all locally and then dedup all globally.
  1884. HqlExprArray localArgs;
  1885. unwindChildren(localArgs, expr);
  1886. localArgs.zap(*manyProp);
  1887. localArgs.append(*createLocalAttribute());
  1888. OwnedHqlExpr localDedup = expr->clone(localArgs);
  1889. HqlExprArray globalArgs;
  1890. globalArgs.append(*localDedup.getClear());
  1891. unwindChildren(globalArgs, expr, 1);
  1892. globalArgs.zap(*manyProp);
  1893. return expr->clone(globalArgs);
  1894. }
  1895. }
  1896. //If a dedup can be done locally then force it to be local
  1897. if (!isLocal)
  1898. {
  1899. OwnedHqlExpr newSort = createValueSafe(no_sortlist, makeSortListType(NULL), info.equalities);
  1900. if (isPartitionedForGroup(dataset, newSort, info.compareAllRows))
  1901. {
  1902. OwnedHqlExpr ret = appendOwnedOperand(expr, createLocalAttribute());
  1903. //A global all join implies hash (historically) so preserve that semantic
  1904. if (info.compareAllRows && !isHashDedup)
  1905. return appendOwnedOperand(ret, createAttribute(hashAtom));
  1906. return ret.getClear();
  1907. }
  1908. }
  1909. //DEDUP,ALL
  1910. if (info.compareAllRows)
  1911. {
  1912. OwnedHqlExpr groupOrder = createValueSafe(no_sortlist, makeSortListType(NULL), info.equalities);
  1913. bool checkLocal = isLocal || (options.supportsMergeDistribute && !isHashDedup);
  1914. //If the dataset is already sorted for deduping, (and no extra tests) then
  1915. //if local can just remove the ALL attribute, since the records are already adjacent.
  1916. //if global remove the all, but enclose the dedup in a group to avoid serial processing
  1917. //Ignore HASH if specified since this has to be more efficient.
  1918. if (info.conds.ordinality() == 0)
  1919. {
  1920. bool alreadySorted = isSortedForGroup(dataset, groupOrder, checkLocal);
  1921. if (alreadySorted)
  1922. {
  1923. OwnedHqlExpr noHash = removeAttribute(expr, hashAtom);
  1924. OwnedHqlExpr noAll = removeAttribute(noHash, allAtom);
  1925. if (isLocal)
  1926. return noAll.getClear();
  1927. return convertDedupToGroupedDedup(noAll, groupOrder, checkLocal && !isLocal);
  1928. }
  1929. }
  1930. if (!isHashDedup)
  1931. {
  1932. //If has post non equality condition, change it to a group all->dedup->ungroup
  1933. if (info.conds.ordinality())
  1934. return convertDedupToGroupedDedup(expr, groupOrder, true);
  1935. //If local and thor (since hash dedup may overflow) convert to sort, dedup(not all)
  1936. //Otherwise a hashdedup is likely to be more efficient - since it will be linear cf O(NlnN) for the sort
  1937. if (hasLocal && translator.targetThor())
  1938. {
  1939. HqlExprArray dedupArgs;
  1940. dedupArgs.append(*ensureSortedForGroup(dataset, groupOrder, true, false, options.implicitGroupSubSort));
  1941. unwindChildren(dedupArgs, expr, 1);
  1942. removeAttribute(dedupArgs, allAtom);
  1943. return expr->clone(dedupArgs);
  1944. }
  1945. else
  1946. {
  1947. if (matchesConstantValue(info.numToKeep, 1) && !info.keepLeft)
  1948. return appendOwnedOperand(expr, createAttribute(hashAtom));
  1949. }
  1950. }
  1951. }
  1952. else
  1953. {
  1954. //Convert dedup(ds, exprs) to group(dedup(group(ds, exprs), exprs))
  1955. //To ensure that the activity isn't executed serially.
  1956. if (!isLocal && !areConstant(info.equalities))
  1957. {
  1958. OwnedHqlExpr groupOrder = createValueSafe(no_sortlist, makeSortListType(NULL), info.equalities);
  1959. return convertDedupToGroupedDedup(expr, groupOrder, false);
  1960. }
  1961. }
  1962. return NULL;
  1963. }
  1964. IHqlExpression * ThorHqlTransformer::normalizeRollup(IHqlExpression * expr)
  1965. {
  1966. if (isGroupedActivity(expr))
  1967. return NULL;
  1968. IHqlExpression * dataset = expr->queryChild(0);
  1969. IHqlExpression * cond = expr->queryChild(1);
  1970. if (isThorCluster(targetClusterType) && !expr->queryAttribute(localAtom) && isIndependentOfScope(expr))
  1971. {
  1972. HqlExprArray equalities;
  1973. OwnedHqlExpr extra;
  1974. if (cond->getOperator() == no_sortlist)
  1975. cond->unwindList(equalities, no_sortlist);
  1976. else if (!cond->isBoolean())
  1977. equalities.append(*LINK(cond));
  1978. else
  1979. {
  1980. HqlExprArray terms;
  1981. cond->unwindList(terms, no_and);
  1982. OwnedHqlExpr left = createSelector(no_left, dataset, querySelSeq(expr));
  1983. OwnedHqlExpr right = createSelector(no_right, dataset, querySelSeq(expr));
  1984. ForEachItemIn(i, terms)
  1985. {
  1986. IHqlExpression & cur = terms.item(i);
  1987. bool matched = false;
  1988. if (cur.getOperator() == no_eq)
  1989. {
  1990. OwnedHqlExpr mappedLeft = replaceSelector(cur.queryChild(0), left, dataset);
  1991. OwnedHqlExpr mappedRight = replaceSelector(cur.queryChild(1), right, dataset);
  1992. if (mappedLeft == mappedRight)
  1993. {
  1994. equalities.append(*LINK(mappedLeft));
  1995. matched = true;
  1996. }
  1997. }
  1998. if (!matched)
  1999. extendConditionOwn(extra, no_and, LINK(&cur));
  2000. }
  2001. }
  2002. //Don't create a group by constant - it will kill thor!
  2003. ForEachItemInRev(ie, equalities)
  2004. if (equalities.item(ie).isConstant())
  2005. equalities.remove(ie);
  2006. if (equalities.ordinality())
  2007. {
  2008. OwnedHqlExpr left = createSelector(no_left, dataset, querySelSeq(expr));
  2009. //If anything in the join condition references LEFT then the whole condition is currently passed the modified row
  2010. //so remove any fields that are modified in the transform
  2011. HqlExprArray ambiguousSelects;
  2012. if (cond->usesSelector(left))
  2013. filterAmbiguousRollupCondition(ambiguousSelects, equalities, expr);
  2014. if (equalities.ordinality() == 0)
  2015. {
  2016. translator.reportWarning(queryLocation(expr), ECODETEXT(HQLWRN_AmbiguousRollupNoGroup));
  2017. }
  2018. else
  2019. {
  2020. OwnedHqlExpr groupOrder = createValueSafe(no_sortlist, makeSortListType(NULL), equalities);
  2021. if (isPartitionedForGroup(dataset, groupOrder, false))
  2022. return appendOwnedOperand(expr, createLocalAttribute());
  2023. //This list can only contain items if the filter is using left/right => expand to an equality
  2024. IHqlExpression * selector = dataset->queryNormalizedSelector();
  2025. OwnedHqlExpr right = createSelector(no_right, dataset, querySelSeq(expr));
  2026. ForEachItemIn(i, ambiguousSelects)
  2027. {
  2028. IHqlExpression * select = &ambiguousSelects.item(i);
  2029. OwnedHqlExpr leftSelect = replaceSelector(select, selector, left);
  2030. OwnedHqlExpr rightSelect = replaceSelector(select, selector, right);
  2031. IHqlExpression * eq = createBoolExpr(no_eq, leftSelect.getClear(), rightSelect.getClear());
  2032. extendConditionOwn(extra, no_and, eq);
  2033. }
  2034. HqlExprArray groupArgs, rollupArgs;
  2035. groupArgs.append(*LINK(dataset));
  2036. groupArgs.append(*LINK(groupOrder));
  2037. OwnedHqlExpr group = createDataset(no_group, groupArgs);
  2038. group.setown(cloneInheritedAnnotations(expr, group));
  2039. rollupArgs.append(*LINK(group));
  2040. if (extra)
  2041. rollupArgs.append(*extra.getClear());
  2042. else
  2043. rollupArgs.append(*createConstant(true));
  2044. unwindChildren(rollupArgs, expr, 2);
  2045. OwnedHqlExpr ungroup = createDataset(no_group, expr->clone(rollupArgs), NULL);
  2046. return cloneInheritedAnnotations(expr, ungroup);
  2047. }
  2048. }
  2049. }
  2050. return NULL;
  2051. }
  2052. IHqlExpression * ThorHqlTransformer::skipOverGroups(IHqlExpression * dataset, bool isLocal)
  2053. {
  2054. //if grouping a group, remove the initial group.
  2055. //Not completely sure about this - it may potentially cause extra splitters.
  2056. IHqlExpression * newDataset = dataset;
  2057. while (newDataset->getOperator() == no_group)
  2058. {
  2059. if (newDataset->hasAttribute(allAtom))
  2060. break;
  2061. if (isLocal && queryRealChild(newDataset, 1))
  2062. {
  2063. //NOTE: local groups should not remove preceding non-local groups.
  2064. if (translator.targetThor() && !newDataset->hasAttribute(localAtom))
  2065. break;
  2066. }
  2067. newDataset = newDataset->queryChild(0);
  2068. }
  2069. return newDataset;
  2070. }
  2071. IHqlExpression * ThorHqlTransformer::skipGroupsWithinGroup(IHqlExpression * expr, bool isLocal)
  2072. {
  2073. //if grouping a group, remove the initial group.
  2074. //Not completely sure about this - it may potentially cause extra splitters.
  2075. IHqlExpression * dataset = expr->queryChild(0);
  2076. if (dataset->getOperator() == no_group)
  2077. {
  2078. IHqlExpression * newDataset = skipOverGroups(dataset, isLocal);
  2079. if (newDataset == dataset)
  2080. return NULL;
  2081. //if we end up with the original grouping then probably have ungroup(group(x,y))
  2082. //so no need to do this group either
  2083. if (queryGrouping(newDataset) == queryGrouping(expr))
  2084. return LINK(newDataset);
  2085. return replaceChild(expr, 0, newDataset);
  2086. }
  2087. return NULL;
  2088. }
  2089. IHqlExpression * ThorHqlTransformer::normalizeGroup(IHqlExpression * expr)
  2090. {
  2091. assertex(expr->getOperator() == no_group);
  2092. IHqlExpression * sortlist = queryRealChild(expr, 1);
  2093. IHqlExpression * dataset = expr->queryChild(0);
  2094. if (!sortlist)
  2095. return skipGroupsWithinGroup(expr, false);
  2096. OwnedHqlExpr newsort = simplifySortlistComplexity(sortlist);
  2097. if (newsort)
  2098. return replaceChild(expr, 1, newsort);
  2099. bool hasLocal = expr->hasAttribute(localAtom);
  2100. bool isLocal = hasLocal || !translator.targetThor();
  2101. bool wantSorted = expr->hasAttribute(sortedAtom);
  2102. bool hasAll = expr->hasAttribute(allAtom);
  2103. // First check if a global group can be done locally - applicable to all and non-all versions.
  2104. if (!isLocal)
  2105. {
  2106. if (!wantSorted && isPartitionedForGroup(dataset, sortlist, hasAll))
  2107. return appendLocalAttribute(expr);
  2108. }
  2109. if (!hasAll)
  2110. {
  2111. //if grouping a group, remove the initial group.
  2112. //Not completely sure about this - it may potentially cause extra splitters.
  2113. return skipGroupsWithinGroup(expr, isLocal);
  2114. }
  2115. //First check to see if the dataset is already sorted by the group criteria, or more.
  2116. //The the data could be globally sorted, but not distributed, and this is likely to be more efficient than redistributing...
  2117. OwnedHqlExpr sorted = ensureSortedForGroup(dataset, sortlist, hasLocal, !translator.targetThor(), options.implicitGroupSubSort);
  2118. if (sorted == dataset)
  2119. return removeAttribute(expr, allAtom);
  2120. sorted.setown(cloneInheritedAnnotations(expr, sorted));
  2121. sorted.setown(inheritAttribute(sorted, expr, skewAtom));
  2122. sorted.setown(inheritAttribute(sorted, expr, thresholdAtom));
  2123. if (!isLocal)
  2124. {
  2125. //Options for ensuring distributed and locally sorted (in order)
  2126. // DISTRIBUTE,MERGE - since lightweight and streaming.
  2127. // DISTRIBUTE,LOCAL SORT
  2128. // SORT
  2129. if (!wantSorted)
  2130. {
  2131. //is it best to hash on all the grouping fields, or just some of them? Do all for the moment.
  2132. OwnedHqlExpr hashed = createValue(no_hash32, LINK(unsignedType), LINK(sortlist), createAttribute(internalAtom));
  2133. if (options.supportsMergeDistribute && isSortedForGroup(dataset, sortlist, true))
  2134. {
  2135. //Dataset is locally sorted, so can use the merge distribute to remove the subsequent local sort.
  2136. //changing a heavyweight global sort into a lightweight distribute,merge
  2137. OwnedHqlExpr sortOrder = getExistingSortOrder(dataset, true, true);
  2138. OwnedHqlExpr mergeAttr = createExprAttribute(mergeAtom, replaceSelector(sortOrder, queryActiveTableSelector(), dataset));
  2139. sorted.setown(createDatasetF(no_distribute, LINK(dataset), LINK(hashed), mergeAttr.getClear(), NULL));
  2140. sorted.setown(cloneInheritedAnnotations(expr, sorted));
  2141. }
  2142. else
  2143. {
  2144. if (groupAllDistribute || expr->hasAttribute(unsortedAtom))
  2145. {
  2146. OwnedHqlExpr distributed = createDataset(no_distribute, LINK(dataset), LINK(hashed));
  2147. distributed.setown(cloneInheritedAnnotations(expr, distributed));
  2148. sorted.setown(createDataset(no_sort, LINK(distributed), createComma(LINK(sortlist), createLocalAttribute())));
  2149. sorted.setown(cloneInheritedAnnotations(expr, sorted));
  2150. }
  2151. }
  2152. }
  2153. #ifdef _DEBUG
  2154. assertex(!sortlist->isPure() || isPartitionedForGroup(sorted, sortlist, true)); // sanity check
  2155. #endif
  2156. }
  2157. //Do a local group after the sort because we know they can't overlap...
  2158. OwnedHqlExpr ret = createDatasetF(no_group, sorted.getClear(), LINK(sortlist), createLocalAttribute(), NULL);
  2159. return expr->cloneAllAnnotations(ret);
  2160. }
  2161. IHqlExpression * ThorHqlTransformer::normalizeCoGroup(IHqlExpression * expr)
  2162. {
  2163. IHqlExpression * grouping = queryAttributeChild(expr, groupAtom, 0);
  2164. OwnedHqlExpr newsort = simplifySortlistComplexity(grouping);
  2165. if (newsort)
  2166. {
  2167. OwnedHqlExpr newGroup = createExprAttribute(groupAtom, newsort.getClear());
  2168. return replaceOwnedAttribute(expr, newGroup.getClear());
  2169. }
  2170. HqlExprArray inputs;
  2171. //Gather the inputs and ensure they aren't grouped.
  2172. ForEachChild(i, expr)
  2173. {
  2174. IHqlExpression * cur = expr->queryChild(i);
  2175. if (!cur->isAttribute())
  2176. {
  2177. if (isGrouped(cur))
  2178. {
  2179. OwnedHqlExpr ungroup = createDataset(no_group, LINK(cur));
  2180. inputs.append(*cloneInheritedAnnotations(expr, ungroup));
  2181. }
  2182. else
  2183. inputs.append(*LINK(cur));
  2184. }
  2185. }
  2186. bool hasLocal = expr->hasAttribute(localAtom);
  2187. bool alwaysLocal = !translator.targetThor();
  2188. bool isLocal = hasLocal || alwaysLocal;
  2189. OwnedHqlExpr localFlag = !alwaysLocal ? createLocalAttribute() : NULL;
  2190. OwnedHqlExpr bestSortOrder;
  2191. //Choose the best existing sort order (for the moment assume the shortest - although
  2192. //even better would be to pick the shortest most frequent
  2193. ForEachItemIn(iBest, inputs)
  2194. {
  2195. if (isSortedForGroup(&inputs.item(iBest), grouping, true))
  2196. {
  2197. OwnedHqlExpr localOrder = getExistingSortOrder(&inputs.item(iBest), true, true);
  2198. if (!bestSortOrder || (localOrder->numChildren() < bestSortOrder->numChildren()))
  2199. bestSortOrder.set(localOrder);
  2200. }
  2201. }
  2202. if (!isLocal)
  2203. {
  2204. //Ensure all the inputs are co-distributed (use an existing distribution if possible)
  2205. //Even better would be to pick the most frequent
  2206. OwnedHqlExpr distribution;
  2207. ForEachItemIn(i, inputs)
  2208. {
  2209. IHqlExpression & cur = inputs.item(i);
  2210. if (isPartitionedForGroup(&cur, grouping, true))
  2211. {
  2212. IHqlExpression * curDistribution = queryDistribution(&cur);
  2213. if (!isSortDistribution(curDistribution))
  2214. {
  2215. distribution.set(curDistribution);
  2216. break;
  2217. }
  2218. }
  2219. }
  2220. if (!distribution)
  2221. distribution.setown(createValue(no_hash32, LINK(unsignedType), LINK(grouping), createAttribute(internalAtom)));
  2222. ForEachItemIn(iReplace, inputs)
  2223. {
  2224. IHqlExpression & cur = inputs.item(iReplace);
  2225. if (queryDistribution(&cur) != distribution)
  2226. {
  2227. OwnedHqlExpr mappedDistribution = replaceSelector(distribution, queryActiveTableSelector(), &cur);
  2228. OwnedHqlExpr mergeAttr;
  2229. if (bestSortOrder && isAlreadySorted(&cur, bestSortOrder, true, true))
  2230. mergeAttr.setown(createExprAttribute(mergeAtom, replaceSelector(bestSortOrder, queryActiveTableSelector(), &cur)));
  2231. OwnedHqlExpr distributedInput = createDatasetF(no_distribute, LINK(&cur), LINK(mappedDistribution), mergeAttr.getClear(), NULL);
  2232. distributedInput.setown(cloneInheritedAnnotations(expr, distributedInput));
  2233. inputs.replace(*distributedInput.getClear(), iReplace);
  2234. }
  2235. }
  2236. }
  2237. OwnedHqlExpr merged;
  2238. if (bestSortOrder)
  2239. {
  2240. //If some of the datasets are sorted then sort the remaining inputs by the same order and merge
  2241. HqlExprArray sortedInputs;
  2242. ForEachItemIn(i, inputs)
  2243. {
  2244. IHqlExpression & cur = inputs.item(i);
  2245. OwnedHqlExpr mappedOrder = replaceSelector(bestSortOrder, queryActiveTableSelector(), &cur);
  2246. sortedInputs.append(*ensureSorted(&cur, mappedOrder, true, true, alwaysLocal, options.implicitSubSort));
  2247. }
  2248. HqlExprArray sortedArgs;
  2249. unwindChildren(sortedArgs, bestSortOrder);
  2250. sortedInputs.append(*createExprAttribute(sortedAtom, sortedArgs));
  2251. if (localFlag)
  2252. sortedInputs.append(*LINK(localFlag));
  2253. merged.setown(createDataset(no_merge, sortedInputs));
  2254. }
  2255. else
  2256. {
  2257. //otherwise append the datasets and then sort them all
  2258. OwnedHqlExpr appended = createDataset(no_addfiles, inputs);
  2259. appended.setown(cloneInheritedAnnotations(expr, appended));
  2260. OwnedHqlExpr mappedOrder = replaceSelector(grouping, queryActiveTableSelector(), appended);
  2261. merged.setown(createDatasetF(no_sort, LINK(appended), mappedOrder.getClear(), LINK(localFlag), NULL));
  2262. }
  2263. //Now group by the grouping condition
  2264. merged.setown(cloneInheritedAnnotations(expr, merged));
  2265. OwnedHqlExpr mappedGrouping = replaceSelector(grouping, queryActiveTableSelector(), merged);
  2266. OwnedHqlExpr grouped = createDataset(no_group, LINK(merged), mappedGrouping.getClear());
  2267. return expr->cloneAllAnnotations(grouped);
  2268. }
  2269. static IHqlExpression * getNonThorSortedJoinInput(IHqlExpression * joinExpr, IHqlExpression * dataset, const HqlExprArray & sorts, bool implicitSubSort)
  2270. {
  2271. if (!sorts.length())
  2272. return LINK(dataset);
  2273. LinkedHqlExpr expr = dataset;
  2274. if (isGrouped(expr))
  2275. {
  2276. expr.setown(createDataset(no_group, LINK(expr), NULL));
  2277. expr.setown(cloneInheritedAnnotations(joinExpr, expr));
  2278. }
  2279. // if already sorted or grouped, use it!
  2280. OwnedHqlExpr groupOrder = createValueSafe(no_sortlist, makeSortListType(NULL), sorts);
  2281. groupOrder.setown(replaceSelector(groupOrder, queryActiveTableSelector(), expr->queryNormalizedSelector()));
  2282. //not used for thor, so sort can be local
  2283. OwnedHqlExpr table = ensureSorted(expr, groupOrder, false, true, true, implicitSubSort);
  2284. if (table != expr)
  2285. table.setown(cloneInheritedAnnotations(joinExpr, table));
  2286. OwnedHqlExpr group = createDatasetF(no_group, table.getClear(), LINK(groupOrder), NULL);
  2287. return cloneInheritedAnnotations(joinExpr, group);
  2288. }
  2289. static bool sameOrGrouped(IHqlExpression * newLeft, IHqlExpression * oldLeft)
  2290. {
  2291. if (newLeft->queryBody() == oldLeft->queryBody())
  2292. return true;
  2293. if (newLeft->getOperator() != no_group)
  2294. return false;
  2295. newLeft = newLeft->queryChild(0);
  2296. return (newLeft->queryBody() == oldLeft->queryBody());
  2297. }
  2298. static bool canReorderMatchExistingLocalSort(HqlExprArray & newElements1, HqlExprArray & newElements2, IHqlExpression * ds1, Shared<IHqlExpression> & ds2, const HqlExprArray & elements1, const HqlExprArray & elements2, bool canSubSort, bool isLocal, bool alwaysLocal)
  2299. {
  2300. newElements1.kill();
  2301. newElements2.kill();
  2302. if (reorderMatchExistingLocalSort(newElements1, newElements2, ds1, elements1, elements2))
  2303. {
  2304. if (isAlreadySorted(ds2, newElements2, isLocal||alwaysLocal, true))
  2305. return true;
  2306. if (canSubSort && isWorthShuffling(ds2, newElements2, isLocal||alwaysLocal, true))
  2307. {
  2308. OwnedHqlExpr subsorted = getSubSort(ds2, newElements2, isLocal, true, alwaysLocal);
  2309. if (subsorted)
  2310. {
  2311. ds2.swap(subsorted);
  2312. return true;
  2313. }
  2314. }
  2315. }
  2316. return false;
  2317. }
  2318. bool ThorHqlTransformer::isLightweightJoinCandidate(IHqlExpression * expr, bool isLocal, bool isLimitedSubstringJoin)
  2319. {
  2320. //This is equally applicable to hthor and roxie. However non lookup joins currently generate group activities on
  2321. //the inputs which look less efficient. It may still be better to enable it though.
  2322. if (!translator.targetThor())
  2323. return false;
  2324. if (!options.spotLocalMerge || isLimitedSubstringJoin || !isLocal)
  2325. return false;
  2326. if (expr->hasAttribute(_lightweight_Atom))
  2327. return false;
  2328. switch (expr->getOperator())
  2329. {
  2330. case no_join:
  2331. case no_selfjoin:
  2332. case no_denormalizegroup:
  2333. case no_denormalize:
  2334. return true;
  2335. }
  2336. return false;
  2337. }
  2338. static IHqlExpression * createDistributedInput(IHqlExpression * ds, IHqlExpression * sortlist, bool internal)
  2339. {
  2340. //could use a more optimal hash function since comparing against self, so fields are same type
  2341. IHqlExpression * internalExpr = internal ? createAttribute(internalAtom) : NULL;
  2342. OwnedHqlExpr activeDist = createValue(no_hash32, LINK(unsignedType), LINK(sortlist), internalExpr);
  2343. OwnedHqlExpr dist = replaceSelector(activeDist, queryActiveTableSelector(), ds);
  2344. return createDataset(no_distribute, LINK(ds), LINK(dist));
  2345. }
  2346. static IHqlExpression * createDistributedInput(IHqlExpression * ds, const HqlExprArray & sorts, bool internal)
  2347. {
  2348. OwnedHqlExpr sortlist = createValueSafe(no_sortlist, makeSortListType(NULL), sorts);
  2349. return createDistributedInput(ds, sortlist, internal);
  2350. }
  2351. /*
  2352. Perform the following transformation:
  2353. R := JOIN(l, r, LEFT.key = RIGHT.key AND fuzzy(LEFT,RIGHT), t(LEFT,RIGHT), GROUP(LEFT.id1, LEFT.id2), ATMOST(optional))
  2354. DL := DISTRIBUTE(L, HASH(key));
  2355. DR := DISTRIBUTE(R, HASH(key));
  2356. SL := SORT(DL, id, LOCAL); // Later replace this with a LEFTSORT() attribute on the join (so can optimize self join)
  2357. //If it is a self join, SR == SL
  2358. JR := JOIN(SL, DR, LEFT.key = RIGHT.key, t(LEFT,RIGHT), LOOKUP MANY, LOCAL);
  2359. DJ := DISTRIBUTE(J, HASH(leftid1, leftid2), MERGE(leftid1, leftid2));
  2360. R := GROUP(DJ, leftid1, leftid2, LOCAL);
  2361. */
  2362. IHqlExpression * ThorHqlTransformer::normalizeJoinAndGroup(IHqlExpression * expr)
  2363. {
  2364. IHqlExpression * oldLeft = expr->queryChild(0);
  2365. IHqlExpression * oldRight = expr->queryChild(1);
  2366. LinkedHqlExpr newLeft = oldLeft;
  2367. LinkedHqlExpr newRight = oldRight;
  2368. IHqlExpression * groupOrder = queryAttributeChild(expr, groupAtom, 0);
  2369. node_operator op = expr->getOperator();
  2370. bool hasLocal = isLocalActivity(expr);
  2371. bool alwaysLocal = !translator.targetThor();
  2372. if (!hasLocal && !alwaysLocal)
  2373. {
  2374. JoinSortInfo joinInfo;
  2375. joinInfo.findJoinSortOrders(expr, false);
  2376. OwnedHqlExpr leftList = createValueSafe(no_sortlist, makeSortListType(NULL), joinInfo.queryLeftReq());
  2377. OwnedHqlExpr mappedLeftList = replaceSelector(leftList, queryActiveTableSelector(), newLeft->queryNormalizedSelector());
  2378. OwnedHqlExpr hashLeft = createValue(no_hash32, makeIntType(4, false), mappedLeftList.getClear());
  2379. newLeft.setown(createDataset(no_distribute, LINK(newLeft), LINK(hashLeft)));
  2380. if (oldRight == oldLeft)
  2381. newRight.set(newLeft);
  2382. else if (op != no_selfjoin)
  2383. {
  2384. OwnedHqlExpr rightList = createValueSafe(no_sortlist, makeSortListType(NULL), joinInfo.queryRightReq());
  2385. OwnedHqlExpr mappedRightList = replaceSelector(rightList, queryActiveTableSelector(), newRight->queryNormalizedSelector());
  2386. OwnedHqlExpr hashRight = createValue(no_hash32, makeIntType(4, false), mappedRightList.getClear());
  2387. newRight.setown(createDataset(no_distribute, LINK(newRight), LINK(hashRight)));
  2388. }
  2389. }
  2390. OwnedHqlExpr newLocalAttr = alwaysLocal ? NULL : createLocalAttribute();
  2391. //Sort the left hand dataset into grouping order.
  2392. assertex(groupOrder);
  2393. OwnedHqlExpr left = createSelector(no_left, expr->queryChild(0), querySelSeq(expr));
  2394. OwnedHqlExpr leftSortOrder = replaceSelector(groupOrder, left, newLeft);
  2395. newLeft.setown(createDatasetF(no_sort, newLeft.getClear(), LINK(leftSortOrder), LINK(newLocalAttr), NULL));
  2396. if (oldRight == oldLeft)
  2397. newRight.set(newLeft);
  2398. //Now create the modified join
  2399. HqlExprArray joinArgs;
  2400. joinArgs.append(*LINK(newLeft));
  2401. joinArgs.append(*LINK(newRight));
  2402. unwindChildren(joinArgs, expr, 2);
  2403. removeAttribute(joinArgs, groupAtom);
  2404. if (!hasLocal && !alwaysLocal)
  2405. joinArgs.append(*createLocalAttribute());
  2406. OwnedHqlExpr newJoin = expr->clone(joinArgs);
  2407. //Now need to map the fields from the input dataset to the join output
  2408. NewProjectMapper2 mapper;
  2409. mapper.setMapping(newJoin->queryChild(3));
  2410. bool matchedAll = true;
  2411. OwnedHqlExpr mappedOrder = mapper.collapseFields(groupOrder, left, newJoin->queryNormalizedSelector(), left, &matchedAll);
  2412. assertex(matchedAll); // This is checked in the parser, so shouldn't be triggered here.
  2413. //Distribute the result
  2414. LinkedHqlExpr distributed = newJoin;
  2415. if (!hasLocal && !alwaysLocal)
  2416. {
  2417. OwnedHqlExpr hashOut = createValue(no_hash32, makeIntType(4, false), LINK(mappedOrder));
  2418. OwnedHqlExpr mergeOut = createExprAttribute(mergeAtom, LINK(mappedOrder));
  2419. distributed.setown(createDatasetF(no_distribute, LINK(newJoin), hashOut.getClear(), mergeOut.getClear(), NULL));
  2420. }
  2421. //And finally group it.
  2422. return createDatasetF(no_group, LINK(distributed), LINK(mappedOrder), LINK(newLocalAttr), NULL);
  2423. }
  2424. IHqlExpression * ThorHqlTransformer::normalizeJoinOrDenormalize(IHqlExpression * expr)
  2425. {
  2426. IHqlExpression * leftDs = expr->queryChild(0);
  2427. IHqlExpression * rightDs = queryJoinRhs(expr);
  2428. IHqlExpression * seq = querySelSeq(expr);
  2429. node_operator op = expr->getOperator();
  2430. if (op == no_join)
  2431. {
  2432. if (isSelfJoin(expr))
  2433. {
  2434. HqlExprArray children;
  2435. unwindChildren(children, expr);
  2436. children.replace(*createAttribute(_selfJoinPlaceholder_Atom), 1); // replace the 1st dataset with an attribute so parameters are still in the same place.
  2437. OwnedHqlExpr ret = createDataset(no_selfjoin, children);
  2438. return expr->cloneAllAnnotations(ret);
  2439. }
  2440. }
  2441. bool hasLocal = isLocalActivity(expr);
  2442. bool alwaysLocal = !translator.targetThor();
  2443. bool isLocal = hasLocal || alwaysLocal;
  2444. //hash,local doesn't make sense (hash is only used for distribution) => remove hash
  2445. //but also prevent it being converted to a lookup join??
  2446. if (isLocal && expr->hasAttribute(hashAtom))
  2447. {
  2448. HqlExprArray args;
  2449. unwindChildren(args, expr);
  2450. removeAttribute(args, hashAtom);
  2451. return expr->clone(args);
  2452. }
  2453. //Check to see if this join should be done as a keyed join...
  2454. if (!expr->hasAttribute(lookupAtom) && !expr->hasAttribute(smartAtom) && !expr->hasAttribute(allAtom))
  2455. {
  2456. if (rightDs->getOperator() == no_filter)
  2457. {
  2458. bool moveRhsFilter = false;
  2459. if (expr->hasAttribute(keyedAtom) && queryAttributeChild(expr, keyedAtom, 0))
  2460. {
  2461. //Full keyed join - ensure the filter is moved from the rhs to the condition.
  2462. moveRhsFilter = true;
  2463. }
  2464. else if (options.spotPotentialKeyedJoins && (rightDs != leftDs))
  2465. {
  2466. //This can turn some non keyed joins into keyed joins
  2467. IHqlExpression * cur = rightDs;
  2468. while (cur->getOperator() == no_filter)
  2469. cur = cur->queryChild(0);
  2470. if (cur->getOperator() == no_newkeyindex)
  2471. moveRhsFilter = true;
  2472. }
  2473. if (moveRhsFilter)
  2474. {
  2475. //Transform join(a, b(x), c) to join(a, b, c and evaluate(right, x))
  2476. HqlExprAttr extraFilter;
  2477. OwnedHqlExpr right = createSelector(no_right, rightDs, seq);
  2478. IHqlExpression * cur = rightDs;
  2479. while (cur->getOperator() == no_filter)
  2480. {
  2481. unsigned max = cur->numChildren();
  2482. for (unsigned i = 1; i < max; i++)
  2483. {
  2484. IHqlExpression * filter = cur->queryChild(i);
  2485. if (!filter->isAttribute())
  2486. {
  2487. IHqlExpression * newFilter = replaceSelector(filter, rightDs, right);
  2488. extendConditionOwn(extraFilter, no_and, newFilter);
  2489. }
  2490. }
  2491. cur = cur->queryChild(0);
  2492. }
  2493. HqlExprArray args;
  2494. unwindChildren(args, expr);
  2495. args.replace(*LINK(cur), 1);
  2496. args.replace(*createValue(no_and, makeBoolType(), LINK(expr->queryChild(2)), extraFilter.getClear()), 2);
  2497. return expr->clone(args);
  2498. }
  2499. }
  2500. }
  2501. //Tag a keyed join as ordered in the platforms that ensure it does remain ordered. Extend if the others do.
  2502. if (isKeyedJoin(expr))
  2503. {
  2504. if (translator.targetRoxie() && !expr->hasAttribute(_ordered_Atom))
  2505. return appendOwnedOperand(expr, createAttribute(_ordered_Atom));
  2506. return NULL;
  2507. }
  2508. JoinSortInfo joinInfo;
  2509. joinInfo.findJoinSortOrders(expr, canBeSlidingJoin(expr));
  2510. //If the data is already distributed so the data is on the correct machines then perform the join locally.
  2511. //Should be equally applicable to lookup, hash, all and normal joins.
  2512. if (!isLocal && !joinInfo.hasOptionalEqualities() && joinInfo.queryLeftReq().ordinality())
  2513. {
  2514. if (isDistributedCoLocally(leftDs, rightDs, joinInfo.queryLeftReq(), joinInfo.queryRightReq()))
  2515. return appendOwnedOperand(expr, createLocalAttribute());
  2516. if (options.matchExistingDistributionForJoin)
  2517. {
  2518. //Should this exclude lookup joins??
  2519. //On balance it is probably worthwhile since it means that only 1/clustersize data is on each node.
  2520. //If left side (assumed to be the largest) is already distributed, it would be more efficient
  2521. //to redistribute the rhs by a matching hash function (or use cosort), and then join locally.
  2522. //Be careful about the persist scaling factors though.
  2523. //SORT partitions should be supported once they are persisted by the system
  2524. IHqlExpression * leftDistribution = queryDistribution(leftDs);
  2525. if (matchesAnyDistribution(leftDistribution))
  2526. return appendOwnedOperand(expr, createLocalAttribute());
  2527. if (!isPersistDistribution(leftDistribution) && !isSortedDistribution(leftDistribution) && isPartitionedForGroup(leftDs, joinInfo.queryLeftReq(), true))
  2528. {
  2529. //MORE: May need a flag to stop this - to prevent issues with skew.
  2530. OwnedHqlExpr newHash = createMatchingDistribution(leftDistribution, joinInfo.queryLeftReq(), joinInfo.queryRightReq());
  2531. if (newHash)
  2532. {
  2533. OwnedHqlExpr dist = replaceSelector(newHash, queryActiveTableSelector(), rightDs);
  2534. OwnedHqlExpr newRhs = createDataset(no_distribute, LINK(rightDs), LINK(dist));
  2535. OwnedHqlExpr newJoin = replaceChild(expr, 1, newRhs);
  2536. return appendOwnedOperand(newJoin, createLocalAttribute());
  2537. }
  2538. }
  2539. }
  2540. else
  2541. {
  2542. IHqlExpression * leftDistribution = queryDistribution(leftDs);
  2543. if (!isPersistDistribution(leftDistribution) && !isSortedDistribution(leftDistribution) && isPartitionedForGroup(leftDs, joinInfo.queryLeftReq(), true))
  2544. DBGLOG("MORE: Potential for distributed join optimization");
  2545. }
  2546. }
  2547. if (joinInfo.hasOptionalEqualities() && !isLocal && !expr->hasAttribute(hashAtom) && !expr->hasAttribute(allAtom))
  2548. {
  2549. if (joinInfo.hasRequiredEqualities())
  2550. return appendAttribute(expr, hashAtom);
  2551. throwError(HQLERR_PrefixJoinRequiresEquality);
  2552. }
  2553. if (expr->hasAttribute(allAtom))
  2554. return NULL;
  2555. if (expr->hasAttribute(lookupAtom))
  2556. return NULL;
  2557. //Try and convert local joins to a lightweight join that doesn't require any sorting of the inputs.
  2558. //Improves resourcing for thor, and prevents lookup conversion for hthor/roxie
  2559. //Worthwhile even for lookup joins
  2560. if (isLightweightJoinCandidate(expr, isLocal, joinInfo.hasOptionalEqualities()))
  2561. {
  2562. if (isAlreadySorted(leftDs, joinInfo.queryLeftSort(), true, true) &&
  2563. isAlreadySorted(rightDs, joinInfo.queryRightSort(), true, true))
  2564. {
  2565. //If this is a lookup join without a many then we need to make sure only the first match is retained.
  2566. return appendOwnedOperand(expr, createAttribute(_lightweight_Atom));
  2567. }
  2568. //Check for a local join where we can reorder the condition so both sides match the existing sort orders.
  2569. //could special case self-join to do less work, but probably not worth the effort.
  2570. HqlExprArray sortedLeft, sortedRight;
  2571. if (!joinInfo.hasOptionalEqualities())
  2572. {
  2573. //Since the distribution and order of global joins is not defined this could probably be used for non-local as well.
  2574. LinkedHqlExpr newLeftDs = leftDs;
  2575. LinkedHqlExpr newRightDs = rightDs;
  2576. bool canSubSort = options.subsortLocalJoinConditions;
  2577. bool reordered = canReorderMatchExistingLocalSort(sortedLeft, sortedRight, newLeftDs, newRightDs,
  2578. joinInfo.queryLeftSort(), joinInfo.queryRightSort(), canSubSort, isLocal, alwaysLocal);
  2579. //If allowed to subsort then try the otherway around
  2580. if (!reordered && canSubSort)
  2581. reordered = canReorderMatchExistingLocalSort(sortedRight, sortedLeft, newRightDs, newLeftDs,
  2582. joinInfo.queryRightSort(), joinInfo.queryLeftSort(), canSubSort, isLocal, alwaysLocal);
  2583. if (reordered)
  2584. {
  2585. //Recreate the join condition in the correct order to match the existing sorts...
  2586. HqlExprAttr newcond;
  2587. OwnedHqlExpr leftSelector = createSelector(no_left, newLeftDs, seq);
  2588. OwnedHqlExpr rightSelector = createSelector(no_right, newRightDs, seq);
  2589. ForEachItemIn(i, sortedLeft)
  2590. {
  2591. OwnedHqlExpr lc = replaceSelector(&sortedLeft.item(i), queryActiveTableSelector(), leftSelector);
  2592. OwnedHqlExpr rc = replaceSelector(&sortedRight.item(i), queryActiveTableSelector(), rightSelector);
  2593. extendConditionOwn(newcond, no_and, createValue(no_eq, makeBoolType(), lc.getClear(), rc.getClear()));
  2594. }
  2595. extendConditionOwn(newcond, no_and, LINK(joinInfo.extraMatch));
  2596. HqlExprArray args;
  2597. args.append(*newLeftDs.getClear());
  2598. args.append(*newRightDs.getClear());
  2599. args.append(*newcond.getClear());
  2600. unwindChildren(args, expr, 3);
  2601. args.append(*createAttribute(_lightweight_Atom));
  2602. return expr->clone(args);
  2603. }
  2604. }
  2605. }
  2606. //Sort,Sort->join is O(NlnN) lookup join using a hash table is O(N) =>convert for hthor/roxie
  2607. if (!isThorCluster(targetClusterType) && !expr->hasAttribute(_normalized_Atom) && !expr->hasAttribute(smartAtom))
  2608. {
  2609. bool createLookup = false;
  2610. if ((op == no_join) && options.convertJoinToLookup)
  2611. {
  2612. if ((targetClusterType == RoxieCluster) || hasFewRows(rightDs))
  2613. if (!isFullJoin(expr) && !isRightJoin(expr) && !expr->hasAttribute(partitionRightAtom))
  2614. createLookup = !expr->hasAttribute(_lightweight_Atom);
  2615. }
  2616. if (joinInfo.hasOptionalEqualities())
  2617. createLookup = false; //doesn't support it yet
  2618. else if (createLookup && joinInfo.queryLeftSort().ordinality())
  2619. {
  2620. //Check this isn't going to generate a between join - if it is that takes precedence.
  2621. if ((joinInfo.slidingMatches.ordinality() != 0) && (joinInfo.queryLeftSort().ordinality() == joinInfo.slidingMatches.ordinality()))
  2622. createLookup = false;
  2623. }
  2624. if (createLookup)
  2625. {
  2626. IHqlExpression * lhs = expr->queryChild(0);
  2627. HqlExprArray args;
  2628. if (isGrouped(lhs))
  2629. {
  2630. OwnedHqlExpr ungroup = createDataset(no_group, LINK(lhs));
  2631. args.append(*cloneInheritedAnnotations(expr, ungroup));
  2632. }
  2633. else
  2634. args.append(*LINK(lhs));
  2635. unwindChildren(args, expr, 1);
  2636. args.append(*createAttribute(manyAtom));
  2637. args.append(*createAttribute(lookupAtom));
  2638. return expr->clone(args);
  2639. }
  2640. OwnedHqlExpr newLeft = getNonThorSortedJoinInput(expr, leftDs, joinInfo.queryLeftSort(), options.implicitSubSort);
  2641. OwnedHqlExpr newRight = getNonThorSortedJoinInput(expr, rightDs, joinInfo.queryRightSort(), options.implicitSubSort);
  2642. try
  2643. {
  2644. if ((leftDs != newLeft) || (rightDs != newRight))
  2645. {
  2646. HqlExprArray args;
  2647. args.append(*newLeft.getClear());
  2648. args.append(*newRight.getClear());
  2649. unwindChildren(args, expr, 2);
  2650. args.append(*createAttribute(_normalized_Atom));
  2651. return expr->clone(args);
  2652. }
  2653. }
  2654. catch (IException * e)
  2655. {
  2656. //Couldn't work out the sort orders - shouldn't be fatal because may constant fold later.
  2657. EXCLOG(e, "Transform");
  2658. e->Release();
  2659. }
  2660. }
  2661. //Convert hash selfjoin to self-join(distribute)
  2662. if ((op == no_selfjoin) && expr->hasAttribute(hashAtom))
  2663. {
  2664. assertex(!isLocal);
  2665. if (joinInfo.hasRequiredEqualities())
  2666. {
  2667. OwnedHqlExpr sortlist = createValueSafe(no_sortlist, makeSortListType(NULL), joinInfo.queryLeftReq());
  2668. OwnedHqlExpr distribute;
  2669. //Only likely to catch this partition test if isLimitedSubstringJoin true, otherwise caught above
  2670. if (!isPartitionedForGroup(leftDs, sortlist, true))
  2671. {
  2672. distribute.setown(createDistributedInput(leftDs, sortlist, true));
  2673. distribute.setown(cloneInheritedAnnotations(expr, distribute));
  2674. }
  2675. else
  2676. distribute.set(leftDs);
  2677. HqlExprArray args;
  2678. args.append(*LINK(distribute));
  2679. unwindChildren(args, expr, 1);
  2680. removeAttribute(args, hashAtom);
  2681. args.append(*createLocalAttribute());
  2682. return expr->clone(args);
  2683. }
  2684. }
  2685. if (options.expandHashJoin && isThorCluster(targetClusterType) && expr->hasAttribute(hashAtom) && !joinInfo.hasOptionalEqualities())
  2686. {
  2687. HqlExprArray args;
  2688. args.append(*createDistributedInput(leftDs, joinInfo.queryLeftReq(), false));
  2689. args.append(*createDistributedInput(rightDs, joinInfo.queryRightReq(), false));
  2690. unwindChildren(args, expr, 2);
  2691. removeAttribute(args, hashAtom);
  2692. args.append(*createLocalAttribute());
  2693. return expr->clone(args);
  2694. }
  2695. if (isThorCluster(targetClusterType) && isLocal && options.implicitJoinSubSort && !expr->hasAttribute(smartAtom))
  2696. {
  2697. IHqlExpression * noSortAttr = expr->queryAttribute(noSortAtom);
  2698. OwnedHqlExpr newLeft;
  2699. OwnedHqlExpr newRight;
  2700. if (!userPreventsSort(noSortAttr, no_left))
  2701. newLeft.setown(getSubSort(leftDs, joinInfo.queryLeftSort(), isLocal, true, alwaysLocal));
  2702. if (!userPreventsSort(noSortAttr, no_right))
  2703. newRight.setown(getSubSort(rightDs, joinInfo.queryRightSort(), isLocal, true, alwaysLocal));
  2704. if (newLeft || newRight)
  2705. {
  2706. HqlExprArray args;
  2707. if (newLeft)
  2708. args.append(*newLeft.getClear());
  2709. else
  2710. args.append(*LINK(leftDs));
  2711. if (newRight)
  2712. args.append(*newRight.getClear());
  2713. else
  2714. args.append(*LINK(rightDs));
  2715. unwindChildren(args, expr, 2);
  2716. return expr->clone(args);
  2717. }
  2718. }
  2719. return NULL;
  2720. }
  2721. IHqlExpression * ThorHqlTransformer::normalizeScalarAggregate(IHqlExpression * expr)
  2722. {
  2723. OwnedHqlExpr project = convertScalarAggregateToDataset(expr);
  2724. if (!project)
  2725. throwUnexpected();
  2726. IHqlExpression * field = project->queryRecord()->queryChild(0);
  2727. OwnedHqlExpr ret = createNewSelectExpr(project.getClear(), LINK(field));
  2728. return expr->cloneAllAnnotations(ret);
  2729. }
  2730. IHqlExpression * ThorHqlTransformer::normalizeSelect(IHqlExpression * expr)
  2731. {
  2732. return NULL;
  2733. /*
  2734. The idea of this code is to convert a.b.c into normalize(a.b, a.b.c) if a.b is an out-of scope dataset
  2735. However the following isn't good enough since the fields from a.b also need to be accessible. We would
  2736. need to introduce a field in the result $parent$, and also assign that across. Subsequent references to
  2737. a.b.xyz would need to be converted to in.parent.xyz. It will generate very inefficient code, so not going
  2738. to go this way at the moment.
  2739. */
  2740. if (!isNewSelector(expr) || !expr->isDataset())
  2741. return NULL;
  2742. IHqlExpression * ds = expr->queryChild(0);
  2743. if (!ds->isDataset())
  2744. return NULL;
  2745. //If we are a no_select of a no_select that is also new, insert an implicit denormalized
  2746. HqlExprArray args;
  2747. args.append(*LINK(ds));
  2748. OwnedHqlExpr selSeq = createSelectorSequence();
  2749. HqlExprArray selectArgs;
  2750. unwindChildren(selectArgs, expr);
  2751. selectArgs.replace(*createSelector(no_left, ds, selSeq), 0);
  2752. removeAttribute(selectArgs, newAtom);
  2753. args.append(*expr->clone(selectArgs));
  2754. //Create a transform self := right;
  2755. OwnedHqlExpr right = createSelector(no_right, expr, selSeq);
  2756. OwnedHqlExpr assign = createAssign(getSelf(expr), LINK(right));
  2757. OwnedHqlExpr transform = createValue(no_transform, makeTransformType(LINK(expr->queryRecordType())), LINK(assign));
  2758. args.append(*LINK(transform));
  2759. args.append(*LINK(selSeq));
  2760. args.append(*createAttribute(_internal_Atom));
  2761. return createDataset(no_normalize, args);
  2762. }
  2763. IHqlExpression * ThorHqlTransformer::normalizeSort(IHqlExpression * expr)
  2764. {
  2765. IHqlExpression * dataset = expr->queryChild(0);
  2766. IHqlExpression * sortlist = expr->queryChild(1);
  2767. OwnedHqlExpr newsort = simplifySortlistComplexity(sortlist);
  2768. if (newsort)
  2769. {
  2770. if (newsort == sortlist)
  2771. {
  2772. dbglogExpr(sortlist);
  2773. throwUnexpected();
  2774. }
  2775. HqlExprArray args;
  2776. unwindChildren(args, expr);
  2777. args.replace(*newsort.getClear(), 1);
  2778. return expr->clone(args);
  2779. }
  2780. node_operator op = expr->getOperator();
  2781. if (translator.targetThor())
  2782. {
  2783. if ((op == no_sort) && !isGrouped(expr) && !expr->hasAttribute(localAtom))
  2784. {
  2785. //sort(ds, a,b,c) - check so see if there is a previous sort distribution of sort(ds,a,b,c) if so, this sort can be done locally
  2786. if (queryDistribution(expr) == queryDistribution(dataset))
  2787. return appendLocalAttribute(expr);
  2788. }
  2789. }
  2790. if (op == no_sorted)
  2791. {
  2792. IHqlExpression * normalized = normalizeSortSteppedIndex(expr, sortedAtom);
  2793. if (normalized)
  2794. return normalized;
  2795. }
  2796. bool isLocal = expr->hasAttribute(localAtom);
  2797. bool alwaysLocal = !translator.targetThor();
  2798. if ((op != no_assertsorted) && isAlreadySorted(dataset, sortlist, isLocal||alwaysLocal, false))
  2799. return LINK(dataset);
  2800. if (op == no_sorted)
  2801. return normalizeSortSteppedIndex(expr, sortedAtom);
  2802. //NOTE: We can't convert a global sort to a subsort because that will change the distribution
  2803. if (options.implicitSubSort && (isLocal || alwaysLocal) && (op != no_assertsorted))
  2804. {
  2805. OwnedHqlExpr subsorted = getSubSort(dataset, sortlist, isLocal, false, alwaysLocal);
  2806. if (subsorted)
  2807. return dataset->cloneAllAnnotations(subsorted);
  2808. }
  2809. return NULL;
  2810. }
  2811. IHqlExpression * ThorHqlTransformer::normalizeSubSort(IHqlExpression * expr)
  2812. {
  2813. IHqlExpression * dataset = expr->queryChild(0);
  2814. IHqlExpression * sortlist = expr->queryChild(1);
  2815. IHqlExpression * grouping = expr->queryChild(2);
  2816. OwnedHqlExpr newsort = simplifySortlistComplexity(sortlist);
  2817. OwnedHqlExpr newgrouping = simplifySortlistComplexity(grouping);
  2818. if (newsort || newgrouping)
  2819. {
  2820. HqlExprArray args;
  2821. unwindChildren(args, expr);
  2822. if (newsort)
  2823. args.replace(*newsort.getClear(), 1);
  2824. if (newgrouping)
  2825. args.replace(*newgrouping.getClear(), 2);
  2826. return expr->clone(args);
  2827. }
  2828. if (translator.targetThor() && !expr->hasAttribute(localAtom))
  2829. return convertSubSortToGroupedSort(expr);
  2830. return NULL;
  2831. }
  2832. IHqlExpression * ThorHqlTransformer::normalizeSortSteppedIndex(IHqlExpression * expr, IAtom * attrName)
  2833. {
  2834. node_operator op = expr->getOperator();
  2835. if (op == no_assertsorted)
  2836. return NULL;
  2837. IHqlExpression * dataset = expr->queryChild(0);
  2838. node_operator datasetOp = dataset->getOperator();
  2839. if ((datasetOp == no_keyindex) || (datasetOp == no_newkeyindex))
  2840. {
  2841. IHqlExpression * indexRecord = dataset->queryRecord();
  2842. if (!dataset->hasAttribute(attrName))
  2843. {
  2844. HqlExprArray selects;
  2845. IHqlExpression * sortList = expr->queryChild(1);
  2846. if (sortList)
  2847. {
  2848. OwnedHqlExpr mapped = replaceSelector(sortList, dataset->queryNormalizedSelector(), queryActiveTableSelector());
  2849. unwindChildren(selects, mapped);
  2850. }
  2851. HqlExprArray args;
  2852. unwindChildren(args, dataset);
  2853. args.append(*createExprAttribute(attrName, selects));
  2854. return dataset->clone(args);
  2855. }
  2856. }
  2857. return NULL;
  2858. }
  2859. IHqlExpression * ThorHqlTransformer::normalizeTempTable(IHqlExpression * expr)
  2860. {
  2861. #if 0
  2862. //This would be a great improvement to the generated code, but the xml storage formats are different + it doesn't cope with ALL.
  2863. IHqlExpression * values = expr->queryChild(0);
  2864. ITypeInfo * valuesType = values->queryType();
  2865. if ((values->getOperator() == no_getresult) && (valuesType->getTypeCode() == type_set))
  2866. {
  2867. IHqlExpression * record = expr->queryChild(1);
  2868. if ((record->numChildren() == 1) && (valuesType->queryChildType() == record->queryChild(0)->queryType()))
  2869. {
  2870. HqlExprArray args;
  2871. args.append(*LINK(record));
  2872. args.append(*createAttribute(sequenceAtom, LINK(values->queryChild(0))));
  2873. if (values->queryChild(1))
  2874. args.append(*createAttribute(nameAtom, LINK(values->queryChild(1))));
  2875. return createDataset(no_workunit_dataset, args);
  2876. }
  2877. }
  2878. #endif
  2879. return NULL;
  2880. }
  2881. IHqlExpression * ThorHqlTransformer::normalizeChooseN(IHqlExpression * expr)
  2882. {
  2883. OwnedHqlExpr first = foldHqlExpression(queryRealChild(expr, 2));
  2884. if (first)
  2885. {
  2886. if (matchesConstantValue(first, 1))
  2887. {
  2888. HqlExprArray args;
  2889. unwindChildren(args, expr);
  2890. args.remove(2);
  2891. return expr->clone(args);
  2892. }
  2893. }
  2894. if (!options.spotTopN) return NULL;
  2895. return queryConvertChoosenNSort(expr, topNlimit);
  2896. }
  2897. static IHqlExpression * extractPrefetchFields(HqlExprArray & fields, HqlExprArray & values, IHqlExpression * ds, IHqlExpression * expr)
  2898. {
  2899. switch (expr->getOperator())
  2900. {
  2901. case no_newtransform:
  2902. case no_transform:
  2903. case no_assignall:
  2904. case NO_AGGREGATEGROUP:
  2905. case no_sortlist:
  2906. {
  2907. HqlExprArray args;
  2908. ForEachChild(i, expr)
  2909. args.append(*extractPrefetchFields(fields, values, ds, expr->queryChild(i)));
  2910. return expr->clone(args);
  2911. }
  2912. case no_assign:
  2913. {
  2914. HqlExprArray args;
  2915. args.append(*LINK(expr->queryChild(0)));
  2916. args.append(*extractPrefetchFields(fields, values, ds, expr->queryChild(1)));
  2917. return expr->clone(args);
  2918. }
  2919. case no_attr:
  2920. case no_attr_expr:
  2921. case no_attr_link:
  2922. case no_record:
  2923. case no_field:
  2924. return LINK(expr);
  2925. }
  2926. unsigned match = values.find(*expr);
  2927. if (match == NotFound)
  2928. {
  2929. //What about preserving link counting on datasets?
  2930. match = fields.ordinality();
  2931. StringBuffer name;
  2932. name.append("_f").append(match).append("_");
  2933. IHqlExpression * field = createField(createIdAtom(name.str()), expr->getType(), NULL);
  2934. fields.append(*field);
  2935. values.append(*LINK(expr));
  2936. }
  2937. return createSelectExpr(getActiveTableSelector(), LINK(&fields.item(match)));
  2938. }
  2939. IHqlExpression * ThorHqlTransformer::normalizePrefetchAggregate(IHqlExpression * expr)
  2940. {
  2941. //This optimization may be worth doing even if there is no prefetch attribute if the value being summed is very complicated!
  2942. IHqlExpression * prefetch = expr->queryAttribute(prefetchAtom);
  2943. if (!prefetch)
  2944. return NULL;
  2945. //Create a prefetch project for all parameters to count/sum/grouping expressions
  2946. //and then aggregate those values.
  2947. IHqlExpression * ds = expr->queryChild(0);
  2948. HqlExprArray tempArgs, fields, values;
  2949. ForEachChildFrom(i, expr, 2)
  2950. {
  2951. IHqlExpression * cur = expr->queryChild(i);
  2952. if (cur != prefetch)
  2953. tempArgs.append(*extractPrefetchFields(fields, values, ds, cur));
  2954. }
  2955. OwnedHqlExpr newRecord = createRecord(fields);
  2956. OwnedHqlExpr self = createSelector(no_self, newRecord, NULL);
  2957. HqlExprArray assigns;
  2958. ForEachItemIn(iv, fields)
  2959. {
  2960. IHqlExpression * tgt = createSelectExpr(LINK(self), &OLINK(fields.item(iv)));
  2961. assigns.append(*createAssign(tgt, &OLINK(values.item(iv))));
  2962. }
  2963. HqlExprArray args;
  2964. args.append(*LINK(ds));
  2965. args.append(*LINK(newRecord));
  2966. args.append(*createValue(no_newtransform, makeTransformType(newRecord->getType()), assigns));
  2967. args.append(*LINK(prefetch));
  2968. OwnedHqlExpr project = createDataset(no_newusertable, args);
  2969. project.setown(cloneInheritedAnnotations(expr, project));
  2970. args.kill();
  2971. args.append(*LINK(project));
  2972. args.append(*LINK(expr->queryChild(1)));
  2973. ForEachItemIn(i2, tempArgs)
  2974. args.append(*replaceSelector(&tempArgs.item(i2), queryActiveTableSelector(), project->queryNormalizedSelector()));
  2975. return expr->clone(args);
  2976. }
  2977. static IHqlExpression * convertAggregateGroupingToGroupedAggregate(IHqlExpression * expr, IHqlExpression* groupBy)
  2978. {
  2979. IHqlExpression * dataset = expr->queryChild(0);
  2980. HqlExprArray groupArgs;
  2981. groupArgs.append(*LINK(dataset));
  2982. groupArgs.append(*LINK(groupBy));
  2983. groupArgs.append(*createAttribute(allAtom));
  2984. unwindChildren(groupArgs, expr, 4);
  2985. OwnedHqlExpr result = createDataset(no_group, groupArgs);
  2986. result.setown(cloneInheritedAnnotations(expr, result));
  2987. HqlExprArray args;
  2988. unwindChildren(args, expr);
  2989. args.replace(*result.getClear(), 0);
  2990. args.remove(3); // no longer grouped.
  2991. return expr->clone(args);
  2992. }
  2993. IHqlExpression * ThorHqlTransformer::getMergeTransform(IHqlExpression * dataset, IHqlExpression * transform)
  2994. {
  2995. HqlExprArray args;
  2996. ForEachChild(i, transform)
  2997. {
  2998. IHqlExpression * cur = transform->queryChild(i);
  2999. switch (cur->getOperator())
  3000. {
  3001. case no_assignall:
  3002. args.append(*getMergeTransform(dataset, cur));
  3003. break;
  3004. case no_assign:
  3005. {
  3006. IHqlExpression * lhs = cur->queryChild(0);
  3007. IHqlExpression * lhsField = lhs->queryChild(1);
  3008. IHqlExpression * rhs = cur->queryChild(1);
  3009. OwnedHqlExpr selected = createSelectExpr(LINK(dataset), LINK(lhsField));
  3010. OwnedHqlExpr newRhs;
  3011. node_operator rhsOp = rhs->getOperator();
  3012. switch (rhsOp)
  3013. {
  3014. case no_countgroup:
  3015. case no_sumgroup:
  3016. newRhs.setown(createValue(no_sumgroup, selected->getType(), LINK(selected)));
  3017. break;
  3018. case no_maxgroup:
  3019. case no_mingroup:
  3020. newRhs.setown(createValue(rhsOp, selected->getType(), LINK(selected)));
  3021. break;
  3022. case no_existsgroup:
  3023. newRhs.setown(createValue(no_existsgroup, selected->getType(), LINK(selected)));
  3024. break;
  3025. case no_vargroup:
  3026. case no_covargroup:
  3027. case no_corrgroup:
  3028. case no_avegroup:
  3029. throwUnexpected();
  3030. default:
  3031. newRhs.set(selected);
  3032. break;
  3033. }
  3034. args.append(*createAssign(LINK(lhs), newRhs.getClear()));
  3035. break;
  3036. }
  3037. default:
  3038. args.append(*LINK(cur));
  3039. break;
  3040. }
  3041. }
  3042. return transform->clone(args);
  3043. }
  3044. //Convert table(x { count(group), sum(group, x) }, gr) to
  3045. //sort(x, gr, local) -> group(gr) -> aggregate -> distribute(merge) -> group(local) -> aggregate'
  3046. IHqlExpression * ThorHqlTransformer::normalizeMergeAggregate(IHqlExpression * expr)
  3047. {
  3048. IHqlExpression * dataset = expr->queryChild(0);
  3049. IHqlExpression * groupBy = expr->queryChild(3);
  3050. //If locally distributed then don't do anything
  3051. OwnedHqlExpr noMerge = removeAttribute(expr, mergeAtom);
  3052. if (!translator.targetThor() || expr->hasAttribute(localAtom) || isPartitionedForGroup(dataset, groupBy, true))
  3053. return noMerge.getClear();
  3054. //Convert the aggregation (so no covariance/ave and other computed fields)
  3055. OwnedHqlExpr normalized = normalizeTableToAggregate(noMerge, true);
  3056. IHqlExpression * aggregate = normalized;
  3057. if (aggregate->getOperator() != no_newaggregate)
  3058. aggregate = aggregate->queryChild(0);
  3059. assertex(aggregate->getOperator() == no_newaggregate);
  3060. HqlExprArray localAggregateArgs;
  3061. unwindChildren(localAggregateArgs, aggregate);
  3062. removeAttribute(localAggregateArgs, hashAtom);
  3063. removeAttribute(localAggregateArgs, mergeAtom);
  3064. localAggregateArgs.append(*createLocalAttribute());
  3065. localAggregateArgs.append(*createAttribute(sortedAtom));
  3066. //Local aggregate and force a local sort order to be used
  3067. OwnedHqlExpr localAggregate = aggregate->clone(localAggregateArgs);
  3068. OwnedHqlExpr localGroupedAggregate = convertAggregateGroupingToGroupedAggregate(localAggregate, groupBy);
  3069. //Ensure the group,all is transformed to a local sort, local group
  3070. OwnedHqlExpr transformedFirstAggregate = transform(localGroupedAggregate);
  3071. //Use distribute(,MERGE) to move rows globally, and remain sorted
  3072. //Note grouping fields need to be mapped using the fields projected by the aggregate
  3073. TableProjectMapper mapper(transformedFirstAggregate);
  3074. bool groupCanBeMapped = false;
  3075. OwnedHqlExpr mappedGrouping = mapper.collapseFields(groupBy, dataset, transformedFirstAggregate, &groupCanBeMapped);
  3076. assertex(groupCanBeMapped);
  3077. OwnedHqlExpr sortOrder = getExistingSortOrder(transformedFirstAggregate, true, true);
  3078. OwnedHqlExpr mergeAttr = createExprAttribute(mergeAtom, replaceSelector(sortOrder, queryActiveTableSelector(), transformedFirstAggregate));
  3079. OwnedHqlExpr hashed = createValue(no_hash32, LINK(unsignedType), LINK(mappedGrouping), createAttribute(internalAtom));
  3080. OwnedHqlExpr redistributed = createDatasetF(no_distribute, LINK(transformedFirstAggregate), LINK(hashed), mergeAttr.getClear(), NULL);
  3081. redistributed.setown(cloneInheritedAnnotations(expr, redistributed));
  3082. OwnedHqlExpr grouped = createDatasetF(no_group, LINK(redistributed), LINK(mappedGrouping), createLocalAttribute(), NULL);
  3083. grouped.setown(cloneInheritedAnnotations(expr, grouped));
  3084. HqlExprArray args;
  3085. args.append(*LINK(grouped));
  3086. args.append(*LINK(localAggregate->queryChild(1)));
  3087. args.append(*getMergeTransform(grouped->queryNormalizedSelector(), localAggregate->queryChild(2)));
  3088. unwindChildren(args, localAggregate, 4);
  3089. OwnedHqlExpr newAggregate = localAggregate->clone(args);
  3090. if (aggregate == normalized)
  3091. return newAggregate.getClear();
  3092. return replaceChildDataset(normalized, newAggregate, 0);
  3093. }
  3094. IHqlExpression * ThorHqlTransformer::normalizeTableToAggregate(IHqlExpression * expr, bool canOptimizeCasts)
  3095. {
  3096. IHqlExpression * dataset = expr->queryChild(0);
  3097. IHqlExpression * record = expr->queryChild(1);
  3098. IHqlExpression * transform = expr->queryChild(2);
  3099. IHqlExpression * groupBy = expr->queryChild(3);
  3100. if (!isAggregateDataset(expr))
  3101. return NULL;
  3102. //MORE: Should fail if asked to group by variable length field, or do max/min on variable length field.
  3103. HqlExprArray aggregateFields;
  3104. HqlExprArray aggregateAssigns;
  3105. HqlExprArray extraAssigns;
  3106. bool extraSelectNeeded = false;
  3107. OwnedHqlExpr self = getSelf(expr);
  3108. ForEachChild(idx, transform)
  3109. {
  3110. IHqlExpression * assign=transform->queryChild(idx);
  3111. IHqlExpression * cur = assign->queryChild(0);
  3112. IHqlExpression * src = assign->queryChild(1);
  3113. IHqlExpression * mapped = normalizeAggregateExpr(cur, src, aggregateFields, aggregateAssigns, extraSelectNeeded, canOptimizeCasts);
  3114. if (mapped == src)
  3115. {
  3116. mapped->Release();
  3117. mapped = replaceSelector(cur, self, queryActiveTableSelector());
  3118. // Not an aggregate - must be an expression that is used in the grouping
  3119. aggregateFields.append(*LINK(cur->queryChild(1)));
  3120. aggregateAssigns.append(*createAssign(LINK(mapped), LINK(src)));
  3121. }
  3122. // Add expression to calculate the fields to the second projection
  3123. extraAssigns.append(*createAssign(LINK(cur), mapped));
  3124. }
  3125. //Now add any grouping fields.......
  3126. IHqlExpression * newGroupBy = NULL;
  3127. if (groupBy && !groupBy->isAttribute())
  3128. {
  3129. unsigned numGroupBy = groupBy->numChildren();
  3130. HqlExprArray newGroupElement;
  3131. for (unsigned idx = 0; idx < numGroupBy; idx++)
  3132. {
  3133. IHqlExpression * curGroup = groupBy->queryChild(idx);
  3134. bool matched = false;
  3135. ForEachItemIn(idxa, aggregateAssigns)
  3136. {
  3137. IHqlExpression * rhs = aggregateAssigns.item(idxa).queryChild(1);
  3138. if (rhs->getOperator() == no_activerow)
  3139. rhs = rhs->queryChild(0);
  3140. if (rhs == curGroup)
  3141. {
  3142. matched = true;
  3143. break;
  3144. }
  3145. }
  3146. if (!matched)
  3147. {
  3148. StringBuffer temp;
  3149. temp.append("_agg_").append(aggregateAssigns.ordinality());
  3150. IHqlExpression * targetField = createField(createIdAtom(temp.str()), curGroup->getType(), NULL);
  3151. aggregateFields.append(*targetField);
  3152. aggregateAssigns.append(*createAssign(createSelectExpr(getActiveTableSelector(), LINK(targetField)), LINK(curGroup)));
  3153. extraSelectNeeded = true;
  3154. }
  3155. newGroupElement.append(*LINK(curGroup));
  3156. }
  3157. newGroupBy = createSortList(newGroupElement);
  3158. }
  3159. IHqlExpression * aggregateRecord = extraSelectNeeded ? createRecord(aggregateFields) : LINK(record);
  3160. OwnedHqlExpr aggregateSelf = getSelf(aggregateRecord);
  3161. replaceAssignSelector(aggregateAssigns, aggregateSelf);
  3162. IHqlExpression * aggregateTransform = createValue(no_newtransform, makeTransformType(aggregateRecord->getType()), aggregateAssigns);
  3163. HqlExprArray aggregateAttrs;
  3164. unwindAttributes(aggregateAttrs, expr);
  3165. if (!expr->hasAttribute(localAtom) && newGroupBy && !isGrouped(dataset) && isPartitionedForGroup(dataset, newGroupBy, true))
  3166. aggregateAttrs.append(*createLocalAttribute());
  3167. OwnedHqlExpr ret = createDataset(no_newaggregate, LINK(dataset), createComma(aggregateRecord, aggregateTransform, newGroupBy, createComma(aggregateAttrs)));
  3168. if (extraSelectNeeded)
  3169. ret.setown(cloneInheritedAnnotations(expr, ret));
  3170. else
  3171. ret.setown(expr->cloneAllAnnotations(ret));
  3172. if (expr->hasAttribute(mergeAtom))
  3173. ret.setown(normalizeMergeAggregate(ret));
  3174. if (extraSelectNeeded)
  3175. {
  3176. replaceAssignSelector(extraAssigns, ret);
  3177. IHqlExpression * projectTransform = createValue(no_newtransform, makeTransformType(record->getType()), extraAssigns);
  3178. ret.setown(createDataset(no_newusertable, ret.getClear(), createComma(LINK(record), projectTransform)));
  3179. ret.setown(expr->cloneAllAnnotations(ret));
  3180. }
  3181. return ret.getClear();
  3182. }
  3183. IHqlExpression * ThorHqlTransformer::normalizeTableGrouping(IHqlExpression * expr)
  3184. {
  3185. //Transform table(x,y,z) to table(group(x,z),y)
  3186. IHqlExpression * dataset = expr->queryChild(0);
  3187. LinkedHqlExpr group = queryRealChild(expr, 3);
  3188. if (group)
  3189. {
  3190. if (expr->hasAttribute(mergeAtom))
  3191. return normalizeMergeAggregate(expr);
  3192. bool useHashAggregate = expr->hasAttribute(fewAtom);
  3193. if (expr->getOperator() == no_aggregate)
  3194. {
  3195. OwnedHqlExpr selector = createSelector(no_left, dataset->queryRecord(), querySelSeq(expr));
  3196. group.setown(replaceSelector(group, selector, dataset));
  3197. //Cannot use a hash aggregate if we don't know the mapping from input to output fields...
  3198. if (!isKnownTransform(expr->queryChild(2)))
  3199. useHashAggregate = false;
  3200. }
  3201. if (useHashAggregate && group->isConstant() && !translator.targetThor())
  3202. return removeAttribute(expr, fewAtom);
  3203. if (!expr->hasAttribute(manyAtom) && !expr->hasAttribute(sortedAtom))
  3204. {
  3205. if (isSmallGrouping(group))
  3206. {
  3207. OwnedHqlExpr newsort = simplifySortlistComplexity(group);
  3208. if (!newsort)
  3209. newsort.set(group);
  3210. LinkedHqlExpr ds = dataset;
  3211. if (isGrouped(ds))
  3212. {
  3213. ds.setown(createDataset(no_group, ds.getClear(), NULL));
  3214. ds.setown(cloneInheritedAnnotations(expr, ds));
  3215. }
  3216. OwnedHqlExpr sorted = ensureSortedForGroup(ds, newsort, expr->hasAttribute(localAtom), !translator.targetThor(), options.implicitGroupSubSort);
  3217. //For thor a global grouped aggregate would transfer elements between nodes so it is still likely to
  3218. //be more efficient to do a hash aggregate. Even better would be to check the distribution
  3219. if ((sorted != ds) ||
  3220. (translator.targetThor() && !expr->hasAttribute(localAtom) && !isPartitionedForGroup(ds, newsort, true)))
  3221. useHashAggregate = true;
  3222. }
  3223. //Default to a hash aggregate for child queries/normalized sources
  3224. IHqlExpression * rootDs = queryExpression(dataset->queryDataset()->queryRootTable());
  3225. if (rootDs && rootDs->getOperator() == no_select)
  3226. useHashAggregate = true;
  3227. }
  3228. if (!useHashAggregate && !expr->hasAttribute(groupedAtom))
  3229. return convertAggregateGroupingToGroupedAggregate(expr, group);
  3230. }
  3231. return NULL;
  3232. }
  3233. void HqlCppTranslator::convertLogicalToActivities(WorkflowItem & curWorkflow)
  3234. {
  3235. {
  3236. unsigned time = msTick();
  3237. ThorHqlTransformer transformer(*this, targetClusterType, wu());
  3238. HqlExprArray & exprs = curWorkflow.queryExprs();
  3239. HqlExprArray transformed;
  3240. transformer.transformRoot(exprs, transformed);
  3241. replaceArray(exprs, transformed);
  3242. updateTimer("workunit;tree transform: convert logical", msTick()-time);
  3243. }
  3244. if (queryOptions().normalizeLocations)
  3245. normalizeAnnotations(*this, curWorkflow.queryExprs());
  3246. }
  3247. //------------------------------------------------------------------------
  3248. CompoundSourceInfo::CompoundSourceInfo(IHqlExpression * _original) : NewTransformInfo(_original)
  3249. {
  3250. sourceOp = no_none;
  3251. mode = no_none;
  3252. splitCount = 0;
  3253. reset();
  3254. }
  3255. void CompoundSourceInfo::reset()
  3256. {
  3257. forceCompound = false;
  3258. isBoundary = false;
  3259. isPreloaded = false;
  3260. isLimited = false;
  3261. hasChoosen = false;
  3262. hasSkipLimit = false;
  3263. isCloned = false;
  3264. isFiltered = false;
  3265. isPostFiltered = false;
  3266. isCreateRowLimited = false;
  3267. hasOnFail = false;
  3268. }
  3269. bool CompoundSourceInfo::canMergeLimit(IHqlExpression * expr, ClusterType targetClusterType) const
  3270. {
  3271. if (isAggregate() || isChooseNAllLimit(expr->queryChild(1)) || !isBinary())
  3272. return false;
  3273. node_operator op = expr->getOperator();
  3274. switch (op)
  3275. {
  3276. case no_limit:
  3277. //Can't merge a limit into a choosen() because the limit will be applied first
  3278. if (isLimited || hasChoosen)
  3279. return false;
  3280. //Don't merge skip and onfail limits into activities that can't implement them completely
  3281. if (targetClusterType != RoxieCluster)
  3282. {
  3283. if (expr->hasAttribute(skipAtom) || expr->hasAttribute(onFailAtom))
  3284. return false;
  3285. }
  3286. else
  3287. {
  3288. //Can always limit a count/aggregate with a skip limit - just resets count to 0
  3289. if (expr->hasAttribute(skipAtom))
  3290. return true;
  3291. }
  3292. break;
  3293. case no_choosen:
  3294. if (hasChoosen)
  3295. return false;
  3296. break;
  3297. }
  3298. switch (sourceOp)
  3299. {
  3300. case no_compound_diskread:
  3301. case no_compound_disknormalize:
  3302. case no_compound_indexread:
  3303. case no_compound_indexnormalize:
  3304. return true;
  3305. }
  3306. return false;
  3307. }
  3308. void CompoundSourceInfo::ensureCompound()
  3309. {
  3310. if (sourceOp != no_none)
  3311. {
  3312. forceCompound = true;
  3313. #if 0
  3314. //MORE: We should really remove the sharing for entries that are going to become compound activities.
  3315. //However, that isn't just for this case - should be iterative
  3316. //e.g. while (spotMoreCompoundActivities())....
  3317. IHqlExpression * search = original;
  3318. loop
  3319. {
  3320. CompoundSourceInfo * extra = queryExtra(search);
  3321. if (extra->sharedCount-- > 1)
  3322. break;
  3323. search = search->queryChild(0);
  3324. }
  3325. #endif
  3326. }
  3327. }
  3328. bool CompoundSourceInfo::inherit(const CompoundSourceInfo & other, node_operator newSourceOp)
  3329. {
  3330. isLimited = other.isLimited;
  3331. hasSkipLimit = other.hasSkipLimit;
  3332. hasChoosen = other.hasChoosen;
  3333. isFiltered = other.isFiltered;
  3334. isPostFiltered = other.isPostFiltered;
  3335. isPreloaded = other.isPreloaded;
  3336. isCreateRowLimited = other.isCreateRowLimited;
  3337. hasOnFail = other.hasOnFail;
  3338. mode = other.mode;
  3339. uid.set(other.uid);
  3340. if (other.sourceOp == no_none)
  3341. return false;
  3342. if (newSourceOp == no_none)
  3343. {
  3344. if (other.isCloned)
  3345. return false;
  3346. newSourceOp = other.sourceOp;
  3347. }
  3348. sourceOp = newSourceOp;
  3349. return true;
  3350. }
  3351. bool CompoundSourceInfo::isAggregate() const
  3352. {
  3353. switch (sourceOp)
  3354. {
  3355. case no_compound_diskaggregate:
  3356. case no_compound_diskcount:
  3357. case no_compound_diskgroupaggregate:
  3358. case no_compound_indexaggregate:
  3359. case no_compound_indexcount:
  3360. case no_compound_indexgroupaggregate:
  3361. case no_compound_childaggregate:
  3362. case no_compound_childcount:
  3363. case no_compound_childgroupaggregate:
  3364. return true;
  3365. }
  3366. return false;
  3367. }
  3368. //This doesn't try to restrict creating the compound nodes to the inner level, but will also create them for nested children.
  3369. //This shouldn't cause any problems, since compound operators within compounds are ignored, and it means that this transformer
  3370. //doesn't have to cope with being scope dependent.
  3371. //Calling the transformer again later on child queries should extend the compound activities if appropriate.
  3372. static HqlTransformerInfo compoundSourceTransformerInfo("CompoundSourceTransformer");
  3373. CompoundSourceTransformer::CompoundSourceTransformer(HqlCppTranslator & _translator, unsigned _flags)
  3374. : NewHqlTransformer(compoundSourceTransformerInfo), translator(_translator)
  3375. {
  3376. targetClusterType = translator.getTargetClusterType();
  3377. flags = _flags;
  3378. insideCompound = false;
  3379. candidate = false;
  3380. }
  3381. void CompoundSourceTransformer::analyseGatherInfo(IHqlExpression * expr)
  3382. {
  3383. CompoundSourceInfo * extra = queryBodyExtra(expr);
  3384. node_operator op = expr->getOperator();
  3385. bool wasInsideCompound = insideCompound;
  3386. if (!insideCompound)
  3387. extra->noteUsage();
  3388. if (!expr->isDataset())
  3389. insideCompound = false;
  3390. switch (op)
  3391. {
  3392. case no_fetch:
  3393. {
  3394. unsigned max = expr->numChildren();
  3395. for (unsigned i =1; i < max; i++)
  3396. analyseExpr(expr->queryChild(i));
  3397. break;
  3398. }
  3399. case no_keyed:
  3400. case no_record:
  3401. case no_attr:
  3402. case no_attr_expr:
  3403. break;
  3404. case no_keyedlimit:
  3405. case no_compound_diskread:
  3406. case no_compound_disknormalize:
  3407. case no_compound_diskaggregate:
  3408. case no_compound_diskcount:
  3409. case no_compound_diskgroupaggregate:
  3410. case no_compound_indexread:
  3411. case no_compound_indexnormalize:
  3412. case no_compound_indexaggregate:
  3413. case no_compound_indexcount:
  3414. case no_compound_indexgroupaggregate:
  3415. case no_compound_childread:
  3416. case no_compound_childnormalize:
  3417. case no_compound_childaggregate:
  3418. case no_compound_childcount:
  3419. case no_compound_childgroupaggregate:
  3420. case no_compound_selectnew:
  3421. case no_compound_inline:
  3422. case no_preload:
  3423. insideCompound = true;
  3424. NewHqlTransformer::analyseExpr(expr);
  3425. break;
  3426. case no_filter:
  3427. if (filterIsKeyed(expr))
  3428. insideCompound = true;
  3429. NewHqlTransformer::analyseExpr(expr);
  3430. break;
  3431. case no_hqlproject:
  3432. case no_newusertable:
  3433. case no_aggregate:
  3434. case no_newaggregate:
  3435. if (expr->hasAttribute(keyedAtom))
  3436. insideCompound = true;
  3437. NewHqlTransformer::analyseExpr(expr);
  3438. break;
  3439. case no_join:
  3440. if (isKeyedJoin(expr) && !expr->hasAttribute(_complexKeyed_Atom))
  3441. {
  3442. analyseExpr(expr->queryChild(0));
  3443. doAnalyseChildren(expr, 2);
  3444. }
  3445. else
  3446. NewHqlTransformer::analyseExpr(expr);
  3447. break;
  3448. default:
  3449. NewHqlTransformer::analyseExpr(expr);
  3450. break;
  3451. }
  3452. switch (op)
  3453. {
  3454. case no_newkeyindex:
  3455. extra->sourceOp = no_compound_indexread;
  3456. extra->uid.set(expr->queryAttribute(_uid_Atom));
  3457. extra->mode = no_thor;
  3458. break;
  3459. case no_table:
  3460. {
  3461. IHqlExpression * mode = expr->queryChild(2);
  3462. if (!mode)
  3463. break;
  3464. switch (mode->getOperator())
  3465. {
  3466. case no_thor:
  3467. case no_flat:
  3468. if ((flags & CSFcompoundSpill) || !expr->hasAttribute(_spill_Atom))
  3469. {
  3470. extra->sourceOp = no_compound_diskread;
  3471. extra->isPreloaded = expr->hasAttribute(preloadAtom);
  3472. extra->uid.set(expr->queryAttribute(_uid_Atom));
  3473. extra->mode = no_thor;
  3474. }
  3475. break;
  3476. case no_csv:
  3477. if (translator.queryOptions().enableCompoundCsvRead)
  3478. {
  3479. extra->sourceOp = no_compound_diskread;
  3480. extra->isPreloaded = expr->hasAttribute(preloadAtom);
  3481. extra->uid.set(expr->queryAttribute(_uid_Atom));
  3482. extra->mode = mode->getOperator();
  3483. }
  3484. break;
  3485. }
  3486. break;
  3487. }
  3488. case no_hqlproject:
  3489. {
  3490. if (!expr->hasAttribute(prefetchAtom))
  3491. {
  3492. IHqlExpression * transform = expr->queryChild(1);
  3493. IHqlExpression * counter = queryAttributeChild(expr, _countProject_Atom, 0);
  3494. if (!counter || !transformContainsCounter(transform, counter))
  3495. {
  3496. IHqlExpression * dataset = expr->queryChild(0);
  3497. CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
  3498. //Skips in datasets don't work very well at the moment - pure() is a bit strict really.
  3499. if ((dataset->isPure() || expr->hasAttribute(keyedAtom)) && !parentExtra->isAggregate())
  3500. {
  3501. extra->inherit(*parentExtra);
  3502. if (expr->hasAttribute(keyedAtom))
  3503. extra->ensureCompound();
  3504. if (!isPureActivity(expr))
  3505. {
  3506. extra->isFiltered = true;
  3507. extra->isPostFiltered = true;
  3508. }
  3509. }
  3510. }
  3511. }
  3512. break;
  3513. }
  3514. case no_keyedlimit:
  3515. {
  3516. IHqlExpression * dataset = expr->queryChild(0);
  3517. CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
  3518. if (!parentExtra->isAggregate() && parentExtra->isBinary())
  3519. {
  3520. extra->inherit(*parentExtra);
  3521. extra->ensureCompound();
  3522. if (expr->hasAttribute(onFailAtom))
  3523. extra->hasOnFail = true;
  3524. }
  3525. break;
  3526. }
  3527. case no_inlinetable:
  3528. case no_temptable:
  3529. case no_datasetfromrow:
  3530. extra->sourceOp = no_compound_inline;
  3531. extra->mode = no_inlinetable;
  3532. break;
  3533. case no_workunit_dataset:
  3534. // extra->sourceOp = no_compound_childread;
  3535. break;
  3536. case no_getgraphresult:
  3537. case no_externalcall:
  3538. // if (expr->isDataset())
  3539. // extra->sourceOp = no_compound_childread;
  3540. break;
  3541. case no_compound_diskread:
  3542. case no_compound_disknormalize:
  3543. case no_compound_diskaggregate:
  3544. case no_compound_diskcount:
  3545. case no_compound_diskgroupaggregate:
  3546. case no_compound_indexread:
  3547. case no_compound_indexnormalize:
  3548. case no_compound_indexaggregate:
  3549. case no_compound_indexcount:
  3550. case no_compound_indexgroupaggregate:
  3551. case no_compound_childread:
  3552. case no_compound_childnormalize:
  3553. case no_compound_childaggregate:
  3554. case no_compound_childcount:
  3555. case no_compound_childgroupaggregate:
  3556. case no_compound_selectnew:
  3557. case no_compound_inline:
  3558. {
  3559. IHqlExpression * dataset = expr->queryChild(0);
  3560. CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
  3561. extra->inherit(*parentExtra, op);
  3562. extra->uid.set(expr->queryAttribute(_uid_Atom));
  3563. break;
  3564. }
  3565. case no_select:
  3566. if (expr->isDataset())
  3567. {
  3568. if (expr->hasAttribute(newAtom))
  3569. {
  3570. IHqlExpression * dataset = expr->queryChild(0);
  3571. CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
  3572. if (!parentExtra->isAggregate() && !parentExtra->hasAnyLimit() && parentExtra->isBinary())
  3573. {
  3574. node_operator newOp = no_none;
  3575. switch (parentExtra->sourceOp)
  3576. {
  3577. case no_compound_diskread:
  3578. case no_compound_disknormalize:
  3579. if (flags & CSFnewdisk)
  3580. newOp = no_compound_disknormalize;
  3581. break;
  3582. case no_compound_indexread:
  3583. case no_compound_indexnormalize:
  3584. if (flags & CSFnewindex)
  3585. newOp = no_compound_indexnormalize;
  3586. break;
  3587. case no_compound_childread:
  3588. case no_compound_childnormalize:
  3589. if (flags & CSFnewchild)
  3590. newOp = no_compound_childnormalize;
  3591. break;
  3592. }
  3593. if (newOp)
  3594. {
  3595. if (extra->inherit(*parentExtra))
  3596. {
  3597. extra->sourceOp = newOp;
  3598. extra->ensureCompound();
  3599. }
  3600. }
  3601. }
  3602. if ((flags & CSFnewchild) && (extra->sourceOp == no_none))
  3603. {
  3604. extra->reset();
  3605. extra->sourceOp = no_compound_selectnew;
  3606. extra->ensureCompound();
  3607. }
  3608. }
  3609. else
  3610. {
  3611. if ((flags & CSFnewchild) && !isTargetSelector(expr)) // latter is optimization - still works without this
  3612. {
  3613. extra->sourceOp = no_compound_childread;
  3614. }
  3615. }
  3616. }
  3617. break;
  3618. case no_choosen:
  3619. {
  3620. IHqlExpression * arg2 = expr->queryChild(2);
  3621. if (arg2 && !arg2->isPure())
  3622. break;
  3623. //fall through
  3624. }
  3625. case no_limit:
  3626. {
  3627. IHqlExpression * dataset = expr->queryChild(0);
  3628. CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
  3629. bool cloneRequired = needToCloneLimit(expr, parentExtra->sourceOp);
  3630. if (cloneRequired && !expr->queryChild(1)->isPure())
  3631. break;
  3632. if (parentExtra->canMergeLimit(expr, targetClusterType) && !isGrouped(expr) && parentExtra->isBinary())
  3633. {
  3634. if (extra->inherit(*parentExtra))
  3635. {
  3636. if (op == no_choosen)
  3637. {
  3638. extra->hasChoosen = true;
  3639. }
  3640. else
  3641. {
  3642. extra->isLimited = true;
  3643. if (expr->hasAttribute(skipAtom))
  3644. extra->hasSkipLimit = true;
  3645. }
  3646. if (expr->hasAttribute(onFailAtom))
  3647. extra->isCreateRowLimited = true;
  3648. extra->isCloned = cloneRequired;
  3649. }
  3650. }
  3651. break;
  3652. }
  3653. case no_aggregate:
  3654. case no_newusertable:
  3655. case no_newaggregate:
  3656. {
  3657. IHqlExpression * dataset = expr->queryChild(0);
  3658. CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
  3659. if (isAggregateDataset(expr))
  3660. {
  3661. //Don't yet have csv/xml variants!
  3662. if (!parentExtra->isBinary())
  3663. break;
  3664. //ONFAIL isn't supported for compound aggregates at the moment - although it could be....
  3665. if (parentExtra->hasOnFail)
  3666. break;
  3667. IHqlExpression * root = queryRoot(dataset);
  3668. if (!root || isGrouped(root) || expr->hasAttribute(localAtom))
  3669. break;
  3670. bool isSimpleCountExists = isSimpleCountExistsAggregate(expr, true, false);
  3671. if (parentExtra->isCreateRowLimited)
  3672. break;
  3673. if (parentExtra->hasAnyLimit() && !isSimpleCountExists)
  3674. break;
  3675. //We either have a limit or choosen as the input
  3676. if (parentExtra->isCloned)
  3677. {
  3678. assertex(isSimpleCountExists);
  3679. //Too complicated if the limit is local
  3680. if ((targetClusterType == ThorLCRCluster) && dataset->hasAttribute(localAtom))
  3681. break;
  3682. //CHOOSEN(ds, x, <n>)
  3683. if ((dataset->getOperator() == no_choosen) && queryRealChild(dataset, 2))
  3684. break;
  3685. }
  3686. node_operator newOp = no_none;
  3687. node_operator parentOp = parentExtra->sourceOp;
  3688. if (queryRealChild(expr, 3))
  3689. {
  3690. //Grouped aggregate
  3691. switch (parentOp)
  3692. {
  3693. case no_compound_diskread:
  3694. case no_compound_disknormalize:
  3695. if (flags & CSFnewdisk)
  3696. newOp = no_compound_diskgroupaggregate;
  3697. break;
  3698. case no_compound_indexread:
  3699. case no_compound_indexnormalize:
  3700. if (flags & CSFnewindex)
  3701. newOp = no_compound_indexgroupaggregate;
  3702. break;
  3703. case no_compound_childread:
  3704. case no_compound_childnormalize:
  3705. if (flags & CSFnewchild)
  3706. newOp = no_compound_childgroupaggregate;
  3707. break;
  3708. }
  3709. }
  3710. else
  3711. {
  3712. switch (parentOp)
  3713. {
  3714. case no_compound_diskread:
  3715. case no_compound_disknormalize:
  3716. if (flags & CSFnewdisk)
  3717. {
  3718. newOp = no_compound_diskaggregate;
  3719. if (isSimpleCountExists && !parentExtra->isFiltered && (parentOp == no_compound_diskread))
  3720. {
  3721. IHqlExpression * root = queryRoot(expr);
  3722. if (root)
  3723. {
  3724. ColumnToOffsetMap * map = translator.queryRecordOffsetMap(root->queryRecord());
  3725. if (map->isFixedWidth())
  3726. extra->forceCompound = true;
  3727. }
  3728. }
  3729. }
  3730. break;
  3731. case no_compound_indexread:
  3732. case no_compound_indexnormalize:
  3733. //Don't create counts for (non-keyed) skip limits - little benefit, and could cause problems
  3734. //correctly returning the counts - e.g. especially for exists()
  3735. if ((flags & CSFnewindex) && !parentExtra->hasSkipLimit)
  3736. {
  3737. newOp = no_compound_indexaggregate;
  3738. //Force counts on indexes to become a new compound activity
  3739. //otherwise if(count(x) > n, f(x), g()) will always cause x to be read and spilt.
  3740. //The commented out test would do a better job, but not all keyed filters have an explicit keyed() so it is insufficient
  3741. //
  3742. //Really this should become a count if there are no index reads with the same level of conditionality, or if all accesses
  3743. //are counts.
  3744. //That can be logged as a future enhancement.....
  3745. // if (isSimpleCountExists && !parentExtra->isPostFiltered && (parentOp == no_compound_indexread))
  3746. if (isSimpleCountExists && (parentOp == no_compound_indexread))
  3747. {
  3748. //A skip limit will require everything to be read anyway - so no point splitting in two
  3749. if (!parentExtra->hasSkipLimit)
  3750. extra->forceCompound = true;
  3751. }
  3752. }
  3753. break;
  3754. case no_compound_childread:
  3755. case no_compound_childnormalize:
  3756. if (flags & CSFnewchild)
  3757. newOp = no_compound_childaggregate;
  3758. break;
  3759. case no_compound_inline:
  3760. if (flags & CSFnewinline)
  3761. newOp = no_compound_inline;
  3762. break;
  3763. }
  3764. }
  3765. if (newOp)
  3766. {
  3767. //NB: When creating a limited aggregate, it is ok if the input indicates it is cloned
  3768. //because the new compound count operation will take it into account.
  3769. extra->inherit(*parentExtra, newOp);
  3770. }
  3771. }
  3772. else
  3773. {
  3774. if (!parentExtra->isAggregate())
  3775. extra->inherit(*queryBodyExtra(dataset));
  3776. }
  3777. if (expr->hasAttribute(keyedAtom))
  3778. extra->ensureCompound();
  3779. }
  3780. break;
  3781. case no_filter:
  3782. {
  3783. IHqlExpression * dataset = expr->queryChild(0);
  3784. CompoundSourceInfo * parentExtra = queryBodyExtra(dataset);
  3785. if (!parentExtra->hasAnyLimit() && !parentExtra->isAggregate())
  3786. {
  3787. if (extra->inherit(*parentExtra))
  3788. {
  3789. extra->isFiltered = true;
  3790. if (filterIsKeyed(expr))
  3791. extra->ensureCompound();
  3792. if (filterIsUnkeyed(expr))
  3793. extra->isPostFiltered = true;
  3794. }
  3795. }
  3796. }
  3797. break;
  3798. case no_preload:
  3799. {
  3800. IHqlExpression * dataset = expr->queryChild(0);
  3801. extra->inherit(*queryBodyExtra(dataset));
  3802. extra->isPreloaded = true;
  3803. break;
  3804. }
  3805. case no_sorted:
  3806. case no_preservemeta:
  3807. case no_distributed:
  3808. case no_grouped:
  3809. case no_stepped:
  3810. case no_section:
  3811. case no_sectioninput:
  3812. case no_dataset_alias:
  3813. {
  3814. IHqlExpression * dataset = expr->queryChild(0);
  3815. extra->inherit(*queryBodyExtra(dataset));
  3816. break;
  3817. }
  3818. case no_usertable:
  3819. case no_selectfields:
  3820. UNIMPLEMENTED;
  3821. break;
  3822. case no_addfiles:
  3823. if (canProcessInline(NULL, expr) && (flags & CSFnewinline))
  3824. extra->sourceOp = no_compound_inline;
  3825. break;
  3826. }
  3827. insideCompound = wasInsideCompound;
  3828. }
  3829. void CompoundSourceTransformer::analyseMarkBoundaries(IHqlExpression * expr)
  3830. {
  3831. //This code means that child-query compounds inside a compound aren't yet spotted, they are spotted later.
  3832. if (createCompoundSource(expr))
  3833. {
  3834. queryBodyExtra(expr)->isBoundary = true;
  3835. candidate = true;
  3836. return;
  3837. }
  3838. else if (isCompoundSource(expr))
  3839. return;
  3840. //Might cause problems if Some items are references (e.g., keyed, fetch(0), keyedjoin(1) and don't want translating.
  3841. NewHqlTransformer::analyseExpr(expr);
  3842. }
  3843. void CompoundSourceTransformer::analyseExpr(IHqlExpression * expr)
  3844. {
  3845. if (alreadyVisited(expr->queryBody()))
  3846. {
  3847. if ((pass == 0) && !insideCompound)
  3848. {
  3849. if (!queryBodyExtra(expr)->isNoteUsageFirst())
  3850. return;
  3851. }
  3852. else
  3853. return;
  3854. }
  3855. if (expr->isConstant())
  3856. return;
  3857. switch (pass)
  3858. {
  3859. case 0:
  3860. analyseGatherInfo(expr);
  3861. break;
  3862. case 1:
  3863. analyseMarkBoundaries(expr);
  3864. break;
  3865. default:
  3866. throwUnexpected();
  3867. break;
  3868. }
  3869. }
  3870. bool CompoundSourceTransformer::childrenAreShared(IHqlExpression * expr)
  3871. {
  3872. if (isCompoundSource(expr))
  3873. return false;
  3874. unsigned numChildren = getNumChildTables(expr);
  3875. for (unsigned i=0; i < numChildren; i++)
  3876. {
  3877. IHqlExpression * cur = expr->queryChild(i);
  3878. if (queryBodyExtra(cur)->isShared() || childrenAreShared(cur))
  3879. return true;
  3880. }
  3881. return false;
  3882. }
  3883. bool CompoundSourceTransformer::createCompoundSource(IHqlExpression * expr)
  3884. {
  3885. CompoundSourceInfo * extra = queryBodyExtra(expr);
  3886. if (extra->sourceOp == no_none)
  3887. return false;
  3888. if (extra->forceCompound)
  3889. return true;
  3890. if (isSourceActivity(expr))
  3891. return false;
  3892. if (expr->getOperator() == no_preservemeta)
  3893. return false;
  3894. if (extra->isPreloaded)
  3895. return (flags & CSFpreload) != 0;
  3896. switch (extra->sourceOp)
  3897. {
  3898. case no_compound_diskread:
  3899. case no_compound_diskaggregate:
  3900. case no_compound_diskcount:
  3901. case no_compound_diskgroupaggregate:
  3902. return ((flags & CSFignoreShared) || !childrenAreShared(expr));
  3903. case no_compound_disknormalize:
  3904. return true;
  3905. case no_compound_indexaggregate:
  3906. case no_compound_indexcount:
  3907. case no_compound_indexgroupaggregate:
  3908. case no_compound_indexread:
  3909. //MORE: Should stop at sufficiently shared children - e.g.,
  3910. //* if the children are aggregates, when we get that far.
  3911. //* if child actions don't change the filter significantly (e.g, just projects, or no seg monitors)
  3912. {
  3913. if (!(flags & CSFindex))
  3914. return false;
  3915. CompoundSourceInfo * parentExtra = queryBodyExtra(expr->queryChild(0));
  3916. return ((flags & CSFignoreShared) || !childrenAreShared(expr) || !parentExtra->isFiltered);
  3917. }
  3918. case no_compound_indexnormalize:
  3919. return ((flags & CSFindex) != 0);
  3920. case no_compound_inline:
  3921. if (!(flags & CSFnewinline))
  3922. return false;
  3923. return !childrenAreShared(expr);
  3924. case no_compound_childread:
  3925. case no_compound_childnormalize:
  3926. case no_compound_childaggregate:
  3927. case no_compound_childcount:
  3928. case no_compound_childgroupaggregate:
  3929. case no_compound_selectnew:
  3930. return true;
  3931. }
  3932. UNIMPLEMENTED;
  3933. return false;
  3934. }
  3935. IHqlExpression * CompoundSourceTransformer::createTransformed(IHqlExpression * expr)
  3936. {
  3937. if (expr->isConstant())
  3938. return LINK(expr);
  3939. OwnedHqlExpr ret = queryTransformAnnotation(expr);
  3940. if (ret)
  3941. return ret.getClear();
  3942. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  3943. CompoundSourceInfo * extra = queryBodyExtra(expr);
  3944. if (extra->isBoundary)
  3945. {
  3946. HqlExprAttr def = transformed;
  3947. if (extra->isCloned)
  3948. transformed.setown(appendLocalAttribute(transformed));
  3949. transformed.setown(createDataset(extra->sourceOp, LINK(transformed), LINK(extra->uid)));
  3950. if (extra->isCloned)
  3951. {
  3952. HqlExprArray args;
  3953. unwindChildren(args, def);
  3954. args.replace(*transformed.getClear(), 0);
  3955. transformed.setown(def->clone(args));
  3956. }
  3957. }
  3958. return transformed.getClear();
  3959. }
  3960. ANewTransformInfo * CompoundSourceTransformer::createTransformInfo(IHqlExpression * expr)
  3961. {
  3962. return CREATE_NEWTRANSFORMINFO(CompoundSourceInfo, expr);
  3963. }
  3964. bool CompoundSourceTransformer::needToCloneLimit(IHqlExpression * expr, node_operator sourceOp)
  3965. {
  3966. node_operator op = expr->getOperator();
  3967. switch (op)
  3968. {
  3969. case no_choosen:
  3970. if (queryRealChild(expr, 2))
  3971. return true;
  3972. break;
  3973. case no_limit:
  3974. if (expr->hasAttribute(skipAtom) && (targetClusterType != RoxieCluster))
  3975. return true;
  3976. break;
  3977. }
  3978. switch (targetClusterType)
  3979. {
  3980. case RoxieCluster:
  3981. return false;
  3982. case HThorCluster:
  3983. return (sourceOp != no_compound_indexread) || (op != no_limit);
  3984. case ThorLCRCluster:
  3985. return true;
  3986. default:
  3987. UNIMPLEMENTED;
  3988. }
  3989. }
  3990. IHqlExpression * CompoundSourceTransformer::process(IHqlExpression * expr)
  3991. {
  3992. analyse(expr, 0);
  3993. analyse(expr, 1);
  3994. if (candidate)
  3995. return transformRoot(expr);
  3996. return LINK(expr);
  3997. }
  3998. //---------------------------------------------------------------------------
  3999. IHqlExpression * getMergedFetch(IHqlExpression * expr)
  4000. {
  4001. IHqlExpression * child = expr->queryChild(0);
  4002. if (isLimitedDataset(child))
  4003. return LINK(expr);
  4004. HqlExprArray args;
  4005. if (child->getOperator() == no_compound_fetch)
  4006. return swapDatasets(expr);
  4007. if (child->getOperator() != no_fetch)
  4008. return LINK(expr);
  4009. args.append(*LINK(expr));
  4010. return createDataset(no_compound_fetch, args);
  4011. }
  4012. static HqlTransformerInfo compoundActivityTransformerInfo("CompoundActivityTransformer");
  4013. CompoundActivityTransformer::CompoundActivityTransformer(ClusterType _targetClusterType) : NewHqlTransformer(compoundActivityTransformerInfo)
  4014. {
  4015. targetClusterType = _targetClusterType;
  4016. }
  4017. IHqlExpression * CompoundActivityTransformer::createTransformed(IHqlExpression * expr)
  4018. {
  4019. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  4020. updateOrphanedSelectors(transformed, expr);
  4021. switch (transformed->getOperator())
  4022. {
  4023. case no_filter:
  4024. return getMergedFetch(transformed);
  4025. case no_limit:
  4026. {
  4027. if (transformed->hasAttribute(onFailAtom))
  4028. break;
  4029. LinkedHqlExpr dataset = transformed->queryChild(0);
  4030. if (dataset->hasAttribute(limitAtom) || transformed->hasAttribute(skipAtom))
  4031. break;
  4032. switch (dataset->getOperator())
  4033. {
  4034. case no_join:
  4035. case no_denormalize:
  4036. case no_denormalizegroup:
  4037. if (isKeyedJoin(dataset))
  4038. break;
  4039. return transformed.getClear();
  4040. default:
  4041. return transformed.getClear();
  4042. }
  4043. if (!isThorCluster(targetClusterType))
  4044. return mergeLimitIntoDataset(dataset, transformed);
  4045. HqlExprArray args;
  4046. unwindChildren(args, transformed);
  4047. args.replace(*mergeLimitIntoDataset(dataset, transformed), 0);
  4048. return transformed->clone(args);
  4049. }
  4050. }
  4051. return transformed.getClear();
  4052. }
  4053. //------------------------------------------------------------------------
  4054. static HqlTransformerInfo optimizeActivityTransformerInfo("OptimizeActivityTransformer");
  4055. OptimizeActivityTransformer::OptimizeActivityTransformer(bool _optimizeCountCompare, bool _optimizeNonEmpty)
  4056. : NewHqlTransformer(optimizeActivityTransformerInfo)
  4057. {
  4058. optimizeCountCompare = _optimizeCountCompare; optimizeNonEmpty = _optimizeNonEmpty;
  4059. }
  4060. void OptimizeActivityTransformer::analyseExpr(IHqlExpression * expr)
  4061. {
  4062. expr = expr->queryBody();
  4063. queryBodyExtra(expr)->noteUsed();
  4064. if (alreadyVisited(expr))
  4065. return;
  4066. NewHqlTransformer::analyseExpr(expr);
  4067. }
  4068. //either a simple count, or isCountAggregate is guaranteed to be true - so structure is well defined
  4069. IHqlExpression * OptimizeActivityTransformer::insertChoosen(IHqlExpression * lhs, IHqlExpression * limit, __int64 limitDelta)
  4070. {
  4071. if (isShared(lhs))
  4072. return NULL;
  4073. IHqlExpression * ds = lhs->queryChild(0);
  4074. HqlExprArray args;
  4075. switch (lhs->getOperator())
  4076. {
  4077. case no_choosen:
  4078. return NULL;
  4079. case no_count:
  4080. case no_newaggregate:
  4081. {
  4082. //count on a child dataset is better if not limited...
  4083. node_operator dsOp = ds->getOperator();
  4084. if ((dsOp == no_select) || (dsOp == no_choosen) || (dsOp == no_rows))
  4085. return NULL;
  4086. args.append(*createDataset(no_choosen, LINK(ds), adjustValue(limit, limitDelta)));
  4087. break;
  4088. }
  4089. case no_implicitcast:
  4090. case no_cast:
  4091. case no_compound_childaggregate:
  4092. case no_compound_diskaggregate:
  4093. case no_compound_indexaggregate:
  4094. case no_select:
  4095. {
  4096. IHqlExpression * newDs = insertChoosen(ds, limit, limitDelta);
  4097. if (!newDs)
  4098. return NULL;
  4099. args.append(*newDs);
  4100. break;
  4101. }
  4102. default:
  4103. throwUnexpectedOp(lhs->getOperator());
  4104. }
  4105. unwindChildren(args, lhs, 1);
  4106. return lhs->clone(args);
  4107. }
  4108. static bool looksLikeSimpleCount(IHqlExpression * expr)
  4109. {
  4110. if ((expr->getOperator() == no_select) && expr->hasAttribute(newAtom))
  4111. {
  4112. IHqlExpression * ds = expr->queryChild(0);
  4113. return isSimpleCountAggregate(ds, false);
  4114. }
  4115. return (expr->getOperator() == no_count);
  4116. }
  4117. IHqlExpression * OptimizeActivityTransformer::optimizeCompare(IHqlExpression * lhs, IHqlExpression * rhs, node_operator op)
  4118. {
  4119. if (isShared(lhs))
  4120. return NULL;
  4121. if (!isIndependentOfScope(rhs))
  4122. return NULL;
  4123. if (!looksLikeSimpleCount(lhs))
  4124. return NULL;
  4125. // count(x) op count(y) - not clear if a choosen should be added to either, so assume neither for the moment,
  4126. // (we definitely don't want it added to both, which happens without the second test.)
  4127. if (looksLikeSimpleCount(rhs))
  4128. return NULL;
  4129. //Convert count(x) >= 1 to exists(x) (and other varients)
  4130. node_operator existOp = no_none;
  4131. switch (op)
  4132. {
  4133. case no_ne:
  4134. case no_gt:
  4135. if (matchesConstantValue(rhs, 0))
  4136. existOp = no_exists;
  4137. break;
  4138. case no_eq:
  4139. case no_le:
  4140. if (matchesConstantValue(rhs, 0))
  4141. existOp = no_not;
  4142. break;
  4143. case no_lt:
  4144. if (matchesConstantValue(rhs, 1))
  4145. existOp = no_not;
  4146. break;
  4147. case no_ge:
  4148. if (matchesConstantValue(rhs, 1))
  4149. existOp = no_exists;
  4150. break;
  4151. }
  4152. if (existOp != no_none)
  4153. {
  4154. if (lhs->getOperator() == no_count)
  4155. {
  4156. IHqlExpression * ds = lhs->queryChild(0);
  4157. OwnedHqlExpr ret = createValue(no_exists, makeBoolType(), LINK(ds));
  4158. if (existOp == no_not)
  4159. return createValue(no_not, makeBoolType(), ret.getClear());
  4160. return ret.getClear();
  4161. }
  4162. }
  4163. unsigned choosenDelta =0;
  4164. switch (op)
  4165. {
  4166. case no_eq:
  4167. //count(x) == n -> count(choosen(x,n+1)) == n
  4168. choosenDelta = 1;
  4169. break;
  4170. case no_ne:
  4171. //count(x) != 0 -> count(choosen(x,n+1)) != n
  4172. choosenDelta = 1;
  4173. break;
  4174. case no_lt:
  4175. //count(x) < n -> count(choosen(x,n)) < n
  4176. break;
  4177. case no_le:
  4178. //count(x) <= n -> count(choosen(x,n+1)) <= n
  4179. choosenDelta = 1;
  4180. break;
  4181. case no_gt:
  4182. //count(x) > n -> count(choosen(x,n+1)) > n
  4183. choosenDelta = 1;
  4184. break;
  4185. case no_ge:
  4186. //count(x) >= n -> count(choosen(x,n)) >= n
  4187. break;
  4188. }
  4189. IHqlExpression * newLhs = insertChoosen(lhs, rhs, choosenDelta);
  4190. if (!newLhs)
  4191. return NULL;
  4192. return createValue(op, makeBoolType(), newLhs, LINK(rhs));
  4193. }
  4194. static IHqlExpression * queryNormalizedAggregateParameter(IHqlExpression * expr)
  4195. {
  4196. loop
  4197. {
  4198. switch (expr->getOperator())
  4199. {
  4200. case no_choosen:
  4201. if (queryRealChild(expr, 2))
  4202. return expr;
  4203. break;
  4204. case no_sort:
  4205. case no_subsort:
  4206. case no_distribute:
  4207. break;
  4208. default:
  4209. return expr;
  4210. }
  4211. expr = expr->queryChild(0);
  4212. }
  4213. }
  4214. static bool aggregateMatchesDataset(IHqlExpression * agg, IHqlExpression * ds)
  4215. {
  4216. return queryNormalizedAggregateParameter(agg)->queryBody() == queryNormalizedAggregateParameter(ds)->queryBody();
  4217. }
  4218. static bool isCheckExistsAtleast(IHqlExpression * cond, IHqlExpression * ds, __int64 minMinElements, __int64 maxMinElements)
  4219. {
  4220. if (maxMinElements <= 0)
  4221. return false;
  4222. switch (cond->getOperator())
  4223. {
  4224. case no_exists:
  4225. if (aggregateMatchesDataset(cond->queryChild(0), ds))
  4226. return true;
  4227. break;
  4228. case no_ne:
  4229. {
  4230. IHqlExpression * condLhs = cond->queryChild(0);
  4231. if ((condLhs->getOperator() == no_count) && isZero(cond->queryChild(1)) && (minMinElements == 1))
  4232. {
  4233. if (aggregateMatchesDataset(condLhs->queryChild(0), ds))
  4234. return true;
  4235. }
  4236. break;
  4237. }
  4238. case no_gt:
  4239. minMinElements--;
  4240. maxMinElements--;
  4241. //fallthrough
  4242. case no_ge:
  4243. {
  4244. IHqlExpression * condLhs = cond->queryChild(0);
  4245. if (condLhs->getOperator() == no_count)
  4246. {
  4247. IHqlExpression * limit = cond->queryChild(1);
  4248. if (limit->queryValue())
  4249. {
  4250. __int64 limitVal = limit->queryValue()->getIntValue();
  4251. if ((limitVal <= maxMinElements) && (limitVal >= minMinElements))
  4252. {
  4253. if (aggregateMatchesDataset(condLhs->queryChild(0), ds))
  4254. return true;
  4255. }
  4256. }
  4257. }
  4258. }
  4259. break;
  4260. }
  4261. return false;
  4262. }
  4263. //is "value" of the form ds[n].x and other the same as the null expression for that field?
  4264. //if so we may be able to remove a condition
  4265. IHqlExpression * queryNullDsSelect(__int64 & selectIndex, IHqlExpression * value, IHqlExpression * other)
  4266. {
  4267. if (isCast(value))
  4268. value = value->queryChild(0);
  4269. if (value->getOperator() != no_select)
  4270. return NULL;
  4271. bool isNew;
  4272. IHqlExpression * ds = querySelectorDataset(value, isNew);
  4273. if (!isNew || ds->getOperator() != no_selectnth)
  4274. return NULL;
  4275. IValue * index = ds->queryChild(1)->queryValue();
  4276. if (!index)
  4277. return NULL;
  4278. if (!isNullExpr(other, value))
  4279. return NULL;
  4280. selectIndex = index->getIntValue();
  4281. return ds->queryChild(0);
  4282. }
  4283. IHqlExpression * OptimizeActivityTransformer::createTransformed(IHqlExpression * expr)
  4284. {
  4285. OwnedHqlExpr transformed = doCreateTransformed(expr);
  4286. if (transformed)
  4287. {
  4288. assertex(transformed != expr);
  4289. queryBodyExtra(transformed)->inherit(queryBodyExtra(expr));
  4290. return transform(transformed);
  4291. }
  4292. transformed.setown(NewHqlTransformer::createTransformed(expr));
  4293. if (transformed != expr)
  4294. queryBodyExtra(transformed)->inherit(queryBodyExtra(expr));
  4295. return transformed.getClear();
  4296. }
  4297. IHqlExpression * OptimizeActivityTransformer::doCreateTransformed(IHqlExpression * expr)
  4298. {
  4299. node_operator op = expr->getOperator();
  4300. switch (op)
  4301. {
  4302. case no_if:
  4303. {
  4304. IHqlExpression * cond = expr->queryChild(0);
  4305. IHqlExpression * lhs = expr->queryChild(1);
  4306. //convert if(exists(x)|count(x)>0, x, y) to nonempty(x, y);
  4307. //must happen before the count(x)>n optimization below....
  4308. if (expr->isDataset())
  4309. {
  4310. if (isCheckExistsAtleast(cond, lhs, 1, 1))
  4311. {
  4312. IHqlExpression * rhs = expr->queryChild(2);
  4313. //always convert if(exists(x),x) to x regardless of x, or the optimizeNonEmpty option
  4314. if (rhs->getOperator() == no_null)
  4315. return transform(lhs);
  4316. if (optimizeNonEmpty && !canProcessInline(NULL, expr))
  4317. {
  4318. HqlExprArray args;
  4319. args.append(*transform(lhs));
  4320. args.append(*transform(rhs));
  4321. OwnedHqlExpr ret = createDataset(no_nonempty, args);
  4322. return expr->cloneAllAnnotations(ret);
  4323. }
  4324. }
  4325. }
  4326. __int64 selectIndex = 0;
  4327. //check for if (count(x) >= 10, x[10].value, <null>) and convert to x[10].value
  4328. //also valid for count(x) >= 1, but not count(x) >= 11
  4329. IHqlExpression * ds = queryNullDsSelect(selectIndex, expr->queryChild(1), expr->queryChild(2));
  4330. if (ds)
  4331. {
  4332. if (isCheckExistsAtleast(cond, ds, 1, selectIndex))
  4333. return LINK(lhs);
  4334. }
  4335. break;
  4336. }
  4337. case no_selectnth:
  4338. {
  4339. IHqlExpression * ds = expr->queryChild(0);
  4340. if ((ds->getOperator() != no_sort) || isShared(ds))
  4341. break;
  4342. IHqlExpression * index = expr->queryChild(1);
  4343. if (getIntValue(index, 99999) > 100)
  4344. break;
  4345. OwnedHqlExpr transformedDs = transform(ds);
  4346. OwnedHqlExpr transformedIndex = transform(index);
  4347. HqlExprArray args;
  4348. unwindChildren(args, transformedDs);
  4349. args.add(*LINK(transformedIndex), 2);
  4350. OwnedHqlExpr topn = createDataset(no_topn, args);
  4351. args.kill();
  4352. args.append(*ds->cloneAllAnnotations(topn));
  4353. args.append(*LINK(transformedIndex));
  4354. unwindChildren(args, expr, 2);
  4355. return expr->clone(args);
  4356. }
  4357. case no_eq:
  4358. case no_ne:
  4359. case no_le:
  4360. case no_lt:
  4361. case no_ge:
  4362. case no_gt:
  4363. //MORE Would still be worth doing for thor i) if a no_select non-new, ii) if the lhs was an aggregate on
  4364. //a compound_disk_aggregate iii) possibly others.
  4365. if (optimizeCountCompare)
  4366. {
  4367. IHqlExpression * lhs = expr->queryChild(0);
  4368. IHqlExpression * rhs = expr->queryChild(1);
  4369. OwnedHqlExpr ret = optimizeCompare(lhs, rhs, op);
  4370. if (!ret)
  4371. ret.setown(optimizeCompare(rhs, lhs, getReverseOp(op)));
  4372. if (ret)
  4373. return ret.getClear();
  4374. }
  4375. break;
  4376. }
  4377. return NULL;
  4378. }
  4379. void optimizeActivities(HqlExprArray & exprs, bool optimizeCountCompare, bool optimizeNonEmpty)
  4380. {
  4381. OptimizeActivityTransformer transformer(optimizeCountCompare, optimizeNonEmpty);
  4382. HqlExprArray results;
  4383. transformer.analyseArray(exprs, 0);
  4384. transformer.transformRoot(exprs, results);
  4385. replaceArray(exprs, results);
  4386. }
  4387. IHqlExpression * optimizeActivities(IHqlExpression * expr, bool optimizeCountCompare, bool optimizeNonEmpty)
  4388. {
  4389. OptimizeActivityTransformer transformer(optimizeCountCompare, optimizeNonEmpty);
  4390. HqlExprArray results;
  4391. transformer.analyse(expr, 0);
  4392. return transformer.transformRoot(expr);
  4393. }
  4394. IHqlExpression * GlobalAttributeInfo::queryAlias(IHqlExpression * value)
  4395. {
  4396. if (!aliasName)
  4397. {
  4398. if (storedName)
  4399. aliasName.set(storedName);
  4400. else
  4401. aliasName.setown(createNextStringValue(value, storedPrefix));
  4402. }
  4403. return aliasName;
  4404. }
  4405. IHqlExpression * GlobalAttributeInfo::queryFilename(IHqlExpression * value, IConstWorkUnit * wu, bool isRoxie)
  4406. {
  4407. if (!cachedFilename)
  4408. {
  4409. if (storedName)
  4410. cachedFilename.set(storedName);
  4411. else
  4412. cachedFilename.setown(createNextStringValue(value, filePrefix));
  4413. if (persistOp != no_persist)
  4414. {
  4415. StringBuffer prefix("~");
  4416. if (storedName)
  4417. {
  4418. if (persistOp == no_stored)
  4419. prefix.append("spill::stored");
  4420. else if (persistOp == no_checkpoint)
  4421. prefix.append("spill::checkpoint");
  4422. }
  4423. if (persistOp == no_once)
  4424. prefix.append("once::");
  4425. bool wuidIsConstant = isRoxie || !wu->getCloneable();
  4426. if (wuidIsConstant)
  4427. {
  4428. StringBuffer s;
  4429. cachedFilename->queryValue()->getStringValue(s.append(prefix));
  4430. cachedFilename.setown(createConstant(s.str()));
  4431. }
  4432. else
  4433. {
  4434. ITypeInfo * type = makeStringType(UNKNOWN_LENGTH, NULL, NULL);
  4435. OwnedHqlExpr filename = createValue(no_concat, type, createConstant(prefix), cachedFilename.getClear());
  4436. cachedFilename.setown(foldHqlExpression(filename));
  4437. }
  4438. }
  4439. }
  4440. return cachedFilename;
  4441. }
  4442. IHqlExpression * GlobalAttributeInfo::createSetValue(IHqlExpression * value, IHqlExpression * name)
  4443. {
  4444. HqlExprArray args;
  4445. args.append(*LINK(value));
  4446. args.append(*createAttribute(sequenceAtom, LINK(sequence)));
  4447. args.append(*createAttribute(namedAtom, LINK(name)));
  4448. if (extraSetAttr)
  4449. extraSetAttr->unwindList(args, no_comma);
  4450. if (cluster)
  4451. args.append(*createAttribute(clusterAtom, LINK(cluster)));
  4452. if (setOp == no_setresult)
  4453. return createSetResult(args);
  4454. return createValue(setOp, makeVoidType(), args);
  4455. }
  4456. IHqlExpression * GlobalAttributeInfo::getStoredKey()
  4457. {
  4458. return createAttribute(nameAtom, LINK(sequence), lowerCaseHqlExpr(originalLabel));
  4459. }
  4460. void GlobalAttributeInfo::setCluster(IHqlExpression * expr)
  4461. {
  4462. if (expr && !isBlankString(expr))
  4463. cluster.set(expr);
  4464. }
  4465. void GlobalAttributeInfo::extractGlobal(IHqlExpression * global, ClusterType platform)
  4466. {
  4467. few = spillToWorkunitNotFile(value, platform) || value->isDictionary();
  4468. if (global)
  4469. {
  4470. if (global->hasAttribute(fewAtom))
  4471. few = true;
  4472. else if (global->hasAttribute(manyAtom) && (platform != RoxieCluster))
  4473. few = false;
  4474. }
  4475. setOp = no_setresult;
  4476. sequence.setown(getLocalSequenceNumber());
  4477. persistOp = no_global;
  4478. }
  4479. void GlobalAttributeInfo::extractStoredInfo(IHqlExpression * expr, IHqlExpression * _codehash, bool isRoxie, int multiplePersistInstances)
  4480. {
  4481. node_operator op = expr->getOperator();
  4482. few = expr->hasAttribute(fewAtom) || (isRoxie) || (value->isDictionary() && !expr->hasAttribute(manyAtom));
  4483. switch (op)
  4484. {
  4485. case no_stored:
  4486. setOp = no_ensureresult;
  4487. storedName.set(expr->queryChild(0));
  4488. originalLabel.set(storedName);
  4489. sequence.setown(getStoredSequenceNumber());
  4490. few = true;
  4491. break;
  4492. case no_checkpoint:
  4493. setOp = no_ensureresult;
  4494. storedName.set(expr->queryChild(0));
  4495. originalLabel.set(storedName);
  4496. sequence.setown(getLocalSequenceNumber());
  4497. extraSetAttr.setown(createAttribute(checkpointAtom));
  4498. break;
  4499. case no_persist:
  4500. assertex(_codehash);
  4501. codehash.set(_codehash);
  4502. setOp = no_ensureresult;
  4503. storedName.set(expr->queryChild(0));
  4504. originalLabel.set(storedName);
  4505. sequence.setown(getGlobalSequenceNumber());
  4506. extraSetAttr.setown(createAttribute(_workflowPersist_Atom, LINK(codehash)));
  4507. setCluster(queryRealChild(expr, 1));
  4508. few = expr->hasAttribute(fewAtom); // PERSISTs need a consistent format.
  4509. extraOutputAttr.setown(createComma(LINK(expr->queryAttribute(expireAtom)), LINK(expr->queryAttribute(clusterAtom))));
  4510. numPersistInstances = multiplePersistInstances;
  4511. if (expr->hasAttribute(multipleAtom))
  4512. numPersistInstances = (int)getIntValue(queryAttributeChild(expr, multipleAtom, 0), -1);
  4513. else if (expr->hasAttribute(singleAtom))
  4514. numPersistInstances = 0;
  4515. if (numPersistInstances != 0)
  4516. {
  4517. StringBuffer s;
  4518. getStringValue(s, storedName);
  4519. s.append("__p");
  4520. getStringValue(s, codehash);
  4521. storedName.setown(createConstant(s.str()));
  4522. }
  4523. break;
  4524. case no_global:
  4525. throwUnexpected();
  4526. case no_independent:
  4527. setOp = no_setresult;
  4528. storedName.clear();
  4529. sequence.setown(getLocalSequenceNumber());
  4530. extraSetAttr.setown(createAttribute(_workflow_Atom));
  4531. setCluster(queryRealChild(expr, 0));
  4532. op = no_global;
  4533. break;
  4534. case no_once:
  4535. setOp = no_setresult;
  4536. storedName.clear();
  4537. sequence.setown(getOnceSequenceNumber());
  4538. extraSetAttr.setown(createAttribute(_workflow_Atom));
  4539. break;
  4540. case no_success:
  4541. case no_failure:
  4542. case no_recovery:
  4543. if(setOp == no_none)
  4544. {
  4545. storedName.clear();
  4546. setOp = no_setresult;
  4547. sequence.setown(getLocalSequenceNumber());
  4548. }
  4549. break;
  4550. default:
  4551. return;
  4552. }
  4553. persistOp = op;
  4554. }
  4555. void GlobalAttributeInfo::splitGlobalDefinition(ITypeInfo * type, IHqlExpression * value, IConstWorkUnit * wu, SharedHqlExpr & setOutput, OwnedHqlExpr * getOutput, bool isRoxie)
  4556. {
  4557. doSplitGlobalDefinition(type, value, wu, setOutput, getOutput, isRoxie);
  4558. }
  4559. void GlobalAttributeInfo::doSplitGlobalDefinition(ITypeInfo * type, IHqlExpression * value, IConstWorkUnit * wu, SharedHqlExpr & setOutput, OwnedHqlExpr * getOutput, bool isRoxie)
  4560. {
  4561. OwnedHqlExpr targetName;
  4562. if (storedName)
  4563. targetName.set(storedName);
  4564. else
  4565. targetName.setown(createNextStringValue(value));
  4566. ITypeInfo * valueType = value->queryType();
  4567. if (value->isDataset() || value->isDictionary())
  4568. {
  4569. if (few)
  4570. {
  4571. splitSmallDataset(value, setOutput, getOutput);
  4572. return;
  4573. }
  4574. LinkedHqlExpr filename = queryFilename(value, wu, isRoxie);
  4575. HqlExprArray args;
  4576. if (value->isDictionary())
  4577. args.append(*createDataset(no_datasetfromdictionary, LINK(value)));
  4578. else
  4579. args.append(*LINK(value));
  4580. args.append(*LINK(filename));
  4581. //NB: Also update the dataset node at the end...
  4582. if (valueType->getTypeCode() == type_groupedtable)
  4583. args.append(*createAttribute(groupedAtom));
  4584. else
  4585. assertex(!isGrouped(valueType));
  4586. bool compressFile = true;
  4587. switch (persistOp)
  4588. {
  4589. case no_persist:
  4590. {
  4591. args.append(*createAttribute(_workflowPersist_Atom));
  4592. args.append(*createAttribute(sequenceAtom, getGlobalSequenceNumber()));
  4593. //add a flag to help get the resourcing right - may need to hash distribute on different size thor
  4594. IHqlExpression * distribution = queryDistribution(value);
  4595. if (distribution && !distribution->isAttribute())
  4596. args.append(*createAttribute(distributedAtom));
  4597. break;
  4598. }
  4599. case no_stored:
  4600. args.append(*createAttribute(ownedAtom));
  4601. args.append(*createAttribute(sequenceAtom, getStoredSequenceNumber()));
  4602. break;
  4603. case no_checkpoint:
  4604. args.append(*createAttribute(ownedAtom));
  4605. args.append(*createAttribute(sequenceAtom, getLocalSequenceNumber()));
  4606. break;
  4607. case no_once:
  4608. args.append(*createAttribute(ownedAtom));
  4609. args.append(*createAttribute(sequenceAtom, getOnceSequenceNumber()));
  4610. break;
  4611. case no_global:
  4612. //May extend over several different graphs
  4613. args.append(*createAttribute(sequenceAtom, getLocalSequenceNumber()));
  4614. args.append(*createAttribute(ownedAtom));
  4615. args.append(*createAttribute(jobTempAtom));
  4616. break;
  4617. default:
  4618. //global, independent, success, failure, etc. etc.
  4619. args.append(*createAttribute(ownedAtom));
  4620. args.append(*createAttribute(jobTempAtom));
  4621. args.append(*createAttribute(sequenceAtom, getLocalSequenceNumber()));
  4622. break;
  4623. }
  4624. if (compressFile)
  4625. args.append(*createAttribute(__compressed__Atom));
  4626. args.append(*createAttribute(overwriteAtom));
  4627. if (extraOutputAttr)
  4628. extraOutputAttr->unwindList(args, no_comma);
  4629. OwnedHqlExpr output = createValue(no_output, makeVoidType(), args);
  4630. // if (persistOp == no_independent)
  4631. if (setOp == no_setresult)
  4632. setOutput.set(output);
  4633. else
  4634. setOutput.setown(createSetValue(output, queryAlias(value)));
  4635. if(getOutput)
  4636. {
  4637. IHqlExpression * record = value->queryRecord();
  4638. args.kill();
  4639. args.append(*LINK(filename));
  4640. args.append(*LINK(record));
  4641. args.append(*createValue(no_thor));
  4642. args.append(*createAttribute(_noVirtual_Atom)); // don't interpret virtual fields in spilled output
  4643. if (persistOp == no_persist)
  4644. args.append(*createAttribute(_workflowPersist_Atom));
  4645. if (isGrouped(value))
  4646. args.append(*createAttribute(groupedAtom));
  4647. if (compressFile)
  4648. args.append(*createAttribute(__compressed__Atom));
  4649. if (hasSingleRow(value))
  4650. args.append(*createAttribute(rowAtom));
  4651. if (output->hasAttribute(jobTempAtom))
  4652. args.append(*createAttribute(jobTempAtom));
  4653. if (persistOp != no_stored)
  4654. {
  4655. IHqlExpression * recordCountAttr = queryRecordCountInfo(value);
  4656. if (recordCountAttr)
  4657. args.append(*LINK(recordCountAttr));
  4658. }
  4659. OwnedHqlExpr getValue = createDataset(no_table, args);
  4660. //getValue.setown(cloneInheritedAnnotations(value, getValue));
  4661. if (persistOp != no_stored)
  4662. getValue.setown(preserveTableInfo(getValue, value, false, (persistOp == no_persist) ? filename : NULL));
  4663. //Note: getValue->queryType() != valueType because the dataset used for field resolution has changed...
  4664. if (value->isDictionary())
  4665. getValue.setown(createDictionary(no_createdictionary, getValue.getClear()));
  4666. getOutput->setown(getValue.getClear());
  4667. }
  4668. }
  4669. else if (type->getTypeCode() == type_void)
  4670. {
  4671. switch (persistOp)
  4672. {
  4673. case no_stored:
  4674. case no_checkpoint:
  4675. case no_once:
  4676. case no_persist:
  4677. setOutput.setown(createSetValue(value, queryAlias(value)));
  4678. break;
  4679. default:
  4680. setOutput.set(value);
  4681. break;
  4682. }
  4683. if(getOutput) getOutput->setown(createValue(no_null, makeVoidType(), createAttribute(_internal_Atom, LINK(sequence), LINK(queryAlias(value)))));
  4684. }
  4685. else
  4686. {
  4687. ITypeInfo * ct = type->queryChildType();
  4688. if (type->getTypeCode() == type_set)
  4689. extraSetAttr.setown(createComma(extraSetAttr.getClear(), createAttribute(_original_Atom, createValue(no_implicitcast, LINK(type), LINK(value)))));
  4690. setOutput.setown(createSetValue(value, queryAlias(value)));
  4691. if(getOutput) getOutput->setown(createGetResultFromSetResult(setOutput, type));
  4692. }
  4693. }
  4694. void GlobalAttributeInfo::createSmallOutput(IHqlExpression * value, SharedHqlExpr & setOutput)
  4695. {
  4696. if (value->getOperator() == no_temptable)
  4697. {
  4698. IHqlExpression * values = value->queryChild(0);
  4699. if ((values->getOperator() == no_null) ||
  4700. ((values->getOperator() == no_list) && (values->numChildren() == 0)))
  4701. {
  4702. OwnedHqlExpr newNull = createDataset(no_null, LINK(value->queryRecord()));
  4703. setOutput.setown(createSetValue(newNull, queryAlias(value)));
  4704. return;
  4705. }
  4706. else if (values->getOperator() == no_all)
  4707. {
  4708. OwnedHqlExpr newAll = createDataset(no_all, LINK(value->queryRecord()));
  4709. setOutput.setown(createSetValue(newAll, queryAlias(value)));
  4710. return;
  4711. }
  4712. }
  4713. // else if (value->getOperator() == no_null)
  4714. // {
  4715. // setOutput.setown(createSetValue(value, queryAlias()));
  4716. // return;
  4717. // }
  4718. HqlExprArray args;
  4719. args.append(*LINK(value));
  4720. args.append(*createAttribute(sequenceAtom, LINK(sequence)));
  4721. args.append(*createAttribute(namedAtom, LINK(queryAlias(value))));
  4722. if (isGrouped(value))
  4723. args.append(*createAttribute(groupedAtom));
  4724. setOutput.setown(createValue(no_output, makeVoidType(), args));
  4725. if (setOp != no_setresult)
  4726. {
  4727. extraSetAttr.setown(createComma(LINK(extraSetAttr), createAttribute(noSetAtom)));
  4728. setOutput.setown(createSetValue(setOutput, queryAlias(value)));
  4729. }
  4730. }
  4731. void GlobalAttributeInfo::checkFew(HqlCppTranslator & translator)
  4732. {
  4733. // if (few && isGrouped(value))
  4734. // translator.WARNINGAT(queryLocation(value), HQLWRN_GroupedGlobalFew);
  4735. }
  4736. void GlobalAttributeInfo::splitSmallDataset(IHqlExpression * value, SharedHqlExpr & setOutput, OwnedHqlExpr * getOutput)
  4737. {
  4738. createSmallOutput(value, setOutput);
  4739. if(getOutput)
  4740. {
  4741. IHqlExpression * record = value->queryRecord();
  4742. HqlExprArray args;
  4743. args.append(*LINK(record));
  4744. args.append(*createAttribute(nameAtom, LINK(queryAlias(value))));
  4745. args.append(*createAttribute(sequenceAtom, LINK(sequence)));
  4746. if (isGrouped(value))
  4747. args.append(*createAttribute(groupedAtom));
  4748. if (persistOp != no_stored)
  4749. {
  4750. IHqlExpression * recordCountAttr = queryRecordCountInfo(value);
  4751. if (recordCountAttr)
  4752. args.append(*LINK(recordCountAttr));
  4753. }
  4754. OwnedHqlExpr wuRead = value->isDictionary() ? createDictionary(no_workunit_dataset, args) : createDataset(no_workunit_dataset, args);
  4755. //wuRead.setown(cloneInheritedAnnotations(value, wuRead));
  4756. if (persistOp != no_stored)
  4757. getOutput->setown(preserveTableInfo(wuRead, value, true, NULL));
  4758. else
  4759. getOutput->set(wuRead);
  4760. }
  4761. }
  4762. //------------------------------------------------------------------------
  4763. static bool isStored(IHqlExpression * set)
  4764. {
  4765. switch (set->getOperator())
  4766. {
  4767. case no_setresult:
  4768. case no_ensureresult:
  4769. case no_output:
  4770. return matchesConstantValue(queryAttributeChild(set, sequenceAtom, 0), ResultSequenceStored);
  4771. }
  4772. return false;
  4773. }
  4774. static bool isTrivialStored(IHqlExpression * set)
  4775. {
  4776. switch (set->getOperator())
  4777. {
  4778. case no_setresult:
  4779. case no_ensureresult:
  4780. if (matchesConstantValue(queryAttributeChild(set, sequenceAtom, 0), ResultSequenceStored))
  4781. {
  4782. IHqlExpression * value = set->queryChild(0);
  4783. loop
  4784. {
  4785. switch (value->getOperator())
  4786. {
  4787. case no_constant:
  4788. case no_all:
  4789. case no_null:
  4790. return true;
  4791. case no_list:
  4792. return (value->numChildren() == 0);
  4793. case no_cast:
  4794. case no_implicitcast:
  4795. value = value->queryChild(0);
  4796. break;
  4797. case no_output:
  4798. return isTrivialInlineOutput(value);
  4799. default:
  4800. return false;
  4801. }
  4802. }
  4803. }
  4804. break;
  4805. case no_output:
  4806. return isTrivialInlineOutput(set);
  4807. }
  4808. return false;
  4809. }
  4810. inline bool isWorkflowAction(IHqlExpression * expr)
  4811. {
  4812. return expr && (expr->getOperator() == no_workflow_action);
  4813. }
  4814. void cloneDependencies(UnsignedArray & tgt, const UnsignedArray & src)
  4815. {
  4816. ForEachItemIn(i, src)
  4817. tgt.append(src.item(i));
  4818. }
  4819. inline bool addDependency(UnsignedArray & tgt, unsigned wfid)
  4820. {
  4821. if (!tgt.contains(wfid))
  4822. {
  4823. tgt.append(wfid);
  4824. return true;
  4825. }
  4826. return false;
  4827. }
  4828. void inheritDependencies(UnsignedArray & tgt, const UnsignedArray & src)
  4829. {
  4830. ForEachItemIn(i, src)
  4831. addDependency(tgt, src.item(i));
  4832. }
  4833. bool hasSameDependencies(UnsignedArray const & d1, UnsignedArray const & d2)
  4834. {
  4835. if (d1.ordinality() != d2.ordinality())
  4836. return false;
  4837. ForEachItemIn(i, d2)
  4838. {
  4839. if (d1.find(d2.item(i)) == NotFound)
  4840. return false;
  4841. }
  4842. return true;
  4843. }
  4844. bool hasExtraDependencies(UnsignedArray const & p, UnsignedArray const & n, UnsignedArray const & ignore)
  4845. {
  4846. if (n.ordinality() > p.ordinality() + ignore.ordinality())
  4847. return true;
  4848. ForEachItemIn(i, n)
  4849. {
  4850. unsigned cur = n.item(i);
  4851. if (!p.contains(cur) && !ignore.contains(cur))
  4852. return true;
  4853. }
  4854. return false;
  4855. }
  4856. void diffDependencies(UnsignedArray & target, UnsignedArray const & d1, UnsignedArray const & d2)
  4857. {
  4858. ForEachItemIn(i, d1)
  4859. {
  4860. unsigned cur = d1.item(i);
  4861. if (d2.find(cur) == NotFound)
  4862. addDependency(target, cur);
  4863. }
  4864. ForEachItemIn(j, d2)
  4865. {
  4866. unsigned cur = d2.item(j);
  4867. if (d1.find(cur) == NotFound)
  4868. addDependency(target, cur);
  4869. }
  4870. }
  4871. void intersectDependencies(UnsignedArray & target, UnsignedArray const & d1, UnsignedArray const & d2)
  4872. {
  4873. ForEachItemIn(i, d1)
  4874. {
  4875. unsigned cur = d1.item(i);
  4876. if (d2.find(cur) != NotFound)
  4877. addDependency(target, cur);
  4878. }
  4879. }
  4880. //------------------------------------------------------------------------
  4881. static HqlTransformerInfo workflowTransformerInfo("WorkflowTransformer");
  4882. WorkflowTransformer::WorkflowTransformer(IWorkUnit * _wu, HqlCppTranslator & _translator)
  4883. : NewHqlTransformer(workflowTransformerInfo), wu(_wu), translator(_translator), wfidCount(0)
  4884. {
  4885. const HqlCppOptions & options = translator.queryOptions();
  4886. trivialStoredWfid = 0;
  4887. nextInternalFunctionId = 0;
  4888. onceWfid = 0;
  4889. combineAllStored = options.combineAllStored;
  4890. combineTrivialStored = options.combineTrivialStored;
  4891. expandPersistInputDependencies = options.expandPersistInputDependencies;
  4892. multiplePersistInstances = options.multiplePersistInstances ? options.defaultNumPersistInstances : 0;
  4893. isRootAction = true;
  4894. isRoxie = (translator.getTargetClusterType() == RoxieCluster);
  4895. workflowOut = NULL;
  4896. isConditional = false;
  4897. insideStored = false;
  4898. }
  4899. //-- Helper routines --
  4900. IWorkflowItem * WorkflowTransformer::addWorkflowToWorkunit(unsigned wfid, WFType type, WFMode mode, UnsignedArray const & dependencies, ContingencyData const & conts, IHqlExpression * cluster)
  4901. {
  4902. Owned<IWorkflowItem> wf(wu->addWorkflowItem(wfid, type, mode, conts.success, conts.failure, conts.recovery, conts.retries, conts.contingencyFor));
  4903. if (cluster)
  4904. {
  4905. StringBuffer clusterText;
  4906. getStringValue(clusterText, cluster);
  4907. wf->setCluster(clusterText);
  4908. }
  4909. ForEachItemIn(idx, dependencies)
  4910. wf->addDependency(dependencies.item(idx));
  4911. return wf.getClear();
  4912. }
  4913. void WorkflowTransformer::setWorkflowSchedule(IWorkflowItem * wf, const ScheduleData & sched)
  4914. {
  4915. if(sched.now)
  4916. {
  4917. wf->setScheduledNow();
  4918. }
  4919. else
  4920. {
  4921. wu->incEventScheduledCount();
  4922. wf->setScheduledOn(sched.eventName.str(), sched.eventText.str());
  4923. if(sched.counting)
  4924. {
  4925. wf->setScheduleCount(sched.count);
  4926. if (sched.count == 0)
  4927. wf->setState(WFStateDone);
  4928. }
  4929. }
  4930. int priority = sched.priority;
  4931. if(priority > 100) priority = 100;
  4932. if(priority < 0) priority = 0;
  4933. wf->setSchedulePriority(priority);
  4934. }
  4935. void WorkflowTransformer::setWorkflowPersist(IWorkflowItem * wf, char const * persistName, unsigned persistWfid, int numPersistInstances)
  4936. {
  4937. wf->setPersistInfo(persistName, persistWfid, numPersistInstances);
  4938. }
  4939. WorkflowItem * WorkflowTransformer::createWorkflowItem(IHqlExpression * expr, unsigned wfid, node_operator workflowOp)
  4940. {
  4941. WorkflowItem * item = new WorkflowItem(wfid, workflowOp);
  4942. expr->unwindList(item->queryExprs(), no_comma);
  4943. gatherIndirectDependencies(item->dependencies, expr);
  4944. return item;
  4945. }
  4946. IWorkflowItem * WorkflowTransformer::lookupWorkflowItem(unsigned wfid)
  4947. {
  4948. Owned<IWorkflowItemIterator> iter = wu->updateWorkflowItems();
  4949. ForEach(*iter)
  4950. {
  4951. Owned<IWorkflowItem> cur = iter->get();
  4952. if (cur->queryWfid() == wfid)
  4953. return cur.getClear();
  4954. }
  4955. return NULL;
  4956. }
  4957. bool WorkflowTransformer::hasStoredDependencies(IHqlExpression * expr)
  4958. {
  4959. return false;
  4960. }
  4961. void WorkflowTransformer::inheritDependencies(IHqlExpression * expr)
  4962. {
  4963. ForEachChild(i, expr)
  4964. copyDependencies(queryBodyExtra(expr->queryChild(i)), queryBodyExtra(expr));
  4965. }
  4966. void WorkflowTransformer::copyDependencies(WorkflowTransformInfo * source, WorkflowTransformInfo * dest)
  4967. {
  4968. if(!source) return;
  4969. UnsignedArray const & dependencies = source->queryDependencies();
  4970. ForEachItemIn(idx, dependencies)
  4971. dest->addDependency(dependencies.item(idx));
  4972. }
  4973. void WorkflowTransformer::copySetValueDependencies(WorkflowTransformInfo * source, IHqlExpression * expr)
  4974. {
  4975. node_operator op = expr->getOperator();
  4976. if (op == no_compound || op==no_actionlist)
  4977. {
  4978. copySetValueDependencies(source, expr->queryChild(expr->numChildren()-1));
  4979. inheritDependencies(expr);
  4980. }
  4981. else
  4982. copyDependencies(source, queryBodyExtra(expr));
  4983. }
  4984. unsigned WorkflowTransformer::ensureWorkflowAction(IHqlExpression * expr)
  4985. {
  4986. if (isWorkflowAction(expr))
  4987. return (unsigned)getIntValue(expr->queryChild(0));
  4988. unsigned wfid = ++wfidCount;
  4989. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(wfid, WFTypeNormal, WFModeNormal, queryDirectDependencies(expr), rootCluster);
  4990. workflowOut->append(*createWorkflowItem(expr, wfid, no_actionlist));
  4991. return wfid;
  4992. }
  4993. //-- first pass - extracting workflow
  4994. unsigned WorkflowTransformer::splitValue(IHqlExpression * value)
  4995. {
  4996. GlobalAttributeInfo info("spill::wf", "wf", value);
  4997. info.sequence.setown(getLocalSequenceNumber());
  4998. info.setOp = no_setresult;
  4999. info.persistOp = no_global;
  5000. OwnedHqlExpr setValue;
  5001. info.checkFew(translator);
  5002. info.splitGlobalDefinition(value->queryType(), value, wu, setValue, 0, (translator.getTargetClusterType() == RoxieCluster));
  5003. inheritDependencies(setValue);
  5004. unsigned wfid = ++wfidCount;
  5005. workflowOut->append(*createWorkflowItem(setValue, wfid, no_global));
  5006. return wfid;
  5007. }
  5008. WorkflowItem * WorkflowTransformer::findWorkflowItem(unsigned wfid)
  5009. {
  5010. ForEachItemIn(i, *workflowOut)
  5011. {
  5012. WorkflowItem & cur = workflowOut->item(i);
  5013. if (cur.wfid == wfid)
  5014. return &cur;
  5015. }
  5016. return NULL;
  5017. }
  5018. void WorkflowTransformer::extractDependentInputs(UnsignedArray & visited, DependenciesUsed & dependencies, const UnsignedArray & wfids)
  5019. {
  5020. ForEachItemIn(i, wfids)
  5021. {
  5022. unsigned wfid = wfids.item(i);
  5023. if (wfid == trivialStoredWfid)
  5024. continue;
  5025. if (visited.contains(wfid))
  5026. continue;
  5027. visited.append(wfid);
  5028. const WorkflowItem * match = findWorkflowItem(wfid);
  5029. assertex(match);
  5030. switch (match->workflowOp)
  5031. {
  5032. case no_persist:
  5033. if (expandPersistInputDependencies)
  5034. break;
  5035. continue;
  5036. case no_stored:
  5037. continue;
  5038. }
  5039. extractDependentInputs(visited, dependencies, match->dependencies);
  5040. ForEachItemIn(iExpr, match->exprs)
  5041. gatherDependencies(&match->exprs.item(iExpr), dependencies, GatherAll);
  5042. }
  5043. }
  5044. IHqlExpression * WorkflowTransformer::extractWorkflow(IHqlExpression * untransformed, IHqlExpression * expr)
  5045. {
  5046. IHqlExpression * value = expr->queryChild(0);
  5047. GlobalAttributeInfo info("spill::wf", "wf", value);
  5048. info.sequence.setown(getLocalSequenceNumber());
  5049. OwnedHqlExpr scheduleActions;
  5050. HqlExprArray actions;
  5051. unwindChildren(actions, expr, 1);
  5052. IHqlExpression * originalAttr = queryAttribute(_original_Atom, actions);
  5053. OwnedHqlExpr codehash;
  5054. if (originalAttr)
  5055. {
  5056. unsigned crc = getExpressionCRC(originalAttr->queryChild(0)) + PERSIST_VERSION;
  5057. codehash.setown(getSizetConstant(crc));
  5058. }
  5059. //First check for duplicate expressions, and cope with the weird case where they are identical except for the annotations.
  5060. //Do it before wfid is allocated to make life simpler
  5061. ForEachItemIn(iCheck, actions)
  5062. {
  5063. IHqlExpression & cur = actions.item(iCheck);
  5064. node_operator curOp = cur.getOperator();
  5065. switch (curOp)
  5066. {
  5067. case no_persist:
  5068. case no_checkpoint:
  5069. case no_stored:
  5070. info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
  5071. OwnedHqlExpr id = info.getStoredKey();
  5072. unsigned match = alreadyProcessed.find(*id);
  5073. if (match == NotFound)
  5074. break;
  5075. //Compare the definitions - not the expressions, otherwise the original attribute can create false negatives
  5076. IHqlExpression * prevValue = alreadyProcessedExpr.item(match).queryChild(0);
  5077. if(prevValue->queryBody() != value->queryBody())
  5078. {
  5079. StringBuffer s;
  5080. getStoredDescription(s, info.sequence, info.originalLabel, true);
  5081. if(prevValue->queryType() != value->queryBody()->queryType())
  5082. {
  5083. #ifdef _DEBUG
  5084. debugFindFirstDifference(alreadyProcessedExpr.item(match).queryBody(), expr->queryBody());
  5085. #endif
  5086. if (curOp == no_stored)
  5087. throwError1(HQLERR_DuplicateStoredDiffType, s.str());
  5088. else
  5089. throwError1(HQLERR_DuplicateDefinitionDiffType, s.str());
  5090. }
  5091. else if (translator.queryOptions().allowStoredDuplicate) // only here as a temporary workaround
  5092. translator.reportWarning(queryActiveLocation(expr), HQLERR_DuplicateDefinition, HQLERR_DuplicateDefinition_Text, s.str());
  5093. else
  5094. {
  5095. if (queryLocationIndependent(prevValue) != queryLocationIndependent(value))
  5096. {
  5097. EclIR::dbglogIR(2, queryLocationIndependent(prevValue), queryLocationIndependent(value));
  5098. if (curOp == no_stored)
  5099. throwError1(HQLERR_DuplicateStoredDefinition, s.str());
  5100. else
  5101. throwError1(HQLERR_DuplicateDefinition, s.str());
  5102. }
  5103. }
  5104. }
  5105. //If the body was essentially the same, call transform on the previous value - so
  5106. return transform(&alreadyProcessedUntransformed.item(match));
  5107. }
  5108. }
  5109. ContingencyData conts;
  5110. ScheduleData sched;
  5111. unsigned wfid = ++wfidCount;
  5112. unsigned schedWfid = 0;
  5113. ForEachItemIn(idx, actions)
  5114. {
  5115. IHqlExpression & cur = actions.item(idx);
  5116. node_operator curOp = cur.getOperator();
  5117. switch (curOp)
  5118. {
  5119. case no_persist:
  5120. if (isRoxie && translator.getCheckRoxieRestrictions())
  5121. {
  5122. StringBuffer s;
  5123. IHqlExpression * name = cur.queryChild(0);
  5124. OwnedHqlExpr seq = getGlobalSequenceNumber();
  5125. getStoredDescription(s, seq, name, true);
  5126. throwError1(HQLERR_NotSupportInRoxie, s.str());
  5127. }
  5128. //fall through
  5129. case no_checkpoint:
  5130. case no_stored:
  5131. {
  5132. info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
  5133. OwnedHqlExpr id = info.getStoredKey();
  5134. alreadyProcessed.append(*id.getClear());
  5135. alreadyProcessedExpr.append(*LINK(expr));
  5136. alreadyProcessedUntransformed.append(*LINK(untransformed));
  5137. }
  5138. break;
  5139. case no_independent:
  5140. case no_once:
  5141. info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
  5142. break;
  5143. case no_success:
  5144. {
  5145. OwnedHqlExpr successExpr = transformSequentialEtc(cur.queryChild(0));
  5146. conts.success = splitValue(successExpr);
  5147. Owned<IWorkflowItem> wf = addWorkflowContingencyToWorkunit(conts.success, WFTypeSuccess, WFModeNormal, queryDirectDependencies(successExpr), NULL, wfid);
  5148. info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
  5149. break;
  5150. }
  5151. case no_failure:
  5152. {
  5153. OwnedHqlExpr failureExpr = transformSequentialEtc(cur.queryChild(0));
  5154. conts.failure = splitValue(failureExpr);
  5155. Owned<IWorkflowItem> wf = addWorkflowContingencyToWorkunit(conts.failure, WFTypeFailure, WFModeNormal, queryDirectDependencies(failureExpr), NULL, wfid);
  5156. info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
  5157. break;
  5158. }
  5159. case no_recovery:
  5160. {
  5161. conts.recovery = splitValue(cur.queryChild(0));
  5162. conts.retries = (unsigned)getIntValue(cur.queryChild(1), 0);
  5163. Owned<IWorkflowItem> wf = addWorkflowContingencyToWorkunit(conts.recovery, WFTypeRecovery, WFModeNormal, queryDirectDependencies(cur.queryChild(0)), NULL, wfid);
  5164. info.extractStoredInfo(&cur, codehash, isRoxie, multiplePersistInstances);
  5165. break;
  5166. }
  5167. case no_attr:
  5168. assertex(cur.queryName() == _original_Atom);
  5169. break;
  5170. case no_when:
  5171. {
  5172. OwnedHqlExpr folded = foldHqlExpression(&cur);
  5173. IHqlExpression * event = folded->queryChild(0);
  5174. IHqlExpression * eventFilter = event->queryChild(1);
  5175. sched.now = false;
  5176. event->queryChild(0)->queryValue()->getStringValue(sched.eventName);
  5177. if (eventFilter)
  5178. eventFilter->queryValue()->getStringValue(sched.eventText);
  5179. else
  5180. sched.eventText.append("*");
  5181. if(cur.numChildren()>1)
  5182. {
  5183. sched.counting = true;
  5184. sched.count = (unsigned)getIntValue(folded->queryChild(1));
  5185. }
  5186. sched.independent = true;
  5187. }
  5188. break;
  5189. case no_priority:
  5190. {
  5191. sched.priority = (int)getIntValue(cur.queryChild(0));
  5192. sched.independent = true;
  5193. break;
  5194. }
  5195. default:
  5196. throwUnexpectedOp(curOp);
  5197. }
  5198. }
  5199. OwnedHqlExpr setValue;
  5200. OwnedHqlExpr getValue;
  5201. bool done = false;
  5202. if (info.setOp != no_none)
  5203. {
  5204. assertex(!sched.independent); // should have been enforced by the tree normalization
  5205. ITypeInfo * type = expr->queryType();
  5206. info.checkFew(translator);
  5207. info.splitGlobalDefinition(type, value, wu, setValue, &getValue, isRoxie);
  5208. copySetValueDependencies(queryBodyExtra(value), setValue);
  5209. }
  5210. else
  5211. {
  5212. assertex(sched.independent);
  5213. getValue.set(value);
  5214. done = true;
  5215. schedWfid = wfid;
  5216. }
  5217. if(!sched.independent && !conts.success && !conts.failure && !conts.recovery)
  5218. {
  5219. bool combine = false;
  5220. if (combineAllStored && !hasNonTrivialDependencies(setValue))
  5221. {
  5222. switch (getResultSequenceValue(setValue))
  5223. {
  5224. case ResultSequenceStored:
  5225. combine = true;
  5226. break;
  5227. case ResultSequenceInternal:
  5228. combine = insideStored;
  5229. break;
  5230. }
  5231. }
  5232. if (info.persistOp == no_once)
  5233. {
  5234. //MORE: Error if refers to stored or persist
  5235. if (queryDirectDependencies(setValue).ordinality())
  5236. translator.ERRORAT(queryLocation(untransformed), HQLERR_OnceCannotAccessStored);
  5237. if (onceWfid == 0)
  5238. {
  5239. onceWfid = wfid;
  5240. }
  5241. else
  5242. {
  5243. wfid = onceWfid;
  5244. wfidCount--;
  5245. }
  5246. if (!onceExprs.contains(*setValue))
  5247. onceExprs.append(*LINK(setValue));
  5248. done = true;
  5249. }
  5250. if (combineTrivialStored && isTrivialStored(setValue))
  5251. combine = true;
  5252. if (combine)
  5253. {
  5254. if (trivialStoredWfid == 0)
  5255. {
  5256. trivialStoredWfid = wfid;
  5257. storedWfids.append(wfid);
  5258. }
  5259. else
  5260. {
  5261. wfid = trivialStoredWfid;
  5262. wfidCount--;
  5263. }
  5264. if (trivialStoredExprs.find(*setValue) == NotFound)
  5265. trivialStoredExprs.append(*LINK(setValue));
  5266. done = true;
  5267. }
  5268. }
  5269. if (!done)
  5270. {
  5271. if (info.persistOp == no_stored)
  5272. storedWfids.append(wfid);
  5273. //If you really want side effects within a no_persist to be processed in the correct sequence
  5274. //you need to use persist(failure(independent, f(independent))
  5275. //It generally makes worse code (and incorrect in jbellow.xhql) if they are expanded.
  5276. //because there is no ensure result in the expected wfid.
  5277. if ((info.persistOp != no_persist) && expr->isAction())
  5278. setValue.setown(transformSequentialEtc(setValue));
  5279. if(info.persistOp == no_persist)
  5280. {
  5281. StringBuffer persistName;
  5282. info.storedName->queryValue()->getStringValue(persistName);
  5283. unsigned persistWfid = ++wfidCount;
  5284. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(wfid, WFTypeNormal, WFModePersist, queryDirectDependencies(setValue), conts, info.queryCluster());
  5285. setWorkflowPersist(wf, persistName.str(), persistWfid, info.queryMaxPersistCopies());
  5286. DependenciesUsed dependencies(false);
  5287. UnsignedArray visited;
  5288. extractDependentInputs(visited, dependencies, queryDirectDependencies(setValue));
  5289. gatherDependencies(setValue, dependencies, GatherAll);
  5290. dependencies.removeInternalReads();
  5291. HqlExprArray checkArgs;
  5292. checkArgs.append(*createExprAttribute(_files_Atom, dependencies.tablesRead));
  5293. inheritDependencies(&checkArgs.item(0));
  5294. if (dependencies.resultsRead.ordinality())
  5295. {
  5296. checkArgs.append(*createExprAttribute(_results_Atom, dependencies.resultsRead));
  5297. inheritDependencies(&checkArgs.item(1));
  5298. }
  5299. checkArgs.append(*createAttribute(_codehash_Atom, LINK(codehash)));
  5300. checkArgs.append(*createAttribute(namedAtom, LINK(info.storedName)));
  5301. if (expr->isDataset())
  5302. checkArgs.append(*createAttribute(fileAtom));
  5303. OwnedHqlExpr check = createValue(no_persist_check, makeVoidType(), checkArgs);
  5304. inheritDependencies(check);
  5305. workflowOut->append(*createWorkflowItem(check, persistWfid, no_actionlist));
  5306. workflowOut->append(*createWorkflowItem(setValue, wfid, no_persist));
  5307. Owned<IWorkflowItem> wfPersist = addWorkflowToWorkunit(persistWfid, WFTypeNormal, WFModeNormal, queryDirectDependencies(check), NULL);
  5308. }
  5309. else
  5310. {
  5311. if (info.queryCluster())
  5312. {
  5313. OwnedHqlExpr cluster = createValue(no_cluster, makeVoidType(), LINK(setValue), LINK(info.queryCluster()));
  5314. inheritDependencies(cluster);
  5315. setValue.set(cluster);
  5316. }
  5317. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(wfid, WFTypeNormal, WFModeNormal, queryDirectDependencies(setValue), conts, info.queryCluster());
  5318. workflowOut->append(*createWorkflowItem(setValue, wfid, info.persistOp));
  5319. }
  5320. }
  5321. if(sched.independent)
  5322. {
  5323. if (schedWfid == 0)
  5324. schedWfid = ++wfidCount;
  5325. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(schedWfid, WFTypeNormal, WFModeNormal, queryDirectDependencies(getValue), info.queryCluster());
  5326. setWorkflowSchedule(wf, sched);
  5327. workflowOut->append(*createWorkflowItem(getValue, schedWfid, no_none));
  5328. getValue.setown(createNullExpr(expr->queryType()));
  5329. }
  5330. else
  5331. queryBodyExtra(getValue.get())->addDependency(wfid);
  5332. return getValue.getClear();
  5333. }
  5334. IHqlExpression * WorkflowTransformer::extractCommonWorkflow(IHqlExpression * expr, IHqlExpression * transformed)
  5335. {
  5336. if (!transformed->queryDataset())
  5337. return LINK(transformed);
  5338. WorkflowTransformInfo * extra = queryBodyExtra(expr);
  5339. if (!extra->isCommonUpCandidate() || !isWorthHoisting(transformed, false))
  5340. return LINK(transformed);
  5341. if (isContextDependent(transformed) || !isIndependentOfScope(transformed))
  5342. return LINK(transformed);
  5343. StringBuffer s;
  5344. IHqlExpression * location = activeLocations.ordinality() ? &activeLocations.tos() : NULL;
  5345. if (!translator.queryOptions().performWorkflowCse)
  5346. {
  5347. s.appendf("AutoWorkflow: Try adding ': INDEPENDENT' to %s ", getOpString(expr->getOperator()));
  5348. if (expr->queryName())
  5349. s.append("[").append(expr->queryName()).append("] ");
  5350. s.append(" to common up code between workflow items");
  5351. DBGLOG("%s", s.str());
  5352. translator.addWorkunitException(ExceptionSeverityInformation, HQLWRN_TryAddingIndependent, s.str(), location);
  5353. if (!translator.queryOptions().performWorkflowCse)
  5354. return LINK(transformed);
  5355. }
  5356. //This code would need a lot more work for it to be enabled by default.
  5357. // e.g., ensure it really is worth commoning up, the expressions aren't to be evaluated on different clusters etc. etc.
  5358. unsigned wfid = ++wfidCount;
  5359. s.appendf("AutoWorkflow: Spotted %s ", getOpString(expr->getOperator()));
  5360. if (expr->queryName())
  5361. s.append("[").append(expr->queryName()).append("] ");
  5362. s.append(" to common up between workflow items [").append(wfid).append("]");
  5363. DBGLOG("%s", s.str());
  5364. translator.addWorkunitException(ExceptionSeverityInformation, 0, s.str(), location);
  5365. GlobalAttributeInfo info("spill::wfa", "wfa", transformed);
  5366. info.extractGlobal(NULL, translator.getTargetClusterType()); // should really be a slightly different function
  5367. OwnedHqlExpr setValue;
  5368. OwnedHqlExpr getValue;
  5369. ContingencyData conts;
  5370. WorkflowTransformInfo * transformedExtra = queryBodyExtra(transformed);
  5371. info.splitGlobalDefinition(transformed->queryType(), transformed, wu, setValue, &getValue, isRoxie);
  5372. copySetValueDependencies(transformedExtra, setValue);
  5373. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(wfid, WFTypeNormal, WFModeNormal, queryDirectDependencies(setValue), conts, NULL);
  5374. workflowOut->append(*createWorkflowItem(setValue, wfid, no_actionlist));
  5375. queryBodyExtra(getValue.get())->addDependency(wfid);
  5376. return getValue.getClear();
  5377. }
  5378. IHqlExpression * WorkflowTransformer::transformInternalFunction(IHqlExpression * newFuncDef)
  5379. {
  5380. IHqlExpression * body = newFuncDef->queryChild(0);
  5381. if (body->getOperator() != no_outofline)
  5382. return LINK(newFuncDef);
  5383. IHqlExpression * ecl = body->queryChild(0);
  5384. StringBuffer funcname;
  5385. funcname.append("user").append(++nextInternalFunctionId);
  5386. if (translator.queryOptions().debugGeneratedCpp)
  5387. funcname.append("_").append(newFuncDef->queryName()).toLowerCase();
  5388. OwnedHqlExpr funcNameExpr = createConstant(funcname);
  5389. IHqlExpression * formals = newFuncDef->queryChild(1);
  5390. OwnedHqlExpr newFormals = mapInternalFunctionParameters(formals);
  5391. HqlExprArray bodyArgs;
  5392. bodyArgs.append(*replaceParameters(ecl, formals, newFormals));
  5393. unwindChildren(bodyArgs, body, 1);
  5394. bodyArgs.append(*createLocalAttribute());
  5395. bodyArgs.append(*createExprAttribute(entrypointAtom, LINK(funcNameExpr)));
  5396. OwnedHqlExpr newBody = body->clone(bodyArgs);
  5397. inheritDependencies(newBody);
  5398. HqlExprArray funcdefArgs;
  5399. funcdefArgs.append(*LINK(newBody));
  5400. funcdefArgs.append(*LINK(newFormals));
  5401. unwindChildren(funcdefArgs, newFuncDef, 2);
  5402. OwnedHqlExpr namedFuncDef = newFuncDef->clone(funcdefArgs);
  5403. inheritDependencies(namedFuncDef);
  5404. if (ecl->getOperator() == no_embedbody)
  5405. return namedFuncDef.getClear();
  5406. WorkflowItem * item = new WorkflowItem(namedFuncDef);
  5407. workflowOut->append(*item);
  5408. return createExternalFuncdefFromInternal(namedFuncDef);
  5409. }
  5410. IHqlExpression * WorkflowTransformer::transformInternalCall(IHqlExpression * transformed)
  5411. {
  5412. IHqlExpression * funcDef = transformed->queryDefinition();
  5413. Owned<IHqlExpression> newFuncDef = transform(funcDef);
  5414. HqlExprArray paramters;
  5415. unwindChildren(paramters, transformed);
  5416. OwnedHqlExpr rebound = createReboundFunction(newFuncDef, paramters);
  5417. inheritDependencies(rebound);
  5418. return rebound.getClear();
  5419. }
  5420. IHqlExpression * WorkflowTransformer::createTransformed(IHqlExpression * expr)
  5421. {
  5422. //Could short-circuit if doesn't contain workflow, but it also modifies outputs/buildindex...
  5423. //Force record to be transformed - so any stored values in record (ifblock!!) are hoisted.
  5424. node_operator op = expr->getOperator();
  5425. if (op == no_param)
  5426. return LINK(expr);
  5427. if (op == no_transform || op == no_newtransform)
  5428. ::Release(transform(expr->queryRecord()));
  5429. IHqlExpression * body = expr->queryBody(true);
  5430. if (expr != body)
  5431. {
  5432. switch (expr->getAnnotationKind())
  5433. {
  5434. case annotate_location:
  5435. case annotate_symbol:
  5436. activeLocations.append(*expr);
  5437. break;
  5438. }
  5439. OwnedHqlExpr transformedBody = transform(body);
  5440. switch (expr->getAnnotationKind())
  5441. {
  5442. case annotate_location:
  5443. case annotate_symbol:
  5444. activeLocations.pop();
  5445. break;
  5446. }
  5447. OwnedHqlExpr transformed = (transformedBody == body) ? LINK(expr) : expr->cloneAnnotation(transformedBody);
  5448. //more: this really shouldn't be needed
  5449. inheritDependencies(transformed);
  5450. return transformed.getClear();
  5451. }
  5452. bool wasInsideStored = insideStored;
  5453. if ((op == no_colon) && queryOperatorInList(no_stored, expr->queryChild(1)))
  5454. insideStored = true;
  5455. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  5456. insideStored = wasInsideStored;
  5457. inheritDependencies(transformed);
  5458. switch (op)
  5459. {
  5460. #if 0
  5461. //MORE: Workflow in user functions doesn't work for roxie at the moment
  5462. case no_call:
  5463. transformed.setown(transformCall(transformed));
  5464. inheritDependencies(transformed);
  5465. copyDependencies(queryBodyExtra(transformed->queryExternalDefinition()), queryBodyExtra(transformed));
  5466. break;
  5467. case no_externalcall:
  5468. transformed.setown(transformExternalCall(transformed));
  5469. inheritDependencies(transformed);
  5470. copyDependencies(queryExtra(transformed->queryExternalDefinition()), queryExtra(transformed));
  5471. break;
  5472. #endif
  5473. case no_colon:
  5474. if (translator.insideLibrary())
  5475. {
  5476. SCMStringBuffer libraryName;
  5477. StringBuffer colonText(" (");
  5478. getOutputLibraryName(libraryName, wu);
  5479. getExprECL(expr, colonText);
  5480. colonText.append(")");
  5481. throwError2(HQLERR_LibraryCannotContainWorkflow, libraryName.str(), colonText.str());
  5482. }
  5483. transformed.setown(extractWorkflow(expr, transformed));
  5484. break;
  5485. case no_output:
  5486. case no_buildindex:
  5487. {
  5488. IHqlExpression * updateAttr = transformed->queryAttribute(updateAtom);
  5489. if (updateAttr)
  5490. {
  5491. DependenciesUsed dependencies(false);
  5492. gatherDependencies(transformed->queryChild(0), dependencies, GatherAll);
  5493. dependencies.removeInternalReads();
  5494. bool canEvaluateFilenames = true;
  5495. HqlExprArray updateArgs;
  5496. unwindChildren(updateArgs, updateAttr);
  5497. if (dependencies.tablesRead.ordinality())
  5498. {
  5499. OwnedHqlExpr attr = createExprAttribute(_files_Atom, dependencies.tablesRead);
  5500. if (!isIndependentOfScope(attr) || isContextDependent(attr))
  5501. {
  5502. if (!updateAttr->hasAttribute(alwaysAtom))
  5503. throwError(HQLERR_InputsAreTooComplexToUpdate);
  5504. canEvaluateFilenames = false;
  5505. }
  5506. else
  5507. updateArgs.append(*attr.getClear());
  5508. }
  5509. if (dependencies.resultsRead.ordinality())
  5510. updateArgs.append(*createExprAttribute(_results_Atom, dependencies.resultsRead));
  5511. HqlExprArray args;
  5512. unwindChildren(args, transformed);
  5513. args.zap(*updateAttr);
  5514. if (canEvaluateFilenames)
  5515. args.append(*createExprAttribute(updateAtom, updateArgs));
  5516. transformed.setown(transformed->clone(args));
  5517. inheritDependencies(transformed);
  5518. }
  5519. break;
  5520. }
  5521. case no_funcdef:
  5522. transformed.setown(transformInternalFunction(transformed));
  5523. break;
  5524. case no_call:
  5525. transformed.setown(transformInternalCall(transformed));
  5526. break;
  5527. }
  5528. return extractCommonWorkflow(expr, transformed);
  5529. }
  5530. //-- second pass - sort out sequential etc.
  5531. /*
  5532. This is very tricky... The problem is we only want to create workflow actions for sequential/parallel and conditions if they
  5533. are necessary. In particular.
  5534. o workflow items are only executed once per invocation
  5535. o create them for sequential if the dependencies haven't already been evaluated
  5536. o create them for conditions if the non-intersection of the dependencies for the branches haven't already been evaluated
  5537. o create if a workflow action has been created for a child action.
  5538. o can't rely on createTransform() updating the dependencies so-far because the transform() may be cached.
  5539. o Need to be careful that dependencies done so far are set up correctly before each call to transform()
  5540. */
  5541. UnsignedArray const & WorkflowTransformer::queryDependencies(unsigned wfid)
  5542. {
  5543. if (wfid == trivialStoredWfid)
  5544. return emptyDependencies;
  5545. ForEachItemIn(i, *workflowOut)
  5546. {
  5547. WorkflowItem & cur = workflowOut->item(i);
  5548. if (cur.wfid == wfid)
  5549. return cur.dependencies;
  5550. }
  5551. throwUnexpected();
  5552. }
  5553. void WorkflowTransformer::gatherIndirectDependencies(UnsignedArray & result, IHqlExpression * expr)
  5554. {
  5555. if (isWorkflowAction(expr))
  5556. {
  5557. unsigned wfid = (unsigned)getIntValue(expr->queryChild(0));
  5558. ::inheritDependencies(result, queryDependencies(wfid));
  5559. }
  5560. else
  5561. {
  5562. const UnsignedArray & direct = queryBodyExtra(expr)->queryDependencies();
  5563. ForEachItemIn(i, direct)
  5564. {
  5565. unsigned wfid = direct.item(i);
  5566. if (addDependency(result, wfid))
  5567. ::inheritDependencies(result, queryDependencies(wfid));
  5568. }
  5569. }
  5570. }
  5571. bool WorkflowTransformer::hasNonTrivialDependencies(IHqlExpression * expr)
  5572. {
  5573. UnsignedArray const & dependencies = queryDirectDependencies(expr);
  5574. ForEachItemIn(i, dependencies)
  5575. {
  5576. unsigned cur = dependencies.item(i);
  5577. if ((cur != trivialStoredWfid) && (cur != onceWfid))
  5578. return true;
  5579. }
  5580. return false;
  5581. }
  5582. UnsignedArray const & WorkflowTransformer::queryDirectDependencies(IHqlExpression * expr)
  5583. {
  5584. return queryBodyExtra(expr)->queryDependencies();
  5585. }
  5586. void WorkflowTransformer::cacheWorkflowDependencies(unsigned wfid, UnsignedArray & extra)
  5587. {
  5588. WorkflowItem * item = new WorkflowItem(wfid, no_actionlist);
  5589. ForEachItemIn(i, extra)
  5590. {
  5591. unsigned wfid = extra.item(i);
  5592. item->dependencies.append(wfid);
  5593. ::inheritDependencies(item->dependencies, queryDependencies(wfid));
  5594. }
  5595. workflowOut->append(*item);
  5596. }
  5597. IHqlExpression * WorkflowTransformer::createWorkflowAction(unsigned wfid)
  5598. {
  5599. //NB: Needs to include wfid as an argument otherwise inherited dependencies get messed up
  5600. OwnedHqlExpr transformed = createValue(no_workflow_action, makeVoidType(), getSizetConstant(wfid));
  5601. queryBodyExtra(transformed)->addDependency(wfid);
  5602. return transformed.getClear();
  5603. }
  5604. void WorkflowTransformer::ensureWorkflowAction(UnsignedArray & dependencies, IHqlExpression * expr)
  5605. {
  5606. unsigned wfid = ensureWorkflowAction(expr);
  5607. addDependency(dependencies, wfid);
  5608. }
  5609. //Create a sequential workflow action if any of the branches contains a workflow action
  5610. IHqlExpression * WorkflowTransformer::createCompoundWorkflow(IHqlExpression * expr)
  5611. {
  5612. HqlExprArray pendingBranches;
  5613. UnsignedArray childWfid;
  5614. ForEachChild(i, expr)
  5615. {
  5616. IHqlExpression * cur = expr->queryChild(i);
  5617. unsigned mark = markDependencies();
  5618. OwnedHqlExpr transformed = transformRootAction(cur);
  5619. restoreDependencies(mark);
  5620. if (isWorkflowAction(transformed))
  5621. {
  5622. if (pendingBranches.ordinality())
  5623. {
  5624. OwnedHqlExpr branch = createActionList(pendingBranches);
  5625. inheritDependencies(branch);
  5626. ensureWorkflowAction(childWfid, branch);
  5627. pendingBranches.kill();
  5628. }
  5629. ensureWorkflowAction(childWfid, transformed);
  5630. }
  5631. else
  5632. {
  5633. pendingBranches.append(*LINK(transformed));
  5634. }
  5635. gatherIndirectDependencies(cumulativeDependencies, transformed);
  5636. }
  5637. if (childWfid.ordinality())
  5638. {
  5639. if (pendingBranches.ordinality())
  5640. {
  5641. OwnedHqlExpr branch = createActionList(pendingBranches);
  5642. inheritDependencies(branch);
  5643. ensureWorkflowAction(childWfid, branch);
  5644. }
  5645. unsigned wfid = ++wfidCount;
  5646. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(wfid, WFTypeNormal, WFModeSequential, childWfid, rootCluster);
  5647. cacheWorkflowDependencies(wfid, childWfid);
  5648. return createWorkflowAction(wfid);
  5649. }
  5650. return LINK(expr);
  5651. }
  5652. //Create a sequential workflow action if any of the branches introduce new dependencies/or creates a workflow item (e.g., wait!)
  5653. IHqlExpression * WorkflowTransformer::createSequentialWorkflow(IHqlExpression * expr)
  5654. {
  5655. OwnedHqlExpr nextBranch;
  5656. UnsignedArray childWfid;
  5657. ForEachChild(i, expr)
  5658. {
  5659. IHqlExpression * cur = expr->queryChild(i);
  5660. unsigned mark = markDependencies();
  5661. OwnedHqlExpr transformed = transformRootAction(cur);
  5662. restoreDependencies(mark);
  5663. UnsignedArray dependencies;
  5664. gatherIndirectDependencies(dependencies, transformed);
  5665. if (hasExtraDependencies(cumulativeDependencies, dependencies, storedWfids) || isWorkflowAction(transformed))
  5666. {
  5667. if (nextBranch)
  5668. {
  5669. ensureWorkflowAction(childWfid, nextBranch);
  5670. nextBranch.clear();
  5671. }
  5672. ::inheritDependencies(cumulativeDependencies, dependencies);
  5673. if (isWorkflowAction(transformed))
  5674. ensureWorkflowAction(childWfid, transformed);
  5675. else
  5676. nextBranch.set(transformed);
  5677. }
  5678. else
  5679. {
  5680. if (nextBranch)
  5681. nextBranch.setown(createValue(expr->getOperator(), nextBranch.getClear(), LINK(transformed)));
  5682. else
  5683. nextBranch.set(transformed);
  5684. inheritDependencies(nextBranch);
  5685. }
  5686. }
  5687. if (childWfid.ordinality())
  5688. {
  5689. if (nextBranch)
  5690. ensureWorkflowAction(childWfid, nextBranch);
  5691. unsigned wfid = ++wfidCount;
  5692. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(wfid, WFTypeNormal, WFModeSequential, childWfid, rootCluster);
  5693. cacheWorkflowDependencies(wfid, childWfid);
  5694. return createWorkflowAction(wfid);
  5695. }
  5696. return LINK(expr);
  5697. }
  5698. // Create a parallel workflow action if any of the child actions are workflow actions
  5699. IHqlExpression * WorkflowTransformer::createParallelWorkflow(IHqlExpression * expr)
  5700. {
  5701. HqlExprArray branches;
  5702. UnsignedArray childWfid;
  5703. unsigned mark = markDependencies();
  5704. ForEachChild(i, expr)
  5705. {
  5706. IHqlExpression * cur = expr->queryChild(i);
  5707. OwnedHqlExpr transformed = transformRootAction(cur);
  5708. if (isWorkflowAction(transformed))
  5709. ensureWorkflowAction(childWfid, transformed);
  5710. else
  5711. branches.append(*LINK(transformed));
  5712. restoreDependencies(mark);
  5713. }
  5714. if (childWfid.ordinality())
  5715. {
  5716. if (branches.ordinality())
  5717. {
  5718. OwnedHqlExpr branch = createActionList(branches);
  5719. inheritDependencies(branch);
  5720. ensureWorkflowAction(childWfid, branch);
  5721. }
  5722. unsigned wfid = ++wfidCount;
  5723. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(wfid, WFTypeNormal, WFModeParallel, childWfid, rootCluster);
  5724. cacheWorkflowDependencies(wfid, childWfid);
  5725. return createWorkflowAction(wfid);
  5726. }
  5727. return LINK(expr);
  5728. }
  5729. IHqlExpression * WorkflowTransformer::createIfWorkflow(IHqlExpression * expr)
  5730. {
  5731. IHqlExpression * cond = expr->queryChild(0);
  5732. IHqlExpression * trueExpr = expr->queryChild(1);
  5733. IHqlExpression * falseExpr = expr->queryChild(2);
  5734. OwnedHqlExpr newCond = LINK(cond);
  5735. gatherIndirectDependencies(cumulativeDependencies, cond);
  5736. //more: inherit dependencies?
  5737. UnsignedArray trueDepends, falseDepends;
  5738. unsigned mark = markDependencies();
  5739. OwnedHqlExpr newTrueExpr = transformRootAction(trueExpr);
  5740. restoreDependencies(mark);
  5741. OwnedHqlExpr newFalseExpr = falseExpr ? transformRootAction(falseExpr) : NULL;
  5742. restoreDependencies(mark);
  5743. //Need to turn a conditional action into a conditional workflow item if
  5744. //i) it has a workflow action as a child.
  5745. //ii) the true/false branches are dependent on something that hasn't already been evaluated
  5746. // (and isn't shared between both branches)
  5747. bool needToCreateWorkflow = false;
  5748. if (hasDependencies(newTrueExpr) || (newFalseExpr && hasDependencies(newFalseExpr)))
  5749. {
  5750. needToCreateWorkflow = isWorkflowAction(newTrueExpr) || isWorkflowAction(newFalseExpr);
  5751. if (!needToCreateWorkflow)
  5752. {
  5753. //Failures are assumed to be exceptional, so don't worry about extra dependencies
  5754. if (!isFailAction(newTrueExpr) && !isFailAction(newFalseExpr))
  5755. {
  5756. UnsignedArray newTrueDepends;
  5757. gatherIndirectDependencies(newTrueDepends, newTrueExpr);
  5758. if (!falseExpr)
  5759. needToCreateWorkflow = hasExtraDependencies(cumulativeDependencies, newTrueDepends, storedWfids);
  5760. else
  5761. {
  5762. UnsignedArray newFalseDepends;
  5763. gatherIndirectDependencies(newFalseDepends, newFalseExpr);
  5764. UnsignedArray diff;
  5765. diffDependencies(diff, newTrueDepends, newFalseDepends);
  5766. needToCreateWorkflow = hasExtraDependencies(cumulativeDependencies, diff, storedWfids);
  5767. }
  5768. }
  5769. }
  5770. if (needToCreateWorkflow)
  5771. {
  5772. //Represent as wfid(cond-wfid, true-wfid, false-wfid)
  5773. UnsignedArray dependencies;
  5774. OwnedHqlExpr setCondExpr = createValue(no_setworkflow_cond, makeVoidType(), LINK(cond));
  5775. inheritDependencies(setCondExpr);
  5776. ensureWorkflowAction(dependencies, setCondExpr);
  5777. ensureWorkflowAction(dependencies, newTrueExpr);
  5778. if (newFalseExpr)
  5779. ensureWorkflowAction(dependencies, newFalseExpr);
  5780. unsigned wfid = ++wfidCount;
  5781. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(wfid, WFTypeNormal, WFModeCondition, dependencies, rootCluster);
  5782. WorkflowItem * item = new WorkflowItem(wfid, no_if);
  5783. cloneDependencies(item->dependencies, dependencies);
  5784. if (falseExpr)
  5785. {
  5786. UnsignedArray newTrueDepends;
  5787. UnsignedArray newFalseDepends;
  5788. gatherIndirectDependencies(newTrueDepends, newTrueExpr);
  5789. gatherIndirectDependencies(newFalseDepends, newFalseExpr);
  5790. intersectDependencies(item->dependencies, newTrueDepends, newFalseDepends);
  5791. }
  5792. workflowOut->append(*item);
  5793. return createWorkflowAction(wfid);
  5794. }
  5795. }
  5796. return LINK(expr);
  5797. }
  5798. IHqlExpression * WorkflowTransformer::createWaitWorkflow(IHqlExpression * expr)
  5799. {
  5800. //First create a EndWait workflow item which has a when clause of the wait criteria
  5801. OwnedHqlExpr folded = foldHqlExpression(expr);
  5802. IHqlExpression * event = folded->queryChild(0);
  5803. IHqlExpression * eventFilter = event->queryChild(1);
  5804. ScheduleData sched;
  5805. sched.now = false;
  5806. getStringValue(sched.eventName, event->queryChild(0));
  5807. if (eventFilter)
  5808. getStringValue(sched.eventText, eventFilter);
  5809. else
  5810. sched.eventText.append("*");
  5811. sched.counting = true;
  5812. sched.count = 0;
  5813. sched.independent = true;
  5814. unsigned endWaitWfid = ++wfidCount;
  5815. UnsignedArray noDependencies;
  5816. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(endWaitWfid, WFTypeNormal, WFModeWait, noDependencies, rootCluster);
  5817. setWorkflowSchedule(wf, sched);
  5818. OwnedHqlExpr doNothing = createValue(no_null, makeVoidType());
  5819. workflowOut->append(*createWorkflowItem(doNothing, endWaitWfid, no_wait));
  5820. //Now create a wait entry, with the EndWait as the dependency
  5821. UnsignedArray dependencies;
  5822. dependencies.append(endWaitWfid);
  5823. unsigned beginWaitWfid = ++wfidCount;
  5824. Owned<IWorkflowItem> wfWait = addWorkflowToWorkunit(beginWaitWfid, WFTypeNormal, WFModeBeginWait, dependencies, rootCluster);
  5825. cacheWorkflowDependencies(beginWaitWfid, dependencies);
  5826. return createWorkflowAction(beginWaitWfid);
  5827. }
  5828. IHqlExpression * WorkflowTransformer::transformRootAction(IHqlExpression * expr)
  5829. {
  5830. node_operator op = expr->getOperator();
  5831. switch (op)
  5832. {
  5833. case no_compound:
  5834. if (expr->isAction())
  5835. return createCompoundWorkflow(expr);
  5836. break;
  5837. case no_parallel:
  5838. return createParallelWorkflow(expr);
  5839. case no_sequential:
  5840. case no_orderedactionlist:
  5841. return createSequentialWorkflow(expr);
  5842. case no_actionlist:
  5843. return createCompoundWorkflow(expr);
  5844. case no_if:
  5845. if (expr->isAction())
  5846. return createIfWorkflow(expr);
  5847. break;
  5848. case no_wait:
  5849. return createWaitWorkflow(expr);
  5850. case no_ensureresult:
  5851. {
  5852. IHqlExpression * value = expr->queryChild(0);
  5853. if (!value->isAction())
  5854. break;
  5855. OwnedHqlExpr transformed = transformRootAction(value);
  5856. if (value == transformed)
  5857. break;
  5858. HqlExprArray args;
  5859. args.append(*transformed.getClear());
  5860. unwindChildren(args, expr, 1);
  5861. OwnedHqlExpr ret = expr->clone(args);
  5862. inheritDependencies(ret);
  5863. return ret.getClear();
  5864. }
  5865. }
  5866. return LINK(expr);
  5867. }
  5868. IHqlExpression * WorkflowTransformer::transformSequentialEtc(IHqlExpression * expr)
  5869. {
  5870. unsigned mark = markDependencies();
  5871. //Ignore differences in access to trivial stored variables.
  5872. if (trivialStoredWfid)
  5873. cumulativeDependencies.append(trivialStoredWfid);
  5874. if (onceWfid)
  5875. cumulativeDependencies.append(onceWfid);
  5876. OwnedHqlExpr ret = transformRootAction(expr);
  5877. restoreDependencies(mark);
  5878. return ret.getClear();
  5879. }
  5880. void WorkflowTransformer::percolateScheduledIds(WorkflowArray & workflow)
  5881. {
  5882. ForEachItemIn(i, workflow)
  5883. {
  5884. WorkflowItem & cur = workflow.item(i);
  5885. Owned<IWorkflowItem> wf = lookupWorkflowItem(cur.queryWfid());
  5886. if (wf && wf->isScheduledNow())
  5887. {
  5888. ForEachItemIn(i2, cur.dependencies)
  5889. {
  5890. Owned<IWorkflowItem> child = lookupWorkflowItem(cur.dependencies.item(i2));
  5891. if (child->queryMode() == WFModeWait)
  5892. child->setScheduledWfid(cur.queryWfid());
  5893. }
  5894. }
  5895. }
  5896. }
  5897. ///- workflow processing
  5898. void WorkflowTransformer::analyseExpr(IHqlExpression * expr)
  5899. {
  5900. WorkflowTransformInfo * extra = queryBodyExtra(expr);
  5901. if (extra->noteWorkflow(activeWfid, isConditional))
  5902. return;
  5903. switch (expr->getOperator())
  5904. {
  5905. case no_allnodes:
  5906. //MORE: Do I need to recurse and explicitly disable hoisting?
  5907. return;
  5908. case no_if:
  5909. {
  5910. bool wasConditional = isConditional;
  5911. analyseExpr(expr->queryChild(0));
  5912. isConditional = true;
  5913. analyseExpr(expr->queryChild(1));
  5914. if (expr->queryChild(2))
  5915. analyseExpr(expr->queryChild(2));
  5916. isConditional = wasConditional;
  5917. return;
  5918. }
  5919. case no_colon:
  5920. {
  5921. if (!isIndependentOfScope(expr->queryChild(0)))
  5922. {
  5923. StringBuffer s;
  5924. if (expr->queryName())
  5925. s.appendf(" '%s'", expr->queryName()->str());
  5926. //MORE: Better if we also kept nested track of locations
  5927. translator.WARNINGAT1(queryActiveLocation(expr), HQLWRN_WorkflowSeemsToBeDependent, s.str());
  5928. }
  5929. unsigned prevWfid = activeWfid;
  5930. activeWfid = ++wfidCount;
  5931. analyseExpr(expr->queryChild(0));
  5932. activeWfid = prevWfid;
  5933. return;
  5934. }
  5935. }
  5936. NewHqlTransformer::analyseExpr(expr);
  5937. }
  5938. void WorkflowTransformer::analyseAll(const HqlExprArray & in)
  5939. {
  5940. activeWfid = ++wfidCount;
  5941. analyseArray(in, 0);
  5942. wfidCount = 0;
  5943. }
  5944. void WorkflowTransformer::transformRoot(const HqlExprArray & in, WorkflowArray & out)
  5945. {
  5946. wfidCount = 0;
  5947. workflowOut = &out;
  5948. HqlExprArray transformed;
  5949. WorkflowTransformInfo globalInfo(NULL);
  5950. ForEachItemIn(idx, in)
  5951. {
  5952. OwnedHqlExpr ret = transform(&in.item(idx));
  5953. copyDependencies(queryBodyExtra(ret), &globalInfo);
  5954. //ignore results that do nothing, but still collect the dependencies...
  5955. if (ret->getOperator() != no_null)
  5956. transformed.append(*ret.getClear());
  5957. }
  5958. if (onceExprs.length())
  5959. {
  5960. //By definition they don't have any dependencies, so no need to call inheritDependencies.
  5961. OwnedHqlExpr onceExpr = createActionList(onceExprs);
  5962. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(onceWfid, WFTypeNormal, WFModeOnce, queryDirectDependencies(onceExpr), NULL);
  5963. wf->setScheduledNow();
  5964. out.append(*createWorkflowItem(onceExpr, onceWfid, no_once));
  5965. }
  5966. if (trivialStoredExprs.length())
  5967. {
  5968. //By definition they don't have any dependencies, so no need to call inheritDependencies.
  5969. OwnedHqlExpr trivialStoredExpr = createActionList(trivialStoredExprs);
  5970. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(trivialStoredWfid, WFTypeNormal, WFModeNormal, queryDirectDependencies(trivialStoredExpr), NULL);
  5971. out.append(*createWorkflowItem(trivialStoredExpr, trivialStoredWfid, no_stored));
  5972. }
  5973. if (transformed.ordinality())
  5974. {
  5975. //Handle sequential etc.
  5976. OwnedHqlExpr combined = createActionList(transformed);
  5977. OwnedHqlExpr result = transformSequentialEtc(combined);
  5978. transformed.kill();
  5979. transformed.append(*result.getClear());
  5980. }
  5981. UnsignedArray const & dependencies = globalInfo.queryDependencies();
  5982. if(transformed.ordinality() || dependencies.ordinality())
  5983. {
  5984. if ((transformed.ordinality() == 0) && (dependencies.ordinality() == 1))
  5985. {
  5986. Owned<IWorkflowItem> wf = lookupWorkflowItem(dependencies.item(0));
  5987. wf->setScheduledNow();
  5988. }
  5989. else
  5990. {
  5991. Owned<IHqlExpression> combinedItems = createComma(transformed);
  5992. if (!combinedItems)
  5993. combinedItems.setown(createValue(no_null, makeVoidType()));
  5994. unsigned wfid;
  5995. if (!isWorkflowAction(combinedItems))
  5996. {
  5997. wfid = ++wfidCount;
  5998. ScheduleData sched;
  5999. Owned<IWorkflowItem> wf = addWorkflowToWorkunit(wfid, WFTypeNormal, WFModeNormal, dependencies, NULL);
  6000. setWorkflowSchedule(wf, sched);
  6001. out.append(*createWorkflowItem(combinedItems, wfid, no_actionlist));
  6002. }
  6003. else
  6004. wfid = ensureWorkflowAction(combinedItems);
  6005. Owned<IWorkflowItem> wf = lookupWorkflowItem(wfid);
  6006. wf->setScheduledNow();
  6007. }
  6008. }
  6009. workflowOut = NULL;
  6010. percolateScheduledIds(out);
  6011. }
  6012. void extractWorkflow(HqlCppTranslator & translator, HqlExprArray & exprs, WorkflowArray & out)
  6013. {
  6014. WorkflowTransformer transformer(translator.wu(), translator);
  6015. if (translator.queryOptions().performWorkflowCse || translator.queryOptions().notifyWorkflowCse)
  6016. transformer.analyseAll(exprs);
  6017. transformer.transformRoot(exprs, out);
  6018. }
  6019. //------------------------------------------------------------------------
  6020. enum { SIKnone, SIKhole, SIKagent, SIKthor };
  6021. class StatementInfo : public CInterface
  6022. {
  6023. public:
  6024. StatementInfo(IHqlExpression * _expr);
  6025. void calcDependencies();
  6026. bool canSwapOrder(StatementInfo & other)
  6027. {
  6028. return queryDependencies().canSwapOrder(other.queryDependencies());
  6029. }
  6030. inline bool isConditional() { return expr->getOperator() == no_if; }
  6031. inline bool isThorQuery() { return category == SIKthor; }
  6032. DependenciesUsed & queryDependencies()
  6033. {
  6034. if (!hasDependencies)
  6035. {
  6036. calcDependencies();
  6037. hasDependencies = true;
  6038. }
  6039. return dependencies;
  6040. }
  6041. public:
  6042. HqlExprAttr expr;
  6043. protected:
  6044. DependenciesUsed dependencies;
  6045. bool hasDependencies;
  6046. unsigned category;
  6047. };
  6048. StatementInfo::StatementInfo(IHqlExpression * _expr) : dependencies(true)
  6049. {
  6050. expr.set(_expr);
  6051. if (expr->getOperator() == no_thor)
  6052. category = SIKthor;
  6053. else
  6054. category = SIKagent;
  6055. hasDependencies = false;
  6056. }
  6057. void StatementInfo::calcDependencies()
  6058. {
  6059. gatherDependencies(expr, dependencies, GatherAll);
  6060. }
  6061. void groupThorGraphs(HqlExprArray & in)
  6062. {
  6063. //Gather information about the statements...
  6064. bool hadThor = false;
  6065. bool lastWasThor = false;
  6066. bool couldImprove = false;
  6067. CIArrayOf<StatementInfo> stmts;
  6068. ForEachItemIn(idx, in)
  6069. {
  6070. StatementInfo & cur = *new StatementInfo(&in.item(idx));
  6071. stmts.append(cur);
  6072. if (cur.isThorQuery())
  6073. {
  6074. if (hadThor && !lastWasThor)
  6075. couldImprove = true;
  6076. hadThor = true;
  6077. lastWasThor = true;
  6078. }
  6079. else
  6080. lastWasThor = false;
  6081. }
  6082. //If no thor queries are split by other queries, then may as well keep in the same order...
  6083. if (!couldImprove)
  6084. return;
  6085. //Need to work out the best order to generate the statements in. We want
  6086. //to move non thor queries to the front, so we do a insertion sort on them
  6087. CopyCIArrayOf<StatementInfo> sorted;
  6088. ForEachItemIn(idx1, stmts)
  6089. {
  6090. StatementInfo & cur = stmts.item(idx1);
  6091. bool curIsThor = cur.isThorQuery();
  6092. unsigned insertPos = sorted.ordinality();
  6093. ForEachItemInRev(idx2, sorted)
  6094. {
  6095. StatementInfo & compare = sorted.item(idx2);
  6096. if (compare.isThorQuery() == curIsThor)
  6097. {
  6098. insertPos = idx2+1;
  6099. break;
  6100. }
  6101. if (!compare.canSwapOrder(cur))
  6102. break;
  6103. }
  6104. sorted.add(cur, insertPos);
  6105. }
  6106. //Finally see if there is any merit in moving an initial block of thor queries down to
  6107. //merge with a subsequent one.
  6108. StatementInfo & first = sorted.item(0);
  6109. if (first.isThorQuery())
  6110. {
  6111. unsigned max = sorted.ordinality();
  6112. unsigned numToMove;
  6113. for (numToMove = 1; numToMove < max; numToMove++)
  6114. {
  6115. if (!(sorted.item(numToMove)).isThorQuery())
  6116. break;
  6117. }
  6118. for (unsigned i=numToMove; i < max; i++)
  6119. {
  6120. StatementInfo & compare = sorted.item(i);
  6121. if (compare.isThorQuery())
  6122. {
  6123. for (unsigned j=0; j < numToMove; j++)
  6124. sorted.rotateL(0, i-1);
  6125. break;
  6126. }
  6127. for (unsigned j=0; j < numToMove; j++)
  6128. {
  6129. if (!compare.canSwapOrder(sorted.item(j)))
  6130. {
  6131. i = max - 1;
  6132. break;
  6133. }
  6134. }
  6135. }
  6136. }
  6137. in.kill();
  6138. ForEachItemIn(idxSorted, sorted)
  6139. {
  6140. StatementInfo & cur = sorted.item(idxSorted);
  6141. in.append(*cur.expr.getLink());
  6142. }
  6143. }
  6144. //------------------------------------------------------------------------
  6145. //We will generate better code if conditional statements precede unconditional statements because globals can
  6146. //be commoned up better.
  6147. bool moveUnconditionalEarlier(HqlExprArray & in)
  6148. {
  6149. //Gather information about the statements...
  6150. unsigned numConditionals = 0;
  6151. unsigned firstConditional = NotFound;
  6152. bool couldImprove = false;
  6153. CIArrayOf<StatementInfo> stmts;
  6154. ForEachItemIn(idx, in)
  6155. {
  6156. StatementInfo & cur = *new StatementInfo(&in.item(idx));
  6157. stmts.append(cur);
  6158. if (cur.isConditional())
  6159. {
  6160. if (numConditionals == 0)
  6161. firstConditional = idx;
  6162. numConditionals++;
  6163. }
  6164. else if (numConditionals)
  6165. couldImprove = true;
  6166. }
  6167. //If no unconditionals follow a conditional, and no conditionals to be combined, then keep in the same order...
  6168. if (!couldImprove && numConditionals <= 1)
  6169. return false;
  6170. //For each block of unconditional statements which follow a conditional statement, see if they can be moved over the conditional statements.
  6171. //(copies with no overhead if couldImprove is false)
  6172. CopyCIArrayOf<StatementInfo> sorted;
  6173. unsigned max = stmts.ordinality();
  6174. for (unsigned idx1 = 0; idx1 < max;)
  6175. {
  6176. StatementInfo & cur = stmts.item(idx1);
  6177. bool isConditional = cur.isConditional();
  6178. unsigned cnt = 1;
  6179. if (isConditional || idx1 < firstConditional)
  6180. {
  6181. sorted.append(cur);
  6182. }
  6183. else
  6184. {
  6185. //calculate the number of contiguous unconditional statements
  6186. for (cnt=1; idx1+cnt < max; cnt++)
  6187. {
  6188. if (stmts.item(idx1+cnt).isConditional())
  6189. break;
  6190. }
  6191. unsigned movePosition = 0;
  6192. for (unsigned iBlock = 0; iBlock < cnt; iBlock++)
  6193. {
  6194. StatementInfo & curBlock = stmts.item(idx1+iBlock);
  6195. unsigned bestPosition = NotFound; // best position to add block.
  6196. unsigned prev = idx1;
  6197. while (prev-- > firstConditional)
  6198. {
  6199. StatementInfo & compare = sorted.item(prev);
  6200. if (!compare.canSwapOrder(curBlock))
  6201. break;
  6202. if (prev == firstConditional)
  6203. bestPosition = prev;
  6204. else if (compare.isConditional() && !sorted.item(prev-1).isConditional())
  6205. bestPosition = prev;
  6206. }
  6207. if (bestPosition == NotFound)
  6208. {
  6209. //can't move this element in the block => append the items to the list.
  6210. movePosition = sorted.ordinality();
  6211. break;
  6212. }
  6213. //Intersection of the best positions to provide earliest we can move the block
  6214. if (movePosition < bestPosition)
  6215. movePosition = bestPosition;
  6216. }
  6217. for (unsigned iBlock2 = 0; iBlock2 < cnt; iBlock2++)
  6218. sorted.add(stmts.item(idx1+iBlock2), movePosition+iBlock2);
  6219. }
  6220. idx1 += cnt;
  6221. }
  6222. //See if moving conditional statements could make some conditions next to each other
  6223. //Now see if any of the conditional statements can be combined.
  6224. //Finally replace the array
  6225. in.kill();
  6226. ForEachItemIn(idxSorted, sorted)
  6227. {
  6228. StatementInfo & cur = (StatementInfo &)sorted.item(idxSorted);
  6229. in.append(*cur.expr.getLink());
  6230. }
  6231. return true;
  6232. }
  6233. //------------------------------------------------------------------------
  6234. static void mergeThorGraphs(HqlExprArray & exprs, bool resourceConditionalActions, bool resourceSequential);
  6235. static IHqlExpression * mergeThorGraphs(IHqlExpression * expr, bool resourceConditionalActions, bool resourceSequential)
  6236. {
  6237. HqlExprArray args;
  6238. expr->unwindList(args, no_actionlist);
  6239. mergeThorGraphs(args, resourceConditionalActions, resourceSequential);
  6240. return createActionList(args);
  6241. }
  6242. static void mergeThorGraphs(HqlExprArray & exprs, bool resourceConditionalActions, bool resourceSequential)
  6243. {
  6244. HqlExprArray thorActions;
  6245. HqlExprArray combined;
  6246. ForEachItemIn(idx, exprs)
  6247. {
  6248. IHqlExpression * original = &exprs.item(idx);
  6249. LinkedHqlExpr cur = original;
  6250. const node_operator op = cur->getOperator();
  6251. switch (op)
  6252. {
  6253. case no_compound:
  6254. {
  6255. OwnedHqlExpr replace = mergeThorGraphs(cur->queryChild(0), resourceConditionalActions, resourceSequential);
  6256. cur.setown(replaceChild(cur, 0, replace));
  6257. break;
  6258. }
  6259. case no_if:
  6260. if (cur->isAction())
  6261. {
  6262. IHqlExpression * left = cur->queryChild(1);
  6263. IHqlExpression * right = cur->queryChild(2);
  6264. OwnedHqlExpr newLeft = mergeThorGraphs(left, resourceConditionalActions, resourceSequential);
  6265. OwnedHqlExpr newRight = right ? mergeThorGraphs(right, resourceConditionalActions, resourceSequential) : NULL;
  6266. if (left != newLeft || right != newRight)
  6267. {
  6268. HqlExprArray args;
  6269. unwindChildren(args, cur);
  6270. //Not sure about this - the test condition may not be evaluatable inside thor
  6271. if (resourceConditionalActions && ((newLeft->getOperator() == no_thor) && (!newRight || newRight->getOperator() == no_thor)))
  6272. {
  6273. args.replace(*LINK(newLeft->queryChild(0)), 1);
  6274. if (newRight)
  6275. args.replace(*LINK(newRight->queryChild(0)), 2);
  6276. cur.setown(createValue(no_thor, makeVoidType(), cur->clone(args)));
  6277. }
  6278. else
  6279. {
  6280. args.replace(*LINK(newLeft), 1);
  6281. if (newRight)
  6282. args.replace(*LINK(newRight), 2);
  6283. cur.setown(cur->clone(args));
  6284. }
  6285. }
  6286. }
  6287. break;
  6288. case no_parallel:
  6289. if (false)
  6290. {
  6291. HqlExprArray args;
  6292. bool allThor = true;
  6293. ForEachChild(i, cur)
  6294. {
  6295. IHqlExpression * merged = mergeThorGraphs(cur->queryChild(i), resourceConditionalActions, resourceSequential);
  6296. args.append(*merged);
  6297. if (merged->getOperator() != no_thor)
  6298. allThor = false;
  6299. }
  6300. if (allThor)
  6301. {
  6302. ForEachItemIn(i, args)
  6303. args.replace(*LINK(args.item(i).queryChild(0)), i);
  6304. cur.setown(cur->clone(args));
  6305. cur.setown(createValue(no_thor, makeVoidType(), cur.getClear()));
  6306. }
  6307. else
  6308. cur.setown(cur->clone(args));
  6309. break;
  6310. }
  6311. //fall through
  6312. case no_actionlist:
  6313. case no_orderedactionlist:
  6314. {
  6315. HqlExprArray args;
  6316. cur->unwindList(args, op);
  6317. mergeThorGraphs(args, resourceConditionalActions, resourceSequential);
  6318. cur.setown(cur->clone(args));
  6319. break;
  6320. }
  6321. case no_sequential:
  6322. {
  6323. HqlExprArray args;
  6324. bool allThor = true;
  6325. ForEachChild(i, cur)
  6326. {
  6327. IHqlExpression * merged = mergeThorGraphs(cur->queryChild(i), resourceConditionalActions, resourceSequential);
  6328. args.append(*merged);
  6329. if (merged->getOperator() != no_thor)
  6330. allThor = false;
  6331. }
  6332. if (resourceSequential && allThor)
  6333. {
  6334. ForEachItemIn(i, args)
  6335. args.replace(*LINK(args.item(i).queryChild(0)), i);
  6336. cur.setown(cur->clone(args));
  6337. cur.setown(createValue(no_thor, makeVoidType(), cur.getClear()));
  6338. }
  6339. else
  6340. cur.setown(cur->clone(args));
  6341. break;
  6342. }
  6343. case no_ensureresult:
  6344. {
  6345. HqlExprArray args;
  6346. unwindChildren(args, cur);
  6347. args.replace(*mergeThorGraphs(cur->queryChild(0), resourceConditionalActions, resourceSequential), 0);
  6348. cur.setown(cloneOrLink(cur, args));
  6349. break;
  6350. }
  6351. }
  6352. if (cur->getOperator() == no_thor)
  6353. {
  6354. thorActions.append(*LINK(cur->queryChild(0)));
  6355. }
  6356. else
  6357. {
  6358. if (thorActions.ordinality())
  6359. {
  6360. combined.append(*createValue(no_thor, makeVoidType(), createActionList(thorActions)));
  6361. thorActions.kill();
  6362. }
  6363. combined.append(*cur.getClear());
  6364. }
  6365. }
  6366. if (thorActions.ordinality())
  6367. combined.append(*createValue(no_thor, makeVoidType(), createActionList(thorActions)));
  6368. replaceArray(exprs, combined);
  6369. }
  6370. void mergeThorGraphs(WorkflowItem & workflow, bool resourceConditionalActions, bool resourceSequential)
  6371. {
  6372. groupThorGraphs(workflow.queryExprs());
  6373. mergeThorGraphs(workflow.queryExprs(), resourceConditionalActions, resourceSequential);
  6374. }
  6375. //------------------------------------------------------------------------
  6376. //#define NEW_SCALAR_CODE
  6377. //I think NEW_SCALAR_CODE should be better - but in practice it seems to be worse.....
  6378. inline bool isTypeToHoist(ITypeInfo * type)
  6379. {
  6380. return isSingleValuedType(type);// || (type && type->getTypeCode() == type_set);
  6381. }
  6382. static HqlTransformerInfo scalarGlobalTransformerInfo("ScalarGlobalTransformer");
  6383. ScalarGlobalTransformer::ScalarGlobalTransformer(HqlCppTranslator & _translator)
  6384. : HoistingHqlTransformer(scalarGlobalTransformerInfo, CTFtraverseallnodes), translator(_translator)
  6385. {
  6386. okToHoist = true;
  6387. neverHoist = false;
  6388. }
  6389. void ScalarGlobalTransformer::analyseExpr(IHqlExpression * expr)
  6390. {
  6391. ScalarGlobalExtra * extra = queryBodyExtra(expr);
  6392. analyseThis(expr);
  6393. #ifdef NEW_SCALAR_CODE
  6394. if (++extra->numUses > 1)
  6395. {
  6396. if (!extra->candidate)
  6397. return;
  6398. if (extra->couldHoist || extra->alreadyGlobal)
  6399. return;
  6400. }
  6401. extra->candidate = !containsAnyDataset(expr) && !expr->isConstant() && !isContextDependent(expr);
  6402. extra->couldHoist = extra->candidate && isTypeToHoist(expr->queryType()) && canCreateTemporary(expr) && expr->isPure();
  6403. #else
  6404. if (++extra->numUses > 1)
  6405. {
  6406. if (!okToHoist)
  6407. {
  6408. if (!neverHoist || extra->neverHoist)
  6409. return;
  6410. }
  6411. if (extra->couldHoist)
  6412. {
  6413. if (extra->createGlobal)
  6414. return;
  6415. //Allow a global to be created inside a global marked from somewhere else.
  6416. if (containsAnyDataset(expr) || expr->isConstant() || isContextDependent(expr) || !expr->isIndependentOfScope())
  6417. return;
  6418. }
  6419. }
  6420. extra->couldHoist = okToHoist;
  6421. if (!okToHoist && !neverHoist && !isTypeToHoist(expr->queryType()))
  6422. okToHoist = true;
  6423. #endif
  6424. extra->neverHoist = neverHoist;
  6425. doAnalyseExpr(expr);
  6426. okToHoist = extra->couldHoist;
  6427. neverHoist = extra->neverHoist;
  6428. }
  6429. void ScalarGlobalTransformer::doAnalyseExpr(IHqlExpression * expr)
  6430. {
  6431. switch (expr->getOperator())
  6432. {
  6433. case no_attr:
  6434. case no_constant:
  6435. case no_attr_link:
  6436. case no_null:
  6437. case no_all:
  6438. return;
  6439. case no_persist_check:
  6440. //No point spotting global within this since it will not create a subquery..
  6441. return;
  6442. case no_attr_expr:
  6443. {
  6444. IAtom * name = expr->queryName();
  6445. if ((name == _selectors_Atom) || (name == keyedAtom))
  6446. return;
  6447. analyseChildren(expr);
  6448. return;
  6449. }
  6450. case no_getresult:
  6451. case no_libraryinput:
  6452. queryBodyExtra(expr)->alreadyGlobal = true;
  6453. break;
  6454. case no_globalscope:
  6455. case no_setresult:
  6456. case no_ensureresult:
  6457. {
  6458. queryBodyExtra(expr)->alreadyGlobal = true; // don't tag again - even if opt flag is present
  6459. queryBodyExtra(expr->queryChild(0))->alreadyGlobal = true;
  6460. okToHoist = false;
  6461. break;
  6462. }
  6463. }
  6464. #ifndef NEW_SCALAR_CODE
  6465. // Commented line has problems with SELF used in HOLE definition, and explosion in thumphrey7 etc.
  6466. // if (okToHoist && isIndependentOfScope(expr) && !expr->isConstant() && !isContextDependent(expr) && expr->isPure())
  6467. if (okToHoist && !containsAnyDataset(expr) && !expr->isConstant() && !isContextDependent(expr) && expr->isPure() && expr->isIndependentOfScope())
  6468. {
  6469. ITypeInfo * type = expr->queryType();
  6470. if (isTypeToHoist(type))
  6471. {
  6472. if (canCreateTemporary(expr))
  6473. {
  6474. queryBodyExtra(expr)->createGlobal = true;
  6475. okToHoist = false;
  6476. }
  6477. }
  6478. }
  6479. #endif
  6480. HoistingHqlTransformer::doAnalyseExpr(expr);
  6481. }
  6482. /*
  6483. Try and decide what is trivial enough to serialise, and what should remain. It is more trial an error than particularly logical
  6484. o Better to store smaller objects because they will serialize smaller.
  6485. o If something is used more than once then probably worth serializing regardless - since calculation will be commoned up.
  6486. o Don't really want to serialize 'x' and 'x <> ''' to the same function - but much better to serialize 'x <> ''' rather than 'x' if only one used.
  6487. */
  6488. bool ScalarGlobalTransformer::isComplex(IHqlExpression * expr, bool checkGlobal)
  6489. {
  6490. ScalarGlobalExtra * extra = queryBodyExtra(expr);
  6491. if (checkGlobal)
  6492. {
  6493. //If something else has turned this into a global then no point.
  6494. if (extra->alreadyGlobal)
  6495. return false;
  6496. }
  6497. switch (expr->getOperator())
  6498. {
  6499. case no_workunit_dataset:
  6500. case no_getresult:
  6501. return expr->hasAttribute(wuidAtom);
  6502. case no_constant:
  6503. case no_globalscope:
  6504. case no_libraryinput:
  6505. return false;
  6506. case no_cast:
  6507. case no_implicitcast:
  6508. //serialize if the cast reduces the size of the item, otherwise check argument.
  6509. if (expr->queryType()->getSize() <= expr->queryChild(0)->queryType()->getSize())
  6510. return true;
  6511. //If used a lot then save lots of duplicated work.
  6512. if (extra->numUses > 2)
  6513. return true;
  6514. break;
  6515. case no_eq:
  6516. case no_ne:
  6517. case no_lt:
  6518. case no_gt:
  6519. case no_le:
  6520. case no_ge:
  6521. //Accessed more than once-> probably worth commoning up
  6522. if (extra->numUses > 1)
  6523. return true;
  6524. break;
  6525. //f[1..length(trim(x))] = x is very common, and if the length(trim)) was serialized separately then
  6526. //the generated code would be worse.
  6527. case no_trim:
  6528. case no_charlen:
  6529. case no_sorted:
  6530. case no_not:
  6531. break;
  6532. case no_substring:
  6533. //single character substring - don't create separate items just for this, since likely to have many of them.
  6534. if (!expr->queryChild(1)->queryValue())
  6535. return true;
  6536. break;
  6537. default:
  6538. if (expr->isConstant())
  6539. return false;
  6540. return true;
  6541. }
  6542. ForEachChild(i, expr)
  6543. {
  6544. if (isComplex(expr->queryChild(i), true))
  6545. return true;
  6546. }
  6547. return false;
  6548. }
  6549. IHqlExpression * ScalarGlobalTransformer::createTransformed(IHqlExpression * expr)
  6550. {
  6551. IHqlExpression * ret = queryTransformAnnotation(expr);
  6552. if (ret)
  6553. return ret;
  6554. OwnedHqlExpr transformed = HoistingHqlTransformer::createTransformed(expr);
  6555. ScalarGlobalExtra * extra = queryBodyExtra(expr);
  6556. #ifdef NEW_SCALAR_CODE
  6557. if (extra->numUses > 1 && extra->couldHoist && !extra->alreadyGlobal && isComplex(expr, false))
  6558. #else
  6559. if (extra->createGlobal && !extra->alreadyGlobal && isComplex(expr, false))
  6560. #endif
  6561. {
  6562. #ifdef _DEBUG
  6563. translator.traceExpression("Mark as global", expr);
  6564. #endif
  6565. //mark as global, so isComplex() can take it into account.
  6566. extra->alreadyGlobal = true;
  6567. if (expr->getOperator() == no_createset)
  6568. transformed.setown(projectCreateSetDataset(transformed));
  6569. return createValue(no_globalscope, transformed->getType(), LINK(transformed));
  6570. }
  6571. return transformed.getClear();
  6572. }
  6573. //------------------------------------------------------------------------
  6574. static HqlTransformerInfo explicitGlobalTransformerInfo("ExplicitGlobalTransformer");
  6575. ExplicitGlobalTransformer::ExplicitGlobalTransformer(IWorkUnit * _wu, HqlCppTranslator & _translator)
  6576. : HoistingHqlTransformer(explicitGlobalTransformerInfo, CTFnoteifactions|CTFtraverseallnodes), translator(_translator)
  6577. {
  6578. wu = _wu;
  6579. isRoxie = (translator.getTargetClusterType() == RoxieCluster);
  6580. seenGlobalScope = false;
  6581. seenLocalGlobalScope = false;
  6582. }
  6583. void ExplicitGlobalTransformer::doAnalyseExpr(IHqlExpression * expr)
  6584. {
  6585. node_operator op = expr->getOperator();
  6586. switch (op)
  6587. {
  6588. case no_nothor:
  6589. if (expr->isAction())
  6590. break;
  6591. //fall through
  6592. case no_globalscope:
  6593. //Try and avoid transforms (especially nested ones) as much as possible.
  6594. seenGlobalScope = true;
  6595. //If local attribute is present on a global, then an independent transform may cause an extra
  6596. //transformation because it may become unconditional, when previously conditional
  6597. if (expr->hasAttribute(localAtom))
  6598. seenLocalGlobalScope = true;
  6599. break;
  6600. }
  6601. HoistingHqlTransformer::doAnalyseExpr(expr);
  6602. }
  6603. IHqlExpression * ExplicitGlobalTransformer::createTransformed(IHqlExpression * expr)
  6604. {
  6605. if (expr->isConstant())
  6606. return LINK(expr);
  6607. IHqlExpression * ret = queryTransformAnnotation(expr);
  6608. if (ret)
  6609. return ret;
  6610. OwnedHqlExpr transformed = HoistingHqlTransformer::createTransformed(expr);
  6611. node_operator op = expr->getOperator();
  6612. switch (op)
  6613. {
  6614. case no_nothor:
  6615. if (transformed->isAction())
  6616. break;
  6617. //fall through
  6618. case no_globalscope:
  6619. {
  6620. IHqlExpression * value = transformed->queryChild(0);
  6621. if (expr->hasAttribute(optAtom))
  6622. {
  6623. if (!isIndependentOfScope(value))
  6624. return LINK(value);
  6625. }
  6626. if (!expr->hasAttribute(localAtom) || isUsedUnconditionally(expr))
  6627. {
  6628. if (!isIndependentOfScope(value))
  6629. {
  6630. IHqlExpression * symbol = queryActiveSymbol();
  6631. StringBuffer s;
  6632. if (symbol && symbol->queryBody() == expr)
  6633. s.appendf(" '%s'", symbol->queryName()->str());
  6634. else
  6635. {
  6636. s.append(" ").append(getOpString(value->getOperator()));
  6637. if (symbol)
  6638. s.append(" in ").append(symbol->queryName());
  6639. }
  6640. if (op == no_nothor)
  6641. translator.reportWarning(queryActiveLocation(expr), ECODETEXT(HQLWRN_NoThorContextDependent), s.str());
  6642. else
  6643. translator.reportWarning(queryActiveLocation(expr), ECODETEXT(HQLWRN_GlobalDoesntSeemToBe), s.str());
  6644. }
  6645. if (value->getOperator() == no_createset)
  6646. {
  6647. OwnedHqlExpr createset = projectCreateSetDataset(value);
  6648. IHqlExpression * ds = createset->queryChild(0);
  6649. HqlExprArray outArgs, setArgs;
  6650. outArgs.append(*LINK(ds));
  6651. outArgs.append(*createAttribute(sequenceAtom, getLocalSequenceNumber()));
  6652. outArgs.append(*createAttribute(namedAtom, createNextStringValue(value)));
  6653. IHqlExpression * setResult = createValue(no_output, makeVoidType(), outArgs);
  6654. appendToTarget(*setResult);
  6655. transformed.setown(createGetResultFromSetResult(setResult, expr->queryType()));
  6656. }
  6657. else
  6658. {
  6659. GlobalAttributeInfo info("spill::global","gl", transformed);
  6660. if (op == no_nothor)
  6661. info.extractGlobal(NULL, RoxieCluster);
  6662. else
  6663. info.extractGlobal(transformed, translator.getTargetClusterType());
  6664. OwnedHqlExpr getResult, setResult;
  6665. info.checkFew(translator);
  6666. info.splitGlobalDefinition(transformed->queryType(), value, wu, setResult, &getResult, isRoxie);
  6667. if (op == no_nothor)
  6668. setResult.setown(createValue(no_nothor, makeVoidType(), LINK(setResult)));
  6669. IHqlExpression * cluster = queryRealChild(transformed, 1);
  6670. if (cluster && !isBlankString(cluster))
  6671. setResult.setown(createValue(no_cluster, makeVoidType(), LINK(setResult), LINK(cluster)));
  6672. appendToTarget(*setResult.getClear());
  6673. transformed.setown(getResult.getClear());
  6674. }
  6675. break;
  6676. }
  6677. }
  6678. }
  6679. return transformed.getClear();
  6680. }
  6681. static HqlTransformerInfo optGlobalTransformerInfo("OptGlobalTransformer");
  6682. OptGlobalTransformer::OptGlobalTransformer() : NewHqlTransformer(optGlobalTransformerInfo)
  6683. {
  6684. seenOptGlobal = false;
  6685. }
  6686. void OptGlobalTransformer::analyseExpr(IHqlExpression * expr)
  6687. {
  6688. if (alreadyVisited(expr))
  6689. return;
  6690. node_operator op = expr->getOperator();
  6691. switch (op)
  6692. {
  6693. case no_globalscope:
  6694. if (expr->hasAttribute(optAtom))
  6695. seenOptGlobal = true;
  6696. break;
  6697. }
  6698. NewHqlTransformer::analyseExpr(expr);
  6699. }
  6700. IHqlExpression * OptGlobalTransformer::createTransformed(IHqlExpression * expr)
  6701. {
  6702. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  6703. node_operator op = transformed->getOperator();
  6704. switch (op)
  6705. {
  6706. case no_globalscope:
  6707. {
  6708. if (transformed->hasAttribute(optAtom))
  6709. {
  6710. IHqlExpression * value = transformed->queryChild(0);
  6711. if (!isIndependentOfScope(value))
  6712. return LINK(value);
  6713. return removeAttribute(transformed, optAtom);
  6714. }
  6715. break;
  6716. }
  6717. }
  6718. return transformed.getClear();
  6719. }
  6720. //------------------------------------------------------------------------
  6721. static HqlTransformerInfo scopeIndependentActionCheckerInfo("ScopeIndependentActionChecker");
  6722. class ScopeIndependentActionChecker : public NewHqlTransformer
  6723. {
  6724. public:
  6725. ScopeIndependentActionChecker(HqlCppTranslator & _translator) : NewHqlTransformer(scopeIndependentActionCheckerInfo), translator(_translator)
  6726. {
  6727. }
  6728. protected:
  6729. void analyseExpr(IHqlExpression * expr)
  6730. {
  6731. switch (expr->getOperator())
  6732. {
  6733. case no_parallel:
  6734. case no_if:
  6735. case no_sequential:
  6736. case no_compound:
  6737. case no_actionlist:
  6738. case no_orderedactionlist:
  6739. NewHqlTransformer::analyseExpr(expr);
  6740. break;
  6741. case no_output:
  6742. if (!isIndependentOfScope(expr))
  6743. {
  6744. IHqlExpression * filename = queryRealChild(expr, 2);
  6745. if (!filename)
  6746. filename = expr->queryAttribute(namedAtom);
  6747. StringBuffer s;
  6748. if (filename)
  6749. getExprECL(filename, s);
  6750. translator.WARNINGAT1(queryActiveLocation(expr), HQLWRN_OutputDependendOnScope, s.str());
  6751. #if 0
  6752. checkIndependentOfScope(expr);
  6753. #endif
  6754. }
  6755. break;
  6756. default:
  6757. if (!isIndependentOfScope(expr))
  6758. {
  6759. translator.WARNINGAT(queryActiveLocation(expr), HQLWRN_GlobalActionDependendOnScope);
  6760. #if 0
  6761. checkIndependentOfScope(expr);
  6762. #endif
  6763. }
  6764. break;
  6765. }
  6766. }
  6767. private:
  6768. HqlCppTranslator & translator;
  6769. };
  6770. void checkGlobalActionsIndependentOfScope(HqlCppTranslator & translator, const HqlExprArray & exprs)
  6771. {
  6772. ScopeIndependentActionChecker checker(translator);
  6773. checker.analyseArray(exprs, 0);
  6774. }
  6775. //------------------------------------------------------------------------
  6776. IHqlDataset * queryRootDataset(IHqlExpression * dataset)
  6777. {
  6778. return dataset->queryDataset()->queryRootTable();
  6779. }
  6780. //roxie only executes outputs to temporaries if they are required, or if not all references are from within the graph
  6781. //therefore, there is no need to special case if actions. Thor on the other hand will cause it to be executed unnecessarily.
  6782. static HqlTransformerInfo newScopeMigrateTransformerInfo("NewScopeMigrateTransformer");
  6783. NewScopeMigrateTransformer::NewScopeMigrateTransformer(IWorkUnit * _wu, HqlCppTranslator & _translator)
  6784. : HoistingHqlTransformer(newScopeMigrateTransformerInfo, 0), translator(_translator)
  6785. {
  6786. wu = _wu;
  6787. isRoxie = translator.targetRoxie();
  6788. if (!isRoxie && !_translator.queryOptions().resourceConditionalActions)
  6789. setFlags(CTFnoteifactions);
  6790. minimizeWorkunitTemporaries = translator.queryOptions().minimizeWorkunitTemporaries;
  6791. #ifdef REMOVE_GLOBAL_ANNOTATION
  6792. activityDepth = 0; // should be 0 to actually have any effect - but causes problems...
  6793. #else
  6794. activityDepth = 999; // should be 0 to actually have any effect - but causes problems...
  6795. #endif
  6796. }
  6797. void NewScopeMigrateTransformer::analyseExpr(IHqlExpression * expr)
  6798. {
  6799. ScopeMigrateInfo * extra = queryBodyExtra(expr);
  6800. if (activityDepth > extra->maxActivityDepth)
  6801. {
  6802. if (extra->maxActivityDepth == 0)
  6803. extra->setUnvisited(); // so we walk children again
  6804. extra->maxActivityDepth = activityDepth;
  6805. }
  6806. unsigned savedActivityDepth = activityDepth;
  6807. node_operator op = expr->getOperator();
  6808. switch (op)
  6809. {
  6810. case NO_AGGREGATE:
  6811. case no_createset:
  6812. case NO_ACTION_REQUIRES_GRAPH:
  6813. case no_extractresult:
  6814. case no_distributer:
  6815. case no_within:
  6816. case no_notwithin:
  6817. case no_soapaction_ds:
  6818. case no_returnresult:
  6819. activityDepth++;
  6820. break;
  6821. case no_setresult:
  6822. if (expr->queryChild(0)->isDataset())
  6823. activityDepth++;
  6824. break;
  6825. case no_select:
  6826. if (expr->hasAttribute(newAtom))
  6827. activityDepth++;
  6828. break;
  6829. }
  6830. HoistingHqlTransformer::analyseExpr(expr);
  6831. activityDepth = savedActivityDepth;
  6832. }
  6833. IHqlExpression * NewScopeMigrateTransformer::hoist(IHqlExpression * expr, IHqlExpression * hoisted)
  6834. {
  6835. if (minimizeWorkunitTemporaries)
  6836. return createWrapper(no_globalscope, LINK(hoisted));
  6837. IHqlExpression * setResult = createSetResult(hoisted);
  6838. IHqlExpression * seqAttr = setResult->queryAttribute(sequenceAtom);
  6839. IHqlExpression * aliasAttr = setResult->queryAttribute(namedAtom);
  6840. appendToTarget(*setResult);
  6841. return createGetResultFromSetResult(setResult);
  6842. }
  6843. IHqlExpression * NewScopeMigrateTransformer::createTransformed(IHqlExpression * expr)
  6844. {
  6845. if (expr->isConstant())
  6846. return LINK(expr);
  6847. IHqlExpression * ret = queryTransformAnnotation(expr);
  6848. if (ret)
  6849. return ret;
  6850. OwnedHqlExpr transformed = HoistingHqlTransformer::createTransformed(expr);
  6851. ScopeMigrateInfo * extra = queryBodyExtra(expr);
  6852. node_operator op = expr->getOperator();
  6853. switch (op)
  6854. {
  6855. case no_createset:
  6856. {
  6857. if (isUsedUnconditionally(expr))
  6858. {
  6859. if (isIndependentOfScope(transformed) && !isContextDependent(expr))
  6860. {
  6861. OwnedHqlExpr createset = projectCreateSetDataset(transformed);
  6862. if (minimizeWorkunitTemporaries)
  6863. return createWrapper(no_globalscope, LINK(createset));
  6864. //MORE: This is only temporary until child datasets come into existence, then it will need improving
  6865. //Save it as a temporary dataset in the wu, and retrieve it as a getresult(set)
  6866. IHqlExpression * ds = createset->queryChild(0);
  6867. HqlExprArray outArgs, setArgs;
  6868. outArgs.append(*LINK(ds));
  6869. outArgs.append(*createAttribute(sequenceAtom, getLocalSequenceNumber()));
  6870. outArgs.append(*createAttribute(namedAtom, createNextStringValue(expr)));
  6871. IHqlExpression * setResult = createValue(no_output, makeVoidType(), outArgs);
  6872. appendToTarget(*setResult);
  6873. transformed.setown(createGetResultFromSetResult(setResult, expr->queryType()));
  6874. }
  6875. }
  6876. break;
  6877. }
  6878. case no_select:
  6879. {
  6880. bool isNew;
  6881. IHqlExpression * row = querySelectorDataset(transformed, isNew);
  6882. if (isNew)
  6883. {
  6884. if (isUsedUnconditionally(expr))
  6885. {
  6886. if (extra->maxActivityDepth != 0)
  6887. {
  6888. node_operator rowOp = row->getOperator();
  6889. if (rowOp == no_selectnth)
  6890. {
  6891. node_operator dsOp = row->queryChild(0)->getOperator();
  6892. if ((dsOp == no_workunit_dataset) || (dsOp == no_inlinetable))
  6893. break;
  6894. }
  6895. if (rowOp == no_createrow || rowOp == no_getresult)
  6896. break;
  6897. if (!isInlineTrivialDataset(row) && !isContextDependent(row) && !transformed->isDataset() && !transformed->isDictionary())
  6898. {
  6899. if (isIndependentOfScope(row))
  6900. return hoist(expr, transformed);
  6901. }
  6902. }
  6903. }
  6904. }
  6905. }
  6906. break;
  6907. case NO_AGGREGATE:
  6908. {
  6909. if (isUsedUnconditionally(expr))
  6910. {
  6911. if (extra->maxActivityDepth != 0)
  6912. {
  6913. IHqlExpression * datasetExpr = transformed->queryChild(0);
  6914. IHqlDataset * rootDataset = queryRootDataset(datasetExpr);
  6915. if (!rootDataset)
  6916. {
  6917. //Something like a+b+c
  6918. rootDataset = datasetExpr->queryDataset()->queryTable();
  6919. if (!rootDataset)
  6920. break;
  6921. }
  6922. //Don't do anything with child datasets....
  6923. IHqlExpression * rootDatasetExpr = queryExpression(rootDataset);
  6924. node_operator rootOp = rootDatasetExpr->getOperator();
  6925. if ((rootOp == no_select) || (rootOp == no_field))
  6926. break;
  6927. if (isIndependentOfScope(datasetExpr) && !isContextDependent(expr))
  6928. {
  6929. return hoist(expr, transformed);
  6930. }
  6931. }
  6932. }
  6933. break;
  6934. }
  6935. }
  6936. return transformed.getClear();
  6937. }
  6938. void migrateExprToNaturalLevel(WorkflowItem & cur, IWorkUnit * wu, HqlCppTranslator & translator)
  6939. {
  6940. const HqlCppOptions & options = translator.queryOptions();
  6941. HqlExprArray & exprs = cur.queryExprs();
  6942. if (translator.queryOptions().moveUnconditionalActions)
  6943. moveUnconditionalEarlier(exprs);
  6944. translator.checkNormalized(exprs);
  6945. if (options.hoistSimpleGlobal)
  6946. {
  6947. ScalarGlobalTransformer transformer(translator);
  6948. HqlExprArray results;
  6949. transformer.analyseArray(exprs, 0);
  6950. transformer.transformRoot(exprs, results);
  6951. replaceArray(exprs, results);
  6952. translator.checkNormalized(exprs);
  6953. }
  6954. translator.traceExpressions("m0", exprs);
  6955. checkGlobalActionsIndependentOfScope(translator, exprs);
  6956. if (options.workunitTemporaries)
  6957. {
  6958. ExplicitGlobalTransformer transformer(wu, translator);
  6959. transformer.analyseArray(exprs, 0);
  6960. if (transformer.needToTransform())
  6961. {
  6962. HqlExprArray results;
  6963. transformer.transformRoot(exprs, results);
  6964. replaceArray(exprs, results);
  6965. }
  6966. }
  6967. else
  6968. {
  6969. OptGlobalTransformer transformer;
  6970. transformer.analyseArray(exprs, 0);
  6971. if (transformer.needToTransform())
  6972. {
  6973. HqlExprArray results;
  6974. transformer.transformRoot(exprs, results);
  6975. replaceArray(exprs, results);
  6976. }
  6977. }
  6978. translator.checkNormalized(exprs);
  6979. translator.traceExpressions("m1", exprs);
  6980. if (options.allowScopeMigrate) // && !options.minimizeWorkunitTemporaries)
  6981. {
  6982. NewScopeMigrateTransformer transformer(wu, translator);
  6983. HqlExprArray results;
  6984. transformer.analyseArray(exprs, 0);
  6985. transformer.transformRoot(exprs, results);
  6986. replaceArray(exprs, results);
  6987. translator.checkNormalized(exprs);
  6988. }
  6989. translator.traceExpressions("m2", exprs);
  6990. }
  6991. void expandGlobalDatasets(WorkflowArray & array, IWorkUnit * wu, HqlCppTranslator & translator)
  6992. {
  6993. }
  6994. //---------------------------------------------------------------------------
  6995. bool AutoScopeMigrateInfo::addGraph(unsigned graph)
  6996. {
  6997. if (graph == lastGraph)
  6998. return false;
  6999. if (lastGraph)
  7000. manyGraphs = true;
  7001. lastGraph = graph;
  7002. return true;
  7003. }
  7004. bool AutoScopeMigrateInfo::doAutoHoist(IHqlExpression * transformed, bool minimizeWorkunitTemporaries)
  7005. {
  7006. if (useCount == 0)
  7007. return false;
  7008. node_operator op = original->getOperator();
  7009. switch (op)
  7010. {
  7011. case no_fail:
  7012. return false;
  7013. }
  7014. if (firstUseIsConditional && firstUseIsSequential)
  7015. return false;
  7016. if (firstUseIsSequential && !manyGraphs)
  7017. return false;
  7018. // The following *should* generate better code, but there are currently a couple of exceptions (cmaroney29, jholt20) which need investigation
  7019. // if (!manyGraphs)
  7020. // return false;
  7021. if (globalInsideChild && !minimizeWorkunitTemporaries)// && !transformed->isDataset() && !transformed->isDatarow())
  7022. return true;
  7023. if (!manyGraphs)
  7024. return false;
  7025. if (!original->isDataset())
  7026. {
  7027. switch (op)
  7028. {
  7029. case NO_AGGREGATE:
  7030. break;
  7031. default:
  7032. return false;
  7033. }
  7034. }
  7035. if (!isWorthHoisting(transformed, false))
  7036. return false;
  7037. if (isContextDependent(transformed))
  7038. return false;
  7039. return isIndependentOfScope(original);
  7040. }
  7041. static HqlTransformerInfo autoScopeMigrateTransformerInfo("AutoScopeMigrateTransformer");
  7042. AutoScopeMigrateTransformer::AutoScopeMigrateTransformer(IWorkUnit * _wu, HqlCppTranslator & _translator)
  7043. : NewHqlTransformer(autoScopeMigrateTransformerInfo), translator(_translator)
  7044. {
  7045. wu = _wu;
  7046. isRoxie = (translator.getTargetClusterType() == RoxieCluster);
  7047. isConditional = false;
  7048. isSequential = false;
  7049. hasCandidate = false;
  7050. activityDepth = 0;
  7051. curGraph = 1;
  7052. }
  7053. void AutoScopeMigrateTransformer::analyseExpr(IHqlExpression * expr)
  7054. {
  7055. AutoScopeMigrateInfo * extra = queryBodyExtra(expr);
  7056. if (isConditional)
  7057. extra->condUseCount++;
  7058. else
  7059. extra->useCount++;
  7060. bool newGraph = extra->addGraph(curGraph);
  7061. if (!newGraph)
  7062. return;
  7063. if (extra->doAutoHoist(expr, translator.queryOptions().minimizeWorkunitTemporaries))
  7064. {
  7065. hasCandidate = true;
  7066. return;
  7067. }
  7068. unsigned savedDepth = activityDepth;
  7069. doAnalyseExpr(expr);
  7070. activityDepth = savedDepth;
  7071. }
  7072. void AutoScopeMigrateTransformer::doAnalyseExpr(IHqlExpression * expr)
  7073. {
  7074. AutoScopeMigrateInfo * extra = queryBodyExtra(expr);
  7075. if (activityDepth && expr->isDataset())
  7076. {
  7077. if (isWorthHoisting(expr, true) && isIndependentOfScope(expr) && !isContextDependent(expr))
  7078. {
  7079. #ifdef _DEBUG
  7080. isWorthHoisting(expr, true);
  7081. #endif
  7082. extra->globalInsideChild = true;
  7083. hasCandidate = true;
  7084. activityDepth = 0;
  7085. }
  7086. }
  7087. extra->firstUseIsConditional = isConditional;
  7088. extra->firstUseIsSequential = isSequential;
  7089. switch (expr->getOperator())
  7090. {
  7091. case no_allnodes:
  7092. case no_keyedlimit:
  7093. case no_nothor:
  7094. return;
  7095. case no_sequential:
  7096. return;
  7097. case no_if:
  7098. case no_choose:
  7099. {
  7100. if (expr->isAction())
  7101. {
  7102. bool wasConditional = isConditional;
  7103. analyseExpr(expr->queryChild(0));
  7104. isConditional = true;
  7105. ForEachChildFrom(i, expr, 1)
  7106. analyseExpr(expr->queryChild(i));
  7107. isConditional = wasConditional;
  7108. return;
  7109. }
  7110. break;
  7111. }
  7112. case no_newtransform:
  7113. case no_transform:
  7114. if (curGraph)
  7115. {
  7116. activityDepth++;
  7117. NewHqlTransformer::analyseExpr(expr);
  7118. activityDepth--;
  7119. return;
  7120. }
  7121. break;
  7122. case no_thor:
  7123. //ignore thor attribute on a dataset..
  7124. if (expr->queryType())
  7125. {
  7126. curGraph++;
  7127. NewHqlTransformer::analyseExpr(expr);
  7128. curGraph++; // don't restore - new pseudo graph to aid cse between global branches separated by graphs
  7129. return;
  7130. }
  7131. break;
  7132. }
  7133. NewHqlTransformer::analyseExpr(expr);
  7134. }
  7135. IHqlExpression * AutoScopeMigrateTransformer::createTransformed(IHqlExpression * expr)
  7136. {
  7137. switch (expr->getOperator())
  7138. {
  7139. case no_allnodes:
  7140. case no_keyedlimit:
  7141. case no_libraryscope:
  7142. case no_nothor:
  7143. case no_sequential:
  7144. return LINK(expr);
  7145. case no_thor:
  7146. {
  7147. IHqlExpression * actions = expr->queryChild(0);
  7148. if (actions)
  7149. {
  7150. //MORE: Simplify this???? or remove the special case all together?
  7151. //OwnedHqlExpr newActions = transform(actions);
  7152. HqlExprArray args;
  7153. unwindCommaCompound(args, actions);
  7154. ForEachItemIn(i, args)
  7155. graphActions.append(*transform(&args.item(i)));
  7156. OwnedHqlExpr newActions = createActionList(graphActions);
  7157. graphActions.kill();
  7158. if (actions == newActions)
  7159. return LINK(expr);
  7160. return createWrapper(no_thor, newActions.getClear());
  7161. }
  7162. break;
  7163. }
  7164. }
  7165. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  7166. updateOrphanedSelectors(transformed, expr);
  7167. AutoScopeMigrateInfo * extra = queryBodyExtra(expr);
  7168. if (extra->doAutoHoist(transformed, translator.queryOptions().minimizeWorkunitTemporaries))
  7169. {
  7170. StringBuffer s;
  7171. s.appendf("AutoGlobal: Spotted %s ", getOpString(expr->getOperator()));
  7172. if (expr->queryName())
  7173. s.append("[").append(expr->queryName()).append("] ");
  7174. s.append("as an item to hoist");
  7175. DBGLOG("%s", s.str());
  7176. GlobalAttributeInfo info("spill::auto","auto", transformed);
  7177. info.extractGlobal(NULL, translator.getTargetClusterType());
  7178. if (translator.targetThor() && extra->globalInsideChild)
  7179. info.preventDiskSpill();
  7180. OwnedHqlExpr getResult, setResult;
  7181. info.checkFew(translator);
  7182. info.splitGlobalDefinition(transformed->queryType(), transformed, wu, setResult, &getResult, isRoxie);
  7183. //If the first use is conditional, then hoist the expression globally (it can't have any dependents)
  7184. //else hoist it within the current graph, otherwise it can get hoisted before globals on datasets that
  7185. //it is dependent on.
  7186. if (extra->firstUseIsConditional)
  7187. globalTarget->append(*createWrapper(no_thor, setResult.getClear()));
  7188. else
  7189. graphActions.append(*setResult.getClear());
  7190. transformed.setown(getResult.getClear());
  7191. }
  7192. return transformed.getClear();
  7193. }
  7194. void AutoScopeMigrateTransformer::transformRoot(const HqlExprArray & in, HqlExprArray & out)
  7195. {
  7196. globalTarget = &out;
  7197. NewHqlTransformer::transformRoot(in, out);
  7198. globalTarget = NULL;
  7199. }
  7200. //---------------------------------------------------------------------------
  7201. static HqlTransformerInfo trivialGraphRemoverInfo("TrivialGraphRemover");
  7202. TrivialGraphRemover::TrivialGraphRemover() : NewHqlTransformer(trivialGraphRemoverInfo)
  7203. {
  7204. hasCandidate = false;
  7205. }
  7206. void TrivialGraphRemover::analyseExpr(IHqlExpression * expr)
  7207. {
  7208. if (hasCandidate || alreadyVisited(expr))
  7209. return;
  7210. if (expr->getOperator() == no_thor)
  7211. {
  7212. if (isTrivialGraph(expr->queryChild(0)))
  7213. hasCandidate = true;
  7214. return;
  7215. }
  7216. NewHqlTransformer::analyseExpr(expr);
  7217. }
  7218. IHqlExpression * TrivialGraphRemover::createTransformed(IHqlExpression * expr)
  7219. {
  7220. switch (expr->getOperator())
  7221. {
  7222. case no_thor:
  7223. {
  7224. IHqlExpression * child = expr->queryChild(0);
  7225. if (child && isTrivialGraph(child))
  7226. return LINK(child);
  7227. return LINK(expr);
  7228. }
  7229. }
  7230. return NewHqlTransformer::createTransformed(expr);
  7231. }
  7232. bool TrivialGraphRemover::isTrivialGraph(IHqlExpression * expr)
  7233. {
  7234. if (!expr)
  7235. return false;
  7236. if (expr->getOperator() == no_setresult)
  7237. {
  7238. IHqlExpression * value = expr->queryChild(0);
  7239. if (value->getOperator() != no_getresult)
  7240. return false;
  7241. return true;
  7242. }
  7243. else if (expr->getOperator() == no_output)
  7244. return isTrivialInlineOutput(expr);
  7245. else
  7246. return false;
  7247. }
  7248. void removeTrivialGraphs(WorkflowItem & curWorkflow)
  7249. {
  7250. HqlExprArray & exprs = curWorkflow.queryExprs();
  7251. TrivialGraphRemover transformer;
  7252. transformer.analyseArray(exprs, 0);
  7253. if (transformer.worthTransforming())
  7254. {
  7255. HqlExprArray simplified;
  7256. transformer.transformRoot(exprs, simplified);
  7257. replaceArray(exprs, simplified);
  7258. }
  7259. }
  7260. //==============================================================================================================
  7261. class FilterCloner
  7262. {
  7263. public:
  7264. FilterCloner(IHqlExpression * _ds) { ds.set(_ds); matched = false; lockTransformMutex(); }
  7265. ~FilterCloner() { unlockTransformMutex(); }
  7266. void addMappings(IHqlExpression * expr, IHqlExpression * addDs);
  7267. IHqlExpression * inheritFilters(IHqlExpression * expr);
  7268. inline bool hasMappings() { return matched; }
  7269. protected:
  7270. void doAddMappings(IHqlExpression * expr);
  7271. bool isMatchingSelector(IHqlExpression * expr);
  7272. void setMapping(IHqlExpression * selector, IHqlExpression * value);
  7273. protected:
  7274. HqlExprAttr ds;
  7275. bool matched;
  7276. };
  7277. void FilterCloner::setMapping(IHqlExpression * selector, IHqlExpression * value)
  7278. {
  7279. selector->setTransformExtra(value);
  7280. matched = true;
  7281. }
  7282. void FilterCloner::doAddMappings(IHqlExpression * expr)
  7283. {
  7284. loop
  7285. {
  7286. switch (expr->getOperator())
  7287. {
  7288. case no_and:
  7289. doAddMappings(expr->queryChild(0));
  7290. expr = expr->queryChild(1);
  7291. continue;
  7292. case no_in:
  7293. case no_notin:
  7294. {
  7295. IHqlExpression * lhs = expr->queryChild(0);
  7296. IHqlExpression * rhs = expr->queryChild(1);
  7297. if (isMatchingSelector(lhs) && !containsActiveDataset(rhs))
  7298. setMapping(lhs, expr);
  7299. break;
  7300. }
  7301. case no_between:
  7302. case no_notbetween:
  7303. {
  7304. IHqlExpression * lhs = expr->queryChild(0);
  7305. if (isMatchingSelector(lhs) && !containsActiveDataset(expr->queryChild(1)) && !containsActiveDataset(expr->queryChild(2)))
  7306. setMapping(lhs, expr);
  7307. break;
  7308. }
  7309. case no_eq:
  7310. case no_ne:
  7311. case no_lt:
  7312. case no_gt:
  7313. case no_ge:
  7314. case no_le:
  7315. {
  7316. IHqlExpression * lhs = expr->queryChild(0);
  7317. IHqlExpression * rhs = expr->queryChild(1);
  7318. if (isMatchingSelector(lhs) && !containsActiveDataset(rhs))
  7319. setMapping(lhs, expr);
  7320. else if (isMatchingSelector(rhs) && !containsActiveDataset(lhs))
  7321. setMapping(rhs, expr);
  7322. break;
  7323. }
  7324. }
  7325. return;
  7326. }
  7327. }
  7328. void FilterCloner::addMappings(IHqlExpression * expr, IHqlExpression * addDs)
  7329. {
  7330. if (!expr) return;
  7331. OwnedHqlExpr replaced = replaceSelector(expr, addDs, ds);
  7332. doAddMappings(replaced);
  7333. }
  7334. bool FilterCloner::isMatchingSelector(IHqlExpression * expr)
  7335. {
  7336. return queryDatasetCursor(expr) == ds;
  7337. }
  7338. IHqlExpression * FilterCloner::inheritFilters(IHqlExpression * expr)
  7339. {
  7340. switch (expr->getOperator())
  7341. {
  7342. case no_and:
  7343. case no_assertkeyed:
  7344. case no_assertstepped:
  7345. {
  7346. HqlExprArray args;
  7347. ForEachChild(i, expr)
  7348. args.append(*inheritFilters(expr->queryChild(i)));
  7349. return cloneOrLink(expr, args);
  7350. }
  7351. case no_eq:
  7352. {
  7353. IHqlExpression * lhs = expr->queryChild(0);
  7354. IHqlExpression * rhs = expr->queryChild(1);
  7355. IHqlExpression * lhsExtra = (IHqlExpression *)lhs->queryTransformExtra();
  7356. if (lhsExtra)
  7357. {
  7358. DBGLOG("Inheriting filter condition");
  7359. IHqlExpression * cond = replaceExpression(lhsExtra, lhs, rhs);
  7360. return createValue(no_and, LINK(expr), cond);
  7361. }
  7362. IHqlExpression * rhsExtra = (IHqlExpression *)rhs->queryTransformExtra();
  7363. if (rhsExtra)
  7364. {
  7365. DBGLOG("Inheriting filter condition");
  7366. IHqlExpression * cond = replaceExpression(rhsExtra, rhs, lhs);
  7367. return createValue(no_and, LINK(expr), cond);
  7368. }
  7369. break;
  7370. }
  7371. case no_in:
  7372. case no_notin:
  7373. case no_between:
  7374. case no_notbetween:
  7375. {
  7376. IHqlExpression * lhs = expr->queryChild(0);
  7377. IHqlExpression * lhsExtra = (IHqlExpression *)lhs->queryTransformExtra();
  7378. if (lhsExtra)
  7379. {
  7380. DBGLOG("Inheriting filter condition");
  7381. return createValue(no_and, LINK(expr), LINK(lhsExtra));
  7382. }
  7383. break;
  7384. }
  7385. }
  7386. return LINK(expr);
  7387. }
  7388. static IHqlExpression * optimizeJoinFilter(IHqlExpression * expr)
  7389. {
  7390. //NB: Not a member function because we use a different transform mutex, and don't want to accidentally interfere with caller's use
  7391. IHqlExpression * index = expr->queryChild(1);
  7392. if (!index->hasAttribute(filteredAtom) && !index->hasAttribute(_filtered_Atom) && (index->getOperator() == no_newkeyindex))
  7393. return LINK(expr);
  7394. if (expr->hasAttribute(keyedAtom))
  7395. return LINK(expr); //MORE!
  7396. OwnedHqlExpr rhs = createSelector(no_right, index, querySelSeq(expr));
  7397. FilterCloner processor(rhs);
  7398. while (index->getOperator() != no_newkeyindex)
  7399. {
  7400. switch (index->getOperator())
  7401. {
  7402. case no_filter:
  7403. //case no_filtered:
  7404. //MORE: This might be useful in the future
  7405. UNIMPLEMENTED;
  7406. }
  7407. index = index->queryChild(0);
  7408. }
  7409. processor.addMappings(queryAttributeChild(index, filteredAtom, 0), queryActiveTableSelector());
  7410. processor.addMappings(queryAttributeChild(index, _filtered_Atom, 0), queryActiveTableSelector());
  7411. if (!processor.hasMappings())
  7412. return LINK(expr);
  7413. HqlExprArray exprs;
  7414. expr->queryChild(2)->unwindList(exprs, no_and);
  7415. bool keyedExplicitly = false;
  7416. ForEachItemIn(i1, exprs)
  7417. {
  7418. IHqlExpression & cur = exprs.item(i1);
  7419. switch (cur.getOperator())
  7420. {
  7421. case no_assertkeyed:
  7422. case no_assertwild:
  7423. keyedExplicitly = true;
  7424. exprs.replace(*processor.inheritFilters(&cur), i1);
  7425. break;
  7426. }
  7427. }
  7428. if (!keyedExplicitly)
  7429. {
  7430. ForEachItemIn(i2, exprs)
  7431. {
  7432. IHqlExpression & cur = exprs.item(i2);
  7433. switch (cur.getOperator())
  7434. {
  7435. case no_assertkeyed:
  7436. case no_assertwild:
  7437. case no_attr:
  7438. case no_attr_link:
  7439. case no_attr_expr:
  7440. break;
  7441. default:
  7442. exprs.replace(*processor.inheritFilters(&cur), i2);
  7443. break;
  7444. }
  7445. }
  7446. }
  7447. HqlExprArray args;
  7448. unwindChildren(args, expr);
  7449. args.replace(*createBalanced(no_and, queryBoolType(), exprs), 2);
  7450. return expr->clone(args);
  7451. }
  7452. static HqlTransformerInfo filteredIndexOptimizerInfo("FilteredIndexOptimizer");
  7453. FilteredIndexOptimizer::FilteredIndexOptimizer(bool _processJoins, bool _processReads)
  7454. : NewHqlTransformer(filteredIndexOptimizerInfo)
  7455. {
  7456. processJoins = _processJoins;
  7457. processReads = _processReads;
  7458. }
  7459. IHqlExpression * FilteredIndexOptimizer::createTransformed(IHqlExpression * expr)
  7460. {
  7461. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  7462. if (processJoins && isKeyedJoin(transformed))
  7463. transformed.setown(optimizeJoinFilter(transformed));
  7464. if (processReads)
  7465. {
  7466. switch (transformed->getOperator())
  7467. {
  7468. case no_compound_indexread:
  7469. case no_compound_indexnormalize:
  7470. case no_compound_indexaggregate:
  7471. case no_compound_indexcount:
  7472. case no_compound_indexgroupaggregate:
  7473. //MORE:
  7474. break;
  7475. }
  7476. }
  7477. return transformed.getClear();
  7478. }
  7479. //==============================================================================================================
  7480. static HqlTransformerInfo localUploadTransformerInfo("LocalUploadTransformer");
  7481. LocalUploadTransformer::LocalUploadTransformer(IWorkUnit * _wu) : NewHqlTransformer(localUploadTransformerInfo)
  7482. {
  7483. wu = _wu;
  7484. }
  7485. IHqlExpression * LocalUploadTransformer::createTransformed(IHqlExpression * expr)
  7486. {
  7487. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  7488. if (transformed->hasAttribute(localUploadAtom))
  7489. {
  7490. assertex(transformed->getOperator() == no_table);
  7491. IHqlExpression * filename = transformed->queryChild(0);
  7492. IHqlExpression * mode = transformed->queryChild(2);
  7493. assertex(filename->getOperator() == no_constant);
  7494. StringBuffer sourceName,localName;
  7495. filename->queryValue()->getStringValue(sourceName);
  7496. getUniqueId(localName.append("local"));
  7497. LocalFileUploadType uploadType = UploadTypeWUResult;
  7498. switch (mode->getOperator())
  7499. {
  7500. case no_csv:
  7501. uploadType = UploadTypeWUResultCsv;
  7502. break;
  7503. case no_xml:
  7504. uploadType = UploadTypeWUResultXml;
  7505. break;
  7506. }
  7507. wu->addLocalFileUpload(uploadType, sourceName, localName, NULL);
  7508. HqlExprArray args;
  7509. args.append(*LINK(expr->queryRecord()));
  7510. args.append(*createAttribute(nameAtom, createConstant(localName.str())));
  7511. args.append(*createAttribute(sequenceAtom, getStoredSequenceNumber()));
  7512. return createDataset(no_workunit_dataset, args);
  7513. }
  7514. return transformed.getClear();
  7515. }
  7516. //==============================================================================================================
  7517. /*
  7518. The following code converts expressions of the form a.b where a and b are datasets into a normalized form including
  7519. an explicit normalize activity. It follows the following rules:
  7520. 1) An filter/project/table on a.b logically has access to fields in a and a.b.
  7521. 2) An operation with a transform (or record defining the output) other than a PROJECT/TABLE, implicitly loses access to fields in a for all
  7522. subsequent operations.
  7523. 3) When a dataset is used in an output or outer outer context (e.g., call parameter) then only the fields in the child dataset are passed.
  7524. This is done by converting f(a.b) to normalize(a, f(LEFT.b)) at the appropriate place.
  7525. These rules mean we maintain the HOLe semantics which allow computed fields to be implemented by projecting fields from the parent dataset,
  7526. but also mean that we avoid problems with parent datasets e.g., join(a.b, c.d)
  7527. To make this efficient we also need to implement the following in the code generator:
  7528. 1) aggregate-normalize(x).
  7529. 2) normalize-source.
  7530. 3) aggregate-normalize-source
  7531. 4) inline processing of normalize.
  7532. After this transform there should be no existing datasets of the form <a.b>[new]. There is a new function to ensure this is correct.
  7533. For the first version, it only generates normalizes around datasets - and assumes that parent fields don't need to be mapped into the denormalize.
  7534. This makes it simpler, but means that parse statements that take a record which could theoretically access parent dataset
  7535. fields won't work. The fix would require a new kind of child normalize that took a no_newtransform, and would require
  7536. analysis of which parent fields are used in the child's no_newtransform.
  7537. */
  7538. inline void getDatasetRange(IHqlExpression * expr, unsigned & first, unsigned & max)
  7539. {
  7540. first = 0;
  7541. switch (getChildDatasetType(expr))
  7542. {
  7543. case childdataset_many_noscope:
  7544. case childdataset_many:
  7545. max = expr->numChildren();
  7546. break;
  7547. case childdataset_if:
  7548. first = 1;
  7549. max = expr->numChildren();
  7550. break;
  7551. default:
  7552. max = getNumChildTables(expr);
  7553. break;
  7554. }
  7555. }
  7556. static HqlTransformerInfo nestedSelectorNormalizerInfo("NestedSelectorNormalizer");
  7557. NestedSelectorNormalizer::NestedSelectorNormalizer() : NewHqlTransformer(nestedSelectorNormalizerInfo)
  7558. {
  7559. spottedCandidate = false;
  7560. }
  7561. void NestedSelectorNormalizer::analyseExpr(IHqlExpression * expr)
  7562. {
  7563. if (alreadyVisited(expr))
  7564. return;
  7565. NewHqlTransformer::analyseExpr(expr);
  7566. if (expr->isDataset())
  7567. {
  7568. bool childrenAreDenormalized = false;
  7569. unsigned first, max;
  7570. getDatasetRange(expr, first, max);
  7571. for (unsigned i=0; i < max; i++)
  7572. {
  7573. if (queryBodyExtra(expr->queryChild(i))->isDenormalized)
  7574. childrenAreDenormalized = true;
  7575. }
  7576. NestedSelectorInfo * extra = queryBodyExtra(expr);
  7577. switch (expr->getOperator())
  7578. {
  7579. case no_select:
  7580. if (isNewSelector(expr))
  7581. {
  7582. childrenAreDenormalized = true;
  7583. spottedCandidate = true;
  7584. }
  7585. break;
  7586. case no_hqlproject:
  7587. case no_usertable:
  7588. break;
  7589. default:
  7590. //Follow test effectively checks whether parent dataset is active beyond this point
  7591. if (expr->queryBody() == expr->queryNormalizedSelector())
  7592. {
  7593. if (childrenAreDenormalized)
  7594. {
  7595. extra->insertDenormalize = true;
  7596. childrenAreDenormalized = false;
  7597. }
  7598. }
  7599. break;
  7600. }
  7601. extra->isDenormalized = childrenAreDenormalized;
  7602. }
  7603. }
  7604. static IHqlExpression * splitSelector(IHqlExpression * expr, SharedHqlExpr & oldDataset)
  7605. {
  7606. assertex(expr->getOperator() == no_select);
  7607. IHqlExpression * ds = expr->queryChild(0);
  7608. if (expr->hasAttribute(newAtom))
  7609. {
  7610. oldDataset.set(ds);
  7611. OwnedHqlExpr left = createSelector(no_left, ds, querySelSeq(expr));
  7612. return createSelectExpr(left.getClear(), LINK(expr->queryChild(1)));
  7613. }
  7614. HqlExprArray args;
  7615. args.append(*splitSelector(ds, oldDataset));
  7616. unwindChildren(args, expr, 1);
  7617. return expr->clone(args);
  7618. }
  7619. IHqlExpression * NestedSelectorNormalizer::createNormalized(IHqlExpression * expr)
  7620. {
  7621. IHqlExpression * root = queryRoot(expr);
  7622. assertex(root && root->getOperator() == no_select && isNewSelector(root));
  7623. OwnedHqlExpr selSeq = createSelectorSequence();
  7624. OwnedHqlExpr oldDataset;
  7625. OwnedHqlExpr newSelector = splitSelector(root, oldDataset);
  7626. OwnedHqlExpr right = createSelector(no_right, expr, selSeq);
  7627. HqlExprArray args;
  7628. args.append(*LINK(oldDataset));
  7629. args.append(*replaceExpression(expr, root, newSelector));
  7630. args.append(*createTransformFromRow(right));
  7631. args.append(*LINK(selSeq));
  7632. OwnedHqlExpr ret = createDataset(no_normalize, args);
  7633. return expr->cloneAllAnnotations(ret);
  7634. }
  7635. IHqlExpression * NestedSelectorNormalizer::createTransformed(IHqlExpression * expr)
  7636. {
  7637. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  7638. NestedSelectorInfo * extra = queryBodyExtra(expr);
  7639. bool denormalizeInputs = false;
  7640. if (extra->insertDenormalize)
  7641. {
  7642. denormalizeInputs = true;
  7643. }
  7644. else
  7645. {
  7646. switch (expr->getOperator())
  7647. {
  7648. case NO_AGGREGATE:
  7649. case no_joined:
  7650. case no_buildindex:
  7651. case no_apply:
  7652. case no_distribution:
  7653. case no_distributer:
  7654. case no_within:
  7655. case no_notwithin:
  7656. case no_output:
  7657. case no_createset:
  7658. case no_soapaction_ds:
  7659. case no_newsoapaction_ds:
  7660. case no_returnresult:
  7661. case no_setgraphresult:
  7662. case no_setgraphloopresult:
  7663. case no_keydiff:
  7664. case no_rowdiff:
  7665. case no_extractresult:
  7666. // case no_setresult:
  7667. case no_blob2id:
  7668. case no_selectnth:
  7669. case no_keypatch:
  7670. case no_assign:
  7671. case no_lt:
  7672. case no_le:
  7673. case no_gt:
  7674. case no_ge:
  7675. case no_ne:
  7676. case no_eq:
  7677. case no_order:
  7678. case no_keyed:
  7679. case no_loopbody:
  7680. case no_rowvalue:
  7681. case no_setmeta:
  7682. case no_typetransfer:
  7683. case no_subgraph:
  7684. denormalizeInputs = true;
  7685. break;
  7686. }
  7687. }
  7688. if (denormalizeInputs)
  7689. {
  7690. bool same = true;
  7691. HqlExprArray args;
  7692. unwindChildren(args, transformed);
  7693. unsigned first, max;
  7694. getDatasetRange(expr, first, max);
  7695. for (unsigned i = first; i < max; i++)
  7696. {
  7697. if (queryBodyExtra(expr->queryChild(i))->isDenormalized)
  7698. {
  7699. args.replace(*createNormalized(&args.item(i)), i);
  7700. same = false;
  7701. }
  7702. }
  7703. if (!same)
  7704. return transformed->clone(args);
  7705. }
  7706. return transformed.getClear();
  7707. }
  7708. //==============================================================================================================
  7709. /*
  7710. Code to spot ambiguous LEFT dataset references....
  7711. */
  7712. static HqlTransformerInfo leftRightSelectorNormalizerInfo("LeftRightSelectorNormalizer");
  7713. LeftRightSelectorNormalizer::LeftRightSelectorNormalizer(bool _allowAmbiguity) : NewHqlTransformer(leftRightSelectorNormalizerInfo)
  7714. {
  7715. allowAmbiguity = _allowAmbiguity;
  7716. isAmbiguous = false;
  7717. }
  7718. void LeftRightSelectorNormalizer::checkAmbiguity(const HqlExprCopyArray & inScope, IHqlExpression * selector)
  7719. {
  7720. node_operator selectOp = selector->getOperator();
  7721. ForEachItemIn(i, inScope)
  7722. {
  7723. IHqlExpression & cur = inScope.item(i);
  7724. if ((&cur != selector) && (cur.getOperator() == selectOp) && (cur.queryRecord() == selector->queryRecord()))
  7725. {
  7726. isAmbiguous = true;
  7727. if (!allowAmbiguity)
  7728. {
  7729. StringBuffer ecl;
  7730. getExprECL(selector, ecl);
  7731. throwError1(HQLERR_AmbiguousLeftRight, ecl.str());
  7732. }
  7733. }
  7734. }
  7735. }
  7736. void LeftRightSelectorNormalizer::analyseExpr(IHqlExpression * expr)
  7737. {
  7738. if (alreadyVisited(expr))
  7739. return;
  7740. IHqlExpression * selSeq = querySelSeq(expr);
  7741. if (selSeq)
  7742. {
  7743. HqlExprCopyArray inScope;
  7744. switch (getChildDatasetType(expr))
  7745. {
  7746. case childdataset_none:
  7747. case childdataset_many_noscope:
  7748. case childdataset_many:
  7749. case childdataset_map:
  7750. case childdataset_dataset_noscope:
  7751. case childdataset_if:
  7752. case childdataset_case:
  7753. case childdataset_dataset:
  7754. case childdataset_evaluate:
  7755. break;
  7756. case childdataset_datasetleft:
  7757. case childdataset_left:
  7758. {
  7759. IHqlExpression * dataset = expr->queryChild(0);
  7760. OwnedHqlExpr left = createSelector(no_left, dataset, selSeq);
  7761. gatherChildTablesUsed(NULL, &inScope, expr, 1);
  7762. checkAmbiguity(inScope, left);
  7763. break;
  7764. }
  7765. case childdataset_leftright:
  7766. {
  7767. OwnedHqlExpr left = createSelector(no_left, expr->queryChild(0), selSeq);
  7768. OwnedHqlExpr right = createSelector(no_right, expr->queryChild(1), selSeq);
  7769. gatherChildTablesUsed(NULL, &inScope, expr, 2);
  7770. checkAmbiguity(inScope, left);
  7771. checkAmbiguity(inScope, right);
  7772. break;
  7773. }
  7774. case childdataset_same_left_right:
  7775. case childdataset_top_left_right:
  7776. case childdataset_nway_left_right:
  7777. {
  7778. IHqlExpression * dataset = expr->queryChild(0);
  7779. OwnedHqlExpr left = createSelector(no_left, dataset, selSeq);
  7780. OwnedHqlExpr right = createSelector(no_right, dataset, selSeq);
  7781. gatherChildTablesUsed(NULL, &inScope, expr, 1);
  7782. checkAmbiguity(inScope, left);
  7783. checkAmbiguity(inScope, right);
  7784. break;
  7785. }
  7786. default:
  7787. UNIMPLEMENTED;
  7788. }
  7789. }
  7790. NewHqlTransformer::analyseExpr(expr);
  7791. }
  7792. IHqlExpression * LeftRightSelectorNormalizer::createTransformed(IHqlExpression * expr)
  7793. {
  7794. if (expr->isAttribute() && expr->queryName() == _selectorSequence_Atom)
  7795. return createDummySelectorSequence();
  7796. return NewHqlTransformer::createTransformed(expr);
  7797. }
  7798. IHqlExpression * LeftRightSelectorNormalizer::createTransformedSelector(IHqlExpression * expr)
  7799. {
  7800. node_operator op = expr->getOperator();
  7801. switch (op)
  7802. {
  7803. case no_left:
  7804. case no_right:
  7805. return transform(expr);
  7806. }
  7807. return NewHqlTransformer::createTransformedSelector(expr);
  7808. }
  7809. //==============================================================================================================
  7810. static HqlTransformerInfo forceLocalTransformerInfo("ForceLocalTransformer");
  7811. ForceLocalTransformer::ForceLocalTransformer(ClusterType _targetClusterType) : NewHqlTransformer(forceLocalTransformerInfo)
  7812. {
  7813. targetClusterType = _targetClusterType;
  7814. insideForceLocal = false;
  7815. allNodesDepth = 0;
  7816. }
  7817. IHqlExpression * ForceLocalTransformer::createTransformed(IHqlExpression * expr)
  7818. {
  7819. node_operator op = expr->getOperator();
  7820. switch (expr->getOperator())
  7821. {
  7822. case no_forcelocal:
  7823. case no_forcenolocal:
  7824. {
  7825. bool wasLocal = insideForceLocal;
  7826. insideForceLocal = (op == no_forcelocal) && (targetClusterType != HThorCluster);
  7827. IHqlExpression * ret = transform(expr->queryChild(0));
  7828. insideForceLocal = wasLocal;
  7829. return ret;
  7830. }
  7831. case no_thisnode:
  7832. if ((targetClusterType != HThorCluster) && (allNodesDepth == 0))
  7833. throwError(HQLERR_ThisNodeNotInsideAllNodes);
  7834. //fall through
  7835. case no_allnodes:
  7836. {
  7837. if (targetClusterType != HThorCluster)
  7838. {
  7839. unsigned oldDepth = allNodesDepth;
  7840. if (op == no_allnodes)
  7841. allNodesDepth++;
  7842. else
  7843. allNodesDepth--;
  7844. IHqlExpression * ret = NewHqlTransformer::createTransformed(expr);
  7845. allNodesDepth = oldDepth;
  7846. return ret;
  7847. }
  7848. else
  7849. return transform(expr->queryChild(0));
  7850. }
  7851. case no_globalscope:
  7852. case no_colon:
  7853. {
  7854. bool wasLocal = insideForceLocal;
  7855. unsigned oldDepth = allNodesDepth;
  7856. insideForceLocal = false;
  7857. allNodesDepth = 0;
  7858. IHqlExpression * ret = NewHqlTransformer::createTransformed(expr);
  7859. insideForceLocal = wasLocal;
  7860. allNodesDepth = oldDepth;
  7861. return ret;
  7862. }
  7863. }
  7864. OwnedHqlExpr ret = NewHqlTransformer::createTransformed(expr);
  7865. if (!insideForceLocal || !localChangesActivity(expr) || expr->hasAttribute(noLocalAtom))
  7866. return ret.getClear();
  7867. return appendLocalAttribute(ret);
  7868. }
  7869. ANewTransformInfo * ForceLocalTransformer::createTransformInfo(IHqlExpression * expr)
  7870. {
  7871. return CREATE_NEWTRANSFORMINFO(ForceLocalTransformInfo, expr);
  7872. }
  7873. IHqlExpression * ForceLocalTransformer::queryAlreadyTransformed(IHqlExpression * expr)
  7874. {
  7875. ForceLocalTransformInfo * extra = queryExtra(expr);
  7876. return extra->localTransformed[boolToInt(insideForceLocal)][boolToInt(insideAllNodes())];
  7877. }
  7878. void ForceLocalTransformer::setTransformed(IHqlExpression * expr, IHqlExpression * transformed)
  7879. {
  7880. ForceLocalTransformInfo * extra = queryExtra(expr);
  7881. extra->localTransformed[boolToInt(insideForceLocal)][boolToInt(insideAllNodes())].set(transformed);
  7882. }
  7883. IHqlExpression * ForceLocalTransformer::queryAlreadyTransformedSelector(IHqlExpression * expr)
  7884. {
  7885. ForceLocalTransformInfo * extra = queryExtra(expr);
  7886. return extra->localTransformedSelector[boolToInt(insideForceLocal)][boolToInt(insideAllNodes())];
  7887. }
  7888. void ForceLocalTransformer::setTransformedSelector(IHqlExpression * expr, IHqlExpression * transformed)
  7889. {
  7890. ForceLocalTransformInfo * extra = queryExtra(expr);
  7891. extra->localTransformedSelector[boolToInt(insideForceLocal)][boolToInt(insideAllNodes())].set(transformed);
  7892. }
  7893. //---------------------------------------------------------------------------
  7894. /*
  7895. This transform is responsible for ensuring that all datasets get converted to link counted datasets. Because all records of the same type get converted it is necessary to
  7896. transform the index and table definitions - otherwise a single record would need to be transformed in multiple different ways, which would require a much more
  7897. complicated transformation. However there are some exceptions:
  7898. a) weird field dataset syntax which dates back to hole days is no transformed.
  7899. b) add a project before a build index to ensure the code is more efficient. (It will be later moved over any intervening sorts).
  7900. c) ensure input and out from a pipe is serialized (so that sizeof(inputrow) returns a sensible value)
  7901. */
  7902. static HqlTransformerInfo hqlLinkedChildRowTransformerInfo("HqlLinkedChildRowTransformer");
  7903. HqlLinkedChildRowTransformer::HqlLinkedChildRowTransformer(bool _implicitLinkedChildRows) : QuickHqlTransformer(hqlLinkedChildRowTransformerInfo, NULL)
  7904. {
  7905. implicitLinkedChildRows = _implicitLinkedChildRows;
  7906. }
  7907. IHqlExpression * HqlLinkedChildRowTransformer::ensureInputSerialized(IHqlExpression * expr)
  7908. {
  7909. LinkedHqlExpr dataset = expr->queryChild(0);
  7910. IHqlExpression * record = dataset->queryRecord();
  7911. OwnedHqlExpr serializedRecord = getSerializedForm(record, diskAtom);
  7912. //If the dataset requires serialization, it is much more efficient to serialize before the sort, than to serialize after.
  7913. if (record == serializedRecord)
  7914. return LINK(expr);
  7915. OwnedHqlExpr selSeq = createSelectorSequence();
  7916. //The expression/transform has references to the in-memory selector, but the selector provided to transform will be serialised.
  7917. //so create a mapping <unserialized> := f(serialized)
  7918. //and then use it to expand references to the unserialized format
  7919. IHqlExpression * selector = dataset->queryNormalizedSelector();
  7920. OwnedHqlExpr mapTransform = createRecordMappingTransform(no_transform, serializedRecord, selector);
  7921. OwnedHqlExpr newDataset = createDatasetF(no_newusertable, LINK(dataset), LINK(serializedRecord), LINK(mapTransform), LINK(selSeq), NULL);
  7922. NewProjectMapper2 mapper;
  7923. mapper.setMapping(mapTransform);
  7924. HqlExprArray oldArgs, expandedArgs, newArgs;
  7925. newArgs.append(*LINK(newDataset));
  7926. unwindChildren(oldArgs, expr, 1);
  7927. mapper.expandFields(expandedArgs, oldArgs, dataset, newDataset, selector);
  7928. //Finally replace any remaining selectors - so that sizeof(ds) gets mapped correctly. Obscure..... but could possibly be used in a pipe,repeat
  7929. ForEachItemIn(i, expandedArgs)
  7930. newArgs.append(*replaceSelector(&expandedArgs.item(i), selector, newDataset->queryNormalizedSelector()));
  7931. return expr->clone(newArgs);
  7932. }
  7933. IHqlExpression * HqlLinkedChildRowTransformer::transformBuildIndex(IHqlExpression * expr)
  7934. {
  7935. return ensureInputSerialized(expr);
  7936. }
  7937. IHqlExpression * HqlLinkedChildRowTransformer::transformPipeThrough(IHqlExpression * expr)
  7938. {
  7939. //serialize input to pipe through, so that if they happen to use sizeof(ds) on the input it will give the
  7940. //non serialized format. No major need to ensure output is not serialized.
  7941. return ensureInputSerialized(expr);
  7942. }
  7943. IHqlExpression * HqlLinkedChildRowTransformer::createTransformedBody(IHqlExpression * expr)
  7944. {
  7945. switch (expr->getOperator())
  7946. {
  7947. case no_field:
  7948. {
  7949. ITypeInfo * type = expr->queryType();
  7950. switch (type->getTypeCode())
  7951. {
  7952. case type_dictionary:
  7953. case type_table:
  7954. case type_groupedtable:
  7955. if (expr->hasAttribute(embeddedAtom))
  7956. {
  7957. OwnedHqlExpr transformed = QuickHqlTransformer::createTransformedBody(expr);
  7958. return removeAttribute(transformed, embeddedAtom);
  7959. }
  7960. if (implicitLinkedChildRows && !expr->hasAttribute(_linkCounted_Atom))
  7961. {
  7962. //Don't use link counted rows for weird HOLe style dataset attributes
  7963. if (expr->hasAttribute(countAtom) || expr->hasAttribute(sizeofAtom))
  7964. break;
  7965. //add the attribute first so a no linked field doesn't contain a record that requires it
  7966. OwnedHqlExpr modified = appendOwnedOperand(expr, getLinkCountedAttr());
  7967. return transform(modified);
  7968. }
  7969. break;
  7970. }
  7971. break;
  7972. }
  7973. case no_buildindex:
  7974. {
  7975. OwnedHqlExpr transformed = QuickHqlTransformer::createTransformedBody(expr);
  7976. return transformBuildIndex(transformed);
  7977. }
  7978. case no_pipe:
  7979. if (expr->queryRecord())
  7980. {
  7981. return QuickHqlTransformer::createTransformedBody(expr);
  7982. }
  7983. case no_output:
  7984. //would this be a good idea for output to file? output to pipe?
  7985. if (false)
  7986. {
  7987. IHqlExpression * filename = queryRealChild(expr, 2);
  7988. if (filename)
  7989. {
  7990. OwnedHqlExpr transformed = QuickHqlTransformer::createTransformedBody(expr);
  7991. return ensureInputSerialized(transformed);
  7992. }
  7993. }
  7994. break;
  7995. case no_embedbody:
  7996. //Don't change the type of an embed body - otherwise result it will become link counted when not expected.
  7997. return LINK(expr);
  7998. }
  7999. return QuickHqlTransformer::createTransformedBody(expr);
  8000. }
  8001. //---------------------------------------------------------------------------
  8002. HqlScopeTaggerInfo::HqlScopeTaggerInfo(IHqlExpression * _expr) : MergingTransformInfo(_expr)
  8003. {
  8004. if (!onlyTransformOnce() && isIndependentOfScope(_expr))
  8005. {
  8006. //If the node doesn't have any active selectors then it isn't going to be context dependent
  8007. setOnlyTransformOnce(true);
  8008. }
  8009. }
  8010. ANewTransformInfo * HqlScopeTagger::createTransformInfo(IHqlExpression * expr)
  8011. {
  8012. return CREATE_NEWTRANSFORMINFO(HqlScopeTaggerInfo, expr);
  8013. }
  8014. /*
  8015. Details of the scope tagging.
  8016. no_select
  8017. if the lhs dataset is in scope then no flags are attached. if the lhs is not in scope, then a newAtom attribute is
  8018. attached to select node. That may contain the following attributes:
  8019. global - is an outer level activity
  8020. noTable - no other tables are active at this point e.g., global[1].field;
  8021. relatedTable - I think this could be deprecated in favour of the flag above.
  8022. Datasets
  8023. In other situations where a dataset/datarow is ambiguous whether it is new or in scope then a no_activerow
  8024. is inserted to indicate it is in scope.
  8025. Note:
  8026. new attributes aren't created for subfield e.g, myDataset.level1.level2.
  8027. This means that if (myDataset.level1) is expanded to some expression then a newAtom will be need to be created on the new select
  8028. */
  8029. static HqlTransformerInfo hqlScopeTaggerInfo("HqlScopeTagger");
  8030. HqlScopeTagger::HqlScopeTagger(IErrorReceiver * _errors)
  8031. : ScopedDependentTransformer(hqlScopeTaggerInfo), errors(_errors)
  8032. {
  8033. }
  8034. bool HqlScopeTagger::isValidNormalizeSelector(IHqlExpression * expr)
  8035. {
  8036. loop
  8037. {
  8038. switch (expr->getOperator())
  8039. {
  8040. case no_filter:
  8041. break;
  8042. case no_usertable:
  8043. if (isAggregateDataset(expr))
  8044. return false;
  8045. return true;
  8046. case no_hqlproject:
  8047. if (isCountProject(expr))
  8048. return false;
  8049. return true;
  8050. case no_select:
  8051. {
  8052. IHqlExpression * ds = expr->queryChild(0);
  8053. if (isDatasetActive(ds))
  8054. return true;
  8055. //Not really sure what the following should do. Avoid a.b.c[1].d.e
  8056. if (ds->isDatarow() && (ds->getOperator() != no_select))
  8057. return false;
  8058. break;
  8059. }
  8060. case no_table:
  8061. return (queryTableMode(expr) == no_flat);
  8062. case no_keyindex:
  8063. case no_newkeyindex:
  8064. case no_rows:
  8065. return true;
  8066. default:
  8067. return false;
  8068. }
  8069. expr = expr->queryChild(0);
  8070. }
  8071. }
  8072. static const char * getECL(IHqlExpression * expr, StringBuffer & s)
  8073. {
  8074. toUserECL(s, expr, false);
  8075. if (s.length() > 2)
  8076. s.setLength(s.length()-2);
  8077. return s.str();
  8078. }
  8079. void HqlScopeTagger::checkActiveRow(IHqlExpression * expr)
  8080. {
  8081. if (!isDatasetActive(expr))
  8082. {
  8083. StringBuffer exprText;
  8084. getECL(expr, exprText);
  8085. elideString(exprText, 20);
  8086. VStringBuffer msg("ROW(%s) - dataset argument is not in scope. Did you mean dataset[1]?", exprText.str());
  8087. reportError(msg);
  8088. }
  8089. }
  8090. void HqlScopeTagger::reportSelectorError(IHqlExpression * selector, IHqlExpression * expr)
  8091. {
  8092. ScopeInfo * scope = innerScope;
  8093. if (innerScope && innerScope->isEmpty() && scopeStack.ordinality() > 1)
  8094. scope = &scopeStack.item(scopeStack.ordinality()-2);
  8095. StringBuffer exprText, datasetName, scopeName;
  8096. StringBuffer msg;
  8097. if (scope && scope->dataset)
  8098. {
  8099. IHqlExpression * topScope = scope->dataset;
  8100. msg.appendf("%s - Table %s is not related to %s",
  8101. getECL(expr, exprText),
  8102. getExprIdentifier(datasetName, selector).str(), getExprIdentifier(scopeName, topScope).str());
  8103. }
  8104. else if (scope && scope->left)
  8105. {
  8106. msg.appendf("%s - no active row for Table %s inside transform (use LEFT?)",
  8107. getECL(expr, exprText),
  8108. getExprIdentifier(datasetName, selector).str());
  8109. }
  8110. else
  8111. {
  8112. msg.appendf("%s - no specified row for Table %s", getECL(expr, exprText),
  8113. getExprIdentifier(datasetName, selector).str());
  8114. }
  8115. reportError(msg);
  8116. }
  8117. IHqlExpression * HqlScopeTagger::transformSelect(IHqlExpression * expr)
  8118. {
  8119. IHqlExpression * ds = expr->queryChild(0);
  8120. if (isDatasetActive(ds))
  8121. {
  8122. if (!innerScope && scopeStack.ordinality() == 0)
  8123. {
  8124. ds = queryDatasetCursor(ds);
  8125. switch (ds->getOperator())
  8126. {
  8127. case no_left:
  8128. case no_right:
  8129. StringBuffer exprText, datasetName;
  8130. VStringBuffer msg("%s - %s not in scope, possibly passed into a global/workflow definition", getECL(expr, exprText), getExprIdentifier(datasetName, ds).str());
  8131. reportError(msg);
  8132. break;
  8133. }
  8134. }
  8135. return Parent::createTransformed(expr); // this will call transformSelector() on lhs since new is not present
  8136. }
  8137. IHqlExpression * cursor = queryDatasetCursor(ds);
  8138. if (cursor->isDataset())
  8139. {
  8140. if (expr->isDictionary())
  8141. {
  8142. StringBuffer exprText;
  8143. VStringBuffer msg("dictionary %s must be explicitly NORMALIZED", getECL(expr, exprText));
  8144. reportError(msg, false);
  8145. }
  8146. else if (expr->isDataset())
  8147. {
  8148. if (!isValidNormalizeSelector(cursor))
  8149. {
  8150. StringBuffer exprText;
  8151. VStringBuffer msg("dataset %s may not be supported without using NORMALIZE", getECL(expr, exprText));
  8152. reportError(msg, true);
  8153. }
  8154. }
  8155. else
  8156. {
  8157. if (!isDatasetARow(ds))
  8158. reportSelectorError(ds, expr);
  8159. }
  8160. }
  8161. pushScope();
  8162. OwnedHqlExpr newDs = transformNewDataset(ds, false);
  8163. popScope();
  8164. IHqlExpression * field = expr->queryChild(1);
  8165. if (ds->isDataset())
  8166. {
  8167. if (!expr->isDataset() && !expr->isDatarow() && !expr->isDictionary())
  8168. {
  8169. //If the left is a dataset, and the right isn't a dataset or a datarow then this doesn't make sense - it is an illegal
  8170. return createSelectExpr(newDs.getClear(), LINK(field));
  8171. }
  8172. }
  8173. //MORE: What about child datasets - should really be tagged as
  8174. //if (!isNewDataset && field->isDataset() && !containsSelf(ds) && !isDatasetActive(ds)) isNew = true;
  8175. if ((newDs->getOperator() == no_select) && newDs->isDatarow())
  8176. return createSelectExpr(newDs.getClear(), LINK(field));
  8177. return createNewSelectExpr(newDs.getClear(), LINK(field));
  8178. }
  8179. IHqlExpression * HqlScopeTagger::transformSelectorsAttr(IHqlExpression * expr)
  8180. {
  8181. HqlExprArray args;
  8182. HqlExprArray transformedArgs;
  8183. unwindChildren(args, expr);
  8184. ForEachItemIn(i, args)
  8185. {
  8186. IHqlExpression & cur = args.item(i);
  8187. assertex(cur.getOperator() == no_select);
  8188. //Only retain selectors for datasets which are in scope.
  8189. if (isDatasetActive(cur.queryChild(0)))
  8190. transformedArgs.append(*transformSelector(&cur));
  8191. }
  8192. return expr->clone(transformedArgs);
  8193. }
  8194. IHqlExpression * HqlScopeTagger::transformNewDataset(IHqlExpression * expr, bool isActiveOk)
  8195. {
  8196. node_operator op = expr->getOperator();
  8197. switch (op)
  8198. {
  8199. case no_activerow:
  8200. {
  8201. IHqlExpression * arg0 = expr->queryChild(0);
  8202. checkActiveRow(arg0);
  8203. OwnedHqlExpr transformedArg = transformSelector(arg0);
  8204. return ensureActiveRow(transformedArg);
  8205. }
  8206. }
  8207. OwnedHqlExpr transformed = transform(expr);
  8208. switch (op)
  8209. {
  8210. //MORE: I'm still not quite sure the active tagging of rows is right to have these as exceptions...
  8211. case no_left:
  8212. case no_right:
  8213. case no_matchattr:
  8214. return transformed.getClear();
  8215. case no_select:
  8216. if (isDatasetActive(expr))
  8217. {
  8218. IHqlExpression * ds = expr->queryChild(0);
  8219. if (!isAlwaysActiveRow(ds))
  8220. {
  8221. StringBuffer exprText;
  8222. VStringBuffer msg("%s - Need to use active(dataset) to refer to the current row of an active dataset", getECL(expr, exprText));
  8223. reportError(msg, false);
  8224. }
  8225. }
  8226. return transformed.getClear();
  8227. }
  8228. if (isDatasetActive(expr))
  8229. {
  8230. if (!isActiveOk)
  8231. {
  8232. StringBuffer exprText;
  8233. VStringBuffer msg("%s - Need to use active(dataset) to refer to the current row of an active dataset", getECL(expr, exprText));
  8234. reportError(msg);
  8235. }
  8236. return ensureActiveRow(transformed->queryNormalizedSelector());
  8237. }
  8238. switch (op)
  8239. {
  8240. case no_if:
  8241. case no_chooseds:
  8242. {
  8243. HqlExprArray args;
  8244. args.append(*transform(expr->queryChild(0)));
  8245. args.append(*transformNewDataset(expr->queryChild(1), false));
  8246. ForEachChildFrom(i, expr, 2)
  8247. args.append(*transformNewDataset(expr->queryChild(i), false));
  8248. return expr->clone(args);
  8249. }
  8250. case no_addfiles:
  8251. case no_projectrow:
  8252. return transformAmbiguousChildren(expr);
  8253. case no_case:
  8254. case no_map:
  8255. throwUnexpected(); // should have been converted to no_if by now...
  8256. default:
  8257. return transformed.getClear();
  8258. }
  8259. }
  8260. IHqlExpression * HqlScopeTagger::transformAmbiguous(IHqlExpression * expr, bool isActiveOk)
  8261. {
  8262. ITypeInfo * type = expr->queryType();
  8263. type_t tc = type_void;
  8264. if (type)
  8265. tc = type->getTypeCode();
  8266. switch (tc)
  8267. {
  8268. case type_void:
  8269. return transformAmbiguousChildren(expr);
  8270. case type_table:
  8271. case type_groupedtable:
  8272. {
  8273. pushScope();
  8274. OwnedHqlExpr ret = transformNewDataset(expr, isActiveOk);
  8275. popScope();
  8276. return ret.getClear();
  8277. }
  8278. }
  8279. return transform(expr);
  8280. }
  8281. IHqlExpression * HqlScopeTagger::transformAmbiguousChildren(IHqlExpression * expr)
  8282. {
  8283. unsigned max = expr->numChildren();
  8284. if (max == 0)
  8285. return LINK(expr);
  8286. bool same = true;
  8287. HqlExprArray args;
  8288. args.ensure(max);
  8289. for(unsigned i=0; i < max; i++)
  8290. {
  8291. IHqlExpression * cur = expr->queryChild(i);
  8292. IHqlExpression * tr = transformAmbiguous(cur, false);
  8293. args.append(*tr);
  8294. if (cur != tr)
  8295. same = false;
  8296. }
  8297. if (same)
  8298. return LINK(expr);
  8299. return expr->clone(args);
  8300. }
  8301. IHqlExpression * HqlScopeTagger::transformSizeof(IHqlExpression * expr)
  8302. {
  8303. IHqlExpression * arg = expr->queryChild(0)->queryNormalizedSelector();
  8304. //Sizeof (dataset.somefield(<new>)) - convert to sizeof(record.somefield), so the argument doesn't get hoisted incorrectly, and don't get a scope error
  8305. OwnedHqlExpr newArg;
  8306. if (arg->getOperator() == no_select)
  8307. {
  8308. IHqlExpression * ds = arg->queryChild(0);
  8309. IHqlExpression * cursor = queryDatasetCursor(ds);
  8310. if (!isDatasetActive(cursor) && cursor->isDataset())
  8311. newArg.setown(createSelectExpr(LINK(ds->queryRecord()), LINK(arg->queryChild(1))));
  8312. }
  8313. if (!newArg)
  8314. newArg.setown(transformAmbiguous(arg, true));
  8315. HqlExprArray args;
  8316. args.append(*newArg.getClear());
  8317. return completeTransform(expr, args);
  8318. }
  8319. IHqlExpression * HqlScopeTagger::transformWithin(IHqlExpression * dataset, IHqlExpression * scope)
  8320. {
  8321. while (dataset->getOperator() == no_related)
  8322. dataset = dataset->queryChild(0);
  8323. if (dataset->getOperator() != no_select)
  8324. {
  8325. StringBuffer exprText;
  8326. VStringBuffer msg("%s - dataset filtered by WITHIN is too complex", getECL(dataset, exprText));
  8327. reportError(msg);
  8328. return transform(dataset);
  8329. }
  8330. IHqlExpression * ds = dataset->queryChild(0);
  8331. IHqlExpression * field = dataset->queryChild(1);
  8332. if (ds->queryNormalizedSelector() == scope)
  8333. {
  8334. OwnedHqlExpr newDs = transform(ds);
  8335. return createSelectExpr(newDs.getClear(), LINK(field));
  8336. }
  8337. OwnedHqlExpr newDs = transformWithin(ds, scope);
  8338. return createNewSelectExpr(newDs.getClear(), LINK(field));
  8339. }
  8340. IHqlExpression * HqlScopeTagger::transformRelated(IHqlExpression * expr)
  8341. {
  8342. IHqlExpression * ds = expr->queryChild(0);
  8343. IHqlExpression * scope = expr->queryChild(1);
  8344. if (!isDatasetActive(scope))
  8345. {
  8346. StringBuffer exprText;
  8347. VStringBuffer msg("dataset \"%s\" used in WITHIN is not in scope", getECL(scope, exprText));
  8348. reportError(msg);
  8349. }
  8350. //Check the ds is a table
  8351. IHqlDataset * scopeDs = scope->queryDataset();
  8352. if (scopeDs != scopeDs->queryTable())
  8353. {
  8354. StringBuffer exprText;
  8355. VStringBuffer msg("dataset \"%s\" used as parameter to WITHIN is too complex", getECL(expr, exprText));
  8356. reportError(msg);
  8357. }
  8358. return transformWithin(ds, scope->queryNormalizedSelector());
  8359. }
  8360. IHqlExpression * HqlScopeTagger::createTransformed(IHqlExpression * expr)
  8361. {
  8362. IHqlExpression * body = expr->queryBody(true);
  8363. if (expr != body)
  8364. {
  8365. switch (expr->getAnnotationKind())
  8366. {
  8367. case annotate_meta:
  8368. collector.processMetaAnnotation(expr);
  8369. break;
  8370. case annotate_symbol:
  8371. {
  8372. WarningProcessor::OnWarningState saved;
  8373. collector.pushSymbol(saved, expr);
  8374. OwnedHqlExpr transformedBody = transform(body);
  8375. collector.popSymbol(saved);
  8376. if (body == transformedBody)
  8377. return LINK(expr);
  8378. return expr->cloneAnnotation(transformedBody);
  8379. }
  8380. break;
  8381. case annotate_location:
  8382. {
  8383. break;
  8384. }
  8385. }
  8386. OwnedHqlExpr transformedBody = transform(body);
  8387. if (body == transformedBody)
  8388. return LINK(expr);
  8389. return expr->cloneAnnotation(transformedBody);
  8390. }
  8391. collector.checkForGlobalOnWarning(expr);
  8392. switch (expr->getOperator())
  8393. {
  8394. case no_left:
  8395. case no_right:
  8396. case no_self:
  8397. case no_top:
  8398. return LINK(expr);
  8399. case no_activerow:
  8400. checkActiveRow(expr->queryChild(0));
  8401. break;
  8402. case no_select:
  8403. return transformSelect(expr);
  8404. case no_call:
  8405. case no_externalcall:
  8406. case no_rowvalue:
  8407. // case no_addfiles:
  8408. // case no_libraryscopeinstance:??
  8409. return transformAmbiguousChildren(expr);
  8410. case no_offsetof:
  8411. case no_sizeof:
  8412. return transformSizeof(expr);
  8413. case no_attr_expr:
  8414. if (expr->queryName() == _selectors_Atom)
  8415. return transformSelectorsAttr(expr);
  8416. return transformAmbiguousChildren(expr);
  8417. case no_datasetfromrow:
  8418. {
  8419. IHqlExpression * ds = expr->queryChild(0);
  8420. if (ds->isDataset() && !isDatasetActive(ds))
  8421. {
  8422. StringBuffer exprText;
  8423. VStringBuffer msg("dataset %s mistakenly interpreted as a datarow, possibly due to missing dataset() in parameter type", getECL(ds, exprText));
  8424. reportError(msg);
  8425. }
  8426. return transformAmbiguousChildren(expr);
  8427. }
  8428. case no_temptable:
  8429. if (expr->queryChild(0)->isDatarow())
  8430. return transformAmbiguousChildren(expr);
  8431. break;
  8432. case no_related:
  8433. return transformRelated(expr);
  8434. case no_eq:
  8435. case no_ne:
  8436. case no_lt:
  8437. case no_le:
  8438. case no_gt:
  8439. case no_ge:
  8440. case no_order:
  8441. //MORE: Should check this doesn't make the comparison invalid.
  8442. return transformAmbiguousChildren(expr);
  8443. case no_assign:
  8444. {
  8445. IHqlExpression * lhs = expr->queryChild(0);
  8446. IHqlExpression * rhs = expr->queryChild(1);
  8447. OwnedHqlExpr newRhs = transformAmbiguous(rhs, false);
  8448. if (lhs->isDatarow() && newRhs->isDataset())
  8449. {
  8450. StringBuffer exprText;
  8451. VStringBuffer msg("dataset expression (%s) assigned to field '%s' with type row", getECL(rhs, exprText), lhs->queryChild(1)->queryName()->str());
  8452. reportError(msg.str());
  8453. }
  8454. if (rhs == newRhs)
  8455. return LINK(expr);
  8456. HqlExprArray children;
  8457. children.append(*LINK(expr->queryChild(0)));
  8458. children.append(*newRhs.getClear());
  8459. return completeTransform(expr, children);
  8460. }
  8461. break;
  8462. case no_evaluate:
  8463. throwUnexpected();
  8464. case no_projectrow:
  8465. {
  8466. OwnedHqlExpr transformed = Parent::createTransformed(expr);
  8467. if (transformed->queryChild(0)->isDataset())
  8468. reportError("PROJECT() row argument resolved to a dataset. Missing DATASET() from parameter type?");
  8469. return transformed.getClear();
  8470. }
  8471. case no_merge:
  8472. {
  8473. HqlExprArray children;
  8474. transformChildren(expr, children);
  8475. IHqlExpression * sorted = queryAttribute(sortedAtom, children);
  8476. if (!sorted || queryAttribute(_implicitSorted_Atom, children))
  8477. {
  8478. IHqlExpression * dataset = &children.item(0);
  8479. removeAttribute(children, _implicitSorted_Atom);
  8480. if (sorted)
  8481. children.zap(*sorted);
  8482. HqlExprArray sorts;
  8483. OwnedHqlExpr order = getExistingSortOrder(dataset, expr->hasAttribute(localAtom), true);
  8484. if (order)
  8485. unwindChildren(sorts, order);
  8486. ForEachItemInRev(i, sorts)
  8487. {
  8488. if (sorts.item(i).isAttribute())
  8489. {
  8490. reportError(HQLWRN_MergeBadSortOrder_Text, true);
  8491. sorts.remove(i);
  8492. }
  8493. }
  8494. children.append(*createExprAttribute(sortedAtom, sorts));
  8495. }
  8496. return expr->clone(children);
  8497. }
  8498. }
  8499. return Parent::createTransformed(expr);
  8500. }
  8501. void HqlScopeTagger::reportWarnings()
  8502. {
  8503. if (errors)
  8504. collector.report(*errors);
  8505. }
  8506. void HqlScopeTagger::reportError(const char * msg, bool warning)
  8507. {
  8508. IHqlExpression * location = collector.queryActiveSymbol();
  8509. //Make this an error when we are confident...
  8510. int startLine= location ? location->getStartLine() : 0;
  8511. int startColumn = location ? location->getStartColumn() : 0;
  8512. ISourcePath * sourcePath = location ? location->querySourcePath() : NULL;
  8513. Owned<IECLError> err = createECLError(!warning, ERR_ASSERT_WRONGSCOPING, msg, sourcePath->str(), startLine, startColumn, 0);
  8514. collector.report(NULL, errors, err); // will throw immediately if it is an error.
  8515. }
  8516. //---------------------------------------------------------------------------------------------------------------------
  8517. SharedTableInfo * ImplicitAliasTransformInfo::uses(IHqlExpression * tableBody) const
  8518. {
  8519. ForEachItemIn(i, sharedTables)
  8520. {
  8521. SharedTableInfo & cur = sharedTables.item(i);
  8522. if (cur.dataset == tableBody)
  8523. return &cur;
  8524. }
  8525. return NULL;
  8526. }
  8527. void ImplicitAliasTransformInfo::add(SharedTableInfo * table)
  8528. {
  8529. sharedTables.append(*LINK(table));
  8530. }
  8531. void ImplicitAliasTransformInfo::addAmbiguity(SharedTableInfo * table)
  8532. {
  8533. containsAmbiguity = true;
  8534. merge(table);
  8535. }
  8536. void ImplicitAliasTransformInfo::merge(SharedTableInfo * table)
  8537. {
  8538. ForEachItemIn(i, sharedTables)
  8539. {
  8540. SharedTableInfo & cur = sharedTables.item(i);
  8541. if (cur.dataset == table->dataset)
  8542. {
  8543. if (cur.depth < table->depth)
  8544. sharedTables.replace(*LINK(table), i);
  8545. return;
  8546. }
  8547. }
  8548. add(table);
  8549. }
  8550. void ImplicitAliasTransformInfo::inherit(const ImplicitAliasTransformInfo * other)
  8551. {
  8552. ForEachItemIn(i, other->sharedTables)
  8553. merge(&other->sharedTables.item(i));
  8554. }
  8555. static HqlTransformerInfo implicitAliasTransformerInfo("ImplicitAliasTransformer");
  8556. ImplicitAliasTransformer::ImplicitAliasTransformer() : NewHqlTransformer(implicitAliasTransformerInfo)
  8557. {
  8558. seenShared = true;
  8559. seenAmbiguity = false;
  8560. }
  8561. void ImplicitAliasTransformer::analyseExpr(IHqlExpression * _expr)
  8562. {
  8563. IHqlExpression * body = _expr->queryBody();
  8564. if (alreadyVisited(body))
  8565. {
  8566. if ((pass == 0) && body->isDataset())
  8567. {
  8568. switch (body->getOperator())
  8569. {
  8570. case no_rows:
  8571. break;
  8572. default:
  8573. seenShared = true;
  8574. queryExtra(body)->shared.setown(new SharedTableInfo(body, 0));
  8575. break;
  8576. }
  8577. }
  8578. return;
  8579. }
  8580. NewHqlTransformer::analyseExpr(body);
  8581. if (pass == 0)
  8582. return;
  8583. ImplicitAliasTransformInfo * extra = queryExtra(body);
  8584. if (extra->shared)
  8585. extra->add(extra->shared);
  8586. switch (body->getOperator())
  8587. {
  8588. case no_activerow:
  8589. case no_filepos:
  8590. case no_file_logicalname:
  8591. case no_offsetof:
  8592. case no_joined:
  8593. case no_colon:
  8594. case no_globalscope:
  8595. case no_attr:
  8596. return;
  8597. case no_select:
  8598. {
  8599. bool isNew;
  8600. IHqlExpression * ds = querySelectorDataset(body, isNew);
  8601. if (isNew)
  8602. {
  8603. ImplicitAliasTransformInfo * dsExtra = queryExtra(ds->queryBody());
  8604. extra->inherit(dsExtra);
  8605. }
  8606. return;
  8607. }
  8608. }
  8609. IHqlExpression * dataset = NULL;
  8610. switch (getChildDatasetType(body))
  8611. {
  8612. case childdataset_none:
  8613. case childdataset_many_noscope:
  8614. case childdataset_many:
  8615. case childdataset_map:
  8616. case childdataset_dataset_noscope:
  8617. case childdataset_if:
  8618. case childdataset_case:
  8619. case childdataset_evaluate:
  8620. case childdataset_left:
  8621. case childdataset_leftright:
  8622. case childdataset_same_left_right:
  8623. case childdataset_nway_left_right:
  8624. break;
  8625. case childdataset_dataset:
  8626. case childdataset_datasetleft:
  8627. case childdataset_top_left_right:
  8628. dataset = body->queryChild(0)->queryBody();
  8629. break;
  8630. default:
  8631. UNIMPLEMENTED;
  8632. }
  8633. ForEachChild(i, body)
  8634. {
  8635. IHqlExpression * cur = body->queryChild(i);
  8636. ImplicitAliasTransformInfo * childExtra = queryExtra(cur->queryBody());
  8637. //If this is one of the arguments to an operation which has an active top dataset,
  8638. //check to see if any of the contained expressions reference this item
  8639. if (dataset && (i != 0))
  8640. {
  8641. SharedTableInfo * match = childExtra->uses(dataset);
  8642. if (match)
  8643. {
  8644. seenAmbiguity = true;
  8645. SharedTableInfo * nested = createAmbiguityInfo(match->dataset, match->depth+1);
  8646. extra->addAmbiguity(nested);
  8647. }
  8648. // dbglogExpr(_expr);
  8649. // DBGLOG("Implicit nested table ambiguity spotted in expression");
  8650. }
  8651. extra->inherit(childExtra);
  8652. }
  8653. }
  8654. SharedTableInfo * ImplicitAliasTransformer::createAmbiguityInfo(IHqlExpression * dataset, unsigned depth)
  8655. {
  8656. ForEachItemIn(i, ambiguousTables)
  8657. {
  8658. SharedTableInfo & cur = ambiguousTables.item(i);
  8659. if ((cur.dataset == dataset) && (depth == cur.depth))
  8660. return &cur;
  8661. }
  8662. ambiguousTables.append(*new SharedTableInfo(dataset, depth));
  8663. return &ambiguousTables.tos();
  8664. }
  8665. ANewTransformInfo * ImplicitAliasTransformer::createTransformInfo(IHqlExpression * expr)
  8666. {
  8667. return CREATE_NEWTRANSFORMINFO(ImplicitAliasTransformInfo, expr);
  8668. }
  8669. IHqlExpression * ImplicitAliasTransformer::createTransformed(IHqlExpression * expr)
  8670. {
  8671. IHqlExpression * body = expr->queryBody();
  8672. if (expr != body)
  8673. {
  8674. OwnedHqlExpr newBody = transform(body);
  8675. return expr->cloneAllAnnotations(newBody);
  8676. }
  8677. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  8678. updateOrphanedSelectors(transformed, expr);
  8679. ImplicitAliasTransformInfo * extra = queryExtra(body);
  8680. if (!extra->containsAmbiguity)
  8681. return transformed.getClear();
  8682. SharedTableInfo * match = extra->uses(body->queryChild(0)->queryBody());
  8683. assertex(match && match->depth != 0);
  8684. if (!match->uid)
  8685. match->uid.setown(createUniqueId());
  8686. OwnedHqlExpr aliased;
  8687. IHqlExpression * dataset = transformed->queryChild(0)->queryBody();
  8688. IHqlExpression * uid = match->uid;
  8689. aliased.setown(createDataset(no_dataset_alias, LINK(dataset), LINK(uid)));
  8690. //Replace dataset with an aliased variety, and remap all the selectors
  8691. HqlExprArray args;
  8692. args.append(*LINK(aliased));
  8693. replaceSelectors(args, transformed, 1, dataset->queryNormalizedSelector(), aliased->queryNormalizedSelector());
  8694. return transformed->clone(args);
  8695. }
  8696. void ImplicitAliasTransformer::process(HqlExprArray & exprs)
  8697. {
  8698. analyseArray(exprs, 0);
  8699. if (!seenShared)
  8700. return;
  8701. analyseArray(exprs, 1);
  8702. if (hasAmbiguity())
  8703. {
  8704. // DBGLOG("Implicit nested dataset ambiguity spotted in expression");
  8705. HqlExprArray transformed;
  8706. transformRoot(exprs, transformed);
  8707. replaceArray(exprs, transformed);
  8708. }
  8709. }
  8710. //---------------------------------------------------------------------------------------------------------------------
  8711. SharedTableInfo * LeftRightTransformInfo::uses(IHqlExpression * tableBody) const
  8712. {
  8713. ForEachItemIn(i, sharedTables)
  8714. {
  8715. SharedTableInfo & cur = sharedTables.item(i);
  8716. if (cur.dataset == tableBody)
  8717. return &cur;
  8718. }
  8719. return NULL;
  8720. }
  8721. void LeftRightTransformInfo::add(SharedTableInfo * table)
  8722. {
  8723. sharedTables.append(*LINK(table));
  8724. }
  8725. bool LeftRightTransformInfo::noteUsed(IHqlExpression * seq)
  8726. {
  8727. if (!seqs.contains(*seq))
  8728. seqs.append(*seq);
  8729. if (seqs.ordinality() > 1)
  8730. {
  8731. shared.setown(new SharedTableInfo(original, 0));
  8732. return true;
  8733. }
  8734. return false;
  8735. }
  8736. void LeftRightTransformInfo::addAmbiguity(SharedTableInfo * table)
  8737. {
  8738. containsAmbiguity = true;
  8739. merge(table);
  8740. }
  8741. void LeftRightTransformInfo::merge(SharedTableInfo * table)
  8742. {
  8743. ForEachItemIn(i, sharedTables)
  8744. {
  8745. SharedTableInfo & cur = sharedTables.item(i);
  8746. if (cur.dataset == table->dataset)
  8747. {
  8748. if (cur.depth < table->depth)
  8749. sharedTables.replace(*LINK(table), i);
  8750. return;
  8751. }
  8752. }
  8753. add(table);
  8754. }
  8755. void LeftRightTransformInfo::inherit(const LeftRightTransformInfo * other)
  8756. {
  8757. ForEachItemIn(i, other->sharedTables)
  8758. merge(&other->sharedTables.item(i));
  8759. }
  8760. static HqlTransformerInfo LeftRightTransformerInfo("LeftRightTransformer");
  8761. LeftRightTransformer::LeftRightTransformer() : NewHqlTransformer(LeftRightTransformerInfo)
  8762. {
  8763. seenShared = true;
  8764. }
  8765. void LeftRightTransformer::analyseExpr(IHqlExpression * _expr)
  8766. {
  8767. IHqlExpression * body = _expr->queryBody();
  8768. if (alreadyVisited(body))
  8769. return;
  8770. NewHqlTransformer::analyseExpr(body);
  8771. if (pass == 0)
  8772. {
  8773. //First pass gathers a list of selectors that are potentially ambiguous if the sequence was removed
  8774. IHqlExpression * left = NULL;
  8775. IHqlExpression * right = NULL;
  8776. switch (getChildDatasetType(body))
  8777. {
  8778. case childdataset_left:
  8779. case childdataset_datasetleft:
  8780. left = body->queryChild(0);
  8781. break;
  8782. case childdataset_same_left_right:
  8783. case childdataset_nway_left_right:
  8784. case childdataset_top_left_right:
  8785. left = body->queryChild(0);
  8786. right = body->queryChild(0);
  8787. break;
  8788. case childdataset_leftright:
  8789. left = body->queryChild(0);
  8790. right = body->queryChild(1);
  8791. break;
  8792. }
  8793. if (left)
  8794. {
  8795. LeftRightTransformInfo * extra = queryExtra(body);
  8796. IHqlExpression * selSeq = querySelSeq(body);
  8797. OwnedHqlExpr seq = createDummySelectorSequence();
  8798. extra->rawLeft.setown(createSelector(no_left, left, seq));
  8799. incUsage(extra->rawLeft, selSeq);
  8800. if (right)
  8801. {
  8802. //no_left is used deliberately in the following to avoid complications where right matches
  8803. //but left doesn't, causing the depths to be messed up.
  8804. extra->rawRight.setown(createSelector(no_left, right, seq));
  8805. if (extra->rawLeft != extra->rawRight)
  8806. incUsage(extra->rawRight, selSeq);
  8807. else
  8808. extra->rawRight.clear();
  8809. }
  8810. }
  8811. }
  8812. else
  8813. {
  8814. //Second pass - for each expression, gather a list of selectors that would actually be ambiguous.
  8815. LeftRightTransformInfo * extra = queryExtra(body);
  8816. IHqlExpression * rawLeft = extra->rawLeft;
  8817. IHqlExpression * rawRight = extra->rawRight;
  8818. //If LEFT is potentially ambiguous, then add it to the list of selectors used by this expression
  8819. if (rawLeft)
  8820. {
  8821. LeftRightTransformInfo * leftExtra = queryExtra(rawLeft);
  8822. if (leftExtra->shared)
  8823. extra->add(leftExtra->shared);
  8824. }
  8825. //Ditto for right
  8826. if (rawRight)
  8827. {
  8828. LeftRightTransformInfo * rightExtra = queryExtra(rawRight);
  8829. if (rightExtra->shared)
  8830. extra->add(rightExtra->shared);
  8831. }
  8832. switch (body->getOperator())
  8833. {
  8834. case no_activerow:
  8835. case no_filepos:
  8836. case no_file_logicalname:
  8837. case no_offsetof:
  8838. case no_joined:
  8839. case no_colon:
  8840. case no_globalscope:
  8841. case no_attr:
  8842. return;
  8843. case no_select:
  8844. {
  8845. bool isNew;
  8846. IHqlExpression * ds = querySelectorDataset(body, isNew);
  8847. if (isNew)
  8848. {
  8849. LeftRightTransformInfo * dsExtra = queryExtra(ds->queryBody());
  8850. extra->inherit(dsExtra);
  8851. }
  8852. return;
  8853. }
  8854. }
  8855. ForEachChild(i, body)
  8856. {
  8857. IHqlExpression * cur = body->queryChild(i);
  8858. LeftRightTransformInfo * childExtra = queryExtra(cur->queryBody());
  8859. //If this is one of the arguments to an operation which has an active top dataset,
  8860. //check to see if any of the contained expressions reference this item
  8861. if ((i != 0) && rawLeft)
  8862. {
  8863. SharedTableInfo * matchLeft = childExtra->uses(rawLeft);
  8864. SharedTableInfo * matchRight = rawRight ? childExtra->uses(rawRight) : NULL;
  8865. if (matchLeft || matchRight)
  8866. {
  8867. unsigned leftDepth = matchLeft ? matchLeft->depth : 0;
  8868. unsigned rightDepth = matchRight ? matchRight->depth : 0;
  8869. unsigned depth = leftDepth > rightDepth ? leftDepth : rightDepth;
  8870. SharedTableInfo * nested = createAmbiguityInfo(rawLeft, depth+1);
  8871. extra->addAmbiguity(nested);
  8872. if (rawRight)
  8873. {
  8874. SharedTableInfo * nested = createAmbiguityInfo(rawRight, depth+1);
  8875. extra->addAmbiguity(nested);
  8876. }
  8877. }
  8878. }
  8879. extra->inherit(childExtra);
  8880. }
  8881. }
  8882. }
  8883. void LeftRightTransformer::incUsage(IHqlExpression * expr, IHqlExpression * seq)
  8884. {
  8885. //MORE: Needs to keep track of the sequences that were used with it, so know if needs disambiguating.
  8886. LeftRightTransformInfo * extra = queryExtra(expr);
  8887. if (extra->noteUsed(seq))
  8888. seenShared = true;
  8889. }
  8890. SharedTableInfo * LeftRightTransformer::createAmbiguityInfo(IHqlExpression * dataset, unsigned depth)
  8891. {
  8892. ForEachItemIn(i, ambiguousTables)
  8893. {
  8894. SharedTableInfo & cur = ambiguousTables.item(i);
  8895. if ((cur.dataset == dataset) && (depth == cur.depth))
  8896. return &cur;
  8897. }
  8898. ambiguousTables.append(*new SharedTableInfo(dataset, depth));
  8899. return &ambiguousTables.tos();
  8900. }
  8901. ANewTransformInfo * LeftRightTransformer::createTransformInfo(IHqlExpression * expr)
  8902. {
  8903. return CREATE_NEWTRANSFORMINFO(LeftRightTransformInfo, expr);
  8904. }
  8905. IHqlExpression * LeftRightTransformer::createTransformed(IHqlExpression * expr)
  8906. {
  8907. IHqlExpression * body = expr->queryBody();
  8908. if (expr != body)
  8909. {
  8910. OwnedHqlExpr newBody = transform(body);
  8911. return expr->cloneAllAnnotations(newBody);
  8912. }
  8913. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  8914. updateOrphanedSelectors(transformed, expr);
  8915. LeftRightTransformInfo * extra = queryExtra(body);
  8916. SharedTableInfo * matchLeft = extra->rawLeft ? extra->uses(extra->rawLeft) : NULL;
  8917. if (matchLeft)
  8918. {
  8919. childDatasetType dsType = getChildDatasetType(expr);
  8920. IHqlExpression * left = transformed->queryChild(0);
  8921. IHqlExpression * right = hasRight(dsType) ? (hasSameLeftRight(dsType) ? left : transformed->queryChild(1)) : NULL;
  8922. IHqlExpression * oldSelSeq = querySelSeq(expr);
  8923. OwnedHqlExpr newSelSeq = createSelectorSequence(matchLeft->depth);
  8924. OwnedHqlExpr oldLeft = createSelector(no_left, left, oldSelSeq);
  8925. OwnedHqlExpr newLeft = createSelector(no_left, left, newSelSeq);
  8926. //Replace dataset with an aliased variety, and remap all the selectors
  8927. HqlExprArray mapped;
  8928. replaceSelectors(mapped, transformed, 1, oldLeft, newLeft);
  8929. if (right)
  8930. {
  8931. OwnedHqlExpr oldRight = createSelector(no_right, right, oldSelSeq);
  8932. OwnedHqlExpr newRight = createSelector(no_right, right, newSelSeq);
  8933. unsigned firstArg = (hasSameLeftRight(dsType) ? 0 : 1);
  8934. replaceSelectors(mapped, firstArg, oldRight, newRight);
  8935. }
  8936. HqlExprArray args;
  8937. args.append(*LINK(left));
  8938. appendArray(args, mapped);
  8939. args.zap(*oldSelSeq);
  8940. args.append(*LINK(newSelSeq));
  8941. transformed.setown(transformed->clone(args));
  8942. }
  8943. return transformed.getClear();
  8944. }
  8945. void LeftRightTransformer::process(HqlExprArray & exprs)
  8946. {
  8947. analyseArray(exprs, 0);
  8948. if (!seenShared)
  8949. return;
  8950. analyseArray(exprs, 1);
  8951. HqlExprArray transformed;
  8952. transformRoot(exprs, transformed);
  8953. replaceArray(exprs, transformed);
  8954. }
  8955. //---------------------------------------------------------------------------------------------------------------------
  8956. /*
  8957. Common up expressions so that all references to the same expression have identical symbols, annotations.
  8958. Generally it improves the code a lot - especially when macros are used. However there are occasional problems....
  8959. a) ut.CleanCompany(ds.x) and ut.cleanCompany(left.x).
  8960. If a component one of them is commoned up with another occurrence, then you can get different named symbols within the expanded function. When we
  8961. then test to see if something is sorted it can incorrectly mismatch (see busheader.xhql dnb_combined_append)
  8962. */
  8963. static void unwindAnnotations(HqlExprCopyArray & unwound, IHqlExpression * expr)
  8964. {
  8965. if (expr->getAnnotationKind() == annotate_none)
  8966. return;
  8967. unwindAnnotations(unwound, expr->queryBody(true));
  8968. unwound.append(*expr);
  8969. }
  8970. IHqlExpression * AnnotationTransformInfo::cloneAnnotations(IHqlExpression * newBody)
  8971. {
  8972. if (annotations.ordinality() == 0)
  8973. return LINK(newBody);
  8974. return annotations.item(0).cloneAllAnnotations(newBody);
  8975. LinkedHqlExpr ret = newBody;
  8976. #if 1
  8977. ForEachItemIn(i, annotations)
  8978. ret.setown(annotations.item(i).cloneAllAnnotations(ret));
  8979. #else
  8980. //Code saved once we start removing duplicate annotations (e.g., locations)
  8981. HqlExprCopyArray toApply;
  8982. ForEachItemIn(i, annotations)
  8983. unwindAnnotations(toApply, &annotations.item(i));
  8984. ForEachItemIn(i2, toApply)
  8985. {
  8986. IHqlExpression & curAnnotate = toApply.item(i2);
  8987. ret.setown(curAnnotate.cloneAnnotation(ret));
  8988. }
  8989. #endif
  8990. return ret.getClear();
  8991. }
  8992. void AnnotationTransformInfo::noteAnnotation(IHqlExpression * annotation)
  8993. {
  8994. //MORE: Need more intelligence to see if this is a subset of what we already have..
  8995. annotations.append(*annotation);
  8996. }
  8997. static HqlTransformerInfo annotationNormalizerInfo("AnnotationNormalizerTransformer");
  8998. AnnotationNormalizerTransformer::AnnotationNormalizerTransformer()
  8999. : NewHqlTransformer(annotationNormalizerInfo)
  9000. {
  9001. }
  9002. ANewTransformInfo * AnnotationNormalizerTransformer::createTransformInfo(IHqlExpression * expr)
  9003. {
  9004. return CREATE_NEWTRANSFORMINFO(AnnotationTransformInfo, expr);
  9005. }
  9006. void AnnotationNormalizerTransformer::analyseExpr(IHqlExpression * expr)
  9007. {
  9008. if (alreadyVisited(expr))
  9009. return;
  9010. IHqlExpression * body = expr->queryBody();
  9011. if (expr != body)
  9012. {
  9013. queryLocationIndependentExtra(body)->noteAnnotation(expr);
  9014. //Note: expr already tested if expr == body...
  9015. if (alreadyVisited(body))
  9016. return;
  9017. }
  9018. node_operator op = body->getOperator();
  9019. switch (op)
  9020. {
  9021. case no_attr_expr:
  9022. analyseChildren(body);
  9023. return;
  9024. }
  9025. NewHqlTransformer::analyseExpr(body);
  9026. }
  9027. IHqlExpression * AnnotationNormalizerTransformer::createTransformed(IHqlExpression * expr)
  9028. {
  9029. node_operator op = expr->getOperator();
  9030. IHqlExpression * body = expr->queryBody();
  9031. switch (op)
  9032. {
  9033. case no_list:
  9034. {
  9035. if (body->numChildren() == 0)
  9036. return LINK(body);
  9037. break;
  9038. }
  9039. case no_constant:
  9040. // case no_null:
  9041. {
  9042. //AnnotationTransformInfo * extra = queryLocationIndependentExtra(body);
  9043. //Don't common up the location information for this, otherwise it gets silly! Possibly worth removing altogether if ambiguous?
  9044. //MORE: This should probably depend on whether there is more than one annotation on the constant.
  9045. return LINK(body);
  9046. }
  9047. }
  9048. if (expr != body)
  9049. return transform(body);
  9050. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  9051. return queryLocationIndependentExtra(body)->cloneAnnotations(transformed);
  9052. }
  9053. AnnotationTransformInfo * AnnotationNormalizerTransformer::queryLocationIndependentExtra(IHqlExpression * expr)
  9054. {
  9055. return static_cast<AnnotationTransformInfo *>(queryTransformExtra(queryLocationIndependent(expr)));
  9056. }
  9057. void normalizeAnnotations(HqlCppTranslator & translator, HqlExprArray & exprs)
  9058. {
  9059. //First iterate through the expressions and call queryLocationIndependent() to avoid nested transforms (which are less efficient)
  9060. ForEachItemIn(iInit, exprs)
  9061. queryLocationIndependent(&exprs.item(iInit));
  9062. translator.traceExpressions("before annotation normalize", exprs);
  9063. unsigned time = msTick();
  9064. AnnotationNormalizerTransformer normalizer;
  9065. HqlExprArray transformed;
  9066. normalizer.analyseArray(exprs, 0);
  9067. normalizer.transformRoot(exprs, transformed);
  9068. replaceArray(exprs, transformed);
  9069. translator.updateTimer("workunit;tree transform: normalize.annotations", msTick()-time);
  9070. }
  9071. //---------------------------------------------------------------------------
  9072. static HqlTransformerInfo containsCompoundTransformerInfo("ContainsCompoundTransformer");
  9073. ContainsCompoundTransformer::ContainsCompoundTransformer()
  9074. : QuickHqlTransformer(containsCompoundTransformerInfo, NULL)
  9075. {
  9076. containsCompound = false;
  9077. }
  9078. //NB: This cannot be short circuited, because it is also gathering information about whether or
  9079. void ContainsCompoundTransformer::doAnalyseBody(IHqlExpression * expr)
  9080. {
  9081. if (containsCompound)
  9082. return;
  9083. switch (expr->getOperator())
  9084. {
  9085. case no_compound:
  9086. if (!expr->isAction())
  9087. {
  9088. containsCompound = true;
  9089. return;
  9090. }
  9091. break;
  9092. case no_colon:
  9093. case no_cluster:
  9094. case no_sequential:
  9095. case no_allnodes:
  9096. case no_thisnode:
  9097. //Need to recursively handle these
  9098. containsCompound = true;
  9099. return;
  9100. case no_record:
  9101. case no_field:
  9102. case no_attr:
  9103. case no_attr_link:
  9104. case no_left:
  9105. case no_right:
  9106. case no_self:
  9107. case no_top:
  9108. case no_workunit_dataset:
  9109. case no_assertwild:
  9110. case no_getresult:
  9111. case no_getgraphresult:
  9112. case no_activerow:
  9113. case no_newkeyindex:
  9114. return;
  9115. case no_list:
  9116. if (expr->isConstant())
  9117. return;
  9118. break;
  9119. case no_select:
  9120. // if (expr->hasAttribute(newAtom))
  9121. analyse(expr->queryChild(0));
  9122. return;
  9123. case no_assign:
  9124. analyse(expr->queryChild(1));
  9125. return;
  9126. }
  9127. QuickHqlTransformer::doAnalyseBody(expr);
  9128. }
  9129. bool containsCompound(const HqlExprArray & exprs)
  9130. {
  9131. ContainsCompoundTransformer spotter;
  9132. spotter.analyseArray(exprs);
  9133. return spotter.containsCompound;
  9134. }
  9135. bool containsCompound(IHqlExpression * expr)
  9136. {
  9137. ContainsCompoundTransformer spotter;
  9138. spotter.analyse(expr);
  9139. return spotter.containsCompound;
  9140. }
  9141. static HqlTransformerInfo nestedCompoundTransformerInfo("NestedCompoundTransformer");
  9142. NestedCompoundTransformer::NestedCompoundTransformer(HqlCppTranslator & _translator)
  9143. : HoistingHqlTransformer(nestedCompoundTransformerInfo, CTFnoteifactions), translator(_translator), translatorOptions(_translator.queryOptions())
  9144. {
  9145. }
  9146. //For the moment allow simple external calls in scalar setting
  9147. //to make logging much easier...
  9148. static bool isSimpleSideeffect(IHqlExpression * expr)
  9149. {
  9150. switch (expr->getOperator())
  9151. {
  9152. case no_externalcall:
  9153. case no_attr:
  9154. case no_attr_expr:
  9155. case no_attr_link:
  9156. return true;
  9157. case no_comma:
  9158. case no_compound:
  9159. case no_parallel:
  9160. {
  9161. ForEachChild(i, expr)
  9162. {
  9163. if (!isSimpleSideeffect(expr->queryChild(i)))
  9164. return false;
  9165. }
  9166. return true;
  9167. }
  9168. case no_if:
  9169. {
  9170. ForEachChildFrom(i, expr, 1)
  9171. {
  9172. if (!isSimpleSideeffect(expr->queryChild(i)))
  9173. return false;
  9174. }
  9175. return true;
  9176. }
  9177. }
  9178. return false;
  9179. }
  9180. static bool isCalloutSideeffect(IHqlExpression * expr)
  9181. {
  9182. if (!expr->queryType()->isScalar())
  9183. return false;
  9184. return isSimpleSideeffect(expr->queryChild(0));
  9185. }
  9186. IHqlExpression * NestedCompoundTransformer::createTransformed(IHqlExpression * expr)
  9187. {
  9188. if (expr->isConstant())
  9189. return LINK(expr);
  9190. IHqlExpression * ret = queryTransformAnnotation(expr);
  9191. if (ret)
  9192. return ret;
  9193. node_operator op = expr->getOperator();
  9194. switch (op)
  9195. {
  9196. case no_compound:
  9197. if (isUsedUnconditionally(expr) && !expr->isAction() && !isCalloutSideeffect(expr))
  9198. {
  9199. IHqlExpression * sideEffect = expr->queryChild(0);
  9200. IHqlExpression * value = expr->queryChild(1);
  9201. if (!isIndependentOfScope(sideEffect))
  9202. {
  9203. StringBuffer s;
  9204. if (sideEffect->queryName())
  9205. s.appendf(" '%s'", sideEffect->queryName()->str());
  9206. else if (value->queryName())
  9207. s.appendf(" '%s'", value->queryName()->str());
  9208. else
  9209. s.append(" ").append(getOpString(sideEffect->getOperator()));
  9210. IHqlExpression * location = queryLocation(sideEffect);
  9211. if (!location)
  9212. location = queryLocation(value);
  9213. if (!location)
  9214. location = queryActiveLocation();
  9215. if (!isSimpleSideeffect(sideEffect))
  9216. {
  9217. //MORE: This should be an error, but there are still occasional false positives e.g., OUTPUT(ds1.childds)
  9218. //so needs to stay a warning.
  9219. // translator.ERRORAT1(location, HQLERR_GlobalSideEffectDependent, s.str());
  9220. translator.WARNINGAT1(location, HQLWRN_GlobalSideEffectDependent, s.str());
  9221. }
  9222. break;
  9223. }
  9224. if (!translatorOptions.workunitTemporaries)
  9225. {
  9226. StringBuffer s;
  9227. if (expr->queryName())
  9228. s.append(expr->queryName()).append(": ");
  9229. getExprECL(sideEffect, s);
  9230. throwError1(HQLERR_LibrariesCannotContainSideEffects, s.str());
  9231. }
  9232. appendToTarget(*transform(sideEffect));
  9233. return transform(value);
  9234. }
  9235. break;
  9236. }
  9237. return HoistingHqlTransformer::createTransformed(expr);
  9238. }
  9239. //---------------------------------------------------------------------------
  9240. class LocationInfo : public CInterface
  9241. {
  9242. public:
  9243. HqlExprArray matches;
  9244. };
  9245. static HqlTransformerInfo duplicateCodeSpotterInfo("DuplicateCodeSpotter");
  9246. class DuplicateCodeSpotter : public QuickHqlTransformer
  9247. {
  9248. public:
  9249. DuplicateCodeSpotter() : QuickHqlTransformer(duplicateCodeSpotterInfo, NULL) {}
  9250. inline bool checkExpr(IHqlExpression * expr)
  9251. {
  9252. if (!expr->isDataset())
  9253. return false;
  9254. switch (expr->getOperator())
  9255. {
  9256. case no_join:
  9257. break;
  9258. default:
  9259. return false;
  9260. }
  9261. return true;
  9262. }
  9263. virtual void doAnalyse(IHqlExpression * expr)
  9264. {
  9265. if (checkExpr(expr))
  9266. {
  9267. IHqlExpression * location = queryLocation(expr);
  9268. if (location)
  9269. {
  9270. OwnedHqlExpr attr = createLocationAttr(location->querySourcePath(), location->getStartLine(), location->getStartColumn(), 0);
  9271. Linked<LocationInfo> info;
  9272. Shared<LocationInfo> * match = map.getValue(attr);
  9273. if (match)
  9274. info.set(*match);
  9275. else
  9276. {
  9277. info.setown(new LocationInfo);
  9278. map.setValue(attr, info);
  9279. }
  9280. IHqlExpression * body = expr->queryBody();
  9281. if (!info->matches.contains(*body))
  9282. {
  9283. ForEachItemIn(i, info->matches)
  9284. {
  9285. debugFindFirstDifference(body, &info->matches.item(i));
  9286. }
  9287. info->matches.append(*LINK(body));
  9288. }
  9289. }
  9290. }
  9291. QuickHqlTransformer::doAnalyse(expr);
  9292. }
  9293. MapOwnedToOwned<IHqlExpression, LocationInfo> map;
  9294. };
  9295. void spotPotentialDuplicateCode(HqlExprArray & exprs)
  9296. {
  9297. DuplicateCodeSpotter spotter;
  9298. spotter.analyseArray(exprs);
  9299. }
  9300. //---------------------------------------------------------------------------
  9301. static bool isUniqueAttributeName(IAtom * name)
  9302. {
  9303. const char * nameText = name->str();
  9304. unsigned len = strlen(nameText);
  9305. if (len > 3)
  9306. {
  9307. if ((nameText[len-2] == '_') && (nameText[len-1] == '_') && isdigit((unsigned char)nameText[len-3]))
  9308. return true;
  9309. }
  9310. return false;
  9311. }
  9312. static bool containsUpperCase(const char * s)
  9313. {
  9314. loop
  9315. {
  9316. unsigned char next = *s++;
  9317. if (!next)
  9318. return false;
  9319. if (isupper(next))
  9320. return true;
  9321. }
  9322. }
  9323. static IIdAtom * simplifySymbolName(IIdAtom * name, bool commonUniqueNameAttributes)
  9324. {
  9325. if (!commonUniqueNameAttributes)
  9326. return NULL;
  9327. //Rename all attributes __x__1234__ to __x__
  9328. const char * nameText = name->lower()->str();
  9329. size_t nameLen = strlen(nameText);
  9330. size_t len = nameLen;
  9331. if (len > 3)
  9332. {
  9333. if ((nameText[len-2] == '_') && (nameText[len-1] == '_') && isdigit((unsigned char)nameText[len-3]))
  9334. {
  9335. len -= 3;
  9336. while (len && isdigit((unsigned char)nameText[len-1]))
  9337. len--;
  9338. //Shouldn't be possible...
  9339. if (len == 0)
  9340. len = nameLen;
  9341. }
  9342. }
  9343. if (nameLen != len)
  9344. {
  9345. StringAttr truncName;
  9346. truncName.set(nameText, len);
  9347. return createIdAtom(truncName);
  9348. }
  9349. return NULL;
  9350. }
  9351. static IIdAtom * lowerCaseSymbolName(IIdAtom * name)
  9352. {
  9353. if (containsUpperCase(name->str()))
  9354. return createIdAtom(name->lower()->str());
  9355. return name;
  9356. }
  9357. static bool exprIsSelfConstant(IHqlExpression * expr)
  9358. {
  9359. if (expr->isConstant())
  9360. return true;
  9361. switch (expr->getOperator())
  9362. {
  9363. case no_select:
  9364. {
  9365. IHqlExpression * selector = expr->queryChild(0);
  9366. while (selector->getOperator() == no_select)
  9367. selector = selector->queryChild(0);
  9368. return (selector->getOperator() == no_selfref);
  9369. }
  9370. }
  9371. if (expr->isDataset() && (getNumChildTables(expr) == 0))
  9372. return false;
  9373. ForEachChild(i, expr)
  9374. {
  9375. IHqlExpression * cur = expr->queryChild(i);
  9376. if (!exprIsSelfConstant(cur))
  9377. return false;
  9378. }
  9379. return true;
  9380. }
  9381. static IAtom * queryPatUseModule(IHqlExpression * expr)
  9382. {
  9383. IHqlExpression * moduleAttr = expr->queryAttribute(moduleAtom);
  9384. if (moduleAttr)
  9385. return moduleAttr->queryChild(0)->queryBody()->queryName();
  9386. return NULL;
  9387. }
  9388. static IAtom * queryPatUseName(IHqlExpression * expr)
  9389. {
  9390. IHqlExpression * nameAttr = expr->queryAttribute(nameAtom);
  9391. return nameAttr->queryChild(0)->queryBody()->queryName();
  9392. }
  9393. void HqlTreeNormalizerInfo::noteSymbol(IHqlExpression * _symbol)
  9394. {
  9395. if (!symbol || isUniqueAttributeName(symbol->queryName()))
  9396. symbol = _symbol;
  9397. }
  9398. static HqlTransformerInfo hqlTreeNormalizerInfo("HqlTreeNormalizer");
  9399. HqlTreeNormalizer::HqlTreeNormalizer(HqlCppTranslator & _translator) : NewHqlTransformer(hqlTreeNormalizerInfo), translator(_translator)
  9400. {
  9401. seenForceLocal = false;
  9402. seenLocalUpload = false;
  9403. const HqlCppOptions & translatorOptions = translator.queryOptions();
  9404. options.assertSortedDistributed = translatorOptions.assertSortedDistributed;
  9405. options.removeAsserts = !translatorOptions.checkAsserts;
  9406. options.commonUniqueNameAttributes = translatorOptions.commonUniqueNameAttributes;
  9407. options.sortIndexPayload = translatorOptions.sortIndexPayload;
  9408. options.allowSections = translatorOptions.allowSections;
  9409. options.normalizeExplicitCasts = translatorOptions.normalizeExplicitCasts;
  9410. options.ensureRecordsHaveSymbols = translatorOptions.ensureRecordsHaveSymbols;
  9411. options.outputRowsAsDatasets = translator.targetRoxie();
  9412. options.constantFoldNormalize = translatorOptions.constantFoldNormalize;
  9413. options.allowActivityForKeyedJoin = translatorOptions.allowActivityForKeyedJoin;
  9414. options.implicitSubSort = translatorOptions.implicitBuildIndexSubSort;
  9415. errors = translator.queryErrors();
  9416. nextSequenceValue = 1;
  9417. }
  9418. ANewTransformInfo * HqlTreeNormalizer::createTransformInfo(IHqlExpression * expr)
  9419. {
  9420. return CREATE_NEWTRANSFORMINFO(HqlTreeNormalizerInfo, expr);
  9421. }
  9422. HqlTreeNormalizerInfo * HqlTreeNormalizer::queryLocationIndependentExtra(IHqlExpression * expr)
  9423. {
  9424. return static_cast<HqlTreeNormalizerInfo *>(queryTransformExtra(queryLocationIndependent(expr)));
  9425. }
  9426. void HqlTreeNormalizer::convertRecordToAssigns(HqlExprArray & assigns, IHqlExpression * oldRecord, IHqlExpression * targetSelector, bool canOmit, bool convertTempTable)
  9427. {
  9428. ForEachChild(idx, oldRecord)
  9429. {
  9430. IHqlExpression * oldField = oldRecord->queryChild(idx);
  9431. OwnedHqlExpr newField = transform(oldField);
  9432. switch (oldField->getOperator())
  9433. {
  9434. case no_ifblock:
  9435. convertRecordToAssigns(assigns, oldField->queryChild(1), targetSelector, canOmit, convertTempTable);
  9436. break;
  9437. case no_record:
  9438. convertRecordToAssigns(assigns, oldField, targetSelector, canOmit, convertTempTable);
  9439. break;
  9440. case no_field:
  9441. {
  9442. IHqlExpression * oldFieldRecord = oldField->queryRecord();
  9443. IHqlExpression * value = queryRealChild(oldField, 0);
  9444. OwnedHqlExpr newTargetSelector = createSelectExpr(LINK(targetSelector), LINK(newField));
  9445. if (oldFieldRecord && !oldField->isDataset() && !value)
  9446. {
  9447. if (convertTempTable)
  9448. convertRecordToAssigns(assigns, oldFieldRecord, newTargetSelector, canOmit, convertTempTable);
  9449. else
  9450. {
  9451. IHqlExpression * newRecord = newField->queryRecord();
  9452. OwnedHqlExpr newSelf = getSelf(newRecord);
  9453. HqlExprArray newAssigns;
  9454. convertRecordToAssigns(newAssigns, oldFieldRecord, newSelf, canOmit, convertTempTable);
  9455. IHqlExpression * transform = createValue(no_newtransform, makeTransformType(newRecord->getType()), newAssigns);
  9456. IHqlExpression * newValue = createRow(no_createrow, transform);
  9457. assigns.append(*createAssign(LINK(newTargetSelector), newValue));
  9458. }
  9459. }
  9460. else
  9461. {
  9462. assertex(value || canOmit);
  9463. if (value && (!convertTempTable || exprIsSelfConstant(value)))
  9464. assigns.append(*createAssign(LINK(newTargetSelector), transform(value)));
  9465. if (oldFieldRecord && convertTempTable)
  9466. assigns.append(*createExprAttribute(defaultAtom, createExprAttribute(defaultAtom, LINK(newTargetSelector)), convertRecordToAssigns(oldFieldRecord, canOmit, convertTempTable)));
  9467. }
  9468. break;
  9469. }
  9470. case no_attr:
  9471. case no_attr_link:
  9472. case no_attr_expr:
  9473. break;
  9474. default:
  9475. UNIMPLEMENTED;
  9476. }
  9477. }
  9478. }
  9479. IHqlExpression * HqlTreeNormalizer::convertRecordToAssigns(IHqlExpression * oldRecord, bool canOmit, bool convertTempTable)
  9480. {
  9481. OwnedHqlExpr newRecord = transform(oldRecord);
  9482. HqlExprArray assigns;
  9483. OwnedHqlExpr self = getSelf(newRecord);
  9484. convertRecordToAssigns(assigns, oldRecord, self, canOmit, convertTempTable);
  9485. return createValue(no_transform, makeTransformType(newRecord->getType()), assigns);
  9486. }
  9487. // Problems occur if a record used in a select fields is also used for some other linked purpose.
  9488. // Thankfully the only one so far is BUILDINDEX(index)
  9489. IHqlExpression * HqlTreeNormalizer::convertSelectToProject(IHqlExpression * newRecord, IHqlExpression * expr)
  9490. {
  9491. OwnedHqlExpr newDataset = transform(expr->queryChild(0));
  9492. IHqlExpression * oldRecord = expr->queryChild(1);
  9493. if (oldRecord->getOperator() == no_null)
  9494. return newDataset.getClear();
  9495. HqlExprArray assigns;
  9496. OwnedHqlExpr self = getSelf(newRecord);
  9497. convertRecordToAssigns(assigns, oldRecord, self, false, false);
  9498. OwnedHqlExpr newTransform = createValue(no_newtransform, makeTransformType(LINK(newRecord->queryRecordType())), assigns);
  9499. newTransform.setown(newRecord->cloneAllAnnotations(newTransform));
  9500. HqlExprArray args;
  9501. args.append(*newDataset.getClear());
  9502. args.append(*LINK(newRecord));
  9503. args.append(*newTransform.getClear());
  9504. unsigned numChildren = expr->numChildren();
  9505. for (unsigned idx = 2; idx < numChildren; idx++)
  9506. args.append(*transform(expr->queryChild(idx)));
  9507. node_operator op = isAggregateDataset(expr) ? no_newaggregate : no_newusertable;
  9508. OwnedHqlExpr project = createDataset(op, args);
  9509. return expr->cloneAllAnnotations(project);
  9510. }
  9511. IHqlExpression * HqlTreeNormalizer::removeDefaultsFromExpr(IHqlExpression * expr, unsigned recordChildIndex, node_operator newOp)
  9512. {
  9513. IHqlExpression * oldRecord = expr->queryChild(recordChildIndex);
  9514. OwnedHqlExpr newRecord = transform(oldRecord);
  9515. IHqlExpression * ds = expr->queryChild(0);
  9516. HqlExprArray assigns;
  9517. OwnedHqlExpr self = getSelf(newRecord);
  9518. convertRecordToAssigns(assigns, oldRecord, self, false, false);
  9519. IHqlExpression * newTransform = createValue(no_newtransform, makeTransformType(LINK(newRecord->queryRecordType())), assigns);
  9520. HqlExprArray args;
  9521. args.append(*transform(expr->queryChild(0)));
  9522. for (unsigned i= 1; i < recordChildIndex; i++)
  9523. args.append(*transform(expr->queryChild(i)));
  9524. args.append(*LINK(newRecord));
  9525. args.append(*newTransform);
  9526. unsigned numChildren = expr->numChildren();
  9527. for (unsigned idx = recordChildIndex+1; idx < numChildren; idx++)
  9528. args.append(*transform(expr->queryChild(idx)));
  9529. OwnedHqlExpr project;
  9530. if (expr->isDataset())
  9531. project.setown(createDataset(newOp, args));
  9532. else if (expr->isDatarow())
  9533. project.setown(createRow(newOp, args));
  9534. else
  9535. project.setown(createValue(newOp, makeVoidType(), args));
  9536. return expr->cloneAllAnnotations(project);
  9537. }
  9538. ITypeInfo * HqlTreeNormalizer::transformType(ITypeInfo * type)
  9539. {
  9540. switch (type->queryModifier())
  9541. {
  9542. case typemod_original:
  9543. {
  9544. switch (type->getTypeCode())
  9545. {
  9546. case type_record:
  9547. case type_transform:
  9548. case type_row:
  9549. case type_table:
  9550. case type_groupedtable:
  9551. //Strip all original annotations - they cause branches to not be commoned up
  9552. return transformType(type->queryTypeBase());
  9553. }
  9554. //But keep annotations used for typedef information, they should probably work differently
  9555. break;
  9556. }
  9557. case typemod_none:
  9558. {
  9559. //Ensure all records with the same format get the same original modifier
  9560. Owned<ITypeInfo> transformedType = NewHqlTransformer::transformType(type);
  9561. if (type->getTypeCode() == type_record)
  9562. {
  9563. IHqlExpression * record = queryExpression(type);
  9564. if (record && record->getOperator() == no_record)
  9565. {
  9566. OwnedHqlExpr transformedRecord = transform(record);
  9567. if (transformedRecord->queryBody() != transformedRecord)
  9568. return makeOriginalModifier(LINK(transformedType), LINK(transformedRecord));
  9569. }
  9570. }
  9571. return transformedType.getClear();
  9572. }
  9573. break;
  9574. }
  9575. return NewHqlTransformer::transformType(type);
  9576. }
  9577. bool isVoidOrDatasetOrList(IHqlExpression * expr)
  9578. {
  9579. ITypeInfo * type = expr->queryType();
  9580. switch (type->getTypeCode())
  9581. {
  9582. case type_void:
  9583. case type_table:
  9584. case type_row:
  9585. case type_groupedtable:
  9586. case type_set:
  9587. return true;
  9588. default:
  9589. return false;
  9590. }
  9591. }
  9592. inline IHqlExpression * createColon(IHqlExpression * l, HqlExprArray & actions)
  9593. {
  9594. HqlExprArray args;
  9595. args.append(*l);
  9596. ForEachItemIn(i, actions)
  9597. args.append(OLINK(actions.item(i)));
  9598. return createWrapper(no_colon, args);
  9599. }
  9600. void HqlTreeNormalizer::analyseExpr(IHqlExpression * expr)
  9601. {
  9602. IHqlExpression * body = expr->queryBody();
  9603. node_operator op = body->getOperator();
  9604. if ((op == no_record) && (expr != body))
  9605. {
  9606. IHqlExpression * symbol = queryNamedSymbol(expr);
  9607. if (symbol)
  9608. queryLocationIndependentExtra(body)->noteSymbol(expr);
  9609. }
  9610. if (alreadyVisited(body))
  9611. return;
  9612. //Record a list of all USE(name[,name]) so we know what needs fixing up, and all patterns with explicit defines already
  9613. switch (op)
  9614. {
  9615. case no_pat_use:
  9616. if (body->hasAttribute(nameAtom))
  9617. forwardReferences.append(*LINK(body));
  9618. break;
  9619. case no_pat_instance:
  9620. if (body->queryChild(0)->getOperator() == no_define)
  9621. defines.append(*LINK(body));
  9622. break;
  9623. case no_libraryscopeinstance:
  9624. analyseExpr(body->queryDefinition());
  9625. break;
  9626. case no_transform:
  9627. case no_call:
  9628. case no_externalcall:
  9629. {
  9630. IHqlExpression * record = queryOriginalRecord(body->queryType());
  9631. if (record)
  9632. analyseExpr(record);
  9633. break;
  9634. }
  9635. case no_attr_expr:
  9636. case no_record:
  9637. case no_ifblock:
  9638. case no_select:
  9639. analyseChildren(body);
  9640. return;
  9641. case no_field:
  9642. {
  9643. IHqlExpression * record = queryOriginalRecord(body->queryType());
  9644. if (record)
  9645. analyseExpr(record);
  9646. analyseChildren(body);
  9647. break;
  9648. }
  9649. }
  9650. Parent::analyseExpr(body);
  9651. }
  9652. IHqlExpression * HqlTreeNormalizer::makeRecursiveName(IAtom * searchModule, IAtom * searchName)
  9653. {
  9654. //If this symbol is already has a user define, use that instead of creating our own,
  9655. //because I don't cope very well with multiple defines on the same pattern instance.
  9656. ForEachItemIn(i, defines)
  9657. {
  9658. IHqlExpression & cur = defines.item(i);
  9659. IHqlExpression * moduleExpr = cur.queryChild(2);
  9660. IAtom * module = moduleExpr ? moduleExpr->queryBody()->queryName() : NULL;
  9661. IAtom * name = cur.queryChild(1)->queryBody()->queryName();
  9662. if (name == searchName && module == searchModule)
  9663. return LINK(cur.queryChild(0)->queryChild(1));
  9664. }
  9665. StringBuffer s;
  9666. s.append("$").append(searchModule).append(".").append(searchName);
  9667. return createConstant(s.str());
  9668. }
  9669. IHqlExpression * HqlTreeNormalizer::queryTransformPatternDefine(IHqlExpression * expr)
  9670. {
  9671. if (expr->queryChild(0)->getOperator() == no_define)
  9672. return NULL;
  9673. IHqlExpression * moduleExpr = expr->queryChild(2);
  9674. IAtom * module = moduleExpr ? moduleExpr->queryBody()->queryName() : NULL;
  9675. IAtom * name = expr->queryChild(1)->queryBody()->queryName();
  9676. ForEachItemIn(i, forwardReferences)
  9677. {
  9678. IHqlExpression * cur = &forwardReferences.item(i);
  9679. if ((name == queryPatUseName(cur)) && (module == queryPatUseModule(cur)))
  9680. {
  9681. IHqlExpression * base = transform(expr->queryChild(0));
  9682. HqlExprArray args;
  9683. args.append(*createValue(no_define, base->getType(), base, makeRecursiveName(module, name)));
  9684. unwindChildren(args, expr, 1);
  9685. return createValue(expr->getOperator(), transformType(expr->queryType()), args);
  9686. }
  9687. }
  9688. return NULL;
  9689. }
  9690. IHqlExpression * HqlTreeNormalizer::transformActionList(IHqlExpression * expr)
  9691. {
  9692. HqlExprArray args;
  9693. ForEachChild(i, expr)
  9694. {
  9695. IHqlExpression * cur = expr->queryChild(i);
  9696. if (cur->getOperator() != no_setmeta)
  9697. {
  9698. OwnedHqlExpr transformed = transform(cur);
  9699. if ((transformed->getOperator() != no_null) || !transformed->isAction())
  9700. args.append(*transformed.getClear());
  9701. }
  9702. }
  9703. return expr->clone(args);
  9704. }
  9705. IHqlExpression * HqlTreeNormalizer::transformCaseToIfs(IHqlExpression * expr)
  9706. {
  9707. unsigned max = numRealChildren(expr);
  9708. OwnedHqlExpr testVar = transform(expr->queryChild(0));
  9709. OwnedHqlExpr elseExpr = transform(expr->queryChild(max-1));
  9710. for (unsigned idx = max-2; idx != 0; idx--)
  9711. {
  9712. IHqlExpression * cur = expr->queryChild(idx);
  9713. IHqlExpression * curValue = cur->queryChild(0);
  9714. Owned<ITypeInfo> type = ::getPromotedECLType(testVar->queryType(), curValue->queryType());
  9715. OwnedHqlExpr castCurValue = ensureExprType(curValue, type);
  9716. OwnedHqlExpr test = createBoolExpr(no_eq, ensureExprType(testVar, type), transform(castCurValue));
  9717. if (options.constantFoldNormalize)
  9718. test.setown(foldConstantOperator(test, 0, NULL));
  9719. OwnedHqlExpr trueExpr = transform(cur->queryChild(1));
  9720. elseExpr.setown(createIf(test.getClear(), trueExpr.getClear(), elseExpr.getClear()));
  9721. if (options.constantFoldNormalize)
  9722. elseExpr.setown(foldConstantOperator(elseExpr, 0, NULL));
  9723. }
  9724. return elseExpr.getClear();
  9725. }
  9726. IHqlExpression * HqlTreeNormalizer::transformCaseToChoose(IHqlExpression * expr)
  9727. {
  9728. //For the moment only convert datasets to choose format. (Partly to test implementation.)
  9729. //Datarows are unlikely to benefit, and will cause additional work.
  9730. //Converting actions has implications for needing new activity kinds, and support in thor.
  9731. if (!expr->isDataset())
  9732. return transformCaseToIfs(expr);
  9733. unsigned max = numRealChildren(expr);
  9734. HqlExprArray branches;
  9735. OwnedHqlExpr testVar = transform(expr->queryChild(0));
  9736. HqlExprArray caseArgs;
  9737. caseArgs.append(*LINK(testVar));
  9738. bool isNullMapping = true;
  9739. for (unsigned i1=1; i1 < max-1; i1++)
  9740. {
  9741. IHqlExpression * mapto = expr->queryChild(i1);
  9742. OwnedHqlExpr key = transform(mapto->queryChild(0));
  9743. OwnedHqlExpr branch = transform(mapto->queryChild(1));
  9744. unsigned matchIndex = branches.find(*branch);
  9745. if (matchIndex == NotFound)
  9746. {
  9747. matchIndex = branches.ordinality();
  9748. branches.append(*branch.getClear());
  9749. }
  9750. OwnedHqlExpr value = getSizetConstant(matchIndex+1);
  9751. //MORE: Could calculate a delta and add/subtract it from testVar
  9752. if (!key->queryValue() || !matchesConstantValue(key, matchIndex+1))
  9753. isNullMapping = false;
  9754. caseArgs.append(*createValue(no_mapto, value->getType(), key.getClear(), LINK(value)));
  9755. }
  9756. caseArgs.append(*getSizetConstant(max-1));
  9757. HqlExprArray args;
  9758. if (isNullMapping)
  9759. args.append(*LINK(testVar));
  9760. else
  9761. args.append(*createValue(no_case, LINK(sizetType), caseArgs));
  9762. appendArray(args, branches);
  9763. args.append(*transform(expr->queryChild(max-1)));
  9764. if (expr->isDataset())
  9765. return createDataset(no_chooseds, args);
  9766. return createAction(no_choose, args);
  9767. }
  9768. IHqlExpression * HqlTreeNormalizer::transformEvaluate(IHqlExpression * expr)
  9769. {
  9770. //Evaluate causes chaos - so translate it to a different form.
  9771. //following cases supported so far:
  9772. //EVALUATE(LEFT/RIGHT, g()) -> g(LEFT)
  9773. //EVAlUATE(x, field) -> x.field;
  9774. //EVALUATE(t[n], e) -> table(t,{f1 := e})[n].f1;
  9775. IHqlExpression * ds = expr->queryChild(0);
  9776. IHqlExpression * attr = expr->queryChild(1);
  9777. OwnedHqlExpr transformed;
  9778. OwnedHqlExpr activeTable = getActiveTableSelector();
  9779. if ((attr->getOperator() == no_select) && (attr->queryChild(0) == activeTable))
  9780. {
  9781. //EVAlUATE(x, field) -> x.field;
  9782. transformed.setown(createSelectExpr(LINK(ds), LINK(attr->queryChild(1))));
  9783. }
  9784. else if (attr->isConstant())
  9785. transformed.set(attr);
  9786. else
  9787. {
  9788. switch (ds->getOperator())
  9789. {
  9790. case no_left:
  9791. case no_right:
  9792. //EVALUATE(LEFT/RIGHT, g()) -> g(LEFT)
  9793. //May change too many datasets?
  9794. transformed.setown(replaceSelector(attr, activeTable, ds));
  9795. break;
  9796. case no_select:
  9797. //EVALUATE(x.y, g()) -> EVALUATE(x, g(y))
  9798. //May change too many datasets?
  9799. transformed.setown(createValue(no_evaluate, attr->getType(), LINK(ds->queryChild(0)), replaceSelector(attr, activeTable, ds->queryChild(1))));
  9800. break;
  9801. case no_selectnth:
  9802. {
  9803. IHqlExpression * baseDs = ds->queryChild(0);
  9804. if ((attr->getOperator() == no_select) && (attr->queryChild(0)->queryNormalizedSelector() == baseDs->queryNormalizedSelector()))
  9805. {
  9806. //Special case a select same as field...
  9807. transformed.setown(createSelectExpr(LINK(ds), LINK(attr->queryChild(1))));
  9808. }
  9809. else
  9810. {
  9811. //EVALUATE(t[n], e) -> table(t,{f1 := e})[n].f1;
  9812. OwnedHqlExpr field = createField(valueId, expr->getType(), NULL);
  9813. IHqlExpression * aggregateRecord = createRecord(field);
  9814. IHqlExpression * newAttr = replaceSelector(attr, activeTable, baseDs);
  9815. IHqlExpression * assign = createAssign(createSelectExpr(getSelf(aggregateRecord), LINK(field)), newAttr);
  9816. IHqlExpression * transform = createValue(no_newtransform, makeTransformType(aggregateRecord->getType()), assign);
  9817. IHqlExpression * project = createDataset(no_newusertable, LINK(baseDs), createComma(aggregateRecord, transform));
  9818. project = createRow(no_selectnth, project, LINK(ds->queryChild(1)));
  9819. transformed.setown(createSelectExpr(project, LINK(field)));
  9820. }
  9821. break;
  9822. }
  9823. default:
  9824. UNIMPLEMENTED;
  9825. }
  9826. }
  9827. return transform(transformed);
  9828. }
  9829. IHqlExpression * HqlTreeNormalizer::transformMap(IHqlExpression * expr)
  9830. {
  9831. unsigned max = numRealChildren(expr);
  9832. OwnedHqlExpr elseExpr = transform(expr->queryChild(max-1));
  9833. for (unsigned idx = max-1; idx-- != 0; )
  9834. {
  9835. IHqlExpression * cur = expr->queryChild(idx);
  9836. elseExpr.setown(createIf(transform(cur->queryChild(0)), transform(cur->queryChild(1)), elseExpr.getClear()));
  9837. if (options.constantFoldNormalize)
  9838. elseExpr.setown(foldConstantOperator(elseExpr, 0, NULL));
  9839. }
  9840. return elseExpr.getClear();
  9841. }
  9842. class AbortingErrorReceiver : extends CInterface, implements IErrorReceiver
  9843. {
  9844. public:
  9845. AbortingErrorReceiver(IErrorReceiver * _errors)
  9846. {
  9847. errors = _errors ? _errors : &defaultReporter;
  9848. }
  9849. IMPLEMENT_IINTERFACE
  9850. virtual void reportError(int errNo, const char *msg, const char *filename, int lineno, int column, int pos)
  9851. {
  9852. errors->reportError(errNo, msg, filename, lineno, column, pos);
  9853. throw MakeStringException(HQLERR_ErrorAlreadyReported, "%s", "");
  9854. }
  9855. virtual void report(IECLError* error)
  9856. {
  9857. errors->report(error);
  9858. throw MakeStringException(HQLERR_ErrorAlreadyReported, "%s", "");
  9859. }
  9860. virtual void reportWarning(int warnNo, const char *msg, const char *filename, int lineno, int column, int pos)
  9861. {
  9862. errors->reportWarning(warnNo, msg, filename, lineno, column, pos);
  9863. }
  9864. virtual size32_t errCount()
  9865. {
  9866. return errors->errCount();
  9867. }
  9868. virtual size32_t warnCount()
  9869. {
  9870. return errors->warnCount();
  9871. }
  9872. protected:
  9873. IErrorReceiver * errors;
  9874. ThrowingErrorReceiver defaultReporter;
  9875. };
  9876. IHqlExpression * HqlTreeNormalizer::transformTempRow(IHqlExpression * expr)
  9877. {
  9878. ECLlocation dummyLocation(0, 0, 0, NULL);
  9879. AbortingErrorReceiver errorReporter(errors);
  9880. OwnedHqlExpr createRow = convertTempRowToCreateRow(&errorReporter, dummyLocation, expr);
  9881. return transform(createRow);
  9882. }
  9883. IHqlExpression * HqlTreeNormalizer::transformTempTable(IHqlExpression * expr)
  9884. {
  9885. ECLlocation dummyLocation(0, 0, 0, NULL);
  9886. AbortingErrorReceiver errorReporter(errors);
  9887. OwnedHqlExpr inlineTable = convertTempTableToInlineTable(errorReporter, dummyLocation, expr);
  9888. if (expr != inlineTable)
  9889. return transform(inlineTable);
  9890. IHqlExpression * oldValues = expr->queryChild(0);
  9891. IHqlExpression * oldRecord = expr->queryChild(1);
  9892. OwnedHqlExpr values = normalizeListCasts(oldValues);
  9893. OwnedHqlExpr newRecord = transform(oldRecord);
  9894. node_operator valueOp = values->getOperator();
  9895. if ((valueOp != no_recordlist) && (valueOp != no_list))
  9896. {
  9897. if (queryRealChild(expr, 2))
  9898. return Parent::createTransformed(expr);
  9899. HqlExprArray children;
  9900. children.append(*transform(oldValues));
  9901. children.append(*LINK(newRecord));
  9902. children.append(*convertRecordToAssigns(oldRecord, true, true));
  9903. return expr->clone(children);
  9904. }
  9905. //should have already been handled by convertTempTableToInlineTable();
  9906. throwUnexpected();
  9907. }
  9908. IHqlExpression * HqlTreeNormalizer::transformNewKeyIndex(IHqlExpression * expr)
  9909. {
  9910. IHqlExpression * ds = expr->queryChild(0);
  9911. //If dataset is already null, then do standard
  9912. if (ds->getOperator() == no_null)
  9913. return completeTransform(expr);
  9914. //Before we do anything replace the dataset with a null dataset. This ensures we do the minimum transformation on the rest of the tree
  9915. OwnedHqlExpr newDs = createDataset(no_null, LINK(ds->queryRecord()));
  9916. HqlExprArray args;
  9917. args.append(*ds->cloneAllAnnotations(newDs));
  9918. args.append(*LINK(expr->queryChild(1)));
  9919. args.append(*quickFullReplaceExpression(expr->queryChild(2), ds->queryNormalizedSelector(), newDs));
  9920. unwindChildren(args, expr, 3);
  9921. OwnedHqlExpr ret = expr->clone(args);
  9922. return transform(ret);
  9923. }
  9924. IHqlExpression * HqlTreeNormalizer::transformKeyIndex(IHqlExpression * expr)
  9925. {
  9926. //Before we do anything replace the dataset with a null dataset. This ensures we do the minimum transformation on the rest of the tree
  9927. IHqlExpression * ds = expr->queryChild(0);
  9928. OwnedHqlExpr newDs = createDataset(no_null, LINK(ds->queryRecord()));
  9929. HqlExprArray args;
  9930. args.append(*ds->cloneAllAnnotations(newDs));
  9931. args.append(*quickFullReplaceExpression(expr->queryChild(1), ds->queryNormalizedSelector(), newDs));
  9932. unwindChildren(args, expr, 2);
  9933. OwnedHqlExpr normalized = expr->clone(args);
  9934. //Now convert from the no_keyindex format to the no_newkeyindex format.
  9935. //force the 1st argument to be processed..
  9936. OwnedHqlExpr transformed = completeTransform(normalized);
  9937. HqlExprArray assigns;
  9938. OwnedHqlExpr self = getSelf(transformed);
  9939. convertRecordToAssigns(assigns, normalized->queryChild(1), self, true, false); // fpos may not have a value...
  9940. args.kill();
  9941. unwindChildren(args, transformed);
  9942. args.add(*createValue(no_newtransform, makeTransformType(transformed->queryChild(1)->getType()), assigns), 2);
  9943. OwnedHqlExpr ret = createDataset(no_newkeyindex, args);
  9944. //MORE: This would be the place to add a FILTERED() attribute derived from any filters applied to the dataset
  9945. return expr->cloneAllAnnotations(ret);
  9946. }
  9947. IHqlExpression * HqlTreeNormalizer::transformMerge(IHqlExpression * expr)
  9948. {
  9949. HqlExprArray children;
  9950. transformChildren(expr, children);
  9951. HqlExprArray args;
  9952. reorderAttributesToEnd(args, children);
  9953. return expr->clone(args);
  9954. }
  9955. IHqlExpression * HqlTreeNormalizer::transformPatNamedUse(IHqlExpression * expr)
  9956. {
  9957. OwnedHqlExpr define = makeRecursiveName(queryPatUseModule(expr), queryPatUseName(expr));
  9958. HqlExprArray args;
  9959. ForEachChild(i, expr)
  9960. {
  9961. IHqlExpression * cur = queryRealChild(expr, i);
  9962. if (cur)
  9963. args.append(*LINK(cur));
  9964. }
  9965. args.append(*define.getClear());
  9966. ITypeInfo * type = expr->queryType();
  9967. return createValue(no_pat_use, transformType(type), args);
  9968. }
  9969. IHqlExpression * HqlTreeNormalizer::transformPatCheckIn(IHqlExpression * expr)
  9970. {
  9971. OwnedHqlExpr set = transform(expr->queryChild(1));
  9972. //because this is a check pattern, we are free to remove any instance tags - they can't be used for matching.
  9973. while (set->getOperator() == no_pat_instance)
  9974. set.set(set->queryChild(0));
  9975. if (set->getOperator() == no_pat_set)
  9976. {
  9977. IHqlExpression * notAttr = expr->queryAttribute(notAtom);
  9978. if (!notAttr)
  9979. return LINK(set);
  9980. HqlExprArray args;
  9981. unwindChildren(args, set);
  9982. if (args.find(*notAttr) == NotFound)
  9983. args.append(*LINK(notAttr));
  9984. else
  9985. args.zap(*notAttr);
  9986. return set->clone(args);
  9987. }
  9988. HqlExprArray values, newValues;
  9989. set->unwindList(values, no_pat_or);
  9990. ForEachItemIn(idx, values)
  9991. {
  9992. IHqlExpression * cur = &values.item(idx);
  9993. while (cur->getOperator() == no_pat_instance)
  9994. cur = cur->queryChild(0);
  9995. if (cur->getOperator() != no_pat_const)
  9996. return NULL;
  9997. IValue * value = cur->queryChild(0)->queryValue();
  9998. if (!value)
  9999. return NULL;
  10000. ITypeInfo * type = value->queryType();
  10001. if (type->getStringLen() != 1)
  10002. return NULL;
  10003. switch (type->getTypeCode())
  10004. {
  10005. case type_string:
  10006. newValues.append(*createConstant((int)*(const byte *)value->queryValue()));
  10007. break;
  10008. case type_unicode:
  10009. newValues.append(*createConstant((int)*(const UChar *)value->queryValue()));
  10010. break;
  10011. case type_utf8:
  10012. newValues.append(*createConstant((int)rtlUtf8Char((const char *)value->queryValue())));
  10013. break;
  10014. default:
  10015. return NULL;
  10016. }
  10017. }
  10018. if (expr->hasAttribute(notAtom))
  10019. newValues.append(*createAttribute(notAtom));
  10020. return createValue(no_pat_set, makePatternType(), newValues);
  10021. }
  10022. IHqlExpression * HqlTreeNormalizer::transformTable(IHqlExpression * untransformed)
  10023. {
  10024. //Convert DATASET('xx', rec, PIPE('z'))
  10025. //DATASET('xx', rec, THOR) | PIPE('z')
  10026. OwnedHqlExpr transformed = completeTransform(untransformed);
  10027. IHqlExpression * mode = transformed->queryChild(2);
  10028. if (mode->getOperator() != no_pipe)
  10029. return transformed.getClear();
  10030. IHqlExpression * filename = transformed->queryChild(0);
  10031. StringBuffer s;
  10032. if (getStringValue(s, filename, NULL).length() == 0)
  10033. return transformed.getClear();
  10034. OwnedHqlExpr modeThor = createValue(no_thor);
  10035. IHqlExpression * diskRead = replaceChild(transformed, 2, modeThor);
  10036. HqlExprArray args;
  10037. args.append(*diskRead);
  10038. unwindChildren(args, mode);
  10039. return createDataset(no_pipe, args);
  10040. }
  10041. IHqlExpression * HqlTreeNormalizer::optimizeAssignSkip(HqlExprArray & children, IHqlExpression * expr, IHqlExpression * cond, unsigned depth)
  10042. {
  10043. if (!containsSkip(expr))
  10044. return LINK(expr);
  10045. switch (expr->getOperator())
  10046. {
  10047. case no_skip:
  10048. children.append(*createValue(no_skip, makeVoidType(), LINK(cond)));
  10049. return NULL;
  10050. case no_cast:
  10051. case no_implicitcast:
  10052. {
  10053. bool same= true;
  10054. HqlExprArray args;
  10055. ForEachChild(i, expr)
  10056. {
  10057. IHqlExpression * cur = expr->queryChild(i);
  10058. IHqlExpression * ret = optimizeAssignSkip(children, cur, cond, depth);
  10059. if (!ret)
  10060. return NULL;
  10061. args.append(*ret);
  10062. if (cur != ret)
  10063. same = false;
  10064. }
  10065. if (same)
  10066. return LINK(expr);
  10067. return expr->clone(args);
  10068. }
  10069. //could try and handle map/case/choose, but less common, and more complicated.
  10070. case no_if:
  10071. {
  10072. //For the moment only hoist SKIPS within a single level of IF() conditions.
  10073. //Multi level rarely occur, and don't significantly improve the code
  10074. if (depth != 0)
  10075. return LINK(expr);
  10076. IHqlExpression * thisCond = expr->queryChild(0);
  10077. IHqlExpression * left = expr->queryChild(1);
  10078. IHqlExpression * right = expr->queryChild(2);
  10079. if (!right)
  10080. return LINK(expr);
  10081. OwnedHqlExpr leftCond = extendConditionOwn(no_and, LINK(cond), LINK(thisCond));
  10082. OwnedHqlExpr inverseCond = createValue(no_not, makeBoolType(), LINK(thisCond));
  10083. OwnedHqlExpr rightCond = extendConditionOwn(no_and, LINK(cond), LINK(inverseCond));
  10084. OwnedHqlExpr newLeft = optimizeAssignSkip(children, left, leftCond, depth+1);
  10085. OwnedHqlExpr newRight = optimizeAssignSkip(children, right, rightCond, depth+1);
  10086. if (!newLeft && !newRight)
  10087. return NULL;
  10088. //if cond is true, then it will skip => no need to check the condition
  10089. if (!newLeft)
  10090. return LINK(newRight);
  10091. if (!newRight)
  10092. return LINK(newLeft);
  10093. if (left == newLeft && right == newRight)
  10094. return LINK(expr);
  10095. HqlExprArray args;
  10096. unwindChildren(args, expr);
  10097. args.replace(*newLeft.getClear(), 1);
  10098. args.replace(*newRight.getClear(), 2);
  10099. return expr->clone(args);
  10100. }
  10101. default:
  10102. return LINK(expr);
  10103. }
  10104. }
  10105. bool HqlTreeNormalizer::transformTransform(HqlExprArray & children, IHqlExpression * expr)
  10106. {
  10107. bool same = true;
  10108. ForEachChild(i, expr)
  10109. {
  10110. IHqlExpression * cur = expr->queryChild(i);
  10111. switch (cur->getOperator())
  10112. {
  10113. case no_assignall:
  10114. transformTransform(children, cur);
  10115. same = false; // assign all is removed and assigns expanded in its place
  10116. break;
  10117. case no_assign:
  10118. {
  10119. OwnedHqlExpr assign = transform(cur);
  10120. if (cur->getInfoFlags() & HEFcontainsSkip)
  10121. {
  10122. IHqlExpression * rhs = assign->queryChild(1);
  10123. OwnedHqlExpr newRhs = optimizeAssignSkip(children, rhs, NULL, 0);
  10124. if (rhs != newRhs)
  10125. {
  10126. IHqlExpression * lhs = assign->queryChild(0);
  10127. if (!newRhs)
  10128. newRhs.setown(createNullExpr(rhs));
  10129. assign.setown(createAssign(LINK(lhs), newRhs.getClear()));
  10130. }
  10131. }
  10132. if (assign != cur)
  10133. same = false;
  10134. children.append(*assign.getClear());
  10135. break;
  10136. }
  10137. default:
  10138. {
  10139. IHqlExpression * next = transform(cur);
  10140. children.append(*next);
  10141. if (next != cur)
  10142. same = false;
  10143. }
  10144. break;
  10145. }
  10146. }
  10147. return same;
  10148. }
  10149. IHqlExpression * HqlTreeNormalizer::transformTransform(IHqlExpression * expr)
  10150. {
  10151. HqlExprArray children;
  10152. IHqlExpression * oldRecord = queryOriginalRecord(expr);
  10153. OwnedHqlExpr newRecord = transform(oldRecord);
  10154. bool same = transformTransform(children, expr);
  10155. if ((oldRecord != newRecord) || !same)
  10156. {
  10157. ITypeInfo * newRecordType = createRecordType(newRecord);
  10158. OwnedHqlExpr ret = createValue(expr->getOperator(), makeTransformType(newRecordType), children);
  10159. return expr->cloneAllAnnotations(ret);
  10160. }
  10161. return LINK(expr);
  10162. }
  10163. IHqlExpression * HqlTreeNormalizer::transformIfAssert(node_operator newOp, IHqlExpression * expr)
  10164. {
  10165. unsigned max = expr->numChildren();
  10166. HqlExprArray children;
  10167. bool same = transformChildren(expr, children);
  10168. if ((expr->hasAttribute(assertAtom) || (options.assertSortedDistributed && (newOp != no_assertgrouped))) && !options.removeAsserts)
  10169. {
  10170. OwnedHqlExpr ret = createDataset(newOp, children);
  10171. return expr->cloneAllAnnotations(ret);
  10172. }
  10173. if (!same)
  10174. return expr->clone(children);
  10175. return LINK(expr);
  10176. }
  10177. IHqlExpression * HqlTreeNormalizer::transformExecuteWhen(IHqlExpression * expr)
  10178. {
  10179. OwnedHqlExpr transformedAction = transform(expr->queryChild(1));
  10180. if ((transformedAction->getOperator() == no_setmeta) ||
  10181. ((transformedAction->getOperator() == no_null) && transformedAction->isAction()))
  10182. return transform(expr->queryChild(0));
  10183. HqlExprArray children;
  10184. if (translator.queryOptions().convertWhenExecutedToCompound && !expr->queryChild(2))
  10185. {
  10186. //For the moment, for maximal compatibility, convert no_executewhen to a no_compound
  10187. children.append(*transformedAction.getClear());
  10188. children.append(*transform(expr->queryChild(0)));
  10189. OwnedHqlExpr ret = createCompound(children);
  10190. return expr->cloneAllAnnotations(ret);
  10191. }
  10192. //Need to create a unique id to differentiate the different side effects.
  10193. transformChildren(expr, children);
  10194. assertex(!expr->hasAttribute(_uid_Atom));
  10195. children.append(*createUniqueId());
  10196. return expr->clone(children);
  10197. }
  10198. IHqlExpression * HqlTreeNormalizer::transformWithinFilter(IHqlExpression * expr)
  10199. {
  10200. OwnedHqlExpr ds = transform(expr->queryChild(0));
  10201. HqlExprArray children;
  10202. children.append(*LINK(ds));
  10203. ForEachChildFrom(i, expr, 1)
  10204. {
  10205. IHqlExpression * filter = expr->queryChild(i);
  10206. if (filter->getOperator() == no_within)
  10207. {
  10208. IHqlExpression * scope = filter->queryChild(0);
  10209. while (scope->getOperator() == no_filter)
  10210. {
  10211. ForEachChildFrom(i2, scope, 1)
  10212. children.append(*transform(scope->queryChild(i2)));
  10213. scope = scope->queryChild(0);
  10214. }
  10215. ds.setown(createDataset(no_related, LINK(ds), transform(scope)));
  10216. }
  10217. else
  10218. children.append(*transform(filter));
  10219. }
  10220. if (children.ordinality() == 1)
  10221. return ds.getClear();
  10222. children.replace(*ds.getClear(), 0);
  10223. return expr->clone(children);
  10224. }
  10225. IHqlExpression * HqlTreeNormalizer::validateKeyedJoin(IHqlExpression * expr)
  10226. {
  10227. //Transform join(x, local(key), ....) to join(x, key, ...., local);
  10228. HqlExprArray children;
  10229. transformChildren(expr, children);
  10230. unsigned prevChildren = children.ordinality();
  10231. IHqlExpression * rhs = &children.item(1);
  10232. loop
  10233. {
  10234. node_operator op = rhs->getOperator();
  10235. if (op == no_forcelocal)
  10236. children.append(*createLocalAttribute());
  10237. else if (op == no_forcenolocal)
  10238. children.append(*createAttribute(noLocalAtom));
  10239. else if ((op == no_section) || (op == no_sectioninput))
  10240. {
  10241. //remove the section
  10242. }
  10243. else
  10244. break;
  10245. rhs = rhs->queryChild(0);
  10246. }
  10247. if (prevChildren != children.ordinality())
  10248. {
  10249. if (isKey(rhs))
  10250. children.replace(*LINK(rhs), 1);
  10251. else
  10252. children.trunc(prevChildren);
  10253. }
  10254. //Now check that a join marked as keyed has a key as the rhs.
  10255. IHqlExpression * keyed = expr->queryAttribute(keyedAtom);
  10256. if (!keyed || keyed->queryChild(0) || isKey(rhs))
  10257. return expr->clone(children);
  10258. if (options.allowActivityForKeyedJoin)
  10259. {
  10260. children.append(*createAttribute(_complexKeyed_Atom));
  10261. return expr->clone(children);
  10262. }
  10263. StringBuffer s;
  10264. if (expr->queryName())
  10265. s.append(" (").append(expr->queryName()).append(")");
  10266. throwError1(HQLERR_RhsKeyedNotKey, s.str());
  10267. return NULL;
  10268. }
  10269. //A bit of a nasty dependency - this should match the capabilities of the code in hqlsource for finding selectors
  10270. static void gatherPotentialSelectors(HqlExprArray & args, IHqlExpression * expr)
  10271. {
  10272. node_operator op = expr->getOperator();
  10273. switch (op)
  10274. {
  10275. case no_and:
  10276. case no_or:
  10277. case no_eq:
  10278. case no_ne:
  10279. case no_gt:
  10280. case no_lt:
  10281. case no_ge:
  10282. case no_le:
  10283. gatherPotentialSelectors(args, expr->queryChild(0));
  10284. gatherPotentialSelectors(args, expr->queryChild(1));
  10285. break;
  10286. case no_if:
  10287. case no_case:
  10288. case no_map:
  10289. case no_mapto:
  10290. {
  10291. ForEachChild(i, expr)
  10292. gatherPotentialSelectors(args, expr->queryChild(i));
  10293. break;
  10294. }
  10295. case no_assertkeyed:
  10296. case no_assertstepped:
  10297. case no_not:
  10298. case no_between:
  10299. case no_notbetween:
  10300. case no_cast:
  10301. case no_implicitcast:
  10302. case no_notin:
  10303. case no_in:
  10304. case no_add:
  10305. case no_sub:
  10306. case no_substring:
  10307. gatherPotentialSelectors(args, expr->queryChild(0));
  10308. break;
  10309. case no_select:
  10310. {
  10311. IHqlExpression * selector = expr->queryNormalizedSelector();
  10312. if (!args.contains(*selector))
  10313. args.append(*LINK(selector));
  10314. break;
  10315. }
  10316. }
  10317. }
  10318. IHqlExpression * HqlTreeNormalizer::transformChildrenNoAnnotations(IHqlExpression * expr)
  10319. {
  10320. HqlExprArray args;
  10321. ForEachChild(i, expr)
  10322. {
  10323. OwnedHqlExpr newChild = transform(expr->queryChild(i)->queryBody());
  10324. args.append(*LINK(newChild->queryBody()));
  10325. }
  10326. return completeTransform(expr, args);
  10327. }
  10328. //The following symbol removal code works, but I'm not sure I want to do it at the moment because of the changes to the HOLe queries
  10329. //Remove as many named symbols as we can - try and keep for datasets and statements so can go in the tree.
  10330. IHqlExpression * HqlTreeNormalizer::createTransformed(IHqlExpression * expr)
  10331. {
  10332. IHqlExpression * body = expr->queryBody(true);
  10333. node_operator op = expr->getOperator();
  10334. if (expr != body)
  10335. {
  10336. OwnedHqlExpr transformedBody;
  10337. try
  10338. {
  10339. transformedBody.setown(transform(body));
  10340. }
  10341. catch (IException * e)
  10342. {
  10343. if (dynamic_cast<IECLError *>(e))
  10344. throw;
  10345. IHqlExpression * location = queryLocation(expr);
  10346. if (location)
  10347. {
  10348. IECLError * error = annotateExceptionWithLocation(e, location);
  10349. e->Release();
  10350. throw error;
  10351. }
  10352. throw;
  10353. }
  10354. //Don't retain any annotations on records - except for a symbol which maybe added in the createTransform()
  10355. //code. Otherwise expressions that would otherwise be commoned up are treated as different.
  10356. if (op == no_record)
  10357. return transformedBody.getClear();
  10358. switch (expr->getAnnotationKind())
  10359. {
  10360. case annotate_warning:
  10361. case annotate_parsemeta:
  10362. return transformedBody.getClear();
  10363. case annotate_javadoc:
  10364. return expr->cloneAnnotation(transformedBody);
  10365. case annotate_meta:
  10366. {
  10367. HqlExprArray preservedMeta;
  10368. IHqlExpression * cur;
  10369. bool changed = false;
  10370. for (unsigned i=0; (cur = expr->queryAnnotationParameter(i)) != 0; i++)
  10371. {
  10372. IAtom * name = cur->queryName();
  10373. bool keep = true;
  10374. if (name == deprecatedAtom)
  10375. keep = false;
  10376. else if (!options.allowSections && (name == sectionAtom))
  10377. keep = false;
  10378. if (keep)
  10379. preservedMeta.append(*LINK(cur));
  10380. else
  10381. changed = true;
  10382. }
  10383. if (changed)
  10384. {
  10385. if (preservedMeta.ordinality() == 0)
  10386. return transformedBody.getClear();
  10387. return createMetaAnnotation(transformedBody.getClear(), preservedMeta);
  10388. }
  10389. break; // default action
  10390. }
  10391. case annotate_symbol:
  10392. {
  10393. if (hasNamedSymbol(transformedBody))
  10394. return transformedBody.getClear();
  10395. IIdAtom * id = expr->queryId();
  10396. IIdAtom * simpleId = simplifySymbolName(id, options.commonUniqueNameAttributes);
  10397. if (simpleId)
  10398. return cloneSymbol(expr, simpleId, transformedBody, NULL, NULL);
  10399. break;
  10400. }
  10401. } // switch(kind)
  10402. if (body == transformedBody)
  10403. return LINK(expr);
  10404. return expr->cloneAnnotation(transformedBody);
  10405. }
  10406. //MORE: Types of all pattern attributes should also be normalized. Currently they aren't which causes discrepancies between types
  10407. //for ghoogle.hql. It could conceivably cause problems later on.
  10408. if (forwardReferences.ordinality())
  10409. {
  10410. if (op == no_pat_use && expr->hasAttribute(nameAtom))
  10411. return transformPatNamedUse(expr);
  10412. if (op == no_pat_instance)
  10413. {
  10414. OwnedHqlExpr ret = queryTransformPatternDefine (expr);
  10415. if (ret)
  10416. return ret.getClear();
  10417. }
  10418. }
  10419. IHqlExpression * sideEffects = expr->queryAttribute(_sideEffect_Atom);
  10420. if (sideEffects)
  10421. {
  10422. HqlExprArray args;
  10423. unwindChildren(args, expr);
  10424. args.zap(*sideEffects);
  10425. OwnedHqlExpr next = createCompound(LINK(sideEffects->queryChild(0)), expr->clone(args));
  10426. return transform(next);
  10427. }
  10428. if (!options.constantFoldNormalize)
  10429. return createTransformedBody(expr);
  10430. switch (op)
  10431. {
  10432. case no_if:
  10433. {
  10434. OwnedHqlExpr cond = transform(expr->queryChild(0));
  10435. IValue * condValue = cond->queryValue();
  10436. if (condValue)
  10437. {
  10438. unsigned idx = condValue->getBoolValue() ? 1 : 2;
  10439. IHqlExpression * branch = expr->queryChild(idx);
  10440. if (branch)
  10441. return transform(branch);
  10442. assertex(expr->isAction());
  10443. return createValue(no_null, makeVoidType());
  10444. }
  10445. break;
  10446. }
  10447. case no_choose:
  10448. case no_chooseds:
  10449. {
  10450. OwnedHqlExpr cond = transform(expr->queryChild(0));
  10451. IValue * condValue = cond->queryValue();
  10452. if (condValue)
  10453. {
  10454. unsigned idx = (unsigned)condValue->getIntValue();
  10455. IHqlExpression * branch = queryRealChild(expr, idx);
  10456. if (branch)
  10457. return transform(branch);
  10458. IHqlExpression * defaultExpr = queryLastNonAttribute(expr);
  10459. return transform(defaultExpr);
  10460. }
  10461. break;
  10462. }
  10463. case no_and:
  10464. {
  10465. IHqlExpression * left = expr->queryChild(0);
  10466. IHqlExpression * right = expr->queryChild(1);
  10467. OwnedHqlExpr simpleRight = transformSimpleConst(right);
  10468. if (simpleRight->queryValue())
  10469. {
  10470. if (simpleRight->queryValue()->getBoolValue())
  10471. return transform(left);
  10472. return simpleRight.getClear();
  10473. }
  10474. OwnedHqlExpr newLeft = transform(left);
  10475. IValue * leftValue = newLeft->queryValue();
  10476. if (leftValue)
  10477. {
  10478. if (!leftValue->getBoolValue())
  10479. return newLeft.getClear();
  10480. return transform(right);
  10481. }
  10482. break;
  10483. }
  10484. case no_or:
  10485. {
  10486. IHqlExpression * left = expr->queryChild(0);
  10487. IHqlExpression * right = expr->queryChild(1);
  10488. OwnedHqlExpr simpleRight = transformSimpleConst(right);
  10489. if (simpleRight->queryValue())
  10490. {
  10491. if (!simpleRight->queryValue()->getBoolValue())
  10492. return transform(left);
  10493. return simpleRight.getClear();
  10494. }
  10495. OwnedHqlExpr newLeft = transform(left);
  10496. IValue * leftValue = newLeft->queryValue();
  10497. if (leftValue)
  10498. {
  10499. if (leftValue->getBoolValue())
  10500. return newLeft.getClear();
  10501. return transform(right);
  10502. }
  10503. break;
  10504. }
  10505. case no_attr:
  10506. if (expr->queryName() == _original_Atom)
  10507. return LINK(expr);
  10508. break;
  10509. }
  10510. OwnedHqlExpr transformed = createTransformedBody(expr);
  10511. return foldConstantOperator(transformed, 0, NULL);
  10512. }
  10513. IHqlExpression * HqlTreeNormalizer::createTransformedBody(IHqlExpression * expr)
  10514. {
  10515. node_operator op = expr->getOperator();
  10516. switch (op)
  10517. {
  10518. case no_constant:
  10519. return LINK(expr); // avoid creating an array in default code...
  10520. case no_case:
  10521. if (isVoidOrDatasetOrList(expr) || expr->isDictionary())
  10522. return transformCaseToChoose(expr);
  10523. break;
  10524. case no_map:
  10525. if (isVoidOrDatasetOrList(expr) || expr->isDictionary())
  10526. return transformMap(expr);
  10527. break;
  10528. case no_transform:
  10529. //optimize location of skips
  10530. return transformTransform(expr);
  10531. case no_getresult:
  10532. case no_newtransform:
  10533. {
  10534. IHqlExpression * record = queryOriginalRecord(expr);
  10535. if (record)
  10536. ::Release(transform(record));
  10537. LinkedHqlExpr cleaned = expr;
  10538. //remove any no_assignall children... could really do for no_transform as well... would reduce clarity of graph ecl
  10539. if ((op == no_newtransform) && queryChildOperator(no_assignall, expr))
  10540. {
  10541. HqlExprArray args;
  10542. ForEachChild(i, expr)
  10543. expr->queryChild(i)->unwindList(args, no_assignall);
  10544. cleaned.setown(expr->clone(args));
  10545. }
  10546. return Parent::createTransformed(cleaned);
  10547. }
  10548. case no_usertable:
  10549. case no_selectfields:
  10550. {
  10551. OwnedHqlExpr newRecord = transform(expr->queryChild(1));
  10552. return convertSelectToProject(newRecord, expr);
  10553. }
  10554. case no_parse:
  10555. return removeDefaultsFromExpr(expr, 3, no_newparse);
  10556. case no_xmlparse:
  10557. return removeDefaultsFromExpr(expr, 2, no_newxmlparse);
  10558. case no_soapcall:
  10559. return removeDefaultsFromExpr(expr, 2, no_newsoapcall);
  10560. case no_soapcall_ds:
  10561. return removeDefaultsFromExpr(expr, 3, no_newsoapcall_ds);
  10562. case no_soapaction_ds:
  10563. return removeDefaultsFromExpr(expr, 3, no_newsoapaction_ds);
  10564. #ifdef OPTIMIZE_IMPLICIT_CAST
  10565. //Following is a good idea, but makes some things worse because of the way we currently spot table invariants.
  10566. case no_implicitcast:
  10567. {
  10568. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  10569. return ensureExprType(transformed->queryChild(0), transformed->queryType());
  10570. }
  10571. #endif
  10572. case no_record:
  10573. {
  10574. OwnedHqlExpr transformed = completeTransform(expr);
  10575. if (transformed->hasAttribute(packedAtom))
  10576. transformed.setown(getPackedRecord(transformed));
  10577. if (options.ensureRecordsHaveSymbols)
  10578. {
  10579. //Ensure all records only have a single unique name, and transform it here so that record types also map to that unique name
  10580. IHqlExpression * recordSymbol = queryLocationIndependentExtra(expr)->symbol;
  10581. if (recordSymbol)
  10582. {
  10583. IIdAtom * id = recordSymbol->queryId();
  10584. IIdAtom * simpleId = simplifySymbolName(id, options.commonUniqueNameAttributes);
  10585. IIdAtom * newid = simpleId ? simpleId : id;
  10586. IIdAtom * lowerId = lowerCaseSymbolName(newid);
  10587. return createSymbol(lowerId, transformed.getClear(), ob_private);
  10588. }
  10589. }
  10590. return transformed.getClear();
  10591. }
  10592. case no_left:
  10593. case no_right:
  10594. case no_top:
  10595. case no_self:
  10596. {
  10597. IHqlExpression * record = expr->queryChild(0);
  10598. //If no record argument then make sure the type is transformed
  10599. if (!record)
  10600. break;
  10601. HqlExprArray children;
  10602. //Ensure that the first parameter to one of these nodes is the body of the record, not a named symbol
  10603. OwnedHqlExpr transformedRecord = transform(record);
  10604. children.append(*LINK(transformedRecord->queryBody()));
  10605. return completeTransform(expr, children);
  10606. }
  10607. case no_field:
  10608. {
  10609. //Remove the default values...
  10610. HqlExprArray children;
  10611. bool same = true;
  10612. ForEachChild(idx, expr)
  10613. {
  10614. IHqlExpression * cur = expr->queryChild(idx);
  10615. if (cur->isAttribute())
  10616. {
  10617. IHqlExpression * transformed = transform(cur);
  10618. children.append(*transformed);
  10619. if (cur != transformed)
  10620. same = false;
  10621. }
  10622. else
  10623. same = false;
  10624. }
  10625. ITypeInfo * type = expr->queryType();
  10626. OwnedITypeInfo newType = transformType(type);
  10627. IIdAtom * id = expr->queryId();
  10628. IIdAtom * newId = lowerCaseSymbolName(id);
  10629. if ((type != newType) || (newId != id))
  10630. return createField(newId, newType.getClear(), children);
  10631. if (same)
  10632. return LINK(expr);
  10633. return expr->clone(children);
  10634. }
  10635. case no_trim:
  10636. //TRIM(x,RIGHT) should be represented the same way as TRIM(x) - and it's more efficient
  10637. if ((expr->numChildren() == 2) && (expr->queryChild(1)->queryName() == rightAtom))
  10638. {
  10639. HqlExprArray children;
  10640. children.append(*transform(expr->queryChild(0)));
  10641. return expr->clone(children);
  10642. }
  10643. break;
  10644. case no_pat_pattern:
  10645. return LINK(expr->queryChild(1));
  10646. case no_temptable:
  10647. return transformTempTable(expr);
  10648. case no_temprow:
  10649. return transformTempRow(expr);
  10650. case no_keyindex:
  10651. return transformKeyIndex(expr);
  10652. case no_newkeyindex:
  10653. // seenIndex = true;
  10654. return transformNewKeyIndex(expr);
  10655. case no_table:
  10656. if (expr->hasAttribute(localUploadAtom))
  10657. seenLocalUpload = true;
  10658. return transformTable(expr);
  10659. case no_pat_checkin:
  10660. if (expr->queryChild(0)->getOperator() == no_pat_anychar)
  10661. {
  10662. IHqlExpression * transformed = transformPatCheckIn(expr);
  10663. if (transformed)
  10664. return transformed;
  10665. }
  10666. break;
  10667. case no_denormalize:
  10668. case no_denormalizegroup:
  10669. {
  10670. OwnedHqlExpr transformed = validateKeyedJoin(expr);
  10671. //Explicitly add a left outer flag to a denormalize if no other join type is specified.
  10672. //Do here rather than in parser so crc for persists isn't changed.
  10673. if (!transformed->hasAttribute(innerAtom) &&
  10674. !transformed->hasAttribute(leftonlyAtom) && !transformed->hasAttribute(leftouterAtom) &&
  10675. !transformed->hasAttribute(rightonlyAtom) && !transformed->hasAttribute(rightouterAtom) &&
  10676. !transformed->hasAttribute(fullonlyAtom) && !transformed->hasAttribute(fullouterAtom))
  10677. {
  10678. return appendOwnedOperand(transformed, createAttribute(leftouterAtom));
  10679. }
  10680. return transformed.getClear();
  10681. }
  10682. case no_colon:
  10683. {
  10684. OwnedHqlExpr transformed = Parent::createTransformed(expr);
  10685. LinkedHqlExpr value = transformed->queryChild(0);
  10686. bool same = true;
  10687. bool needToPreserveOriginal = false;
  10688. HqlExprArray actions, scheduleActions;
  10689. unwindChildren(actions, transformed, 1);
  10690. ForEachItemInRev(i, actions)
  10691. {
  10692. IHqlExpression & cur = actions.item(i);
  10693. IHqlExpression * replacement = NULL;
  10694. switch (cur.getOperator())
  10695. {
  10696. case no_global:
  10697. {
  10698. HqlExprArray scopeArgs;
  10699. scopeArgs.append(*LINK(value));
  10700. unwindChildren(scopeArgs, &cur);
  10701. replacement = createWrapper(no_globalscope, scopeArgs);
  10702. break;
  10703. }
  10704. case no_persist:
  10705. {
  10706. needToPreserveOriginal = true;
  10707. same = false;
  10708. break;
  10709. }
  10710. case no_attr:
  10711. case no_attr_expr:
  10712. case no_attr_link:
  10713. if (cur.queryName() == defineAtom)
  10714. replacement = createValue(no_define, transformed->getType(), LINK(value), LINK(cur.queryChild(0)));
  10715. break;
  10716. //Separate scheduled items into a separate no_colon.
  10717. case no_when:
  10718. case no_priority:
  10719. scheduleActions.append(OLINK(cur));
  10720. actions.remove(i);
  10721. same = false;
  10722. break;
  10723. }
  10724. if (replacement)
  10725. {
  10726. value.setown(replacement);
  10727. actions.remove(i);
  10728. same = false;
  10729. }
  10730. }
  10731. if (same)
  10732. return transformed.getClear();
  10733. if (needToPreserveOriginal)
  10734. actions.append(*createAttribute(_original_Atom, LINK(expr->queryChild(0))));
  10735. OwnedHqlExpr result;
  10736. if (actions.ordinality() == 0)
  10737. result.set(value);
  10738. else
  10739. result.setown(createColon(LINK(value), actions));
  10740. if (scheduleActions.ordinality())
  10741. result.setown(createColon(result.getClear(), scheduleActions));
  10742. return result.getClear();
  10743. }
  10744. case no_evaluate:
  10745. return transformEvaluate(expr);
  10746. case no_selectnth:
  10747. {
  10748. IHqlExpression * ds = expr->queryChild(0);
  10749. if (isGrouped(ds))
  10750. {
  10751. OwnedHqlExpr newChild = createDataset(no_group, LINK(ds));
  10752. OwnedHqlExpr mapped = replaceChild(expr, 0, newChild);
  10753. return transform(mapped);
  10754. }
  10755. break;
  10756. }
  10757. case no_assert_ds:
  10758. if (options.removeAsserts)
  10759. return transform(expr->queryChild(0));
  10760. break;
  10761. case no_section:
  10762. case no_sectioninput:
  10763. if (!options.allowSections)
  10764. return transform(expr->queryChild(0));
  10765. break;
  10766. case no_type:
  10767. return transformAlienType(expr);
  10768. case no_param:
  10769. {
  10770. //no_param may be retained by library call definitions + they need the type transforming for consistency
  10771. ITypeInfo * type = expr->queryType();
  10772. OwnedITypeInfo newType = transformType(type);
  10773. if (type != newType)
  10774. {
  10775. //Attributes shouldn't need transforming, but simplest
  10776. HqlExprArray attrs;
  10777. transformChildren(expr, attrs);
  10778. return createParameter(expr->queryId(), (unsigned)expr->querySequenceExtra(), newType.getClear(), attrs);
  10779. }
  10780. break;
  10781. }
  10782. case no_libraryscope:
  10783. {
  10784. OwnedHqlExpr ret = transformScope(expr);
  10785. if (translator.targetHThor())
  10786. return appendOwnedOperand(ret, createAttribute(_noStreaming_Atom));
  10787. return ret.getClear();
  10788. }
  10789. case no_virtualscope:
  10790. return transformScope(expr);
  10791. case no_libraryscopeinstance:
  10792. {
  10793. IHqlExpression * oldFunction = expr->queryDefinition();
  10794. OwnedHqlExpr newFunction = transform(oldFunction);
  10795. HqlExprArray children;
  10796. bool same = true;
  10797. ForEachChild(i, expr)
  10798. {
  10799. LinkedHqlExpr cur = expr->queryChild(i);
  10800. if (cur->getOperator() == no_virtualscope)
  10801. {
  10802. cur.setown(checkCreateConcreteModule(NULL, cur, cur->queryAttribute(_location_Atom)));
  10803. assertex(cur->getOperator() != no_virtualscope);
  10804. same = false;
  10805. }
  10806. else if (cur->getOperator() == no_purevirtual)
  10807. {
  10808. IAtom * name = cur->queryName();
  10809. throwError1(HQLERR_LibraryMemberArgNotDefined, name ? name->str() : "");
  10810. }
  10811. IHqlExpression * transformed = transform(cur);
  10812. children.append(*transformed);
  10813. if (cur != transformed)
  10814. same = false;
  10815. }
  10816. if (same && (oldFunction == newFunction))
  10817. return LINK(expr);
  10818. return createLibraryInstance(newFunction.getClear(), children);
  10819. }
  10820. case no_transformascii:
  10821. case no_transformebcdic:
  10822. {
  10823. HqlExprArray children;
  10824. transformChildren(expr, children);
  10825. OwnedHqlExpr transformed = createDataset(no_hqlproject, children);
  10826. return transform(transformed);
  10827. }
  10828. case no_join:
  10829. {
  10830. OwnedHqlExpr transformed = validateKeyedJoin(expr);
  10831. if (isSelfJoin(expr))
  10832. {
  10833. HqlExprArray children;
  10834. unwindChildren(children, transformed);
  10835. children.replace(*createAttribute(_selfJoinPlaceholder_Atom), 1); // replace the 1st dataset with an attribute so parameters are still in the same place.
  10836. return createDataset(no_selfjoin, children);
  10837. }
  10838. if (isKeyedJoin(transformed) && translator.targetRoxie() && !expr->hasAttribute(_ordered_Atom))
  10839. return appendOwnedOperand(transformed, createAttribute(_ordered_Atom));
  10840. return transformed.getClear();
  10841. }
  10842. case no_projectrow:
  10843. {
  10844. //Work around a problem where left is ambiguous - either outer LEFT, or left within this ROW
  10845. //Not a full solution since PROJECT(PROJECT(LEFT),t(LEFT)) where project(LEFT) doesn't change types
  10846. //would suffer from the same problem.
  10847. //Remove as many instances of PROJECT(row, transform) as we can since ROW(transform) is handled more efficient.
  10848. HqlExprArray children;
  10849. OwnedHqlExpr ds = transform(expr->queryChild(0));
  10850. node_operator dsOp = ds->getOperator();
  10851. if (dsOp == no_left)
  10852. // if (isAlwaysActiveRow(ds))
  10853. {
  10854. //MORE: The call to replaceExpression below isn't actually correct unless selectors are unique
  10855. //this optimization may have to move elsewhere.
  10856. OwnedHqlExpr newTransform = transform(expr->queryChild(1));
  10857. OwnedHqlExpr newSel = transform(querySelSeq(expr));
  10858. OwnedHqlExpr myLeft = createSelector(no_left, ds, newSel);
  10859. OwnedHqlExpr replaced = quickFullReplaceExpression(newTransform, myLeft, ds);
  10860. return createRow(no_createrow, LINK(replaced));
  10861. }
  10862. children.append(*ds.getClear());
  10863. return completeTransform(expr, children);
  10864. }
  10865. case no_sorted:
  10866. return transformIfAssert(no_assertsorted, expr);
  10867. case no_grouped:
  10868. return transformIfAssert(no_assertgrouped, expr);
  10869. case no_distributed:
  10870. //remove distributed(x)
  10871. if (expr->hasAttribute(unknownAtom))
  10872. return transform(expr->queryChild(0));
  10873. return transformIfAssert(no_assertdistributed, expr);
  10874. #if defined(MAP_PROJECT_TO_USERTABLE)
  10875. case no_hqlproject:
  10876. if (!isCountProject(expr))
  10877. {
  10878. HqlExprArray children;
  10879. transformChildren(expr, children);
  10880. IHqlExpression * ds = &children.item(0);
  10881. OwnedHqlExpr left = createSelector(no_left, ds, querySelSeq(expr));
  10882. OwnedHqlExpr mapped = replaceExpression(&children.item(1), left, ds->queryNormalizedSelector());
  10883. children.add(*LINK(mapped->queryRecord()), 1);
  10884. HqlExprArray assigns;
  10885. unwindChildren(assigns, mapped);
  10886. children.replace(*createValue(no_newtransform, mapped->getType(), assigns), 2);
  10887. OwnedHqlExpr transformed = createDataset(no_newusertable, children);
  10888. return transform(transformed);
  10889. }
  10890. break;
  10891. #endif
  10892. case no_comma:
  10893. case no_compound:
  10894. if (expr->queryChild(0)->getOperator() == no_setmeta)
  10895. return transform(expr->queryChild(1));
  10896. if ((op == no_compound) && expr->isAction())
  10897. {
  10898. HqlExprArray args;
  10899. expr->unwindList(args, no_compound);
  10900. OwnedHqlExpr compound = createAction(no_actionlist, args);
  10901. return transform(compound);
  10902. }
  10903. break;
  10904. case no_actionlist:
  10905. case no_orderedactionlist:
  10906. return transformActionList(expr);
  10907. case no_forcelocal:
  10908. case no_forcenolocal:
  10909. case no_allnodes:
  10910. case no_thisnode:
  10911. seenForceLocal = true;
  10912. break;
  10913. case no_enth:
  10914. {
  10915. HqlExprArray children;
  10916. bool same = transformChildren(expr, children);
  10917. IHqlExpression * denom = queryRealChild(expr, 2);
  10918. if (!denom && !expr->queryAttribute(localAtom))
  10919. {
  10920. children.add(*createValue(no_count, LINK(defaultIntegralType), LINK(&children.item(0))), 2);
  10921. same = false;
  10922. }
  10923. if (!same)
  10924. return expr->clone(children);
  10925. return LINK(expr);
  10926. }
  10927. case no_assertconstant:
  10928. {
  10929. IHqlExpression * child = expr->queryChild(0);
  10930. OwnedHqlExpr ret = transform(child);
  10931. OwnedHqlExpr folded = foldHqlExpression(ret, NULL, HFOforcefold);
  10932. if (!folded->isConstant())
  10933. {
  10934. StringBuffer s;
  10935. getExprECL(child, s);
  10936. translator.ERRORAT1(expr->queryChild(1), HQLERR_ExpectedConstant, s.str());
  10937. }
  10938. return folded.getClear();
  10939. }
  10940. case no_assertconcrete:
  10941. {
  10942. ECLlocation errpos;
  10943. errpos.extractLocationAttr(expr->queryChild(1));
  10944. reportAbstractModule(translator.queryErrors(), expr->queryChild(0), errpos);
  10945. throw MakeStringException(HQLERR_ErrorAlreadyReported, "%s", "");
  10946. }
  10947. case no_pat_instance:
  10948. {
  10949. OwnedHqlExpr child = transform(expr->queryChild(0));
  10950. if (child->getOperator() == no_pat_instance && child->hasAttribute(tempAtom))
  10951. return createValue(no_pat_instance, child->getType(), LINK(child->queryChild(0)));
  10952. //default action
  10953. break;
  10954. }
  10955. case no_if:
  10956. {
  10957. //Parameters are being used a lot to select between two items in inside a function/module
  10958. //so much better if we trim the tree earlier....
  10959. IValue * value = expr->queryChild(0)->queryValue();
  10960. if (value && !expr->isAction())
  10961. {
  10962. unsigned branch = value->getBoolValue() ? 1 : 2;
  10963. IHqlExpression * arg = expr->queryChild(branch);
  10964. if (arg)
  10965. return transform(arg);
  10966. }
  10967. break;
  10968. }
  10969. case no_stored:
  10970. {
  10971. HqlExprArray children;
  10972. OwnedHqlExpr name = transform(expr->queryChild(0));
  10973. children.append(*lowerCaseHqlExpr(name));
  10974. return completeTransform(expr, children);
  10975. }
  10976. case no_merge:
  10977. return transformMerge(expr);
  10978. //yuk: Sets of datasets need special casing because their type isn't implicitly calculated from their inputs.
  10979. case no_datasetlist:
  10980. case no_rowset:
  10981. {
  10982. HqlExprArray children;
  10983. transformChildren(expr, children);
  10984. OwnedITypeInfo setType = makeSetType(children.item(0).getType());
  10985. return createValue(op, setType.getClear(), children);
  10986. }
  10987. case no_rowsetrange:
  10988. {
  10989. HqlExprArray children;
  10990. transformChildren(expr, children);
  10991. OwnedITypeInfo setType = children.item(0).getType();
  10992. return createValue(op, setType.getClear(), children);
  10993. }
  10994. case no_buildindex:
  10995. {
  10996. //Normalize the index build by splitting out the sort here, so that constant percolating
  10997. //is also done on these parameters
  10998. OwnedHqlExpr transformed = Parent::createTransformed(expr);
  10999. loop
  11000. {
  11001. IHqlExpression * ret = normalizeIndexBuild(transformed, options.sortIndexPayload, !translator.targetThor(), options.implicitSubSort);
  11002. if (!ret)
  11003. return LINK(transformed);
  11004. transformed.setown(ret);
  11005. }
  11006. }
  11007. case no_keyed:
  11008. {
  11009. HqlExprArray args;
  11010. bool same = transformChildren(expr, args);
  11011. IHqlExpression * ds = &args.item(0);
  11012. if ((ds->getOperator() == no_section) || (ds->getOperator() == no_sectioninput))
  11013. {
  11014. args.replace(*LINK(ds->queryChild(0)), 0);
  11015. same = false;
  11016. }
  11017. if (!same)
  11018. return expr->clone(args);
  11019. return LINK(expr);
  11020. }
  11021. case no_eclcrc:
  11022. {
  11023. OwnedHqlExpr arg = transform(expr->queryChild(0)->queryChild(0));
  11024. return createConstant(expr->queryType()->castFrom(true, (__int64)getExpressionCRC(arg)));
  11025. }
  11026. case no_cast:
  11027. if (options.normalizeExplicitCasts)
  11028. {
  11029. Owned<ITypeInfo> type = transformType(expr->queryType());
  11030. OwnedHqlExpr arg = transform(expr->queryChild(0));
  11031. return createValue(no_implicitcast, type.getClear(), arg.getClear());
  11032. }
  11033. break;
  11034. #if 0
  11035. //This code adds a assertsorted activity after an nary-join, but I don't think it is actually correct, so removed. I may revisit.
  11036. case no_nwayjoin:
  11037. if (expr->hasAttribute(assertAtom) && !removeAsserts)
  11038. {
  11039. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);;
  11040. IHqlExpression * ds = transformed->queryChild(0);
  11041. IHqlExpression * selSeq = querySelSeq(transformed);
  11042. OwnedHqlExpr left = createSelector(no_left, ds, selSeq);
  11043. IHqlExpression * sortOrder = transformed->queryChild(3);
  11044. HqlExprArray args;
  11045. args.append(*LINK(transformed));
  11046. //MORE: Need fixing once join can have a different output format
  11047. args.append(*replaceSelector(sortOrder, left, transformed->queryNormalizedSelector()));
  11048. return createDataset(no_assertsorted, args);
  11049. }
  11050. break;
  11051. #endif
  11052. case no_attr:
  11053. case no_attr_link:
  11054. case no_attr_expr:
  11055. {
  11056. IAtom * name = expr->queryName();
  11057. if ((name == _uid_Atom) && (expr->numChildren() > 0))
  11058. {
  11059. //Make sure we ignore any line number information on the parameters mangled with the uid - otherwise
  11060. //they may create too many unique ids.
  11061. IHqlExpression * normalForm = queryLocationIndependent(expr);
  11062. if (normalForm != expr)
  11063. return transform(normalForm);
  11064. return ::createUniqueId();
  11065. }
  11066. #ifdef USE_SELSEQ_UID
  11067. if (name == _selectorSequence_Atom)
  11068. {
  11069. //Ensure parameterised sequences generate a unique sequence number...
  11070. //Not sure the following is really necessary, but will reduce in memory tree size....
  11071. //also saves complications from having weird attributes in the tree
  11072. if (expr->numChildren() > 0)
  11073. {
  11074. //Make sure we ignore any line number information on the parameters mangled with the uid - otherwise
  11075. //they may create too many unique ids.
  11076. OwnedHqlExpr transformed = Parent::createTransformed(expr);
  11077. IHqlExpression * normalForm = queryLocationIndependent(transformed);
  11078. OwnedHqlExpr ret;
  11079. if (normalForm != expr)
  11080. {
  11081. IHqlExpression * mapped = queryAlreadyTransformed(normalForm);
  11082. if (!mapped)
  11083. {
  11084. ret.setown(createSelectorSequence());
  11085. setTransformed(normalForm, ret);
  11086. }
  11087. else
  11088. ret.set(mapped);
  11089. }
  11090. else
  11091. ret.setown(createSelectorSequence());
  11092. return ret.getClear();
  11093. }
  11094. }
  11095. #endif
  11096. //If a named symbol is used as an argument to maxlength you can end up with a situation
  11097. //where records are identical except for that named symbol.
  11098. //If f(a + b) is then optimized to b this can lead to incompatible selectors, since
  11099. //a and b are "compatible", but not identical.
  11100. //This then causes chaos, so strip them as a precaution... but it is only a partial solution.
  11101. if (name == maxLengthAtom || name == maxCountAtom)
  11102. return transformChildrenNoAnnotations(expr);
  11103. break;
  11104. }
  11105. case no_call:
  11106. {
  11107. IHqlExpression * oldFuncdef = expr->queryFunctionDefinition();
  11108. if (oldFuncdef->getOperator() == no_delayedselect)
  11109. {
  11110. IHqlExpression * module = oldFuncdef->queryChild(1);
  11111. ECLlocation errpos(module);
  11112. //errpos.extractLocationAttr(expr->queryChild(1));
  11113. reportAbstractModule(translator.queryErrors(), module, errpos);
  11114. throw MakeStringException(HQLERR_ErrorAlreadyReported, "%s", "");
  11115. }
  11116. assertex(oldFuncdef->getOperator() == no_funcdef);
  11117. return transformCall(expr);
  11118. }
  11119. case no_externalcall:
  11120. //Yuk.... Because we ensure that all records have a name, we need to make sure that external functions that return records
  11121. //also have there return value normalized - otherwise (jtolbert2.xhql) you can create an ambiguity
  11122. //We could also want to do this for user functions - but better would be to have a different node type.
  11123. if (options.ensureRecordsHaveSymbols)
  11124. {
  11125. if (expr->queryRecord())
  11126. return transformExternalCall(expr);
  11127. }
  11128. break;
  11129. case no_external:
  11130. {
  11131. ITypeInfo * type = expr->queryType();
  11132. OwnedITypeInfo newType = transformType(type);
  11133. HqlExprArray args;
  11134. bool same = transformChildren(expr, args);
  11135. if (same && (type == newType))
  11136. return LINK(expr);
  11137. return createExternalReference(expr->queryId(), newType.getClear(), args);
  11138. }
  11139. case no_outputscalar:
  11140. if (options.outputRowsAsDatasets && expr->queryChild(0)->isDatarow())
  11141. {
  11142. HqlExprArray args;
  11143. bool same = transformChildren(expr, args);
  11144. args.replace(*createDatasetFromRow(LINK(&args.item(0))), 0);
  11145. return createValue(no_output, makeVoidType(), args);
  11146. }
  11147. break;
  11148. case no_nameof:
  11149. {
  11150. OwnedHqlExpr newChild = transform(expr->queryChild(0));
  11151. switch (newChild->getOperator())
  11152. {
  11153. case no_newkeyindex:
  11154. return LINK(newChild->queryChild(3));
  11155. case no_table:
  11156. return LINK(newChild->queryChild(0));
  11157. default:
  11158. throwError(HQLERR_CannotDeduceNameDataset);
  11159. }
  11160. break;
  11161. }
  11162. case no_typedef:
  11163. {
  11164. HqlExprArray children;
  11165. transformChildren(expr, children);
  11166. OwnedITypeInfo newType = transformType(expr->queryType());
  11167. return createValue(op, newType.getClear(), children);
  11168. }
  11169. case no_assertkeyed:
  11170. {
  11171. //Ensure assertkeyed is tagged with the selectors of each of the fields that are keyed, otherwise
  11172. //when expressions are constant folded, the information about keyed fields is lost.
  11173. HqlExprArray children;
  11174. transformChildren(expr, children);
  11175. HqlExprArray args;
  11176. gatherPotentialSelectors(args, expr);
  11177. OwnedHqlExpr selectors = createExprAttribute(_selectors_Atom, args);
  11178. children.append(*transform(selectors));
  11179. return expr->clone(children);
  11180. }
  11181. case no_sequence:
  11182. return getSizetConstant(nextSequenceValue++);
  11183. case no_filter:
  11184. return transformWithinFilter(expr);
  11185. case no_executewhen:
  11186. return transformExecuteWhen(expr);
  11187. case no_funcdef:
  11188. {
  11189. HqlExprArray children;
  11190. if (transformChildren(expr, children))
  11191. return LINK(expr);
  11192. return createFunctionDefinition(expr->queryId(), children);
  11193. }
  11194. case no_debug_option_value:
  11195. {
  11196. if (!matchesConstantString(expr->queryChild(0), "targetClusterType", true))
  11197. return getDebugValueExpr(translator.wu(), expr);
  11198. break;
  11199. }
  11200. case no_loop:
  11201. {
  11202. OwnedHqlExpr transformed = NewHqlTransformer::createTransformed(expr);
  11203. IHqlExpression * loopCond = queryRealChild(transformed, 3);
  11204. if (loopCond)
  11205. {
  11206. //Create a firstCond attribute so that the condition for whether to execute the loop
  11207. //the first time will be efficiently optimized.
  11208. IHqlExpression * dataset = transformed->queryChild(0);
  11209. IHqlExpression * filter = queryRealChild(transformed, 2);
  11210. IHqlExpression * rowsid = transformed->queryAttribute(_rowsid_Atom);
  11211. IHqlExpression * selSeq = querySelSeq(transformed);
  11212. IHqlExpression * counter = queryAttributeChild(transformed, _countProject_Atom, 0);
  11213. OwnedHqlExpr left = createSelector(no_left, dataset, selSeq);
  11214. OwnedHqlExpr rowsExpr = createDataset(no_rows, LINK(left), LINK(rowsid));
  11215. OwnedHqlExpr initialLoopDataset = LINK(dataset);
  11216. if (filter)
  11217. {
  11218. //If there is a loop filter then the global condition is applied to dataset filtered by that.
  11219. OwnedHqlExpr mappedFilter = replaceSelector(filter, left, dataset);
  11220. initialLoopDataset.setown(createDataset(no_filter, initialLoopDataset.getClear(), LINK(mappedFilter)));
  11221. }
  11222. OwnedHqlExpr firstCond = replaceExpression(loopCond, rowsExpr, initialLoopDataset);
  11223. if (counter)
  11224. {
  11225. //Whether to evaluate the 1st time round the loop requires COUNTER=1
  11226. OwnedHqlExpr one = createConstant(createIntValue(1, counter->getType()));
  11227. firstCond.setown(replaceExpression(firstCond, counter, one));
  11228. }
  11229. return appendOwnedOperand(transformed, createExprAttribute(_loopFirst_Atom, firstCond.getClear()));
  11230. }
  11231. return transformed.getClear();
  11232. }
  11233. break;
  11234. }
  11235. ITypeInfo * type = expr->queryType();
  11236. bool checkType = false;
  11237. if (type)
  11238. {
  11239. switch (type->getTypeCode())
  11240. {
  11241. case type_pattern:
  11242. case type_rule:
  11243. checkType = true;
  11244. break;
  11245. }
  11246. }
  11247. unsigned max = expr->numChildren();
  11248. if ((max == 0) && !checkType)
  11249. return LINK(expr);
  11250. bool same = true;
  11251. HqlExprArray children;
  11252. children.ensure(max);
  11253. for (unsigned idx=0;idx<max;idx++)
  11254. {
  11255. IHqlExpression * child = expr->queryChild(idx);
  11256. IHqlExpression * tchild = transform(child);
  11257. children.append(*tchild);
  11258. if (child != tchild)
  11259. same = false;
  11260. }
  11261. if (checkType)
  11262. {
  11263. OwnedITypeInfo newType = transformType(type);
  11264. if (type != newType)
  11265. return createWrapper(op, newType, children);
  11266. }
  11267. if (!same)
  11268. return expr->clone(children);
  11269. return LINK(expr);
  11270. }
  11271. IHqlExpression * HqlTreeNormalizer::createTransformedSelector(IHqlExpression * expr)
  11272. {
  11273. throwUnexpected();
  11274. }
  11275. IHqlExpression * normalizeRecord(HqlCppTranslator & translator, IHqlExpression * record)
  11276. {
  11277. HqlTreeNormalizer normalizer(translator);
  11278. HqlExprArray transformed;
  11279. return normalizer.transformRoot(record);
  11280. }
  11281. void normalizeHqlTree(HqlCppTranslator & translator, HqlExprArray & exprs)
  11282. {
  11283. bool seenForceLocal;
  11284. bool seenLocalUpload;
  11285. {
  11286. //First iterate through the expressions and call queryLocationIndependent() to avoid nested transforms (which are less efficient)
  11287. // ForEachItemIn(iInit, exprs)
  11288. // queryLocationIndependent(&exprs.item(iInit));
  11289. unsigned time = msTick();
  11290. HqlTreeNormalizer normalizer(translator);
  11291. HqlExprArray transformed;
  11292. normalizer.analyseArray(exprs, 0);
  11293. normalizer.transformRoot(exprs, transformed);
  11294. // logTreeStats(exprs);
  11295. // logTreeStats(transformed);
  11296. // DBGLOG("Before normalize %u unique expressions, after normalize %u unique expressions", getNumUniqueExpressions(exprs), getNumUniqueExpressions(transformed));
  11297. replaceArray(exprs, transformed);
  11298. seenForceLocal = normalizer.querySeenForceLocal();
  11299. seenLocalUpload = normalizer.querySeenLocalUpload();
  11300. translator.updateTimer("workunit;tree transform: normalize.initial", msTick()-time);
  11301. }
  11302. if (translator.queryOptions().constantFoldPostNormalize)
  11303. {
  11304. unsigned time = msTick();
  11305. HqlExprArray transformed;
  11306. quickFoldExpressions(transformed, exprs, NULL, 0);
  11307. replaceArray(exprs, transformed);
  11308. translator.updateTimer("workunit;tree transform: normalize.fold", msTick()-time);
  11309. }
  11310. translator.traceExpressions("before scope tag", exprs);
  11311. {
  11312. unsigned time = msTick();
  11313. HqlScopeTagger normalizer(translator.queryErrors());
  11314. HqlExprArray transformed;
  11315. normalizer.transformRoot(exprs, transformed);
  11316. replaceArray(exprs, transformed);
  11317. translator.updateTimer("workunit;tree transform: normalize.scope", msTick()-time);
  11318. normalizer.reportWarnings();
  11319. }
  11320. if (translator.queryOptions().normalizeLocations)
  11321. normalizeAnnotations(translator, exprs);
  11322. translator.traceExpressions("after scope tag", exprs);
  11323. {
  11324. unsigned time = msTick();
  11325. HqlLinkedChildRowTransformer transformer(translator.queryOptions().implicitLinkedChildRows);
  11326. HqlExprArray transformed;
  11327. transformer.transformArray(exprs, transformed);
  11328. replaceArray(exprs, transformed);
  11329. translator.updateTimer("workunit;tree transform: normalize.linkedChildRows", msTick()-time);;
  11330. }
  11331. if (seenLocalUpload)
  11332. {
  11333. LocalUploadTransformer transformer(translator.wu());
  11334. HqlExprArray transformed;
  11335. transformer.transformRoot(exprs, transformed);
  11336. replaceArray(exprs, transformed);
  11337. }
  11338. if (seenForceLocal)
  11339. {
  11340. //Add ,local to all sources, so that count(x) inside local() is differentiated from a global count(x)
  11341. ForceLocalTransformer localizer(translator.getTargetClusterType());
  11342. HqlExprArray transformed;
  11343. localizer.transformRoot(exprs, transformed);
  11344. replaceArray(exprs, transformed);
  11345. }
  11346. #ifdef USE_SELSEQ_UID
  11347. if (translator.queryOptions().detectAmbiguousSelector || translator.queryOptions().allowAmbiguousSelector)
  11348. {
  11349. LeftRightSelectorNormalizer transformer(translator.queryOptions().allowAmbiguousSelector);
  11350. transformer.analyseArray(exprs, 0);
  11351. if (!transformer.containsAmbiguity())
  11352. {
  11353. HqlExprArray transformed;
  11354. transformer.transformRoot(exprs, transformed);
  11355. replaceArray(exprs, transformed);
  11356. }
  11357. }
  11358. #endif
  11359. if (false)
  11360. {
  11361. NestedSelectorNormalizer transformer;
  11362. transformer.analyseArray(exprs, 0);
  11363. if (transformer.requiresTransforming())
  11364. {
  11365. HqlExprArray transformed;
  11366. transformer.transformRoot(exprs, transformed);
  11367. replaceArray(exprs, transformed);
  11368. }
  11369. }
  11370. #if 0
  11371. if (seenIndex)
  11372. {
  11373. FilteredIndexOptimizer transformer(true, false);
  11374. HqlExprArray transformed;
  11375. transformer.transformRoot(exprs, transformed);
  11376. replaceArray(exprs, transformed);
  11377. }
  11378. #endif
  11379. #ifdef _DEBUG
  11380. //spotPotentialDuplicateCode(exprs);
  11381. #endif
  11382. }
  11383. IHqlExpression * normalizeHqlTree(HqlCppTranslator & translator, IHqlExpression * expr)
  11384. {
  11385. HqlExprArray exprs;
  11386. expr->unwindList(exprs, no_comma);
  11387. normalizeHqlTree(translator, exprs);
  11388. return createComma(exprs);
  11389. }
  11390. void hoistNestedCompound(HqlCppTranslator & translator, HqlExprArray & exprs)
  11391. {
  11392. if (containsCompound(exprs))
  11393. {
  11394. NestedCompoundTransformer normalizer(translator);
  11395. normalizer.analyseArray(exprs, 0);
  11396. HqlExprArray transformed;
  11397. normalizer.transformRoot(exprs, transformed);
  11398. replaceArray(exprs, transformed);
  11399. }
  11400. }
  11401. void hoistNestedCompound(HqlCppTranslator & translator, WorkflowArray & workflow)
  11402. {
  11403. ForEachItemIn(i, workflow)
  11404. hoistNestedCompound(translator, workflow.item(i).queryExprs());
  11405. }
  11406. //---------------------------------------------------------------------------
  11407. static IHqlExpression * substituteClusterSize(unsigned numNodes, IHqlExpression * expr, ICodegenContextCallback * ctxCallback, IWorkUnit * wu);
  11408. static HqlTransformerInfo clusterSubstitueTransformerInfo("ClusterSubstitueTransformer");
  11409. class ClusterSubstitueTransformer : public NewHqlTransformer
  11410. {
  11411. public:
  11412. ClusterSubstitueTransformer(unsigned size, ICodegenContextCallback * _ctxCallback, IWorkUnit * _wu)
  11413. : NewHqlTransformer(clusterSubstitueTransformerInfo)
  11414. {
  11415. ctxCallback = _ctxCallback;
  11416. wu = _wu;
  11417. OwnedHqlExpr clusterSizeExpr = createValue(no_clustersize, makeIntType(4, false));
  11418. if (size)
  11419. clusterSizeValue.setown(getSizetConstant(size));
  11420. setTransformed(clusterSizeExpr, clusterSizeValue);
  11421. }
  11422. protected:
  11423. IHqlExpression * createTransformed(IHqlExpression * expr)
  11424. {
  11425. if (expr->isConstant())
  11426. return LINK(expr);
  11427. switch (expr->getOperator())
  11428. {
  11429. case no_clustersize:
  11430. //Cope if CLUSTERSIZE is assigned to a named symbol
  11431. if (clusterSizeValue)
  11432. return LINK(clusterSizeValue);
  11433. break;
  11434. case no_cluster:
  11435. return createSubstitutedChild(expr, expr->queryChild(1));
  11436. case no_colon:
  11437. {
  11438. ForEachChild(i, expr)
  11439. {
  11440. IHqlExpression * child = expr->queryChild(i);
  11441. if (child->getOperator() == no_persist)
  11442. {
  11443. IHqlExpression * cluster = queryRealChild(child, 1);
  11444. if (cluster && !isBlankString(cluster))
  11445. return createSubstitutedChild(expr, cluster);
  11446. }
  11447. else if (child->getOperator() == no_global)
  11448. {
  11449. IHqlExpression * cluster = queryRealChild(child, 0);
  11450. if (cluster && !isBlankString(cluster))
  11451. return createSubstitutedChild(expr, cluster);
  11452. }
  11453. }
  11454. break;
  11455. }
  11456. }
  11457. return NewHqlTransformer::createTransformed(expr);
  11458. }
  11459. IHqlExpression * createSubstitutedChild(IHqlExpression * expr, IHqlExpression * cluster)
  11460. {
  11461. StringBuffer clusterText;
  11462. getStringValue(clusterText, cluster);
  11463. if (clusterText.length())
  11464. ctxCallback->noteCluster(clusterText.str());
  11465. #if 0
  11466. Owned<IConstWUClusterInfo> clusterInfo = wu->getClusterInfo(clusterText.str());
  11467. if (clusterInfo)
  11468. {
  11469. unsigned numNodes = clusterInfo->getSize();
  11470. if (numNodes == 0) numNodes = 1;
  11471. HqlExprArray args;
  11472. unwindChildren(args, expr);
  11473. args.replace(*substituteClusterSize(numNodes, &args.item(0), ctxCallback, wu), 0);
  11474. return expr->clone(args);
  11475. }
  11476. #endif
  11477. return LINK(expr);
  11478. }
  11479. protected:
  11480. ICodegenContextCallback * ctxCallback;
  11481. IWorkUnit * wu;
  11482. OwnedHqlExpr clusterSizeValue;
  11483. };
  11484. IHqlExpression * substituteClusterSize(unsigned numNodes, IHqlExpression * expr, ICodegenContextCallback * ctxCallback, IWorkUnit * wu)
  11485. {
  11486. ClusterSubstitueTransformer transformer(numNodes, ctxCallback, wu);
  11487. return transformer.transformRoot(expr);
  11488. }
  11489. void HqlCppTranslator::substituteClusterSize(HqlExprArray & exprs)
  11490. {
  11491. unsigned numNodes = options.specifiedClusterSize;
  11492. ClusterSubstitueTransformer transformer(numNodes, ctxCallback, wu());
  11493. HqlExprArray transformed;
  11494. ForEachItemIn(i, exprs)
  11495. transformed.append(*transformer.transformRoot(&exprs.item(i)));
  11496. replaceArray(exprs, transformed);
  11497. }
  11498. IHqlExpression * HqlCppTranslator::separateLibraries(IHqlExpression * query, HqlExprArray & internalLibraries)
  11499. {
  11500. HqlExprArray exprs;
  11501. query->unwindList(exprs, no_comma);
  11502. traceExpressions("before transform graph for generation", exprs);
  11503. //Remove any meta entries from the tree.
  11504. ForEachItemInRev(i, exprs)
  11505. if (exprs.item(i).getOperator() == no_setmeta)
  11506. exprs.remove(i);
  11507. processEmbeddedLibraries(exprs, internalLibraries, isLibraryScope(query));
  11508. return createComma(exprs);
  11509. }
  11510. bool HqlCppTranslator::transformGraphForGeneration(HqlQueryContext & query, WorkflowArray & workflow)
  11511. {
  11512. HqlExprArray exprs;
  11513. if (isLibraryScope(query.expr))
  11514. outputLibrary->mapLogicalToImplementation(exprs, query.expr);
  11515. else
  11516. query.expr->unwindList(exprs, no_comma);
  11517. //Ensure the incoming query will be freed up when no longer used
  11518. query.expr.clear();
  11519. traceExpressions("before transform graph for generation", exprs);
  11520. //Don't change the engine if libraries are involved, otherwise things will get very confused.
  11521. unsigned timeCall = msTick();
  11522. expandDelayedFunctionCalls(queryErrors(), exprs);
  11523. updateTimer("workunit;tree transform: expand delayed calls", msTick()-timeCall);
  11524. unsigned time1 = msTick();
  11525. traceExpressions("before normalize", exprs);
  11526. normalizeHqlTree(*this, exprs);
  11527. updateTimer("workunit;tree transform: normalize", msTick()-time1);
  11528. if (wu()->getDebugValueBool("dumpIR", false))
  11529. EclIR::dbglogIR(exprs);
  11530. checkNormalized(exprs);
  11531. #ifdef PICK_ENGINE_EARLY
  11532. if (options.pickBestEngine)
  11533. pickBestEngine(exprs);
  11534. #endif
  11535. allocateSequenceNumbers(exprs); // Added to all expressions/output statements etc.
  11536. traceExpressions("allocate Sequence", exprs);
  11537. checkNormalized(exprs);
  11538. if (options.generateLogicalGraph || options.generateLogicalGraphOnly)
  11539. {
  11540. LogicalGraphCreator creator(wu());
  11541. creator.createLogicalGraph(exprs);
  11542. if (options.generateLogicalGraphOnly)
  11543. return false;
  11544. curActivityId = creator.queryMaxActivityId();
  11545. }
  11546. traceExpressions("begin transformGraphForGeneration", exprs);
  11547. checkNormalized(exprs);
  11548. {
  11549. unsigned startTime = msTick();
  11550. substituteClusterSize(exprs);
  11551. updateTimer("workunit;tree transform: substituteClusterSize", msTick()-startTime);
  11552. }
  11553. {
  11554. unsigned startTime = msTick();
  11555. HqlExprArray folded;
  11556. unsigned foldOptions = DEFAULT_FOLD_OPTIONS;
  11557. if (options.foldConstantDatasets) foldOptions |= HFOconstantdatasets;
  11558. if (options.percolateConstants) foldOptions |= HFOpercolateconstants;
  11559. if (options.percolateFilters) foldOptions |= HFOpercolatefilters;
  11560. if (options.optimizeMax) foldOptions |= HFOx_op_not_x;
  11561. if (options.globalFoldOptions != (unsigned)-1)
  11562. foldOptions = options.globalFoldOptions;
  11563. foldHqlExpression(folded, exprs, foldOptions);
  11564. replaceArray(exprs, folded);
  11565. updateTimer("workunit;tree transform: global fold", msTick()-startTime);
  11566. }
  11567. traceExpressions("after global fold", exprs);
  11568. checkNormalized(exprs);
  11569. if (options.globalOptimize)
  11570. {
  11571. unsigned startTime = msTick();
  11572. HqlExprArray folded;
  11573. optimizeHqlExpression(folded, exprs, HOOfold);
  11574. replaceArray(exprs, folded);
  11575. updateTimer("workunit;tree transform: global optimize", msTick()-startTime);
  11576. }
  11577. traceExpressions("alloc", exprs);
  11578. checkNormalized(exprs);
  11579. modifyOutputLocations(exprs);
  11580. if (exprs.ordinality() == 0)
  11581. return false; // No action needed
  11582. unsigned time4 = msTick();
  11583. ::extractWorkflow(*this, exprs, workflow);
  11584. traceExpressions("workflow", workflow);
  11585. checkNormalized(workflow);
  11586. updateTimer("workunit;tree transform: stored results", msTick()-time4);
  11587. if (outputLibrary && workflow.ordinality() > 1)
  11588. {
  11589. unsigned cnt = 0;
  11590. ForEachItemIn(i, workflow)
  11591. {
  11592. if (!workflow.item(i).isFunction())
  11593. cnt++;
  11594. }
  11595. if (cnt > 1)
  11596. {
  11597. SCMStringBuffer libraryName;
  11598. getOutputLibraryName(libraryName, wu());
  11599. throwError2(HQLERR_LibraryCannotContainWorkflow, libraryName.str(), "");
  11600. }
  11601. }
  11602. ForEachItemIn(i, workflow)
  11603. {
  11604. WorkflowItem & curWorkflow = workflow.item(i);
  11605. #ifdef USE_SELSEQ_UID
  11606. if (options.normalizeSelectorSequence)
  11607. {
  11608. unsigned time = msTick();
  11609. LeftRightTransformer normalizer;
  11610. normalizer.process(curWorkflow.queryExprs());
  11611. updateTimer("workunit;tree transform: left right", msTick()-time);
  11612. //traceExpressions("after implicit alias", workflow);
  11613. }
  11614. #endif
  11615. if (queryOptions().createImplicitAliases)
  11616. {
  11617. unsigned time = msTick();
  11618. ImplicitAliasTransformer normalizer;
  11619. normalizer.process(curWorkflow.queryExprs());
  11620. updateTimer("workunit;tree transform: implicit alias", msTick()-time);
  11621. //traceExpressions("after implicit alias", workflow);
  11622. }
  11623. {
  11624. unsigned startTime = msTick();
  11625. hoistNestedCompound(*this, curWorkflow.queryExprs());
  11626. updateTimer("workunit;tree transform: hoist nested compound", msTick()-startTime);
  11627. }
  11628. if (options.optimizeNestedConditional)
  11629. {
  11630. cycle_t time = msTick();
  11631. optimizeNestedConditional(curWorkflow.queryExprs());
  11632. updateTimer("workunit;optimize nested conditional", msTick()-time);
  11633. traceExpressions("nested", curWorkflow);
  11634. checkNormalized(curWorkflow);
  11635. }
  11636. checkNormalized(curWorkflow);
  11637. //sort(x)[n] -> topn(x, n)[]n, count(x)>n -> count(choosen(x,n+1)) > n and possibly others
  11638. {
  11639. unsigned startTime = msTick();
  11640. optimizeActivities(curWorkflow.queryExprs(), !targetThor(), options.optimizeNonEmpty);
  11641. updateTimer("workunit;tree transform: optimize activities", msTick()-startTime);
  11642. }
  11643. checkNormalized(curWorkflow);
  11644. unsigned time5 = msTick();
  11645. migrateExprToNaturalLevel(curWorkflow, wu(), *this); // Ensure expressions are evaluated at the best level - e.g., counts moved to most appropriate level.
  11646. updateTimer("workunit;tree transform: migrate", msTick()-time5);
  11647. //transformToAliases(exprs);
  11648. traceExpressions("migrate", curWorkflow);
  11649. checkNormalized(curWorkflow);
  11650. unsigned time2 = msTick();
  11651. markThorBoundaries(curWorkflow); // work out which engine is going to perform which operation.
  11652. updateTimer("workunit;tree transform: thor hole", msTick()-time2);
  11653. traceExpressions("boundary", curWorkflow);
  11654. checkNormalized(curWorkflow);
  11655. if (options.optimizeGlobalProjects)
  11656. {
  11657. cycle_t time = msTick();
  11658. insertImplicitProjects(*this, curWorkflow.queryExprs());
  11659. updateTimer("workunit;global implicit projects", msTick()-time);
  11660. traceExpressions("implicit", curWorkflow);
  11661. checkNormalized(curWorkflow);
  11662. }
  11663. unsigned time3 = msTick();
  11664. normalizeResultFormat(curWorkflow, options);
  11665. updateTimer("workunit;tree transform: normalize result", msTick()-time3);
  11666. traceExpressions("results", curWorkflow);
  11667. checkNormalized(curWorkflow);
  11668. optimizePersists(curWorkflow.queryExprs());
  11669. traceExpressions("per", curWorkflow);
  11670. checkNormalized(curWorkflow);
  11671. // flattenDatasets(workflow);
  11672. // traceExpressions("flatten", workflow);
  11673. {
  11674. unsigned startTime = msTick();
  11675. mergeThorGraphs(curWorkflow, options.resourceConditionalActions, options.resourceSequential); // reduces number of graphs sent to thor
  11676. updateTimer("workunit;tree transform: merge thor", msTick()-startTime);
  11677. }
  11678. traceExpressions("merged", curWorkflow);
  11679. checkNormalized(curWorkflow);
  11680. if (queryOptions().normalizeLocations)
  11681. normalizeAnnotations(*this, curWorkflow.queryExprs());
  11682. spotGlobalCSE(curWorkflow); // spot CSE within those graphs, and create some more
  11683. checkNormalized(curWorkflow);
  11684. //expandGlobalDatasets(workflow, wu(), *this);
  11685. {
  11686. unsigned startTime = msTick();
  11687. mergeThorGraphs(curWorkflow, options.resourceConditionalActions, options.resourceSequential);
  11688. updateTimer("workunit;tree transform: merge thor", msTick()-startTime);
  11689. }
  11690. checkNormalized(curWorkflow);
  11691. removeTrivialGraphs(curWorkflow);
  11692. checkNormalized(curWorkflow);
  11693. }
  11694. #ifndef PICK_ENGINE_EARLY
  11695. if (options.pickBestEngine)
  11696. pickBestEngine(workflow);
  11697. #endif
  11698. updateClusterType();
  11699. ForEachItemIn(i2, workflow)
  11700. {
  11701. WorkflowItem & curWorkflow = workflow.item(i2);
  11702. traceExpressions("before convert to logical", curWorkflow);
  11703. convertLogicalToActivities(curWorkflow); // e.g., merge disk reads, transform group, all to sort etc.
  11704. #ifndef _DEBUG
  11705. if (options.regressionTest)
  11706. #endif
  11707. {
  11708. unsigned startTime = msTick();
  11709. checkDependencyConsistency(curWorkflow.queryExprs());
  11710. updateTimer("workunit;tree transform: check dependency", msTick()-startTime);
  11711. }
  11712. traceExpressions("end transformGraphForGeneration", curWorkflow);
  11713. checkNormalized(curWorkflow);
  11714. }
  11715. return true;
  11716. }
  11717. //---------------------------------------------------------------------------
  11718. /*
  11719. Different transformers:
  11720. merge: required if a child get removed or merged with a parent of a non-table dataset.
  11721. adding is not a problem (unless tables are inserted) because all refs to ds.x will remain valid.
  11722. but if tables can be deleted/modified then then it will cause problems, since ds.x needs to be translated to ds'.x within the scope
  11723. It is also ok if only scalars are transformed.
  11724. Transformer base merge dependants [should be]
  11725. filterExtractor simple N
  11726. resource Scoped (Y) complex - could possibly derive from merging...? why scoped?
  11727. HqlThorBoundary New N
  11728. HqlResult New N isConditional,insideThor,insideCondition
  11729. creation of getresult only done on scalars, so I think it is ok.
  11730. could possibly remove the scoping if count() etc. were tagged as outer level or not
  11731. ThorHql Merging Y Again scoped because of no_count etc.
  11732. CompoundSource New add* Either added, or non-tables(limits) are cloned, so no merging issues.
  11733. CompoundActivity Merging Y When limit merged into a dataset. [ need a new way? ]
  11734. Workflow New [inTransform] I think no for the same reason as above, or it adds. layers.
  11735. NewScopeMigrate New ? I think it might be ok, because doesn't modify any tables, only scalars
  11736. ThorCount New N no issues.
  11737. Cse New ? Probably, but ok, if only done on scalars.
  11738. HqlTreeNormalizer Scoped * I don't think it does any, but need to be careful none are introduced.
  11739. */
  11740. /*
  11741. NOTES:
  11742. Consider adding a hqlmeta.hpp that defines all the characteristics of an IHqlExpression node - e.g.,
  11743. is it constant, number of child files, text, what filenames does it generate? what does it read,
  11744. what results does it read/write.
  11745. Dependancy code:
  11746. 1. In the resourcer
  11747. 2. TableDependencies In hqlttcpp to stop reordering when not valid.
  11748. ?Is GetResultHash called on globals that haven't been calculated???
  11749. */