hqlmeta.cpp 118 KB


  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "jliball.hpp"
  14. #include "hql.hpp"
  15. #include "eclrtl.hpp"
  16. #include "platform.h"
  17. #include "jlib.hpp"
  18. #include "jmisc.hpp"
  19. #include "jstream.ipp"
  20. #include "hql.hpp"
  21. #include "hqlexpr.hpp"
  22. #include "hqlutil.hpp"
  23. #include "hqlpmap.hpp"
  24. #include "hqlfold.hpp"
  25. #include "hqlerrors.hpp"
  26. #include "hqltrans.ipp"
  27. #include "hqlthql.hpp"
  28. #include "deffield.hpp"
  29. #include "workunit.hpp"
  30. #include "jencrypt.hpp"
  31. #include "hqlattr.hpp"
  32. #include "hqlerror.hpp"
  33. #include "hqlmeta.hpp"
  34. //#define OPTIMIZATION2
  35. static IHqlExpression * cacheGroupedElement;
  36. static IHqlExpression * cacheUnknownAttribute;
  37. static IHqlExpression * cacheIndeterminateAttribute;
  38. static IHqlExpression * cacheUnknownSortlist;
  39. static IHqlExpression * cacheIndeterminateSortlist;
  40. static IHqlExpression * cacheMatchGroupOrderSortlist;
  41. static IHqlExpression * cached_omitted_Attribute;
  42. static IHqlExpression * cacheAnyAttribute;
  43. static IHqlExpression * cacheAnyOrderSortlist;
  44. static CHqlMetaProperty * nullMetaProperty;
  45. static CHqlMetaProperty * nullGroupedMetaProperty;
  46. MODULE_INIT(INIT_PRIORITY_STANDARD)
  47. {
  48. IAtom * groupedOrderAtom = createAtom("{group-order}");
  49. IAtom * anyOrderAtom = createAtom("{any}");
  50. cacheGroupedElement = createAttribute(groupedOrderAtom);
  51. cacheUnknownAttribute = createAttribute(unknownAtom);
  52. cacheIndeterminateAttribute = createAttribute(indeterminateAtom);
  53. cacheAnyAttribute = createAttribute(anyOrderAtom);
  54. cached_omitted_Attribute = createAttribute(_omitted_Atom);
  55. cacheUnknownSortlist = createValue(no_sortlist, makeSortListType(NULL), LINK(cacheUnknownAttribute));
  56. cacheIndeterminateSortlist = createValue(no_sortlist, makeSortListType(NULL), LINK(cacheIndeterminateAttribute));
  57. cacheMatchGroupOrderSortlist = createValue(no_sortlist, makeSortListType(NULL), LINK(cacheGroupedElement));
  58. cacheAnyOrderSortlist = createValue(no_sortlist, makeSortListType(NULL), LINK(cacheAnyAttribute));
  59. nullMetaProperty = new CHqlMetaProperty;
  60. nullGroupedMetaProperty = new CHqlMetaProperty;
  61. nullGroupedMetaProperty->meta.setUnknownGrouping();
  62. return true;
  63. }
  64. MODULE_EXIT()
  65. {
  66. nullGroupedMetaProperty->Release();
  67. nullMetaProperty->Release();
  68. cached_omitted_Attribute->Release();
  69. cacheAnyOrderSortlist->Release();
  70. cacheMatchGroupOrderSortlist->Release();
  71. cacheAnyAttribute->Release();
  72. cacheIndeterminateSortlist->Release();
  73. cacheUnknownSortlist->Release();
  74. cacheIndeterminateAttribute->Release();
  75. cacheUnknownAttribute->Release();
  76. cacheGroupedElement->Release();
  77. }
  78. /*
  79. This file contains the logic for maintaining the dataset information about the distribution, sort order and grouping of a dataset.
  80. It is called from createDataset() to set up the type information, and from various optimization routines to determine the best way to
  81. implement an activity - e.g, grouping depending on the current distribution.
  82. The following information is maintained:
  83. Distribution
  84. - Either the expression that was used in the DISTRIBUTE() activity, or a sorted attribute with the sort order (no_sortlist) as a child
  85. - or an unknownAttribute if unknown.
  86. Global sort order
  87. - The sort order maintained across all nodes.
  88. - Should never have trailing unknown.
  89. Local ungrouped sort order
  90. - The order of the records on each individual node, excluding anything within a grouping condition
  91. - Can have trailing unknown if a mapping is lost and the dataset is grouped.
  92. - can be set to a special grouping sort list if grouped and couldn't split out the grouping.
  93. Grouping
  94. - Which expressions was the dataset grouped by.
  95. - contains unknown attributes in the grouping elements???
  96. Group sort order
  97. - What is the current order within the group.
  98. - Should never have a trailing unknown attribute
  99. The types are processed with the following in mind:
  100. Projects:
  101. * Projections (or any other transforms) can cause fields referenced in the meta information to be modified
  102. - It can be mapped to an expression.
  103. - It could be mapped to a constant
  104. - The field cannot be mapped.
  105. Distributions:
  106. * A global sort activity ensures that each row with the same sort component fields will end up on the same node. This means it is important to know
  107. if we have lost track of one of the sort components, and not just lose it. Otherwise we might assume the dataset is distributed by (x,y) when it
  108. is actually distributed by (x,y,z) - meaning some values of (x,y) may be on different nodes.
  109. It may still be more efficient to group by (x,y) though instead of redistributing by (x,y).
  110. * A distribute activity similarly ensures that all rows with the same disrtibute expression end up on the same node.
  111. * For both of these it means that a pre-existing distribution is sufficient if it is a subset of a following required distribution.
  112. Sort orders
  113. * It is fine to lose trailing sort-orders that are unknown, but need to be very careful that the order is correctly preserved when a grouped dataset is ungrouped.
  114. * I assert there is no operation which can modify the local sort order without modifying the global sort order. (The global order is never going to be better
  115. than the local order). This means hthor/roxie only need to look at the local sort order, instead of the global sort order.
  116. * The local and global sort orders are tracked excluding any sort order
  117. Grouping
  118. - All fields that are grouped are maintained, not just the leading ones. This is so that when a record is ungrouped, all leading sort components can be preserved??
  119. A single row is always sorted and distributed as you would like.
  120. Problems of constants, unknown attributes and duplicates:
  121. - Duplicates are always removed from sort lists, and group lists before comparing.
  122. - Constant percolation can sometimes cause constants to appear in sort lists. They should also ideally be removed, but that may
  123. cause us to miss matching sort orders.
  124. =>
  125. - Distributions are either fully mapped, unknown or sorted(list of components, terminated by unknown). This latter may possibly useful for
  126. knowing we could use group to subsort instead of resorting.
  127. - Global sort lists do not include any trailing unknown attributes.
  128. - Local sort lists may contain a trailing unknown attribute if they are partial.
  129. - Grouped sort lists do not include unknown attributes.
  130. - Grouping information contains unknown attributes for each unknown item.
  131. - Constants and duplicates are always removed (both from meta information and before matching)
  132. The following are a sample of some of the key activities and how they affect the meta information:
  133. * CHOOSEN
  134. - Although this is sequential, it actually preserves all of the information since no records are moved.
  135. * GLOBAL SORT(x,y)
  136. - Changes the distribution to sorted([x,y])
  137. - Changes global and local sort orders to [x,y]
  138. * LOCAL SORT(x,y)
  139. - Changes the local sort order to [x,y]
  140. - Changes the global sort order to the leading intersection of the current global sort order and the new local sort order.
  141. * GROUPED SORT(x,y)
  142. - Modifies the grouped sort order
  143. - If the local order is grouped attribute, set it to unknown.
  144. - Truncate the global sort order at the last component which is found in the current grouping.
  145. - May indirectly affect the global/local sort orders since they are derived by combining with the grouped order.
  146. * GROUP (x,y)
  147. - If non local then the distribution is invalidated.
  148. - Global sort order is preserved as-is.
  149. - Local sort order is split in two.
  150. The last sort component which is included in the grouping condition (and any trailing constants) marks the end of the local order.
  151. The trailing components become the grouped sort order.
  152. - If none of the sort components occur in the grouping condition, then set the local order to a "grouping attribute".
  153. * UNGROUP (x,y)
  154. - Distribution and global sort order are preserved.
  155. - If the local sort order is a grouping attribute, set it to the grouped sort order.
  156. - If the local sort order ends with an unknown attribute, truncate it, otherwise merged with the grouped order.
  157. * DISTRIBUTE (x)
  158. - set distribtion to x
  159. - invalidate local and global sort orders
  160. * DISTRIBUTE (x,MERGE)
  161. - set distribution to x
  162. - invalidate global sort order
  163. - set local sort order to merge criteria.
  164. * PROJECT
  165. - Distribution: If a distribution fails to be mapped fully it is replaced with a unknownAttribute.
  166. - Sortlist: Any components after the first component that fails to be mapped are relaced with an unknownAttribute
  167. - Grouping: All grouping elements that fail to be matched are replaced with an unknownAttribute. (But trailing components are preserved).
  168. Optimizations:
  169. Grouping:
  170. - A global group can be done locally if
  171. i) The distribution was a sort, and the trailing component of the sort is included in the grouping criteria.
  172. ii) The distribution was a non-sort and all fields in the distribution are included in the grouping criteria.
  173. iii) The dataset was previously grouped by a subset of the new grouping fields.
  174. - A group,all (local or global) can become a group if (folllowing above)
  175. i) Each of the grouping criteria (reordered match the leading components of the appropriate sort (including extra constants).
  176. This also has implications for re-ordering join conditions to ensure they are done in the best order.
  177. - Sometimes for an implicit grouping (e.g., group aggregate), if the distribution partially matches it should still be used.
  178. Sort:
  179. - A local sort can potentially optimized to a group sort if
  180. i) Leading components of the sorts match, and are signficant enough.
  181. codistribute (e.g., cogroup):
  182. - Make use of any existing distributions to minimize the transfer.
  183. *** Implement cogroup() as an example.
  184. COGROUP(a,b)
  185. Check a first, and then b:
  186. If a is sufficiently distributed:
  187. i) If distributed by a sort, generate a cosort, optionally locally sort a, and then local merge.
  188. ii) If keyed distributed then could do a key distr, then local cogroup
  189. iii) If distributed
  190. a) If a and b are locally sorted use distribute,merge on b, then local merge.
  191. b) if a is locally sorted, use distribute(b)->localsort->localmerge
  192. c) If neither sorted use (a+distribute(b))->local sort
  193. NOTE: If one side is distributed(f(x,y)),sorted(x,y); other is distributed(f(x),sorted(x)) It will be best to use the second since we could use a merge distribute.
  194. iv) Take acount of the expected number of rows - ignore if expect to have few rows.
  195. Local cogroup:
  196. if (left and right sufficiently sorted)
  197. pick the shortest (but not a single row)
  198. ensure other side is sorted the same
  199. perform a local merge.
  200. if (either side sufficiently sorted and not a singlerow)
  201. ensure other side is sorted the same
  202. perform a local merge.
  203. else
  204. append and locally sort.
  205. Also should have
  206. NORMALIZE(ds, group, f(rows(left)));
  207. which can be used for the result of a cogroup to perform a similar function to REDUCE in hadoop etc.
  208. Implement at the same time as the child query variety of NORMALIZE
  209. */
  210. IHqlExpression * queryUnknownAttribute() { return cacheUnknownAttribute; }
  211. IHqlExpression * queryMatchGroupOrderSortlist() { return cacheMatchGroupOrderSortlist; }
  212. IHqlExpression * queryUnknownSortlist() { return cacheUnknownSortlist; }
  213. IHqlExpression * getUnknownAttribute() { return LINK(cacheUnknownAttribute); }
  214. IHqlExpression * getMatchGroupOrderSortlist() { return LINK(cacheMatchGroupOrderSortlist); }
  215. IHqlExpression * getUnknownSortlist() { return LINK(cacheUnknownSortlist); }
  216. IHqlExpression * queryAnyOrderSortlist() { return cacheAnyOrderSortlist; }
  217. IHqlExpression * queryAnyDistributionAttribute() { return cacheAnyAttribute; }
  218. CHqlMetaProperty * queryNullMetaProperty(bool isGrouped) { return isGrouped ? nullGroupedMetaProperty : nullMetaProperty; }
  219. bool hasKnownSortlist(IHqlExpression * sortlist)
  220. {
  221. if (!sortlist)
  222. return false;
  223. unsigned max = sortlist->numChildren();
  224. if (max == 0)
  225. return false;
  226. return (sortlist->queryChild(max-1)->queryName() != unknownAtom);
  227. }
  228. bool CHqlMetaInfo::appearsToBeSorted(bool isLocal, bool ignoreGrouping)
  229. {
  230. if (isLocal)
  231. return localUngroupedSortOrder != NULL;
  232. if (!ignoreGrouping && grouping)
  233. return groupSortOrder != NULL;
  234. return globalSortOrder != NULL;
  235. }
  236. void CHqlMetaInfo::clearGrouping()
  237. {
  238. if (grouping)
  239. {
  240. grouping.clear();
  241. groupSortOrder.clear();
  242. }
  243. }
  244. void CHqlMetaInfo::ensureAppearsSorted(bool isLocal, bool ignoreGrouping)
  245. {
  246. if (!appearsToBeSorted(isLocal, false))
  247. {
  248. IHqlExpression * unknownOrder = queryUnknownSortlist();
  249. if (isGrouped())
  250. applyGroupSort(unknownOrder);
  251. else if (isLocal)
  252. applyLocalSort(unknownOrder);
  253. else
  254. {
  255. globalSortOrder.set(unknownOrder);
  256. localUngroupedSortOrder.set(unknownOrder);
  257. }
  258. }
  259. }
  260. bool CHqlMetaInfo::hasKnownSortGroupDistribution(bool isLocal) const
  261. {
  262. if (!isLocal)
  263. {
  264. if (!distribution || (distribution->queryName() == unknownAtom))
  265. return false;
  266. if (!hasKnownSortlist(globalSortOrder))
  267. return false;
  268. }
  269. else
  270. {
  271. if (!hasKnownSortlist(localUngroupedSortOrder))
  272. return false;
  273. }
  274. if (!grouping)
  275. return true;
  276. if (grouping->queryName() == unknownAtom)
  277. return false;
  278. if (!hasKnownSortlist(groupSortOrder))
  279. return false;
  280. return true;
  281. }
  282. bool CHqlMetaInfo::hasUsefulInformation() const
  283. {
  284. return (distribution && containsActiveDataset(distribution)) ||
  285. (globalSortOrder && containsActiveDataset(globalSortOrder)) ||
  286. (localUngroupedSortOrder && containsActiveDataset(localUngroupedSortOrder)) ||
  287. (grouping && containsActiveDataset(grouping)) ||
  288. (groupSortOrder && containsActiveDataset(groupSortOrder));
  289. }
  290. bool CHqlMetaInfo::matches(const CHqlMetaInfo & other) const
  291. {
  292. return (distribution == other.distribution) &&
  293. (globalSortOrder == other.globalSortOrder) &&
  294. (localUngroupedSortOrder == other.localUngroupedSortOrder) &&
  295. (grouping == other.grouping) &&
  296. (groupSortOrder == other.groupSortOrder);
  297. }
  298. void CHqlMetaInfo::preserveGrouping(IHqlExpression * dataset)
  299. {
  300. if (::isGrouped(dataset))
  301. setUnknownGrouping();
  302. else
  303. grouping.clear();
  304. }
  305. void CHqlMetaInfo::removeAllAndUngroup(bool isLocal)
  306. {
  307. if (!isLocal)
  308. distribution.clear();
  309. globalSortOrder.clear();
  310. localUngroupedSortOrder.clear();
  311. clearGrouping();
  312. }
  313. void CHqlMetaInfo::removeAllKeepGrouping()
  314. {
  315. distribution.clear();
  316. globalSortOrder.clear();
  317. localUngroupedSortOrder.clear();
  318. if (grouping)
  319. {
  320. grouping.setown(getUnknownSortlist());
  321. groupSortOrder.clear();
  322. }
  323. }
  324. void CHqlMetaInfo::removeAllSortOrders()
  325. {
  326. globalSortOrder.clear();
  327. localUngroupedSortOrder.clear();
  328. groupSortOrder.clear();
  329. }
  330. void CHqlMetaInfo::removeDistribution()
  331. {
  332. distribution.clear();
  333. }
  334. void CHqlMetaInfo::set(const CHqlMetaInfo & other)
  335. {
  336. distribution.set(other.distribution);
  337. globalSortOrder.set(other.globalSortOrder);
  338. localUngroupedSortOrder.set(other.localUngroupedSortOrder);
  339. grouping.set(other.grouping);
  340. groupSortOrder.set(other.groupSortOrder);
  341. }
  342. void CHqlMetaInfo::setMatchesAny()
  343. {
  344. distribution.set(queryAnyDistributionAttribute());
  345. globalSortOrder.set(queryAnyOrderSortlist());
  346. localUngroupedSortOrder.set(queryAnyOrderSortlist());
  347. }
  348. void CHqlMetaInfo::setUnknownDistribution()
  349. {
  350. distribution.setown(getUnknownAttribute());
  351. }
  352. void CHqlMetaInfo::setUnknownGrouping()
  353. {
  354. grouping.setown(getUnknownSortlist());
  355. }
  356. //---------------------------------------------------------------------------------------------------------------------
  357. class JoinEqualityMapper
  358. {
  359. public:
  360. inline JoinEqualityMapper(IHqlExpression * joinExpr)
  361. {
  362. left = joinExpr->queryChild(0);
  363. right = joinExpr->queryChild(1);
  364. selSeq = querySelSeq(joinExpr);
  365. }
  366. IHqlExpression * mapEqualities(IHqlExpression * expr, IHqlExpression * cond)
  367. {
  368. if (cond->getOperator() == no_assertkeyed)
  369. cond = cond->queryChild(0);
  370. if (cond->getOperator() == no_and)
  371. {
  372. OwnedHqlExpr mapped = mapEqualities(expr, cond->queryChild(0));
  373. return mapEqualities(mapped, cond->queryChild(1));
  374. }
  375. else if (cond->getOperator() == no_eq)
  376. {
  377. IHqlExpression * lhs = cond->queryChild(0);
  378. IHqlExpression * rhs = cond->queryChild(1);
  379. if (lhs->queryType() == rhs->queryType())
  380. {
  381. IHqlExpression * leftSelect = queryDatasetCursor(lhs);
  382. IHqlExpression * rightSelect = queryDatasetCursor(rhs);
  383. if (isLeft(leftSelect) && isRight(rightSelect))
  384. return replaceExpression(expr, rhs, lhs);
  385. if (isRight(leftSelect) && isLeft(rightSelect))
  386. return replaceExpression(expr, lhs, rhs);
  387. }
  388. }
  389. return LINK(expr);
  390. }
  391. protected:
  392. inline bool isMatch(IHqlExpression * expr, node_operator op, IHqlExpression * side)
  393. {
  394. return (expr->getOperator() == op) &&
  395. (expr->queryRecord()->queryBody() == side->queryRecord()->queryBody()) &&
  396. (expr->queryChild(1) == selSeq);
  397. }
  398. inline bool isLeft(IHqlExpression * expr) { return isMatch(expr, no_left, left); }
  399. inline bool isRight(IHqlExpression * expr) { return isMatch(expr, no_right, right); }
  400. protected:
  401. IHqlExpression * left;
  402. IHqlExpression * right;
  403. IHqlExpression * selSeq;
  404. };
  405. //---------------------------------------------------------------------------------------------------------------------
  406. inline bool matchesGroupOrder(IHqlExpression * expr) { return expr == cacheMatchGroupOrderSortlist; }
  407. bool hasTrailingGroupOrder(IHqlExpression * expr)
  408. {
  409. if (expr)
  410. {
  411. unsigned max = expr->numChildren();
  412. if (max)
  413. return expr->queryChild(max-1) == cacheGroupedElement;
  414. }
  415. return false;
  416. }
  417. //---------------------------------------------------------------------------------------------
  418. // Helper functions for processing the basic lists
  419. //return true if identical
  420. bool intersectList(HqlExprArray & target, const HqlExprArray & left, const HqlExprArray & right)
  421. {
  422. unsigned max = left.ordinality();
  423. for (unsigned i= 0; i < max; i++)
  424. {
  425. if (!right.isItem(i))
  426. return false;
  427. IHqlExpression & cur = left.item(i);
  428. if (&cur != &right.item(i))
  429. return false;
  430. target.append(OLINK(cur));
  431. }
  432. if (right.isItem(max))
  433. return false;
  434. return true;
  435. }
  436. IHqlExpression * createSubSortlist(IHqlExpression * sortlist, unsigned from, unsigned to, IHqlExpression * subsetAttr)
  437. {
  438. if (from == to)
  439. return NULL;
  440. if ((from == 0) && (to == sortlist->numChildren()))
  441. return LINK(sortlist);
  442. HqlExprArray components;
  443. unwindChildren(components, sortlist, from, to);
  444. if (subsetAttr)
  445. components.append(*LINK(subsetAttr));
  446. return createSortList(components);
  447. }
  448. void removeDuplicates(HqlExprArray & components)
  449. {
  450. unsigned max = components.ordinality();
  451. if (max == 0)
  452. return;
  453. for (unsigned i=max-1; i != 0; i--)
  454. {
  455. IHqlExpression & cur = components.item(i);
  456. unsigned match = components.find(cur);
  457. if (match != i)
  458. components.remove(i);
  459. }
  460. }
  461. static bool hasUnknownComponent(HqlExprArray & components)
  462. {
  463. if (components.ordinality())
  464. {
  465. IHqlExpression & last = components.tos();
  466. return last.isAttribute() && last.queryName() == unknownAtom;
  467. }
  468. return false;
  469. }
  470. void unwindNormalizeSortlist(HqlExprArray & args, IHqlExpression * src, bool removeAttributes)
  471. {
  472. if (!src)
  473. return;
  474. ForEachChild(i, src)
  475. {
  476. IHqlExpression * cur = src->queryChild(i);
  477. if (!cur->queryValue() && !(removeAttributes && cur->isAttribute()))
  478. args.append(*LINK(cur->queryBody()));
  479. }
  480. }
  481. void normalizeComponents(HqlExprArray & args, const HqlExprArray & src)
  482. {
  483. ForEachItemIn(i, src)
  484. {
  485. IHqlExpression * cur = &src.item(i);
  486. if (!cur->queryValue())
  487. args.append(*LINK(cur->queryBody()));
  488. }
  489. removeDuplicates(args);
  490. }
  491. IHqlExpression * getIntersectingSortlist(IHqlExpression * left, IHqlExpression * right, IHqlExpression * subsetAttr)
  492. {
  493. if (!left || !right)
  494. return NULL;
  495. if (left == queryAnyOrderSortlist())
  496. return LINK(right);
  497. if (right == queryAnyOrderSortlist())
  498. return LINK(left);
  499. ForEachChild(i, left)
  500. {
  501. //This test also covers the case where one list is longer than the other...
  502. if (left->queryChild(i) != right->queryChild(i))
  503. return createSubSortlist(left, 0, i, subsetAttr);
  504. }
  505. return LINK(left);
  506. }
  507. //Find the intersection between left and (localOrder+groupOrder)
  508. IHqlExpression * getModifiedGlobalOrder(IHqlExpression * globalOrder, IHqlExpression * localOrder, IHqlExpression * groupOrder)
  509. {
  510. if (!globalOrder || !localOrder)
  511. return NULL;
  512. unsigned max1=0;
  513. if (!matchesGroupOrder(localOrder))
  514. {
  515. ForEachChild(i1, localOrder)
  516. {
  517. //This test also covers the case where one list is longer than the other...
  518. IHqlExpression * curLocal = localOrder->queryChild(i1);
  519. if (globalOrder->queryChild(i1) != curLocal)
  520. {
  521. if (curLocal == cacheGroupedElement)
  522. break;
  523. return createSubSortlist(globalOrder, 0, i1, NULL);
  524. }
  525. }
  526. max1 = localOrder->numChildren();
  527. }
  528. unsigned max2 = 0;
  529. if (groupOrder)
  530. {
  531. ForEachChild(i2, groupOrder)
  532. {
  533. //This test also covers the case where one list is longer than the other...
  534. if (globalOrder->queryChild(i2+max1) != groupOrder->queryChild(i2))
  535. return createSubSortlist(globalOrder, 0, i2+max1, NULL);
  536. }
  537. max2 = groupOrder->numChildren();
  538. }
  539. return createSubSortlist(globalOrder, 0, max1+max2, NULL);
  540. }
  541. static IHqlExpression * normalizeSortlist(IHqlExpression * sortlist)
  542. {
  543. if (!sortlist)
  544. return NULL;
  545. HqlExprArray components;
  546. unwindNormalizeSortlist(components, sortlist, false);
  547. removeDuplicates(components);
  548. //This never returns NULL if the input was non-null
  549. return createSortList(components);
  550. }
  551. inline IHqlExpression * normalizeSortlist(IHqlExpression * sortlist, IHqlExpression * dataset)
  552. {
  553. if (!sortlist)
  554. return NULL;
  555. OwnedHqlExpr mapped = replaceSelector(sortlist, dataset, queryActiveTableSelector());
  556. return normalizeSortlist(mapped);
  557. }
  558. IHqlExpression * normalizeDistribution(IHqlExpression * distribution)
  559. {
  560. return LINK(distribution);
  561. }
  562. static bool sortComponentMatches(IHqlExpression * curNew, IHqlExpression * curExisting)
  563. {
  564. IHqlExpression * newBody = curNew->queryBody();
  565. IHqlExpression * existingBody = curExisting->queryBody();
  566. if (newBody == existingBody)
  567. return true;
  568. ITypeInfo * newType = curNew->queryType();
  569. ITypeInfo * existingType = curExisting->queryType();
  570. //A local sort by (string)qstring is the same as by qstring....
  571. if (isCast(curNew) && (curNew->queryChild(0)->queryBody() == existingBody))
  572. {
  573. if (preservesValue(newType, existingType) && preservesOrder(newType, existingType))
  574. return true;
  575. }
  576. // a sort by qstring is the same as by (string)qstring.
  577. if (isCast(curExisting) && (newBody == curExisting->queryChild(0)->queryBody()))
  578. {
  579. if (preservesValue(existingType, newType) && preservesOrder(existingType, newType))
  580. return true;
  581. }
  582. // (cast:z)x should match (implicit-cast:z)x
  583. if (isCast(curNew) && isCast(curExisting) && (newType==existingType))
  584. if (curNew->queryChild(0)->queryBody() == curExisting->queryChild(0)->queryBody())
  585. return true;
  586. return false;
  587. }
  588. //---------------------------------------------------------------------------------------------
  589. bool isKnownDistribution(IHqlExpression * distribution)
  590. {
  591. return distribution && (distribution != queryUnknownAttribute());
  592. }
  593. bool isSortedDistribution(IHqlExpression * distribution)
  594. {
  595. return distribution && (distribution->queryName() == sortedAtom);
  596. }
  597. bool isPersistDistribution(IHqlExpression * distribution)
  598. {
  599. return isKnownDistribution(distribution) && (distribution->getOperator() == no_bxor);
  600. }
  601. void extractMeta(CHqlMetaInfo & meta, IHqlExpression * expr)
  602. {
  603. CHqlMetaProperty * match = queryMetaProperty(expr);
  604. meta.set(match->meta);
  605. }
  606. IHqlExpression * queryGrouping(IHqlExpression * expr)
  607. {
  608. if (!expr->isDataset())
  609. return NULL;
  610. return queryMetaProperty(expr)->meta.grouping;
  611. }
  612. IHqlExpression * queryDistribution(IHqlExpression * expr)
  613. {
  614. if (!expr->isDataset())
  615. return NULL;
  616. return queryMetaProperty(expr)->meta.distribution;
  617. }
  618. IHqlExpression * queryGlobalSortOrder(IHqlExpression * expr)
  619. {
  620. if (!expr->isDataset())
  621. return NULL;
  622. return queryMetaProperty(expr)->meta.globalSortOrder;
  623. }
  624. IHqlExpression * queryLocalUngroupedSortOrder(IHqlExpression * expr)
  625. {
  626. if (!expr->isDataset())
  627. return NULL;
  628. return queryMetaProperty(expr)->meta.localUngroupedSortOrder;
  629. }
  630. IHqlExpression * queryGroupSortOrder(IHqlExpression * expr)
  631. {
  632. if (!expr->isDataset())
  633. return NULL;
  634. return queryMetaProperty(expr)->meta.groupSortOrder;
  635. }
  636. //What is the actual local sort order at the moment - ignoring any grouping.
  637. IHqlExpression * CHqlMetaInfo::getLocalSortOrder() const
  638. {
  639. IHqlExpression * localOrder = localUngroupedSortOrder;
  640. if (!isGrouped())
  641. return LINK(localOrder);
  642. if (!localOrder)
  643. return NULL;
  644. IHqlExpression * groupOrder = groupSortOrder;
  645. if (matchesGroupOrder(localOrder))
  646. return LINK(groupOrder);
  647. HqlExprArray components;
  648. unwindChildren(components, localOrder);
  649. if (!hasUnknownComponent(components))
  650. {
  651. if (components.length() && (&components.tos() == cacheGroupedElement))
  652. components.pop();
  653. if (groupOrder)
  654. {
  655. unwindChildren(components, groupOrder);
  656. if (hasUnknownComponent(components))
  657. components.pop();
  658. }
  659. }
  660. else
  661. components.pop();
  662. if (components.ordinality())
  663. {
  664. removeDuplicates(components);
  665. return createSortList(components);
  666. }
  667. return NULL;
  668. }
  669. inline IHqlExpression * getLocalSortOrder(IHqlExpression * expr)
  670. {
  671. CHqlMetaProperty * metaProp = queryMetaProperty(expr);
  672. return metaProp->meta.getLocalSortOrder();
  673. }
  674. inline IHqlExpression * getGlobalSortOrder(IHqlExpression * expr)
  675. {
  676. CHqlMetaProperty * metaProp = queryMetaProperty(expr);
  677. return LINK(metaProp->meta.globalSortOrder);
  678. }
  679. //---------------------------------------------------------------------------------------------
  680. // Helper functions for handling field projection
  681. extern HQL_API IHqlExpression * mapJoinDistribution(TableProjectMapper & mapper, IHqlExpression * distribution, IHqlExpression * side)
  682. {
  683. bool doneAll = false;
  684. IHqlExpression * activeSelector = queryActiveTableSelector();
  685. OwnedHqlExpr mapped = mapper.collapseFields(distribution, activeSelector, activeSelector, side, &doneAll);
  686. if (doneAll)
  687. return mapped.getClear();
  688. return NULL;
  689. }
  690. extern HQL_API IHqlExpression * mapDistribution(IHqlExpression * distribution, TableProjectMapper & mapper)
  691. {
  692. if (!distribution)
  693. return NULL;
  694. bool matchedAll = false;
  695. IHqlExpression * activeSelector = queryActiveTableSelector();
  696. OwnedHqlExpr mapped = mapper.collapseFields(distribution, activeSelector, activeSelector, &matchedAll);
  697. if (matchedAll)
  698. return mapped.getClear();
  699. return getUnknownAttribute();
  700. }
  701. extern HQL_API IHqlExpression * mapSortOrder(IHqlExpression * order, TableProjectMapper & mapper, bool appendUnknownIfTruncated)
  702. {
  703. if (!order)
  704. return NULL;
  705. IHqlExpression * activeSelector = queryActiveTableSelector();
  706. HqlExprArray newComponents;
  707. ForEachChild(idx, order)
  708. {
  709. bool matchedAll;
  710. IHqlExpression * cur = order->queryChild(idx);
  711. OwnedHqlExpr mapped = mapper.collapseFields(cur, activeSelector, activeSelector, &matchedAll);
  712. if (!matchedAll)
  713. {
  714. //If sorted by x,y,z and grouped by x, need to retain knowledge that it was sorted outside the
  715. //group, otherwise a subsequent local sort will appear as a gloabal sort after a degroup.
  716. if (appendUnknownIfTruncated)
  717. newComponents.append(*getUnknownAttribute());
  718. break;
  719. }
  720. newComponents.append(*mapped.getClear());
  721. }
  722. if (newComponents.ordinality() == 0)
  723. return NULL;
  724. HqlExprArray normalizedComponents;
  725. normalizeComponents(normalizedComponents, newComponents);
  726. //?return NULL if no elements??
  727. return order->clone(normalizedComponents);
  728. }
  729. extern HQL_API IHqlExpression * mapGroup(IHqlExpression * grouping, TableProjectMapper & mapper)
  730. {
  731. if (!grouping)
  732. return grouping;
  733. assertex(grouping->getOperator() == no_sortlist);
  734. IHqlExpression * activeSelector = queryActiveTableSelector();
  735. HqlExprArray newGrouping;
  736. ForEachChild(idx, grouping)
  737. {
  738. bool matchedAll;
  739. IHqlExpression * cur = grouping->queryChild(idx);
  740. OwnedHqlExpr mapped = mapper.collapseFields(cur, activeSelector, activeSelector, &matchedAll);
  741. //If the group fields don't translate, replace each with a dummy grouping so still recognised as grouped
  742. if (!matchedAll)
  743. newGrouping.append(*getUnknownAttribute());
  744. else
  745. newGrouping.append(*mapped.getClear());
  746. }
  747. return grouping->clone(newGrouping);
  748. }
  749. //---------------------------------------------------------------------------------------------
  750. // functions used by the creation functions to create a modified type
  751. // They should be optimized to do the minimal work depending on whether the input is grouped.
  752. // any parameters should be mapped so they only refer to active tables
  753. void CHqlMetaInfo::removeGroup()
  754. {
  755. if (grouping)
  756. {
  757. localUngroupedSortOrder.setown(getLocalSortOrder());
  758. grouping.clear();
  759. groupSortOrder.clear();
  760. }
  761. }
  762. static bool matchesGroupBy(IHqlExpression * groupBy, IHqlExpression * cur)
  763. {
  764. if (sortComponentMatches(groupBy, cur))
  765. return true;
  766. if (cur->getOperator() == no_negate)
  767. return sortComponentMatches(groupBy, cur->queryChild(0));
  768. return false;
  769. }
  770. static bool withinGroupBy(const HqlExprArray & groupBy, IHqlExpression * cur)
  771. {
  772. ForEachItemIn(i, groupBy)
  773. {
  774. if (matchesGroupBy(&groupBy.item(i), cur))
  775. return true;
  776. }
  777. return false;
  778. }
  779. static bool groupByWithinSortOrder(IHqlExpression * groupBy, IHqlExpression * order)
  780. {
  781. ForEachChild(i, order)
  782. {
  783. if (matchesGroupBy(groupBy, order->queryChild(i)))
  784. return true;
  785. }
  786. return false;
  787. }
  788. //NB: This does not handle ALL groups that is handled in createDataset()
  789. void CHqlMetaInfo::applyGroupBy(IHqlExpression * groupBy, bool isLocal)
  790. {
  791. removeGroup();
  792. OwnedHqlExpr newGrouping = normalizeSortlist(groupBy);
  793. IHqlExpression * localOrder = localUngroupedSortOrder;
  794. OwnedHqlExpr newLocalOrder;
  795. OwnedHqlExpr newGroupOrder;
  796. if (localOrder)
  797. {
  798. HqlExprArray groupBy;
  799. if (newGrouping)
  800. unwindChildren(groupBy, newGrouping);
  801. //The local sort order is split into two.
  802. //Where depends on whether all the grouping conditions match sort elements.
  803. //MORE: Is there a good way to accomplish this withit iterating both ways round?
  804. bool allGroupingMatch = true;
  805. ForEachItemIn(i, groupBy)
  806. {
  807. IHqlExpression * groupElement = &groupBy.item(i);
  808. if (!groupByWithinSortOrder(groupElement, localOrder))
  809. {
  810. allGroupingMatch = false;
  811. break;
  812. }
  813. }
  814. unsigned max = localOrder->numChildren();
  815. unsigned firstGroup;
  816. if (allGroupingMatch)
  817. {
  818. //All grouping conditions match known sorts. Therefore the last local order component that is included in
  819. //the grouping condition is important. The order of all elements before that will be preserved if the
  820. //group is sorted.
  821. firstGroup = 0;
  822. for (unsigned i=max;i--!= 0;)
  823. {
  824. IHqlExpression * cur = localOrder->queryChild(i);
  825. if (withinGroupBy(groupBy, cur))
  826. {
  827. firstGroup = i+1;
  828. break;
  829. }
  830. }
  831. }
  832. else
  833. {
  834. //If one of the grouping conditions is not included in the sort order, and if the group is subsequently
  835. //sorted then the the state of the first element that doesn't match the grouping condition will be unknown.
  836. firstGroup = max;
  837. for (unsigned i=0;i<max;i++)
  838. {
  839. IHqlExpression * cur = localOrder->queryChild(i);
  840. if (!withinGroupBy(groupBy, cur))
  841. {
  842. firstGroup = i;
  843. break;
  844. }
  845. }
  846. }
  847. if (firstGroup == 0)
  848. {
  849. //mark the local ungrouped sort order with a special value so we can restore if order doesn't change.
  850. newLocalOrder.set(queryMatchGroupOrderSortlist());
  851. newGroupOrder.set(localOrder);
  852. }
  853. else
  854. {
  855. //Add a marker to the end of the first order if it the rest will become invalidated by a group sort
  856. IHqlExpression * subsetAttr = (!allGroupingMatch && (firstGroup != max)) ? cacheGroupedElement : NULL;
  857. newLocalOrder.setown(createSubSortlist(localOrder, 0, firstGroup, subsetAttr));
  858. newGroupOrder.setown(createSubSortlist(localOrder, firstGroup, max, NULL));
  859. }
  860. }
  861. if (!isLocal)
  862. distribution.clear();
  863. localUngroupedSortOrder.setown(newLocalOrder.getClear());
  864. grouping.setown(newGrouping.getClear());
  865. groupSortOrder.setown(newGroupOrder.getClear());
  866. }
  867. void CHqlMetaInfo::applyGlobalSort(IHqlExpression * sortOrder)
  868. {
  869. OwnedHqlExpr newSortOrder = normalizeSortlist(sortOrder);
  870. distribution.setown(createExprAttribute(sortedAtom, LINK(newSortOrder))); //, createUniqueId());
  871. globalSortOrder.set(newSortOrder);
  872. localUngroupedSortOrder.set(newSortOrder);
  873. }
  874. void CHqlMetaInfo::applyLocalSort(IHqlExpression * sortOrder)
  875. {
  876. clearGrouping();
  877. localUngroupedSortOrder.setown(normalizeSortlist(sortOrder));
  878. //The global sort order is maintained as the leading components that match.
  879. globalSortOrder.setown(getIntersectingSortlist(globalSortOrder, localUngroupedSortOrder, NULL));
  880. }
  881. void CHqlMetaInfo::applyGroupSort(IHqlExpression * sortOrder)
  882. {
  883. assertex(isGrouped());
  884. OwnedHqlExpr groupedOrder = normalizeSortlist(sortOrder);
  885. // if (groupedOrder == queryGroupSortOrder(prev))
  886. // return LINK(prev);
  887. IHqlExpression * globalOrder = globalSortOrder;
  888. IHqlExpression * localUngroupedOrder = localUngroupedSortOrder;
  889. //Group sort => make sure we no longer track it as the localsort
  890. OwnedHqlExpr newLocalUngroupedOrder;
  891. if (localUngroupedOrder && !matchesGroupOrder(localUngroupedOrder))
  892. {
  893. if (hasTrailingGroupOrder(localUngroupedOrder))
  894. {
  895. HqlExprArray components;
  896. unwindChildren(components, localUngroupedOrder);
  897. components.pop();
  898. components.append(*getUnknownAttribute());
  899. newLocalUngroupedOrder.setown(localUngroupedOrder->clone(components));
  900. }
  901. else
  902. newLocalUngroupedOrder.set(localUngroupedOrder);
  903. }
  904. OwnedHqlExpr newGlobalOrder = getModifiedGlobalOrder(globalOrder, newLocalUngroupedOrder, groupedOrder);
  905. globalSortOrder.setown(newGlobalOrder.getClear());
  906. localUngroupedSortOrder.setown(newLocalUngroupedOrder.getClear());
  907. groupSortOrder.setown(groupedOrder.getClear());
  908. }
  909. void CHqlMetaInfo::removeActiveSort()
  910. {
  911. if (isGrouped())
  912. applyGroupSort(NULL);
  913. else
  914. removeAllSortOrders();
  915. }
  916. void CHqlMetaInfo::applyDistribute(IHqlExpression * newDistribution, IHqlExpression * optMergeOrder)
  917. {
  918. distribution.setown(normalizeDistribution(newDistribution));
  919. localUngroupedSortOrder.setown(optMergeOrder ? normalizeSortlist(optMergeOrder) : NULL);
  920. //Theoretically a keyed distribute may create a global sort order if merging also specified.
  921. }
  922. //Used when there is an alternative - either left or right.
  923. //As long as the identical cases fall out fairly well it is probably not worth spending lots of time
  924. //getting it very accurate.
  925. void CHqlMetaInfo::getIntersection(const CHqlMetaInfo & rightMeta)
  926. {
  927. IHqlExpression * rightDist = rightMeta.distribution;
  928. if ((distribution == rightDist) || (rightDist == queryAnyDistributionAttribute()))
  929. {
  930. //keep existing distribution
  931. }
  932. else if (distribution == queryAnyDistributionAttribute())
  933. distribution.set(rightDist);
  934. else if (distribution && rightDist)
  935. distribution.set(queryUnknownAttribute());
  936. else
  937. distribution.clear();
  938. globalSortOrder.setown(getIntersectingSortlist(globalSortOrder, rightMeta.globalSortOrder, NULL));
  939. IHqlExpression * leftLocalOrder = localUngroupedSortOrder;
  940. IHqlExpression * rightLocalOrder = rightMeta.localUngroupedSortOrder;
  941. IHqlExpression * leftGrouping = grouping;
  942. IHqlExpression * rightGrouping = rightMeta.grouping;
  943. if (leftGrouping == queryAnyOrderSortlist())
  944. leftGrouping = rightGrouping;
  945. else if (rightGrouping == queryAnyOrderSortlist())
  946. rightGrouping = leftGrouping;
  947. OwnedHqlExpr newLocalOrder;
  948. OwnedHqlExpr newGrouping = (leftGrouping || rightGrouping) ? getUnknownSortlist() : NULL;
  949. if (leftGrouping == rightGrouping)
  950. newGrouping.set(leftGrouping);
  951. OwnedHqlExpr newGroupSortOrder;
  952. if (leftLocalOrder == rightLocalOrder)
  953. {
  954. newLocalOrder.set(leftLocalOrder);
  955. if (leftGrouping == rightGrouping)
  956. {
  957. newGroupSortOrder.setown(getIntersectingSortlist(groupSortOrder, rightMeta.groupSortOrder, NULL));
  958. }
  959. else
  960. {
  961. //Don't intersect the grouping - that may cause false results, and ignore any group ordering.
  962. }
  963. }
  964. else
  965. {
  966. //intersect local order - not worth doing anything else
  967. if (!matchesGroupOrder(leftLocalOrder) && !matchesGroupOrder(rightLocalOrder))
  968. {
  969. IHqlExpression * extraAttr = newGrouping ? queryUnknownAttribute() : NULL;
  970. newLocalOrder.setown(getIntersectingSortlist(leftLocalOrder, rightLocalOrder, extraAttr));
  971. }
  972. }
  973. //MORE: This could be cleaned up
  974. localUngroupedSortOrder.setown(newLocalOrder.getClear());
  975. grouping.setown(newGrouping.getClear());
  976. groupSortOrder.setown(newGroupSortOrder.getClear());
  977. }
  978. //Distribute is all or nothing
  979. //Global sort retains as main as significant
  980. //Local sort needs a trailing unknown marker if dataset is grouped
  981. //Grouping retains attributes in place of grouping elements
  982. //Group sort retains as much as possible.
  983. void CHqlMetaInfo::applyProject(TableProjectMapper & mapper)
  984. {
  985. distribution.setown(mapDistribution(distribution, mapper));
  986. globalSortOrder.setown(mapSortOrder(globalSortOrder, mapper, false));
  987. localUngroupedSortOrder.setown(mapSortOrder(localUngroupedSortOrder, mapper, (grouping != NULL)));
  988. if (grouping)
  989. {
  990. grouping.setown(mapGroup(grouping, mapper));
  991. groupSortOrder.setown(mapSortOrder(groupSortOrder, mapper, false));
  992. }
  993. }
  994. void extractMetaFromMetaAttr(CHqlMetaInfo & meta, IHqlExpression * attr, unsigned firstChild)
  995. {
  996. meta.distribution.set(queryRemoveOmitted(attr->queryChild(firstChild+0)));
  997. meta.globalSortOrder.set(queryRemoveOmitted(attr->queryChild(firstChild+1)));
  998. meta.localUngroupedSortOrder.set(queryRemoveOmitted(attr->queryChild(firstChild+2)));
  999. meta.grouping.set(queryRemoveOmitted(attr->queryChild(firstChild+3)));
  1000. meta.groupSortOrder.set(queryRemoveOmitted(attr->queryChild(firstChild+4)));
  1001. }
  1002. void CHqlMetaInfo::applySubSort(IHqlExpression * groupBy, IHqlExpression * sortOrder, bool isLocal)
  1003. {
  1004. applyGroupBy(groupBy, isLocal);
  1005. applyGroupSort(sortOrder);
  1006. removeGroup();
  1007. }
  1008. //---------------------------------------------------------------------------------------------
  1009. bool appearsToBeSorted(IHqlExpression * expr, bool isLocal, bool ignoreGrouping)
  1010. {
  1011. CHqlMetaProperty * metaProp = queryMetaProperty(expr);
  1012. return metaProp->meta.appearsToBeSorted(isLocal, ignoreGrouping);
  1013. }
  1014. //---------------------------------------------------------------------------------------------
  1015. // Helper functions for optimizing grouping operations
  1016. bool isSortedForGroup(IHqlExpression * table, IHqlExpression *sortList, bool isLocal)
  1017. {
  1018. assertex(sortList->getOperator()==no_sortlist);
  1019. OwnedHqlExpr existingOrder = isLocal ? getLocalSortOrder(table) : getGlobalSortOrder(table);
  1020. OwnedHqlExpr normalizedGroupList = normalizeSortlist(sortList, table);
  1021. unsigned numToGroup = normalizedGroupList->numChildren();
  1022. if (numToGroup == 0)
  1023. return true;
  1024. if (!existingOrder)
  1025. return false;
  1026. unsigned numExistingOrder = existingOrder->numChildren();
  1027. if (numToGroup > numExistingOrder)
  1028. return false;
  1029. bool allowReordering = false;
  1030. if (!allowReordering)
  1031. {
  1032. // Each of the leading components of the sort criteria need to match the elements in the group.
  1033. ForEachChild(i, normalizedGroupList)
  1034. {
  1035. if (existingOrder->queryChild(i) != normalizedGroupList->queryChild(i))
  1036. return false;
  1037. }
  1038. return true;
  1039. }
  1040. //The leading elements of the sort criteria need to match the leading elements of the group, but the order can be changed.
  1041. //NOTE: The lists cannot contain any duplicates => only need to check for existance.
  1042. HqlExprCopyArray existingComponents;
  1043. unwindChildren(existingComponents, existingOrder);
  1044. ForEachChild(i, normalizedGroupList)
  1045. {
  1046. IHqlExpression * cur = normalizedGroupList->queryChild(i);
  1047. unsigned match = existingComponents.find(*cur);
  1048. if ((match == NotFound) || (match >= numToGroup))
  1049. return false;
  1050. }
  1051. return true;
  1052. }
  1053. IHqlExpression * ensureSortedForGroup(IHqlExpression * table, IHqlExpression *sortList, bool isLocal, bool alwaysLocal, bool allowSubSort)
  1054. {
  1055. if (isSortedForGroup(table, sortList, isLocal||alwaysLocal))
  1056. return LINK(table);
  1057. IHqlExpression * attr = isLocal ? createLocalAttribute() : NULL;
  1058. IHqlExpression * ds = LINK(table);
  1059. if (isGrouped(ds))
  1060. ds = createDataset(no_group, ds, NULL);
  1061. return createDatasetF(no_sort, ds, LINK(sortList), attr, NULL);
  1062. }
  1063. //If this gets too complex we would need to make sure we don't explode traversing the expression tree.
  1064. static bool includesFieldsOutsideGrouping(IHqlExpression * distribution, const HqlExprCopyArray & groups)
  1065. {
  1066. if (groups.find(*distribution->queryBody()) != NotFound)
  1067. return false;
  1068. switch (distribution->getOperator())
  1069. {
  1070. case no_hash:
  1071. case no_hash32:
  1072. case no_hash64:
  1073. case no_hashmd5:
  1074. case no_add:
  1075. case no_xor:
  1076. case no_bxor:
  1077. case no_sortlist:
  1078. case no_cast:
  1079. case no_implicitcast:
  1080. case no_negate:
  1081. break;
  1082. case no_field:
  1083. case no_select:
  1084. case no_sortpartition:
  1085. return true;
  1086. case no_constant:
  1087. return false;
  1088. case no_trim:
  1089. if (distribution->hasAttribute(leftAtom) || distribution->hasAttribute(allAtom))
  1090. return false;
  1091. return includesFieldsOutsideGrouping(distribution->queryChild(0), groups);
  1092. case no_attr:
  1093. //may be flags on hash32,trim etc.
  1094. return (distribution->queryName() == unknownAtom);
  1095. default:
  1096. return true;
  1097. }
  1098. ForEachChild(idx, distribution)
  1099. {
  1100. if (includesFieldsOutsideGrouping(distribution->queryChild(idx), groups))
  1101. return true;
  1102. }
  1103. return false;
  1104. }
  1105. bool isPartitionedForGroup(IHqlExpression * table, IHqlExpression *grouping, bool isGroupAll)
  1106. {
  1107. IHqlExpression * distribution = queryDistribution(table);
  1108. if (!isKnownDistribution(distribution) || !distribution->isPure())
  1109. return false;
  1110. OwnedHqlExpr normalizedGrouping = normalizeSortlist(grouping, table);
  1111. unsigned numToGroup = normalizedGrouping->numChildren();
  1112. if (numToGroup == 0)
  1113. return false; // since they all need transferring to a single node!
  1114. HqlExprCopyArray groupingElements;
  1115. unwindChildren(groupingElements, normalizedGrouping);
  1116. // MORE: Could possibly check if the trailing field of the previous grouping lies in the new grouping fields.
  1117. // If so it implies the new grouping will already be split between nodes, so it can be done locally.
  1118. // But it may have been grouped locally so assumption isn't correct
  1119. if (isSortDistribution(distribution))
  1120. {
  1121. IHqlExpression * sortlist = distribution->queryChild(0);
  1122. if (!isGroupAll)
  1123. {
  1124. //The distribution was a sort, ok if the trailing component of the sort is included in the grouping criteria.
  1125. //If a trailing component is a constant we can test the preceding one since a single valued field can't
  1126. //have been split over multiple nodes.
  1127. unsigned numElements = sortlist->numChildren();
  1128. while (numElements != 0)
  1129. {
  1130. IHqlExpression * element = sortlist->queryChild(numElements-1)->queryBody();
  1131. if (!element->isConstant())
  1132. return groupingElements.contains(*element);
  1133. numElements--;
  1134. }
  1135. return false;
  1136. }
  1137. //For a group,all all the fields in the sort need to be in the grouping condition.
  1138. distribution = sortlist;
  1139. }
  1140. //The distribution was a non-sort, so ok if all fields in the distribution are included in the grouping criteria.
  1141. return !includesFieldsOutsideGrouping(distribution, groupingElements);
  1142. }
  1143. bool isPartitionedForGroup(IHqlExpression * table, const HqlExprArray & grouping, bool isGroupAll)
  1144. {
  1145. OwnedHqlExpr sortlist = createValueSafe(no_sortlist, makeSortListType(NULL), grouping);
  1146. return isPartitionedForGroup(table, sortlist, isGroupAll);
  1147. }
  1148. //---------------------------------------------------------------------------
  1149. // Helper functions for optimizing sorting...
  1150. IHqlExpression * getExistingSortOrder(IHqlExpression * dataset, bool isLocal, bool ignoreGrouping)
  1151. {
  1152. if (isLocal)
  1153. return getLocalSortOrder(dataset);
  1154. if (ignoreGrouping || !isGrouped(dataset))
  1155. return getGlobalSortOrder(dataset);
  1156. return LINK(queryGroupSortOrder(dataset));
  1157. }
  1158. static bool isCorrectDistributionForSort(IHqlExpression * dataset, IHqlExpression * normalizedSortOrder, bool isLocal, bool ignoreGrouping)
  1159. {
  1160. if (isLocal || (isGrouped(dataset) && !ignoreGrouping))
  1161. return true;
  1162. IHqlExpression * distribution = queryDistribution(dataset);
  1163. if (distribution == queryAnyDistributionAttribute())
  1164. return true;
  1165. if (!isSortDistribution(distribution))
  1166. return false;
  1167. IHqlExpression * previousOrder = distribution->queryChild(0); // Already normalized when it was created.
  1168. //MORE: We should possibly loosen this test to allow compatible casts etc.
  1169. //return isCompatibleSortOrder(existingOrder, normalizedSortOrder)
  1170. return (previousOrder == normalizedSortOrder);
  1171. }
  1172. //--------------------------------------------------------------------------------------------------------------------
  1173. static bool isCompatibleSortOrder(IHqlExpression * existingOrder, IHqlExpression * normalizedOrder)
  1174. {
  1175. if (normalizedOrder->numChildren() == 0)
  1176. return true;
  1177. if (!existingOrder)
  1178. return false;
  1179. if (existingOrder == queryAnyOrderSortlist())
  1180. return true;
  1181. if (normalizedOrder->numChildren() > existingOrder->numChildren())
  1182. return false;
  1183. ForEachChild(i, normalizedOrder)
  1184. {
  1185. if (!sortComponentMatches(normalizedOrder->queryChild(i), existingOrder->queryChild(i)))
  1186. return false;
  1187. }
  1188. return true;
  1189. }
  1190. static bool normalizedIsAlreadySorted(IHqlExpression * dataset, IHqlExpression * normalizedOrder, bool isLocal, bool ignoreGrouping)
  1191. {
  1192. #ifdef OPTIMIZATION2
  1193. if (hasNoMoreRowsThan(dataset, 1))
  1194. return true;
  1195. #endif
  1196. if (!isCorrectDistributionForSort(dataset, normalizedOrder, isLocal, ignoreGrouping))
  1197. return false;
  1198. //Constant items and duplicates should have been removed already.
  1199. OwnedHqlExpr existingOrder = getExistingSortOrder(dataset, isLocal, ignoreGrouping);
  1200. return isCompatibleSortOrder(existingOrder, normalizedOrder);
  1201. }
  1202. bool isAlreadySorted(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping)
  1203. {
  1204. #ifdef OPTIMIZATION2
  1205. if (hasNoMoreRowsThan(dataset, 1))
  1206. return true;
  1207. #endif
  1208. OwnedHqlExpr normalizedOrder = normalizeSortlist(order, dataset);
  1209. return normalizedIsAlreadySorted(dataset, normalizedOrder, isLocal, ignoreGrouping);
  1210. }
  1211. //Elements in the exprarray have already been mapped;
  1212. bool isAlreadySorted(IHqlExpression * dataset, const HqlExprArray & newSort, bool isLocal, bool ignoreGrouping)
  1213. {
  1214. HqlExprArray components;
  1215. normalizeComponents(components, newSort);
  1216. OwnedHqlExpr normalizedOrder = createSortList(components);
  1217. return normalizedIsAlreadySorted(dataset, normalizedOrder, isLocal, ignoreGrouping);
  1218. }
  1219. //--------------------------------------------------------------------------------------------------------------------
  1220. static unsigned numCompatibleSortElements(IHqlExpression * existingOrder, IHqlExpression * normalizedOrder)
  1221. {
  1222. if (!existingOrder)
  1223. return 0;
  1224. unsigned numExisting = existingOrder->numChildren();
  1225. unsigned numRequired = normalizedOrder->numChildren();
  1226. unsigned numToCompare = (numRequired > numExisting) ? numExisting : numRequired;
  1227. for (unsigned i=0; i < numToCompare; i++)
  1228. {
  1229. if (!sortComponentMatches(normalizedOrder->queryChild(i), existingOrder->queryChild(i)))
  1230. return i;
  1231. }
  1232. return numToCompare;
  1233. }
  1234. static unsigned normalizedNumSortedElements(IHqlExpression * dataset, IHqlExpression * normalizedOrder, bool isLocal, bool ignoreGrouping)
  1235. {
  1236. #ifdef OPTIMIZATION2
  1237. if (hasNoMoreRowsThan(dataset, 1))
  1238. return true;
  1239. #endif
  1240. if (!isCorrectDistributionForSort(dataset, normalizedOrder, isLocal, ignoreGrouping))
  1241. return false;
  1242. //Constant items and duplicates should have been removed already.
  1243. OwnedHqlExpr existingOrder = getExistingSortOrder(dataset, isLocal, ignoreGrouping);
  1244. return numCompatibleSortElements(existingOrder, normalizedOrder);
  1245. }
  1246. static unsigned numElementsAlreadySorted(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping)
  1247. {
  1248. #ifdef OPTIMIZATION2
  1249. if (hasNoMoreRowsThan(dataset, 1))
  1250. return order->numChildren();
  1251. #endif
  1252. OwnedHqlExpr normalizedOrder = normalizeSortlist(order, dataset);
  1253. return normalizedNumSortedElements(dataset, normalizedOrder, isLocal, ignoreGrouping);
  1254. }
  1255. //Elements in the exprarray have already been mapped;
  1256. static unsigned numElementsAlreadySorted(IHqlExpression * dataset, const HqlExprArray & newSort, bool isLocal, bool ignoreGrouping)
  1257. {
  1258. HqlExprArray components;
  1259. normalizeComponents(components, newSort);
  1260. OwnedHqlExpr normalizedOrder = createSortList(components);
  1261. return normalizedNumSortedElements(dataset, normalizedOrder, isLocal, ignoreGrouping);
  1262. }
  1263. bool isWorthShuffling(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping)
  1264. {
  1265. //MORE: Should this look at the cardinality of the already-sorted fields, and not transform if below a certain threshold?
  1266. return numElementsAlreadySorted(dataset, order, isLocal, ignoreGrouping) != 0;
  1267. }
  1268. bool isWorthShuffling(IHqlExpression * dataset, const HqlExprArray & newSort, bool isLocal, bool ignoreGrouping)
  1269. {
  1270. //MORE: Should this look at the cardinality of the already-sorted fields, and not transform if below a certain threshold?
  1271. return numElementsAlreadySorted(dataset, newSort, isLocal, ignoreGrouping) != 0;
  1272. }
  1273. //--------------------------------------------------------------------------------------------------------------------
  1274. //Convert SUBSORT(ds, <sort>, <grouping>, ?LOCAL, options) to
  1275. //g := GROUP(ds, grouping, ?LOCAL); s := SORT(g, <sort>, options); GROUP(s);
  1276. IHqlExpression * convertSubSortToGroupedSort(IHqlExpression * expr)
  1277. {
  1278. IHqlExpression * dataset = expr->queryChild(0);
  1279. IHqlExpression * newOrder = expr->queryChild(1);
  1280. IHqlExpression * grouping = expr->queryChild(2);
  1281. assertex(!isGrouped(dataset) || expr->hasAttribute(globalAtom));
  1282. OwnedHqlExpr attr = isLocalActivity(expr) ? createLocalAttribute() : NULL;
  1283. OwnedHqlExpr grouped = createDatasetF(no_group, LINK(dataset), LINK(grouping), LINK(attr), NULL);
  1284. HqlExprArray args;
  1285. args.append(*grouped.getClear());
  1286. args.append(*LINK(newOrder));
  1287. unwindChildren(args, expr, 3);
  1288. removeAttribute(args, localAtom);
  1289. OwnedHqlExpr sorted = createDataset(no_sort, args);
  1290. return createDataset(no_group, sorted.getClear());
  1291. }
  1292. static IHqlExpression * createSubSorted(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping, bool alwaysLocal)
  1293. {
  1294. bool isGroupedSubSort = !ignoreGrouping && isGrouped(dataset);
  1295. unsigned sortedElements = numElementsAlreadySorted(dataset, order, isLocal||alwaysLocal, ignoreGrouping);
  1296. if ((sortedElements == 0) || isGroupedSubSort)
  1297. return NULL;
  1298. HqlExprArray components;
  1299. unwindNormalizeSortlist(components, order, false);
  1300. removeDuplicates(components);
  1301. if (components.ordinality() == sortedElements)
  1302. return LINK(dataset);
  1303. OwnedHqlExpr alreadySorted = createValueSafe(no_sortlist, makeSortListType(NULL), components, 0, sortedElements);
  1304. OwnedHqlExpr newOrder = createValueSafe(no_sortlist, makeSortListType(NULL), components, sortedElements, components.ordinality());
  1305. const bool removeGrouping = ignoreGrouping && isGrouped(dataset);
  1306. OwnedHqlExpr attr = isLocal ? createLocalAttribute() : (isGrouped(dataset) && ignoreGrouping) ? createAttribute(globalAtom) : NULL;
  1307. OwnedHqlExpr input = removeGrouping ? createDataset(no_group, LINK(dataset)) : LINK(dataset);
  1308. OwnedHqlExpr subsort = createDatasetF(no_subsort, LINK(input), LINK(newOrder), LINK(alreadySorted), LINK(attr), NULL);
  1309. //Grouped subsorts never generated, global subsorts (if generated) get converted to a global group
  1310. if (!isLocal && !alwaysLocal)
  1311. subsort.setown(convertSubSortToGroupedSort(subsort));
  1312. assertex(isAlreadySorted(subsort, order, isLocal||alwaysLocal, ignoreGrouping));
  1313. return subsort.getClear();
  1314. }
  1315. IHqlExpression * getSubSort(IHqlExpression * dataset, const HqlExprArray & order, bool isLocal, bool ignoreGrouping, bool alwaysLocal)
  1316. {
  1317. if (isAlreadySorted(dataset, order, isLocal||alwaysLocal, ignoreGrouping))
  1318. return NULL;
  1319. OwnedHqlExpr sortlist = createValueSafe(no_sortlist, makeSortListType(NULL), order);
  1320. OwnedHqlExpr mappedSortlist = replaceSelector(sortlist, queryActiveTableSelector(), dataset);
  1321. return createSubSorted(dataset, mappedSortlist, isLocal, ignoreGrouping, alwaysLocal);
  1322. }
  1323. IHqlExpression * getSubSort(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping, bool alwaysLocal)
  1324. {
  1325. if (isAlreadySorted(dataset, order, isLocal||alwaysLocal, ignoreGrouping))
  1326. return NULL;
  1327. return createSubSorted(dataset, order, isLocal, ignoreGrouping, alwaysLocal);
  1328. }
  1329. //--------------------------------------------------------------------------------------------------------------------
  1330. IHqlExpression * ensureSorted(IHqlExpression * dataset, IHqlExpression * order, bool isLocal, bool ignoreGrouping, bool alwaysLocal, bool allowSubSort)
  1331. {
  1332. if (isAlreadySorted(dataset, order, isLocal||alwaysLocal, ignoreGrouping))
  1333. return LINK(dataset);
  1334. if (allowSubSort && (isLocal || alwaysLocal))
  1335. {
  1336. if (isWorthShuffling(dataset, order, isLocal||alwaysLocal, ignoreGrouping))
  1337. {
  1338. OwnedHqlExpr subsorted = createSubSorted(dataset, order, isLocal, ignoreGrouping, alwaysLocal);
  1339. if (subsorted)
  1340. return subsorted.getClear();
  1341. }
  1342. }
  1343. IHqlExpression * attr = isLocal ? createLocalAttribute() : (isGrouped(dataset) && ignoreGrouping) ? createAttribute(globalAtom) : NULL;
  1344. return createDatasetF(no_sort, LINK(dataset), LINK(order), attr, NULL);
  1345. }
  1346. //-------------------------------
  1347. // Join optimization - can the join order be changed so there is no need to resort.
  1348. bool reorderMatchExistingLocalSort(HqlExprArray & sortedLeft, HqlExprArray & reorderedRight, IHqlExpression * dataset, const HqlExprArray & left, const HqlExprArray & right)
  1349. {
  1350. OwnedHqlExpr existingOrder = getLocalSortOrder(dataset);
  1351. unsigned maxLeft = left.ordinality();
  1352. if (!existingOrder || (existingOrder->numChildren() < maxLeft))
  1353. return false;
  1354. for (unsigned i1=0; i1 < maxLeft; i1++)
  1355. {
  1356. IHqlExpression * search = existingOrder->queryChild(i1);
  1357. unsigned matched = NotFound;
  1358. ForEachItemIn(i2, left)
  1359. {
  1360. if (sortComponentMatches(&left.item(i2), search))
  1361. {
  1362. matched = i2;
  1363. break;
  1364. }
  1365. }
  1366. if (matched == NotFound)
  1367. return false;
  1368. //Play very safe on joins on same field, or expressions that could match the same sort order.
  1369. if (sortedLeft.contains(left.item(matched)) || reorderedRight.contains(right.item(matched)))
  1370. return false;
  1371. sortedLeft.append(OLINK(left.item(matched)));
  1372. reorderedRight.append(OLINK(right.item(matched)));
  1373. }
  1374. return true;
  1375. }
  1376. bool matchDedupDistribution(IHqlExpression * distn, const HqlExprArray & equalities)
  1377. {
  1378. //Could probably use reinterpret cast, but that would be nasty
  1379. HqlExprCopyArray cloned;
  1380. appendArray(cloned, equalities);
  1381. return !includesFieldsOutsideGrouping(distn, cloned);
  1382. }
  1383. bool matchesAnyDistribution(IHqlExpression * distn)
  1384. {
  1385. return distn == queryAnyDistributionAttribute();
  1386. }
  1387. //---------------------------------------------------------------------------
  1388. /*
  1389. For a join to be able to be optimized to a local join we need:
  1390. a) The distribution function to have exactly the same form on each side.
  1391. b) All references to fields from the dataset must match the join element
  1392. */
  1393. static bool checkDistributedCoLocally(IHqlExpression * distribute1, IHqlExpression * distribute2, const HqlExprArray & sort1, const HqlExprArray & sort2)
  1394. {
  1395. unsigned match1 = sort1.find(*distribute1->queryBody());
  1396. unsigned match2 = sort2.find(*distribute2->queryBody());
  1397. if ((match1 != NotFound) || (match2 != NotFound))
  1398. return match1 == match2;
  1399. node_operator op = distribute1->getOperator();
  1400. if (op != distribute2->getOperator())
  1401. return false;
  1402. unsigned max = distribute1->numChildren();
  1403. if (max != distribute2->numChildren())
  1404. return false;
  1405. switch (op)
  1406. {
  1407. case no_select:
  1408. case no_field:
  1409. {
  1410. //recurse?
  1411. return false;
  1412. }
  1413. }
  1414. if (max == 0)
  1415. return true;
  1416. for (unsigned idx = 0; idx < max; idx++)
  1417. {
  1418. if (!checkDistributedCoLocally(distribute1->queryChild(idx), distribute2->queryChild(idx), sort1, sort2))
  1419. return false;
  1420. }
  1421. return true;
  1422. }
  1423. //Convert a function of fields referenced in oldSort, to fields referenced in newSort.
  1424. IHqlExpression * createMatchingDistribution(IHqlExpression * expr, const HqlExprArray & oldSort, const HqlExprArray & newSort)
  1425. {
  1426. unsigned match = oldSort.find(*expr->queryBody());
  1427. if (match != NotFound)
  1428. return LINK(&newSort.item(match));
  1429. node_operator op = expr->getOperator();
  1430. switch (op)
  1431. {
  1432. case no_hash:
  1433. case no_hash32:
  1434. case no_hash64:
  1435. case no_hashmd5:
  1436. case no_add:
  1437. case no_xor:
  1438. case no_bxor:
  1439. case no_sortlist:
  1440. case no_cast:
  1441. case no_implicitcast:
  1442. case no_negate:
  1443. case no_trim:
  1444. break;
  1445. case no_field:
  1446. case no_select:
  1447. case no_sortpartition:
  1448. return NULL;
  1449. case no_constant:
  1450. break;
  1451. case no_attr:
  1452. case no_attr_expr:
  1453. {
  1454. IAtom * name = expr->queryName();
  1455. if (name == internalAtom)
  1456. {
  1457. //HASH,internal - only valid if the types of the old and new sorts match exactly
  1458. ForEachItemIn(i, oldSort)
  1459. {
  1460. if (oldSort.item(i).queryType() != newSort.item(i).queryType())
  1461. return NULL;
  1462. }
  1463. }
  1464. else if (expr == cacheAnyAttribute)
  1465. return NULL;
  1466. break;
  1467. }
  1468. default:
  1469. return NULL;
  1470. }
  1471. unsigned max = expr->numChildren();
  1472. if (max == 0)
  1473. return LINK(expr);
  1474. HqlExprArray args;
  1475. args.ensure(max);
  1476. ForEachChild(i, expr)
  1477. {
  1478. IHqlExpression * mapped = createMatchingDistribution(expr->queryChild(i), oldSort, newSort);
  1479. if (!mapped)
  1480. return NULL;
  1481. args.append(*mapped);
  1482. }
  1483. return expr->clone(args);
  1484. }
  1485. static IHqlExpression * queryColocalDataset(IHqlExpression * expr)
  1486. {
  1487. loop
  1488. {
  1489. switch (expr->getOperator())
  1490. {
  1491. case no_filter:
  1492. break;
  1493. default:
  1494. return expr->queryBody();
  1495. }
  1496. expr = expr->queryChild(0);
  1497. }
  1498. }
  1499. //Check if the distribution functions are essentially identical, except for the places
  1500. bool isDistributedCoLocally(IHqlExpression * dataset1, IHqlExpression * dataset2, const HqlExprArray & sort1, const HqlExprArray & sort2)
  1501. {
  1502. IHqlExpression * distribute1 = queryDistribution(dataset1);
  1503. IHqlExpression * distribute2 = queryDistribution(dataset2);
  1504. //Check the distribution functions are equivalent - by walking through in parallel, and don't contain any
  1505. //references to fields not in the join conditions
  1506. if (isKnownDistribution(distribute1) && distribute1->isPure() &&
  1507. isKnownDistribution(distribute2) && distribute2->isPure())
  1508. {
  1509. //If sorted they are only going to be codistributed if they came from the same sort
  1510. //We could only determine that by making the sort orders unique - by appending a uid
  1511. //But that hits problems with the values going out of sync. (needs more investigation why)
  1512. if (isSortedDistribution(distribute1) || isSortedDistribution(distribute2))
  1513. {
  1514. if (distribute1 != distribute2)
  1515. return false;
  1516. //Self join is guaranteed to be from the same sort
  1517. //NOTE: IF/NONEMPTY return sitribution if the same - but since still only
  1518. //one or the other the dataset will be distributed ok.
  1519. if (queryColocalDataset(dataset1) != queryColocalDataset(dataset2))
  1520. return false;
  1521. }
  1522. if (checkDistributedCoLocally(distribute1, distribute2, sort1, sort2))
  1523. return true;
  1524. }
  1525. return false;
  1526. }
  1527. //---------------------------------------------------------------------------
  1528. inline IHqlExpression * ensureNonNull(IHqlExpression * expr)
  1529. {
  1530. if (expr)
  1531. return LINK(expr);
  1532. return LINK(cached_omitted_Attribute);
  1533. }
  1534. ITypeInfo * createDatasetType(ITypeInfo * recordType, bool isGrouped)
  1535. {
  1536. ITypeInfo * rowType = makeRowType(LINK(recordType));
  1537. Owned<ITypeInfo> type = makeTableType(rowType);
  1538. if (isGrouped)
  1539. return makeGroupedTableType(type.getClear());
  1540. return type.getClear();
  1541. }
  1542. static IHqlExpression * createPreserveTableInfo(IHqlExpression * newTable, IHqlExpression * original, bool loseDistribution, IHqlExpression * persistName)
  1543. {
  1544. CHqlMetaInfo meta;
  1545. if (original->isDataset())
  1546. extractMeta(meta, original);
  1547. LinkedHqlExpr distribution = loseDistribution ? NULL : meta.distribution;
  1548. IHqlExpression * globalSort = meta.globalSortOrder;
  1549. IHqlExpression * localSort = meta.localUngroupedSortOrder;
  1550. IHqlExpression * grouping = meta.grouping;
  1551. IHqlExpression * groupSort = meta.groupSortOrder;
  1552. if (persistName && isKnownDistribution(distribution))
  1553. {
  1554. if (!distribution->isAttribute())
  1555. {
  1556. //Cluster size may not match so generate a unique modifier. Needs to modify enough distribute no longer a nop,
  1557. //but not too much to not get hoisted, or introduce extra dependencies.
  1558. //At the moment bxor with a sequence number since I can't see anyone ever doing that.
  1559. __int64 seq = getExpressionCRC(persistName);
  1560. OwnedHqlExpr uid = createConstant(distribution->queryType()->castFrom(true, seq));
  1561. distribution.setown(createValue(no_bxor, distribution->getType(), LINK(distribution), LINK(uid)));
  1562. }
  1563. else if (isSortDistribution(distribution))
  1564. {
  1565. //Sort distribution is still ok to preserve - since the assumption that trailing elements are not
  1566. //split over nodes still holds.
  1567. }
  1568. else
  1569. {
  1570. //keyed - assume the worst
  1571. distribution.clear();
  1572. }
  1573. }
  1574. LinkedHqlExpr ret = newTable;
  1575. if (distribution || globalSort || localSort || grouping || groupSort)
  1576. ret.setown(createDatasetF(no_preservemeta, LINK(newTable), ensureNonNull(distribution), ensureNonNull(globalSort), ensureNonNull(localSort), ensureNonNull(grouping), ensureNonNull(groupSort), NULL));
  1577. return original->cloneAllAnnotations(ret);
  1578. }
  1579. //Convert preservemeta(wuread) to wuread(,preservemeta), primarily because the optimizer moves datasets
  1580. //over the metadata, causing items not to be commoned up in child queries (e.g., jholt20.xhql).
  1581. //Really needs a better solution
  1582. static IHqlExpression * optimizePreserveMeta(IHqlExpression * expr)
  1583. {
  1584. if (expr->getOperator() != no_preservemeta)
  1585. return LINK(expr);
  1586. IHqlExpression * ds = expr->queryChild(0);
  1587. switch (ds->getOperator())
  1588. {
  1589. case no_workunit_dataset:
  1590. case no_getgraphresult:
  1591. break;
  1592. default:
  1593. return LINK(expr);
  1594. }
  1595. HqlExprArray args;
  1596. unwindChildren(args, expr, 1);
  1597. //Could be an exprAttribute, but if so, I would need to make sure the activeSelector() is removed from the active tables
  1598. OwnedHqlExpr metaAttr = createAttribute(_metadata_Atom, args);
  1599. OwnedHqlExpr ret = appendOwnedOperand(ds, metaAttr.getClear());
  1600. return expr->cloneAllAnnotations(ret);
  1601. }
  1602. IHqlExpression * preserveTableInfo(IHqlExpression * newTable, IHqlExpression * original, bool loseDistribution, IHqlExpression * persistName)
  1603. {
  1604. OwnedHqlExpr preserved = createPreserveTableInfo(newTable, original, loseDistribution, persistName);
  1605. return optimizePreserveMeta(preserved);
  1606. }
  1607. //---------------------------------------------------------------------------------------------------------------------
  1608. static void getMetaIntersection(CHqlMetaInfo & meta, IHqlExpression * other)
  1609. {
  1610. CHqlMetaProperty * otherMetaProp = queryMetaProperty(other);
  1611. meta.getIntersection(otherMetaProp->meta);
  1612. }
  1613. static void calculateProjectMeta(CHqlMetaInfo & meta, IHqlExpression * parent, IHqlExpression * transform, IHqlExpression * selSeq)
  1614. {
  1615. OwnedHqlExpr leftSelect = createSelector(no_left, parent, selSeq);
  1616. TableProjectMapper mapper;
  1617. mapper.setMapping(transform, leftSelect);
  1618. extractMeta(meta, parent);
  1619. meta.applyProject(mapper);
  1620. }
  1621. CHqlMetaProperty * querySimpleDatasetMeta(IHqlExpression * expr)
  1622. {
  1623. node_operator op = expr->getOperator();
  1624. switch (op)
  1625. {
  1626. case no_field:
  1627. case no_selectnth:
  1628. case no_inlinetable:
  1629. case no_dataset_from_transform:
  1630. case no_xmlproject:
  1631. case no_temptable:
  1632. case no_id2blob:
  1633. case no_embedbody:
  1634. case no_httpcall:
  1635. case no_soapcall:
  1636. case no_newsoapcall:
  1637. case no_datasetfromrow:
  1638. case no_datasetfromdictionary:
  1639. return nullMetaProperty;
  1640. case no_rowsetindex:
  1641. case no_rowsetrange:
  1642. case no_translated:
  1643. return queryNullMetaProperty(isGrouped(expr->queryChild(0)));
  1644. case no_param:
  1645. return queryNullMetaProperty(isGrouped(expr));
  1646. case no_pipe:
  1647. return queryNullMetaProperty(expr->hasAttribute(groupAtom));
  1648. case no_alias_project:
  1649. case no_alias_scope:
  1650. case no_cachealias:
  1651. case no_cloned:
  1652. case no_globalscope:
  1653. case no_comma:
  1654. case no_filter:
  1655. case no_keyed:
  1656. case no_nofold:
  1657. case no_nohoist:
  1658. case no_section:
  1659. case no_sectioninput:
  1660. case no_sub:
  1661. case no_thor:
  1662. case no_nothor:
  1663. case no_compound_indexread:
  1664. case no_compound_diskread:
  1665. case no_compound_disknormalize:
  1666. case no_compound_diskaggregate:
  1667. case no_compound_diskcount:
  1668. case no_compound_diskgroupaggregate:
  1669. case no_compound_indexnormalize:
  1670. case no_compound_indexaggregate:
  1671. case no_compound_indexcount:
  1672. case no_compound_indexgroupaggregate:
  1673. case no_compound_childread:
  1674. case no_compound_childnormalize:
  1675. case no_compound_childaggregate:
  1676. case no_compound_childcount:
  1677. case no_compound_childgroupaggregate:
  1678. case no_compound_selectnew:
  1679. case no_compound_inline:
  1680. case no_metaactivity:
  1681. case no_split:
  1682. case no_spill:
  1683. case no_readspill:
  1684. case no_commonspill:
  1685. case no_writespill:
  1686. case no_throughaggregate:
  1687. case no_limit:
  1688. case no_catchds:
  1689. case no_keyedlimit:
  1690. case no_compound_fetch:
  1691. case no_preload:
  1692. case no_alias:
  1693. case no_catch:
  1694. case no_activerow:
  1695. case no_newrow:
  1696. case no_assert_ds:
  1697. case no_spillgraphresult:
  1698. case no_cluster:
  1699. case no_forcenolocal:
  1700. case no_thisnode:
  1701. case no_forcelocal:
  1702. case no_filtergroup:
  1703. case no_forcegraph:
  1704. case no_related:
  1705. case no_executewhen:
  1706. case no_outofline:
  1707. case no_fieldmap:
  1708. case no_owned_ds:
  1709. case no_dataset_alias:
  1710. case no_funcdef:
  1711. return queryMetaProperty(expr->queryChild(0));
  1712. case no_compound:
  1713. case no_select:
  1714. case no_mapto:
  1715. return queryMetaProperty(expr->queryChild(1));
  1716. case no_delayedselect:
  1717. case no_libraryselect:
  1718. case no_unboundselect:
  1719. return queryMetaProperty(expr->queryChild(2));
  1720. }
  1721. return NULL;
  1722. }
  1723. void calculateDatasetMeta(CHqlMetaInfo & meta, IHqlExpression * expr)
  1724. {
  1725. node_operator op = expr->getOperator();
  1726. IHqlExpression * dataset = expr->queryChild(0);
  1727. //Following need to be filled in ready for type creation at the end...
  1728. //gather all the type rules together so we don't get inconsistencies.
  1729. switch (op)
  1730. {
  1731. case no_field:
  1732. case no_selectnth:
  1733. case no_inlinetable:
  1734. case no_dataset_from_transform:
  1735. case no_xmlproject:
  1736. case no_temptable:
  1737. case no_id2blob:
  1738. case no_embedbody:
  1739. case no_httpcall:
  1740. case no_soapcall:
  1741. case no_newsoapcall:
  1742. case no_datasetfromrow:
  1743. case no_datasetfromdictionary:
  1744. break;
  1745. case no_rowsetindex:
  1746. case no_rowsetrange:
  1747. case no_translated:
  1748. if (isGrouped(expr->queryChild(0)))
  1749. meta.setUnknownGrouping();
  1750. break;
  1751. case no_param:
  1752. if (isGrouped(expr))
  1753. meta.setUnknownGrouping();
  1754. break;
  1755. case no_pipe:
  1756. if (expr->queryAttribute(groupAtom))
  1757. meta.setUnknownGrouping();
  1758. break;
  1759. case no_alias_project:
  1760. case no_alias_scope:
  1761. case no_cachealias:
  1762. case no_cloned:
  1763. case no_globalscope:
  1764. case no_comma:
  1765. case no_filter:
  1766. case no_keyed:
  1767. case no_nofold:
  1768. case no_nohoist:
  1769. case no_section:
  1770. case no_sectioninput:
  1771. case no_sub:
  1772. case no_thor:
  1773. case no_nothor:
  1774. case no_compound_indexread:
  1775. case no_compound_diskread:
  1776. case no_compound_disknormalize:
  1777. case no_compound_diskaggregate:
  1778. case no_compound_diskcount:
  1779. case no_compound_diskgroupaggregate:
  1780. case no_compound_indexnormalize:
  1781. case no_compound_indexaggregate:
  1782. case no_compound_indexcount:
  1783. case no_compound_indexgroupaggregate:
  1784. case no_compound_childread:
  1785. case no_compound_childnormalize:
  1786. case no_compound_childaggregate:
  1787. case no_compound_childcount:
  1788. case no_compound_childgroupaggregate:
  1789. case no_compound_selectnew:
  1790. case no_compound_inline:
  1791. case no_metaactivity:
  1792. case no_split:
  1793. case no_spill:
  1794. case no_readspill:
  1795. case no_commonspill:
  1796. case no_writespill:
  1797. case no_throughaggregate:
  1798. case no_limit:
  1799. case no_catchds:
  1800. case no_keyedlimit:
  1801. case no_compound_fetch:
  1802. case no_preload:
  1803. case no_alias:
  1804. case no_catch:
  1805. case no_activerow:
  1806. case no_newrow:
  1807. case no_assert_ds:
  1808. case no_spillgraphresult:
  1809. case no_cluster:
  1810. case no_forcenolocal:
  1811. case no_thisnode:
  1812. case no_forcelocal:
  1813. case no_filtergroup:
  1814. case no_forcegraph:
  1815. case no_related:
  1816. case no_executewhen:
  1817. case no_outofline:
  1818. case no_fieldmap:
  1819. case no_owned_ds:
  1820. case no_dataset_alias:
  1821. case no_funcdef:
  1822. extractMeta(meta, dataset);
  1823. break;
  1824. case no_compound:
  1825. case no_select:
  1826. case no_mapto:
  1827. extractMeta(meta, expr->queryChild(1));
  1828. break;
  1829. case no_delayedselect:
  1830. case no_libraryselect:
  1831. case no_unboundselect:
  1832. extractMeta(meta, expr->queryChild(2));
  1833. break;
  1834. case no_table:
  1835. {
  1836. IHqlExpression * grouping = expr->queryAttribute(groupedAtom);
  1837. if (grouping)
  1838. meta.grouping.set(queryUnknownSortlist());
  1839. break;
  1840. }
  1841. case no_null:
  1842. case no_fail:
  1843. case no_anon:
  1844. case no_pseudods:
  1845. case no_skip:
  1846. case no_all:
  1847. case no_workunit_dataset:
  1848. case no_getgraphresult:
  1849. case no_getgraphloopresult:
  1850. case no_getresult:
  1851. case no_rows:
  1852. case no_internalselect:
  1853. case no_purevirtual:
  1854. case no_libraryinput:
  1855. {
  1856. IHqlExpression * metadata = expr->queryAttribute(_metadata_Atom);
  1857. if (!metadata)
  1858. {
  1859. IHqlExpression * distributed = expr->queryAttribute(distributedAtom);
  1860. IHqlExpression * distribution = distributed ? distributed->queryChild(0) : NULL;
  1861. meta.distribution.set(distribution);
  1862. IHqlExpression * grouped = expr->queryAttribute(groupedAtom);
  1863. if (grouped)
  1864. {
  1865. IHqlExpression * groupExpr = grouped->queryChild(0);
  1866. assertex(!groupExpr || groupExpr->getOperator() == no_sortlist);
  1867. if (!groupExpr)
  1868. groupExpr = queryUnknownSortlist();
  1869. meta.grouping.set(groupExpr);
  1870. }
  1871. }
  1872. else
  1873. extractMetaFromMetaAttr(meta, metadata, 0);
  1874. if ((op == no_null) || (op == no_fail))
  1875. {
  1876. meta.setMatchesAny();
  1877. if (meta.grouping)
  1878. meta.grouping.set(queryAnyOrderSortlist());
  1879. }
  1880. break;
  1881. }
  1882. case no_combine:
  1883. case no_combinegroup:
  1884. {
  1885. calculateProjectMeta(meta, expr->queryChild(0), expr->queryChild(2), expr->queryAttribute(_selectorSequence_Atom));
  1886. //Not at all sure about this wierd case
  1887. break;
  1888. }
  1889. case no_process:
  1890. {
  1891. calculateProjectMeta(meta, expr->queryChild(0), expr->queryChild(2), expr->queryAttribute(_selectorSequence_Atom));
  1892. break;
  1893. }
  1894. case no_fetch:
  1895. {
  1896. //Currently fetch is assumed to preserve nothing (since I suspect thor doesn't)
  1897. break;
  1898. }
  1899. case no_denormalize:
  1900. case no_denormalizegroup:
  1901. case no_join:
  1902. case no_selfjoin:
  1903. case no_joincount:
  1904. {
  1905. IHqlExpression * transform = expr->queryChild(3);
  1906. //JOIN does not preserve sort order or grouping.
  1907. //It does preserve distribution if distribution fields are projected
  1908. bool isLookupJoin = expr->queryAttribute(lookupAtom) != NULL;
  1909. bool isAllJoin = expr->queryAttribute(allAtom) != NULL;
  1910. bool isHashJoin = expr->queryAttribute(hashAtom) != NULL;
  1911. bool isSmartJoin = expr->queryAttribute(smartAtom) != NULL;
  1912. bool isStreamedJoin = expr->queryAttribute(streamedAtom) != NULL;
  1913. bool isKeyedJoin = !isAllJoin && !isLookupJoin && !isSmartJoin && !isStreamedJoin && (expr->queryAttribute(keyedAtom) || isKey(expr->queryChild(1)));
  1914. bool isLocal = (expr->queryAttribute(localAtom) != NULL);
  1915. bool fo = expr->queryAttribute(fullonlyAtom) || expr->queryAttribute(fullouterAtom);
  1916. bool createDefaultLeft = fo || expr->queryAttribute(rightonlyAtom) || expr->queryAttribute(rightouterAtom);
  1917. bool createDefaultRight = fo || expr->queryAttribute(leftonlyAtom) || expr->queryAttribute(leftouterAtom);
  1918. OwnedHqlExpr leftSelect = createSelector(no_left, expr->queryChild(0), expr->queryAttribute(_selectorSequence_Atom));
  1919. if (isKeyedJoin || isAllJoin || isLookupJoin)
  1920. {
  1921. CHqlMetaInfo & parentMeta = queryMetaProperty(dataset)->meta;
  1922. //If default left hand records created, then can't preserve distn etc.
  1923. meta.set(parentMeta);
  1924. if (!createDefaultLeft)
  1925. {
  1926. bool preservesOrder = !expr->queryAttribute(unorderedAtom);
  1927. if (isKeyedJoin)
  1928. preservesOrder = expr->queryAttribute(_ordered_Atom) != NULL;
  1929. if (!preservesOrder)
  1930. meta.removeAllSortOrders();
  1931. LinkedHqlExpr mapTransform = transform;
  1932. // If there is a join equality LEFT.x = RIGHT.x, and the transform contains a reference to RIGHT.x
  1933. // substitute LEFT.x instead.
  1934. // The modified transform is used for mapping the sort/grouping information
  1935. // which means that information about sort orders are more likely to be preserved (for keyed joins).
  1936. if (!createDefaultRight && ((op == no_join) || (op == no_selfjoin)))
  1937. {
  1938. //Only bother to modify the transform if something useful is going to be mapped in
  1939. //the meta information - otherwise this can be expensive for no gain.
  1940. if (meta.hasUsefulInformation())
  1941. {
  1942. JoinEqualityMapper mapper(expr);
  1943. mapTransform.setown(mapper.mapEqualities(transform, expr->queryChild(2)));
  1944. }
  1945. }
  1946. TableProjectMapper mapper;
  1947. mapper.setMapping(mapTransform, leftSelect);
  1948. meta.applyProject(mapper);
  1949. //For no_denormalize information is only preserved if it is the same whether or not the transform was called.
  1950. if (op == no_denormalize)
  1951. meta.getIntersection(parentMeta);
  1952. }
  1953. else
  1954. meta.removeAllKeepGrouping();
  1955. //The grouping fields could be mapped using the transform to provide more information, but it is
  1956. //unlikely to provide scope for other optimizations, and it will soon be replaced with the expanded
  1957. //implementation which will track map the information.
  1958. if (expr->queryAttribute(groupAtom))
  1959. meta.setUnknownGrouping();
  1960. }
  1961. else if (isLocal)
  1962. {
  1963. CHqlMetaInfo ungroupedMeta;
  1964. CHqlMetaInfo parentMeta = queryMetaProperty(dataset)->meta;
  1965. ungroupedMeta.set(parentMeta);
  1966. ungroupedMeta.removeGroup();
  1967. //local operation so try and preserve the current distribution, no clue about the following sort order,
  1968. //and result is never grouped.
  1969. if (expr->queryAttribute(_lightweight_Atom) && !createDefaultLeft)
  1970. {
  1971. //Implementation detail: lightweight joins preserve the lhs sort order
  1972. //Can be very useful for converting subsequent joins to lightweight joins.
  1973. TableProjectMapper mapper;
  1974. mapper.setMapping(transform, leftSelect);
  1975. meta.set(ungroupedMeta);
  1976. if (expr->hasAttribute(unorderedAtom))
  1977. meta.removeAllSortOrders();
  1978. meta.applyProject(mapper);
  1979. }
  1980. else
  1981. {
  1982. IHqlExpression * leftDistributeInfo = queryDistribution(expr->queryChild(0));
  1983. IHqlExpression * rightDistributeInfo = queryDistribution(expr->queryChild(1));
  1984. IHqlExpression * newDistributeInfo = NULL;
  1985. if (isKnownDistribution(leftDistributeInfo) || isKnownDistribution(rightDistributeInfo))
  1986. {
  1987. TableProjectMapper mapper;
  1988. mapper.setMapping(transform, NULL);
  1989. if (isKnownDistribution(leftDistributeInfo) && !createDefaultLeft)
  1990. newDistributeInfo = mapJoinDistribution(mapper, leftDistributeInfo, leftSelect);
  1991. if (!newDistributeInfo && isKnownDistribution(rightDistributeInfo) && !createDefaultRight)
  1992. {
  1993. OwnedHqlExpr rightSelect = createSelector(no_right, expr->queryChild(1), expr->queryAttribute(_selectorSequence_Atom));
  1994. newDistributeInfo = mapJoinDistribution(mapper, rightDistributeInfo, rightSelect);
  1995. }
  1996. }
  1997. if (!newDistributeInfo)
  1998. newDistributeInfo = getUnknownAttribute();
  1999. meta.distribution.setown(newDistributeInfo);
  2000. }
  2001. //For no_denormalize information is only preserved if it is the same whether or not the transform was called.
  2002. if (op == no_denormalize)
  2003. meta.getIntersection(ungroupedMeta);
  2004. }
  2005. else if (isHashJoin)
  2006. {
  2007. meta.distribution.setown(getUnknownAttribute());
  2008. }
  2009. else
  2010. {
  2011. //Nothing known
  2012. }
  2013. break;
  2014. }
  2015. case no_dedup:
  2016. {
  2017. extractMeta(meta, dataset);
  2018. bool isLocal = expr->hasAttribute(localAtom);
  2019. bool hasAll = expr->hasAttribute(hashAtom) || expr->hasAttribute(allAtom);
  2020. if (!meta.isGrouped())
  2021. {
  2022. //dedup,all kills the sort order, and global removes the distribution
  2023. if (hasAll)
  2024. {
  2025. meta.removeAllAndUngroup(isLocal);
  2026. }
  2027. else if (!isLocal)
  2028. meta.removeDistribution();
  2029. }
  2030. else
  2031. {
  2032. //Some implementations of Dedup all within a group can kill the group sort order
  2033. if (hasAll)
  2034. meta.removeActiveSort();
  2035. }
  2036. break;
  2037. }
  2038. case no_group:
  2039. case no_grouped:
  2040. case no_assertgrouped:
  2041. {
  2042. extractMeta(meta, dataset);
  2043. IHqlExpression * grouping = queryRealChild(expr, 1);
  2044. if (grouping)
  2045. {
  2046. OwnedHqlExpr mappedGrouping = replaceSelector(grouping, dataset, queryActiveTableSelector());
  2047. bool isLocal = expr->queryAttribute(localAtom) != NULL;
  2048. if (expr->queryAttribute(allAtom))
  2049. {
  2050. //group,all destroys any previous sort order, may destroy distribution.
  2051. meta.removeAllAndUngroup(isLocal);
  2052. }
  2053. meta.applyGroupBy(mappedGrouping, isLocal);
  2054. }
  2055. else
  2056. meta.removeGroup();
  2057. break;
  2058. }
  2059. case no_distribute:
  2060. case no_distributed:
  2061. case no_assertdistributed:
  2062. {
  2063. if (expr->queryAttribute(skewAtom))
  2064. {
  2065. meta.setUnknownDistribution();
  2066. }
  2067. else
  2068. {
  2069. OwnedHqlExpr mappedDistributeInfo = replaceSelector(expr->queryChild(1), dataset, queryActiveTableSelector());
  2070. IHqlExpression * sorted = expr->queryAttribute(mergeAtom);
  2071. if (sorted)
  2072. {
  2073. OwnedHqlExpr mappedSorted = replaceSelector(sorted->queryChild(0), dataset, queryActiveTableSelector());
  2074. meta.applyDistribute(mappedDistributeInfo, mappedSorted);
  2075. }
  2076. else
  2077. meta.applyDistribute(mappedDistributeInfo, NULL);
  2078. }
  2079. break;
  2080. }
  2081. case no_preservemeta:
  2082. {
  2083. meta.distribution.set(queryRemoveOmitted(expr->queryChild(1)));
  2084. meta.globalSortOrder.set(queryRemoveOmitted(expr->queryChild(2)));
  2085. meta.localUngroupedSortOrder.set(queryRemoveOmitted(expr->queryChild(3)));
  2086. meta.grouping.set(queryRemoveOmitted(expr->queryChild(4)));
  2087. meta.groupSortOrder.set(queryRemoveOmitted(expr->queryChild(5)));
  2088. break;
  2089. }
  2090. case no_keyeddistribute:
  2091. {
  2092. //destroy grouping and sort order, sets new distribution info
  2093. //to be usable this needs to really save a reference to the actual index used.
  2094. OwnedHqlExpr newDistribution = createAttribute(keyedAtom, replaceSelector(expr->queryChild(1), dataset, queryActiveTableSelector()));
  2095. meta.applyDistribute(newDistribution, NULL);
  2096. break;
  2097. }
  2098. case no_subsort:
  2099. {
  2100. bool isLocal = expr->queryAttribute(localAtom) != NULL;
  2101. OwnedHqlExpr normalizedSortOrder = replaceSelector(expr->queryChild(1), dataset, queryActiveTableSelector());
  2102. OwnedHqlExpr mappedGrouping = replaceSelector(expr->queryChild(2), dataset, queryActiveTableSelector());
  2103. extractMeta(meta, dataset);
  2104. meta.applySubSort(mappedGrouping, normalizedSortOrder, isLocal);
  2105. break;
  2106. }
  2107. case no_cosort:
  2108. case no_sort:
  2109. case no_sorted:
  2110. case no_assertsorted:
  2111. case no_topn:
  2112. case no_stepped: // stepped implies the sort order matches the stepped criteria
  2113. {
  2114. OwnedHqlExpr normalizedSortOrder;
  2115. bool isLocal = expr->hasAttribute(localAtom);
  2116. bool hasGlobal = expr->hasAttribute(globalAtom);
  2117. IHqlExpression * sortOrder = queryRealChild(expr, 1);
  2118. if (sortOrder)
  2119. normalizedSortOrder.setown(replaceSelector(sortOrder, dataset, queryActiveTableSelector()));
  2120. if (!isLocal && (hasGlobal || !isGrouped(dataset)))
  2121. {
  2122. meta.applyGlobalSort(normalizedSortOrder);
  2123. if ((op == no_topn) || (op == no_sorted))
  2124. meta.removeDistribution();
  2125. }
  2126. else
  2127. {
  2128. extractMeta(meta, dataset);
  2129. if (isLocal || expr->queryAttribute(globalAtom))
  2130. meta.removeGroup();
  2131. if (meta.isGrouped())
  2132. meta.applyGroupSort(normalizedSortOrder);
  2133. else if (isLocal)
  2134. meta.applyLocalSort(normalizedSortOrder);
  2135. else
  2136. throwUnexpected(); // should have been handled by the global branch aboce
  2137. }
  2138. break;
  2139. }
  2140. case no_iterate:
  2141. case no_transformebcdic:
  2142. case no_transformascii:
  2143. case no_hqlproject:
  2144. case no_normalize:
  2145. case no_rollup:
  2146. case no_newparse:
  2147. case no_newxmlparse:
  2148. case no_rollupgroup:
  2149. {
  2150. //These may lose the sort order because they may modify the sort fields or change the
  2151. //projected fields. Grouping also translated, but remains even if no mapping.
  2152. TableProjectMapper mapper;
  2153. IHqlExpression * transform;
  2154. bool globalActivityTransfersRows = false;
  2155. OwnedHqlExpr leftSelect = createSelector(no_left, dataset, expr->queryAttribute(_selectorSequence_Atom));
  2156. switch (op)
  2157. {
  2158. case no_rollup:
  2159. transform = expr->queryChild(2);
  2160. globalActivityTransfersRows = true;
  2161. mapper.setMapping(transform, leftSelect);
  2162. break;
  2163. case no_normalize:
  2164. transform = expr->queryChild(2);
  2165. mapper.setMapping(transform, leftSelect);
  2166. break;
  2167. case no_newxmlparse:
  2168. transform = expr->queryChild(3);
  2169. mapper.setMapping(transform, leftSelect);
  2170. break;
  2171. case no_newparse:
  2172. transform = expr->queryChild(4);
  2173. mapper.setMapping(transform, leftSelect);
  2174. break;
  2175. case no_iterate:
  2176. {
  2177. OwnedHqlExpr rightSelect = createSelector(no_right, dataset, expr->queryAttribute(_selectorSequence_Atom));
  2178. transform = expr->queryChild(1);
  2179. globalActivityTransfersRows = true;
  2180. mapper.setMapping(transform, rightSelect); // only if keep from right will it be preserved.
  2181. break;
  2182. }
  2183. case no_transformebcdic:
  2184. case no_transformascii:
  2185. case no_hqlproject:
  2186. case no_rollupgroup:
  2187. transform = expr->queryChild(1);
  2188. mapper.setMapping(transform, leftSelect);
  2189. break;
  2190. default:
  2191. throwUnexpected();
  2192. }
  2193. extractMeta(meta, dataset);
  2194. if (globalActivityTransfersRows && !expr->queryAttribute(localAtom) && !meta.isGrouped())
  2195. meta.removeDistribution();
  2196. if (op == no_rollupgroup)
  2197. meta.removeGroup();
  2198. meta.applyProject(mapper);
  2199. //Tag a count project as sorted in some way, we could spot which field (if any) was initialised with it.
  2200. if ((op == no_hqlproject) && expr->hasAttribute(_countProject_Atom))
  2201. {
  2202. bool isLocal = expr->hasAttribute(localAtom);
  2203. meta.ensureAppearsSorted(isLocal, false);
  2204. }
  2205. break;
  2206. }
  2207. case no_keyindex:
  2208. case no_newkeyindex:
  2209. {
  2210. if (expr->queryAttribute(sortedAtom))
  2211. {
  2212. IHqlExpression * record = expr->queryChild(1);
  2213. HqlExprArray sortExprs;
  2214. if (expr->queryAttribute(sort_KeyedAtom))
  2215. {
  2216. IHqlExpression * payloadAttr = expr->queryAttribute(_payload_Atom);
  2217. bool hasFileposition = getBoolAttribute(expr, filepositionAtom, true);
  2218. unsigned payloadCount = payloadAttr ? (unsigned)getIntValue(payloadAttr->queryChild(0), 1) : hasFileposition ? 1 : 0;
  2219. unsigned payloadIndex = firstPayloadField(record, payloadCount);
  2220. unwindRecordAsSelects(sortExprs, record, queryActiveTableSelector(), payloadIndex);
  2221. }
  2222. else
  2223. unwindRecordAsSelects(sortExprs, record, queryActiveTableSelector());
  2224. OwnedHqlExpr sortOrder = createSortList(sortExprs);
  2225. if (expr->queryAttribute(noRootAtom))
  2226. meta.applyLocalSort(sortOrder);
  2227. else
  2228. meta.applyGlobalSort(sortOrder);
  2229. }
  2230. break;
  2231. }
  2232. case no_soapcall_ds:
  2233. case no_newsoapcall_ds:
  2234. meta.preserveGrouping(dataset);
  2235. break;
  2236. case no_parse:
  2237. case no_xmlparse:
  2238. {
  2239. //Assume we can't work out anything about the sort order/grouping/distribution.
  2240. //Not strictly true, but it will do for the moment.
  2241. meta.preserveGrouping(dataset);
  2242. break;
  2243. }
  2244. case no_selectfields:
  2245. case no_aggregate:
  2246. case no_newaggregate:
  2247. case no_newusertable:
  2248. case no_usertable:
  2249. {
  2250. IHqlExpression * record = expr->queryChild(1);
  2251. if (record->getOperator() == no_null)
  2252. {
  2253. extractMeta(meta, dataset);
  2254. break;
  2255. }
  2256. TableProjectMapper mapper;
  2257. record = record->queryRecord();
  2258. IHqlExpression * grouping = NULL;
  2259. IHqlExpression * mapping = NULL;
  2260. LinkedHqlExpr selector = dataset;
  2261. switch (op)
  2262. {
  2263. case no_usertable:
  2264. case no_selectfields:
  2265. mapping = record;
  2266. grouping = expr->queryChild(2);
  2267. break;
  2268. case no_aggregate:
  2269. selector.setown(createSelector(no_left, dataset, expr->queryAttribute(_selectorSequence_Atom)));
  2270. if (!expr->hasAttribute(mergeTransformAtom))
  2271. mapping = expr->queryChild(2);
  2272. grouping = expr->queryChild(3);
  2273. break;
  2274. case no_newaggregate:
  2275. case no_newusertable:
  2276. mapping = expr->queryChild(2);
  2277. grouping = expr->queryChild(3);
  2278. break;
  2279. }
  2280. if (!mapping)
  2281. mapper.setUnknownMapping();
  2282. else
  2283. mapper.setMapping(mapping, selector);
  2284. if (grouping && grouping->isAttribute())
  2285. grouping = NULL;
  2286. extractMeta(meta, dataset);
  2287. if (grouping)
  2288. {
  2289. if (expr->hasAttribute(groupedAtom))
  2290. {
  2291. //A grouped hash aggregate - the sort order within the groups will be lost.
  2292. meta.removeActiveSort();
  2293. }
  2294. else
  2295. {
  2296. //grouping causes the sort order (and distribution) to be lost - because it might be done by a hash aggregate.
  2297. meta.removeAllSortOrders();
  2298. if (!expr->queryAttribute(localAtom))
  2299. meta.setUnknownDistribution(); // will be distributed by some function of the grouping fields
  2300. //Aggregation removes grouping, unless explicitly marked as a grouped operation
  2301. meta.removeGroup();
  2302. }
  2303. }
  2304. else
  2305. {
  2306. //Aggregation removes grouping
  2307. if (op == no_newaggregate || op == no_aggregate || (mapping && mapping->isGroupAggregateFunction()))
  2308. meta.removeGroup();
  2309. }
  2310. //Now map any fields that we can.
  2311. meta.applyProject(mapper);
  2312. break;
  2313. }
  2314. case no_nonempty:
  2315. {
  2316. //We can take the intersection of the input types for non empty since only one is read.
  2317. extractMeta(meta, dataset);
  2318. ForEachChildFrom(i, expr, 1)
  2319. {
  2320. IHqlExpression * cur = expr->queryChild(i);
  2321. if (!cur->isAttribute())
  2322. getMetaIntersection(meta, cur);
  2323. }
  2324. break;
  2325. }
  2326. case no_addfiles:
  2327. case no_regroup:
  2328. case no_cogroup:
  2329. {
  2330. // Note Concatenation destroys sort order
  2331. // If all the source files have the same distribution then preserve it, else just mark as distributed...
  2332. bool sameDistribution = true;
  2333. bool allInputsIdentical = true;
  2334. IHqlExpression * distributeInfo = queryDistribution(dataset);
  2335. bool allGrouped = isGrouped(dataset);
  2336. ForEachChildFrom(i, expr, 1)
  2337. {
  2338. IHqlExpression * cur = expr->queryChild(i);
  2339. if (!cur->isAttribute())
  2340. {
  2341. if (cur != dataset)
  2342. allInputsIdentical = false;
  2343. IHqlExpression * curDist = queryDistribution(cur);
  2344. if (!curDist)
  2345. distributeInfo = NULL;
  2346. else if (curDist != distributeInfo)
  2347. sameDistribution = false;
  2348. if (!isGrouped(cur))
  2349. allGrouped = false;
  2350. }
  2351. }
  2352. IHqlExpression * newDistribution = NULL;
  2353. if (distributeInfo)
  2354. {
  2355. //sort distributions are not identical - except in the unusual case where
  2356. //the inputs are identical (e.g., x + x). Can change if uids get created for sorts.
  2357. if (sameDistribution && (!isSortedDistribution(distributeInfo) || allInputsIdentical))
  2358. newDistribution = LINK(distributeInfo);
  2359. else
  2360. newDistribution = getUnknownAttribute();
  2361. }
  2362. meta.distribution.setown(newDistribution);
  2363. if (allGrouped || (op == no_cogroup))
  2364. meta.setUnknownGrouping();
  2365. break;
  2366. }
  2367. case no_chooseds:
  2368. case no_datasetlist:
  2369. case no_case:
  2370. case no_map:
  2371. {
  2372. //Activities that pick one of the inputs => the meta is the intersection
  2373. unsigned firstDataset = ((op == no_chooseds) || (op == no_case)) ? 1 : 0;
  2374. ForEachChildFrom(i, expr, firstDataset)
  2375. {
  2376. IHqlExpression * cur = expr->queryChild(i);
  2377. if (i == firstDataset)
  2378. extractMeta(meta, cur);
  2379. else
  2380. getMetaIntersection(meta, cur);
  2381. }
  2382. break;
  2383. }
  2384. case no_normalizegroup:
  2385. {
  2386. //Not very nice - it is hard to track anything through a group normalize.
  2387. if (queryDistribution(dataset))
  2388. meta.setUnknownDistribution();
  2389. meta.preserveGrouping(dataset);
  2390. break;
  2391. }
  2392. case no_if:
  2393. {
  2394. IHqlExpression * left = expr->queryChild(1);
  2395. IHqlExpression * right = expr->queryChild(2);
  2396. if (left->getOperator() == no_null)
  2397. extractMeta(meta, right);
  2398. else if (right->getOperator() == no_null)
  2399. extractMeta(meta, left);
  2400. else
  2401. {
  2402. extractMeta(meta, left);
  2403. getMetaIntersection(meta, right);
  2404. }
  2405. break;
  2406. }
  2407. case no_merge:
  2408. {
  2409. HqlExprArray components;
  2410. IHqlExpression * order= expr->queryAttribute(sortedAtom); // already uses no_activetable to refer to dataset
  2411. assertex(order);
  2412. unwindChildren(components, order);
  2413. OwnedHqlExpr sortlist = createSortList(components);
  2414. if (expr->queryAttribute(localAtom))
  2415. {
  2416. extractMeta(meta, dataset);
  2417. meta.applyLocalSort(sortlist);
  2418. }
  2419. else
  2420. {
  2421. meta.globalSortOrder.set(sortlist);
  2422. meta.localUngroupedSortOrder.set(sortlist);
  2423. }
  2424. break;
  2425. }
  2426. case no_mergejoin:
  2427. case no_nwayjoin:
  2428. case no_nwaymerge:
  2429. {
  2430. IHqlExpression * order = NULL;
  2431. switch (op)
  2432. {
  2433. case no_mergejoin:
  2434. order = expr->queryChild(2);
  2435. break;
  2436. case no_nwayjoin:
  2437. order = expr->queryChild(3);
  2438. break;
  2439. case no_nwaymerge:
  2440. order = expr->queryChild(1);
  2441. break;
  2442. }
  2443. assertex(order);
  2444. IHqlExpression * selSeq = expr->queryAttribute(_selectorSequence_Atom);
  2445. OwnedHqlExpr selector = createSelector(no_left, expr->queryChild(0), selSeq);
  2446. OwnedHqlExpr normalizedOrder = replaceSelector(order, selector, queryActiveTableSelector());
  2447. HqlExprArray components;
  2448. unwindChildren(components, normalizedOrder);
  2449. OwnedHqlExpr sortlist = createSortList(components);
  2450. //These are all currently implemented as local activities, need to change following if no longer true
  2451. extractMeta(meta, expr->queryChild(0));
  2452. meta.applyLocalSort(sortlist);
  2453. break;
  2454. }
  2455. case no_choosen:
  2456. case no_choosesets:
  2457. case no_enth:
  2458. case no_sample:
  2459. //grouped preserves everything
  2460. //otherwise it preserves distribution, global and local order (no data is transferred even for global), but not the grouping.
  2461. if (expr->queryAttribute(groupedAtom))
  2462. {
  2463. extractMeta(meta, dataset);
  2464. }
  2465. else
  2466. {
  2467. extractMeta(meta, dataset);
  2468. meta.removeGroup();
  2469. }
  2470. break;
  2471. case no_allnodes:
  2472. {
  2473. IHqlExpression * merge = expr->queryAttribute(mergeAtom);
  2474. if (merge)
  2475. {
  2476. //more - sort order defined
  2477. }
  2478. break;
  2479. }
  2480. case no_colon:
  2481. //Persist shouldn't preserve the distribution, since can't guarantee done on same width cluster.
  2482. if (queryOperatorInList(no_persist, expr->queryChild(1)))
  2483. {
  2484. extractMeta(meta, dataset);
  2485. meta.setUnknownDistribution();
  2486. if (isGrouped(dataset))
  2487. meta.setUnknownGrouping();
  2488. }
  2489. else if (queryOperatorInList(no_stored, expr->queryChild(1)))
  2490. {
  2491. meta.preserveGrouping(dataset);
  2492. }
  2493. else
  2494. extractMeta(meta, dataset);
  2495. break;
  2496. case no_loop:
  2497. {
  2498. IHqlExpression * body = expr->queryChild(4);
  2499. extractMeta(meta, dataset);
  2500. getMetaIntersection(meta, body->queryChild(0));
  2501. break;
  2502. }
  2503. case no_graphloop:
  2504. {
  2505. IHqlExpression * body = expr->queryChild(2);
  2506. extractMeta(meta, dataset);
  2507. getMetaIntersection(meta, body->queryChild(0));
  2508. break;
  2509. }
  2510. case no_serialize:
  2511. {
  2512. meta.preserveGrouping(dataset);
  2513. break;
  2514. }
  2515. case no_deserialize:
  2516. {
  2517. meta.preserveGrouping(dataset);
  2518. break;
  2519. }
  2520. case no_call:
  2521. //MORE: ?
  2522. if (isGrouped(expr))
  2523. meta.setUnknownGrouping();
  2524. break;
  2525. case no_externalcall:
  2526. case no_external:
  2527. if (isGrouped(expr))
  2528. meta.setUnknownGrouping();
  2529. //No support for grouping?
  2530. break;
  2531. default:
  2532. if (expr->isDataset())
  2533. UNIMPLEMENTED_XY("Type meta for dataset operator", getOpString(op));
  2534. break;
  2535. }
  2536. assertex(isGrouped(expr) == (meta.grouping != NULL));
  2537. #ifdef _DEBUG
  2538. assertex(!meta.grouping || meta.grouping->getOperator() == no_sortlist);
  2539. assertex(!meta.globalSortOrder || meta.globalSortOrder->getOperator() == no_sortlist);
  2540. #endif
  2541. }
  2542. ITypeInfo * calculateDatasetType(node_operator op, const HqlExprArray & parms)
  2543. {
  2544. IHqlExpression * dataset = NULL;
  2545. ITypeInfo * datasetType = NULL;
  2546. ITypeInfo * childType = NULL;
  2547. ITypeInfo * recordType = NULL;
  2548. switch (op)
  2549. {
  2550. case no_activetable:
  2551. throwUnexpected();
  2552. case no_table:
  2553. case no_temptable:
  2554. case no_inlinetable:
  2555. case no_xmlproject:
  2556. case no_null:
  2557. case no_anon:
  2558. case no_pseudods:
  2559. case no_all:
  2560. case no_workunit_dataset:
  2561. case no_getgraphresult:
  2562. case no_getgraphloopresult:
  2563. case no_fail:
  2564. case no_skip:
  2565. case no_datasetfromrow:
  2566. case no_datasetfromdictionary:
  2567. case no_if:
  2568. case no_translated:
  2569. case no_rows:
  2570. break;
  2571. case no_mergejoin:
  2572. case no_nwayjoin:
  2573. case no_nwaymerge:
  2574. datasetType = parms.item(0).queryType()->queryChildType();
  2575. break;
  2576. case no_chooseds:
  2577. case no_compound:
  2578. case no_select:
  2579. dataset = &parms.item(1);
  2580. break;
  2581. default:
  2582. dataset = &parms.item(0);
  2583. break;
  2584. }
  2585. if (dataset)
  2586. datasetType = dataset->queryType();
  2587. if (datasetType)
  2588. {
  2589. childType = datasetType->queryChildType();
  2590. ITypeInfo * rowType = NULL;
  2591. switch (datasetType->getTypeCode())
  2592. {
  2593. case type_groupedtable:
  2594. rowType = childType->queryChildType();
  2595. break;
  2596. case type_row:
  2597. rowType = datasetType;
  2598. break;
  2599. case type_record:
  2600. recordType = datasetType;
  2601. break;
  2602. default:
  2603. rowType = childType;
  2604. break;
  2605. }
  2606. if (rowType)
  2607. recordType = rowType->queryChildType();
  2608. }
  2609. //Following need to be filled in ready for type creation at the end...
  2610. //gather all the type rules together so we don't get inconsistencies.
  2611. Owned<ITypeInfo> type;
  2612. Owned<ITypeInfo> newRecordType;
  2613. unsigned recordArg = NotFound;
  2614. bool linkCounted = false;
  2615. bool streamed = false;
  2616. bool nowGrouped = false;
  2617. switch (op)
  2618. {
  2619. case no_table:
  2620. {
  2621. assertex(parms.isItem(1));
  2622. Linked<IHqlExpression> recorddef = &parms.item(1);
  2623. newRecordType.setown(createRecordType(recorddef));
  2624. nowGrouped = hasAttribute(groupedAtom, parms);
  2625. break;
  2626. }
  2627. case no_null:
  2628. case no_fail:
  2629. case no_anon:
  2630. case no_pseudods:
  2631. case no_skip:
  2632. case no_all:
  2633. case no_workunit_dataset:
  2634. case no_getgraphresult:
  2635. case no_getgraphloopresult:
  2636. case no_getresult:
  2637. case no_rows:
  2638. case no_internalselect:
  2639. case no_delayedselect:
  2640. case no_unboundselect:
  2641. case no_libraryselect:
  2642. case no_purevirtual:
  2643. case no_libraryinput:
  2644. {
  2645. IHqlExpression * record = parms.item(0).queryRecord();
  2646. recordArg = 0;
  2647. nowGrouped = hasAttribute(groupedAtom, parms);
  2648. linkCounted = (hasAttribute(_linkCounted_Atom, parms) || recordRequiresLinkCount(record));
  2649. break;
  2650. }
  2651. case no_translated:
  2652. type.setown(parms.item(0).getType());
  2653. assertex(parms.ordinality()>1 || hasStreamedModifier(type)); // should have a count or a length
  2654. break;
  2655. case no_inlinetable:
  2656. case no_dataset_from_transform:
  2657. case no_xmlproject:
  2658. case no_temptable:
  2659. case no_id2blob:
  2660. case no_embedbody:
  2661. newRecordType.setown(createRecordType(&parms.item(1)));
  2662. linkCounted = hasAttribute(_linkCounted_Atom, parms);
  2663. streamed = hasAttribute(streamedAtom, parms);
  2664. break;
  2665. case no_pipe:
  2666. {
  2667. nowGrouped=hasAttribute(groupAtom, parms);
  2668. if (parms.isItem(2) && (parms.item(2).getOperator() == no_record))
  2669. newRecordType.setown(createRecordType(&parms.item(2)));
  2670. else
  2671. newRecordType.set(recordType);
  2672. break;
  2673. }
  2674. //Records providing the format, can hopefully combine with the transforms soon.
  2675. case no_keyindex:
  2676. case no_newkeyindex:
  2677. {
  2678. recordArg = 1;
  2679. break;
  2680. }
  2681. case no_selectfields:
  2682. case no_aggregate:
  2683. case no_newaggregate:
  2684. case no_newusertable:
  2685. case no_usertable:
  2686. {
  2687. IHqlExpression * record = &parms.item(1);
  2688. if (record->getOperator() == no_null)
  2689. {
  2690. type.set(datasetType);
  2691. break;
  2692. }
  2693. IHqlExpression * grouping = NULL;
  2694. IHqlExpression * mapping = NULL;
  2695. switch (op)
  2696. {
  2697. case no_usertable:
  2698. case no_selectfields:
  2699. mapping = record;
  2700. if (parms.isItem(2))
  2701. grouping = &parms.item(2);
  2702. break;
  2703. case no_aggregate:
  2704. if (!hasAttribute(mergeTransformAtom, parms))
  2705. mapping = &parms.item(2);
  2706. if (parms.isItem(3))
  2707. grouping = &parms.item(3);
  2708. break;
  2709. case no_newaggregate:
  2710. case no_newusertable:
  2711. mapping = &parms.item(2);
  2712. if (parms.isItem(3))
  2713. grouping = &parms.item(3);
  2714. break;
  2715. }
  2716. recordArg = 1;
  2717. if (grouping && grouping->isAttribute())
  2718. grouping = NULL;
  2719. if (grouping)
  2720. {
  2721. if (hasAttribute(groupedAtom, parms))
  2722. {
  2723. nowGrouped = isGrouped(datasetType);
  2724. }
  2725. else
  2726. {
  2727. nowGrouped = false;
  2728. }
  2729. }
  2730. else
  2731. {
  2732. //Aggregation removes grouping
  2733. if (op == no_newaggregate || op == no_aggregate || (mapping && mapping->isGroupAggregateFunction()))
  2734. nowGrouped=false;
  2735. else
  2736. nowGrouped = isGrouped(datasetType);
  2737. }
  2738. break;
  2739. }
  2740. case no_httpcall:
  2741. case no_soapcall:
  2742. recordArg = 3;
  2743. break;
  2744. case no_newsoapcall:
  2745. recordArg = 4;
  2746. break;
  2747. case no_soapcall_ds:
  2748. recordArg = 4;
  2749. nowGrouped = isGrouped(datasetType);
  2750. break;
  2751. case no_newsoapcall_ds:
  2752. recordArg = 5;
  2753. nowGrouped = isGrouped(datasetType);
  2754. break;
  2755. case no_parse:
  2756. recordArg = 3;
  2757. nowGrouped = isGrouped(datasetType);
  2758. break;
  2759. case no_xmlparse:
  2760. recordArg = 2;
  2761. nowGrouped = isGrouped(datasetType);
  2762. break;
  2763. //Transforms providing the format, can hopefully combine with the transforms soon.
  2764. case no_iterate:
  2765. case no_transformebcdic:
  2766. case no_transformascii:
  2767. case no_hqlproject:
  2768. recordArg = 1;
  2769. nowGrouped = isGrouped(datasetType);
  2770. break;
  2771. case no_rollupgroup:
  2772. recordArg = 1;
  2773. break;
  2774. case no_combine:
  2775. case no_combinegroup:
  2776. case no_process:
  2777. case no_normalize:
  2778. case no_rollup:
  2779. recordArg = 2;
  2780. nowGrouped = isGrouped(datasetType);
  2781. break;
  2782. case no_denormalize:
  2783. case no_denormalizegroup:
  2784. case no_join:
  2785. case no_selfjoin:
  2786. case no_joincount:
  2787. {
  2788. bool isLookupJoin = queryAttribute(lookupAtom, parms) != NULL;
  2789. bool isAllJoin = queryAttribute(allAtom, parms) != NULL;
  2790. bool isSmartJoin = queryAttribute(smartAtom, parms) != NULL;
  2791. bool isStreamedJoin = queryAttribute(streamedAtom, parms) != NULL;
  2792. bool isKeyedJoin = !isAllJoin && !isLookupJoin && !isSmartJoin && !isStreamedJoin && (queryAttribute(keyedAtom, parms) || isKey(&parms.item(1)));
  2793. recordArg = 3;
  2794. if (queryAttribute(groupAtom, parms))
  2795. nowGrouped = true;
  2796. else if (isKeyedJoin || isAllJoin || isLookupJoin)
  2797. nowGrouped = isGrouped(datasetType);
  2798. else
  2799. nowGrouped = false;
  2800. break;
  2801. }
  2802. case no_newxmlparse:
  2803. recordArg = 3;
  2804. nowGrouped = isGrouped(datasetType);
  2805. break;
  2806. case no_fetch:
  2807. recordArg = 3;
  2808. nowGrouped = false; // Is this really correct?
  2809. break;
  2810. case no_newparse:
  2811. recordArg = 4;
  2812. nowGrouped = isGrouped(datasetType);
  2813. break;
  2814. case no_addfiles:
  2815. case no_regroup:
  2816. case no_cogroup:
  2817. case no_chooseds:
  2818. {
  2819. unsigned max = parms.ordinality();
  2820. bool allGrouped = isGrouped(datasetType);
  2821. unsigned firstDataset = (op == no_chooseds) ? 1 : 0;
  2822. for (unsigned i=firstDataset+1; i < max; i++)
  2823. {
  2824. IHqlExpression & cur = parms.item(i);
  2825. if (!cur.isAttribute() && !isGrouped(&cur))
  2826. allGrouped = false;
  2827. }
  2828. newRecordType.set(recordType);
  2829. nowGrouped = (allGrouped || (op == no_cogroup));
  2830. break;
  2831. }
  2832. case no_normalizegroup:
  2833. {
  2834. //Not very nice - it is hard to track anything through a group normalize.
  2835. recordArg = 1;
  2836. nowGrouped = isGrouped(dataset);
  2837. break;
  2838. }
  2839. case no_if:
  2840. {
  2841. recordArg = 1;
  2842. IHqlExpression * left = &parms.item(1);
  2843. IHqlExpression * right = &parms.item(2);
  2844. if (isNull(left))
  2845. nowGrouped = isGrouped(right);
  2846. else if (isNull(right))
  2847. nowGrouped = isGrouped(left);
  2848. else
  2849. nowGrouped = isGrouped(left) || isGrouped(right);
  2850. break;
  2851. }
  2852. case no_case:
  2853. case no_mapto:
  2854. //following is wrong, but they get removed pretty quickly so I don't really care
  2855. type.set(parms.item(1).queryType());
  2856. break;
  2857. case no_map:
  2858. //following is wrong, but they get removed pretty quickly so I don't really care
  2859. type.set(parms.item(0).queryType());
  2860. break;
  2861. case no_merge:
  2862. newRecordType.set(recordType);
  2863. nowGrouped = false;
  2864. break;
  2865. case no_mergejoin:
  2866. case no_nwayjoin:
  2867. case no_nwaymerge:
  2868. newRecordType.set(recordType);
  2869. nowGrouped = false;
  2870. break;
  2871. case no_choosen:
  2872. case no_choosesets:
  2873. case no_enth:
  2874. case no_sample:
  2875. newRecordType.set(recordType);
  2876. if (hasAttribute(groupedAtom, parms))
  2877. nowGrouped = isGrouped(dataset);
  2878. break;
  2879. case no_allnodes:
  2880. newRecordType.set(recordType);
  2881. //grouped not currently supported - ensure this is consistent with the meta.
  2882. break;
  2883. case no_colon:
  2884. case no_alias_project:
  2885. case no_alias_scope:
  2886. case no_cachealias:
  2887. case no_cloned:
  2888. case no_globalscope:
  2889. case no_comma:
  2890. case no_compound:
  2891. case no_filter:
  2892. case no_keyed:
  2893. case no_nofold:
  2894. case no_nohoist:
  2895. case no_section:
  2896. case no_sectioninput:
  2897. case no_sub:
  2898. case no_thor:
  2899. case no_nothor:
  2900. case no_compound_indexread:
  2901. case no_compound_diskread:
  2902. case no_compound_disknormalize:
  2903. case no_compound_diskaggregate:
  2904. case no_compound_diskcount:
  2905. case no_compound_diskgroupaggregate:
  2906. case no_compound_indexnormalize:
  2907. case no_compound_indexaggregate:
  2908. case no_compound_indexcount:
  2909. case no_compound_indexgroupaggregate:
  2910. case no_compound_childread:
  2911. case no_compound_childnormalize:
  2912. case no_compound_childaggregate:
  2913. case no_compound_childcount:
  2914. case no_compound_childgroupaggregate:
  2915. case no_compound_selectnew:
  2916. case no_compound_inline:
  2917. case no_field:
  2918. case no_metaactivity:
  2919. case no_split:
  2920. case no_spill:
  2921. case no_readspill:
  2922. case no_commonspill:
  2923. case no_writespill:
  2924. case no_throughaggregate:
  2925. case no_limit:
  2926. case no_catchds:
  2927. case no_keyedlimit:
  2928. case no_compound_fetch:
  2929. case no_preload:
  2930. case no_alias:
  2931. case no_catch:
  2932. case no_activerow:
  2933. case no_newrow:
  2934. case no_assert_ds:
  2935. case no_spillgraphresult:
  2936. case no_cluster:
  2937. case no_forcenolocal:
  2938. case no_thisnode:
  2939. case no_forcelocal:
  2940. case no_filtergroup:
  2941. case no_forcegraph:
  2942. case no_related:
  2943. case no_executewhen:
  2944. case no_outofline:
  2945. case no_fieldmap:
  2946. case no_owned_ds:
  2947. case no_dataset_alias:
  2948. case no_dedup:
  2949. case no_assertgrouped:
  2950. case no_preservemeta:
  2951. case no_keyeddistribute:
  2952. case no_subsort:
  2953. case no_select:
  2954. type.setown(dataset->getType());
  2955. break;
  2956. case no_distribute:
  2957. case no_distributed:
  2958. case no_assertdistributed:
  2959. newRecordType.set(recordType);
  2960. break;
  2961. case no_cosort:
  2962. case no_sort:
  2963. case no_sorted:
  2964. case no_assertsorted:
  2965. case no_topn:
  2966. case no_stepped: // stepped implies the sort order matches the stepped criteria
  2967. case no_nonempty:
  2968. newRecordType.set(recordType);
  2969. if (!hasAttribute(localAtom, parms) && !hasAttribute(globalAtom, parms))
  2970. nowGrouped = isGrouped(dataset);
  2971. break;
  2972. case no_group:
  2973. case no_grouped:
  2974. newRecordType.set(recordType);
  2975. nowGrouped = (parms.isItem(1) && !parms.item(1).isAttribute());
  2976. break;
  2977. case no_loop:
  2978. {
  2979. newRecordType.set(recordType);
  2980. IHqlExpression & body = parms.item(4);
  2981. nowGrouped = isGrouped(dataset) || isGrouped(body.queryChild(0));
  2982. break;
  2983. }
  2984. case no_graphloop:
  2985. {
  2986. newRecordType.set(recordType);
  2987. IHqlExpression & body = parms.item(2);
  2988. nowGrouped = isGrouped(dataset) || isGrouped(body.queryChild(0));
  2989. break;
  2990. }
  2991. case no_serialize:
  2992. {
  2993. assertex(parms.ordinality() >= 2);
  2994. IHqlExpression & form = parms.item(1);
  2995. assertex(form.isAttribute());
  2996. type.setown(getSerializedForm(datasetType, form.queryName()));
  2997. break;
  2998. }
  2999. case no_deserialize:
  3000. {
  3001. assertex(parms.ordinality() >= 3);
  3002. IHqlExpression & record = parms.item(1);
  3003. IHqlExpression & form = parms.item(2);
  3004. assertex(form.isAttribute());
  3005. ITypeInfo * recordType = record.queryType();
  3006. OwnedITypeInfo rowType = makeRowType(LINK(recordType));
  3007. assertex(record.getOperator() == no_record);
  3008. if (isGrouped(datasetType))
  3009. {
  3010. ITypeInfo * childType = datasetType->queryChildType();
  3011. OwnedITypeInfo newChildType = replaceChildType(childType, rowType);
  3012. type.setown(replaceChildType(datasetType, newChildType));
  3013. }
  3014. else
  3015. type.setown(replaceChildType(datasetType, rowType));
  3016. //MORE: The distribution etc. won't be correct....
  3017. type.setown(setLinkCountedAttr(type, true));
  3018. #ifdef _DEBUG
  3019. OwnedITypeInfo serializedType = getSerializedForm(type, form.queryName());
  3020. assertex(recordTypesMatch(serializedType, datasetType));
  3021. #endif
  3022. break;
  3023. }
  3024. case no_rowsetindex:
  3025. case no_rowsetrange:
  3026. type.set(childType);
  3027. break;
  3028. case no_datasetfromrow:
  3029. case no_datasetfromdictionary:
  3030. recordArg = 0;
  3031. break;
  3032. default:
  3033. UNIMPLEMENTED_XY("Type calculation for dataset operator", getOpString(op));
  3034. break;
  3035. }
  3036. if (!type)
  3037. {
  3038. if (!newRecordType)
  3039. {
  3040. assertex(recordArg != NotFound);
  3041. IHqlExpression * record = parms.item(recordArg).queryRecord();
  3042. newRecordType.setown(createRecordType(record));
  3043. }
  3044. type.setown(createDatasetType(newRecordType, nowGrouped));
  3045. if (linkCounted)
  3046. type.setown(setLinkCountedAttr(type, true));
  3047. if (streamed)
  3048. type.setown(setStreamedAttr(type, true));
  3049. }
  3050. return type.getClear();
  3051. }
  3052. extern HQL_API bool hasSameSortGroupDistribution(IHqlExpression * expr, IHqlExpression * other)
  3053. {
  3054. CHqlMetaInfo & left = queryMetaProperty(expr)->meta;
  3055. CHqlMetaInfo & right = queryMetaProperty(other)->meta;
  3056. return left.matches(right);
  3057. }
  3058. extern HQL_API bool hasKnownSortGroupDistribution(IHqlExpression * expr, bool isLocal)
  3059. {
  3060. return queryMetaProperty(expr)->meta.hasKnownSortGroupDistribution(isLocal);
  3061. }