hqlusage.cpp 17 KB


  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "jlib.hpp"
  14. #include "hqltrans.ipp"
  15. #include "hqlusage.hpp"
  16. #include "hqlutil.hpp"
  17. #include "hqlpmap.hpp"
  18. #include "hqlthql.hpp"
  19. //#define SEARCH_FILENAME "xxx"
  20. //#define SEARCH_FIELD "field1"
  21. #ifdef SEARCH_FIELD
  22. static void breakOnMatchField()
  23. {
  24. strlen("");
  25. }
  26. #endif
  27. class ExpressionStatsInfo
  28. {
  29. public:
  30. enum { MaxOperands = 17 }; //An arbitrary upper limit to the distinct number of operands being counted
  31. public:
  32. ExpressionStatsInfo() { count = 0; _clear(numOperands); countMax = 0; sumMax = 0; }
  33. void trace()
  34. {
  35. DBGLOG("numUnique %u", count);
  36. for (unsigned i=0; i < MaxOperands; i++)
  37. DBGLOG(" %u operands: %u", i, numOperands[i]);
  38. DBGLOG(" %u expressions total %u operands", countMax, sumMax);
  39. }
  40. unsigned count;
  41. unsigned numOperands[MaxOperands];
  42. unsigned countMax;
  43. unsigned sumMax;
  44. };
  45. static void calcNumUniqueExpressions(IHqlExpression * expr, ExpressionStatsInfo & info)
  46. {
  47. if (expr->queryTransformExtra())
  48. return;
  49. expr->setTransformExtraUnlinked(expr);
  50. //use head recursion
  51. loop
  52. {
  53. info.count++;
  54. unsigned max = expr->numChildren();
  55. if (max >= ExpressionStatsInfo::MaxOperands)
  56. {
  57. info.countMax++;
  58. info.sumMax += max;
  59. }
  60. else
  61. info.numOperands[max]++;
  62. if (max == 0)
  63. return;
  64. for (unsigned idx=1; idx < max; idx++)
  65. calcNumUniqueExpressions(expr->queryChild(idx), info);
  66. expr = expr->queryChild(0);
  67. }
  68. }
  69. unsigned getNumUniqueExpressions(IHqlExpression * expr)
  70. {
  71. TransformMutexBlock block;
  72. ExpressionStatsInfo info;
  73. calcNumUniqueExpressions(expr, info);
  74. return info.count;
  75. }
  76. unsigned getNumUniqueExpressions(const HqlExprArray & exprs)
  77. {
  78. TransformMutexBlock block;
  79. ExpressionStatsInfo info;
  80. ForEachItemIn(i, exprs)
  81. calcNumUniqueExpressions(&exprs.item(i),info);
  82. return info.count;
  83. }
  84. //------------------------------------------------------------------------------------------------
  85. static HqlTransformerInfo quickExpressionCounterInfo("QuickExpressionCounter");
  86. class HQL_API QuickExpressionCounter : public QuickHqlTransformer
  87. {
  88. public:
  89. QuickExpressionCounter(IHqlExpression * _search, unsigned _limit)
  90. : QuickHqlTransformer(quickExpressionCounterInfo, NULL), search(_search), limit(_limit)
  91. {
  92. matches = 0;
  93. }
  94. void analyse(IHqlExpression * expr)
  95. {
  96. if (expr == search)
  97. matches++;
  98. if (matches >= limit)
  99. return;
  100. QuickHqlTransformer::analyse(expr);
  101. }
  102. bool limitReached() const { return matches >= limit; }
  103. unsigned numMatches() const { return matches; }
  104. protected:
  105. HqlExprAttr search;
  106. unsigned matches;
  107. unsigned limit;
  108. };
  109. extern HQL_API unsigned getNumOccurences(HqlExprArray & exprs, IHqlExpression * search, unsigned limit)
  110. {
  111. QuickExpressionCounter counter(search, limit);
  112. ForEachItemIn(i, exprs)
  113. counter.analyse(&exprs.item(i));
  114. return counter.numMatches();
  115. }
  116. extern HQL_API void logTreeStats(IHqlExpression * expr)
  117. {
  118. TransformMutexBlock block;
  119. ExpressionStatsInfo info;
  120. calcNumUniqueExpressions(expr,info);
  121. info.trace();
  122. }
  123. extern HQL_API void logTreeStats(const HqlExprArray & exprs)
  124. {
  125. TransformMutexBlock block;
  126. ExpressionStatsInfo info;
  127. ForEachItemIn(i, exprs)
  128. calcNumUniqueExpressions(&exprs.item(i),info);
  129. info.trace();
  130. }
  131. //---------------------------------------------------------------------------
  132. HQL_API bool containsSelector(IHqlExpression * expr, IHqlExpression * selector)
  133. {
  134. return exprReferencesDataset(expr, selector);
  135. }
  136. //---------------------------------------------------------------------------
  137. static HqlTransformerInfo hqlSelectorAnywhereLocatorInfo("HqlSelectorAnywhereLocator");
  138. HqlSelectorAnywhereLocator::HqlSelectorAnywhereLocator(IHqlExpression * _selector) : NewHqlTransformer(hqlSelectorAnywhereLocatorInfo)
  139. {
  140. selector.set(_selector);
  141. foundSelector = false;
  142. }
  143. void HqlSelectorAnywhereLocator::analyseExpr(IHqlExpression * expr)
  144. {
  145. if (foundSelector || alreadyVisited(expr))
  146. return;
  147. NewHqlTransformer::analyseExpr(expr);
  148. }
  149. void HqlSelectorAnywhereLocator::analyseSelector(IHqlExpression * expr)
  150. {
  151. if (expr == selector)
  152. {
  153. foundSelector = true;
  154. return;
  155. }
  156. NewHqlTransformer::analyseSelector(expr);
  157. }
  158. bool HqlSelectorAnywhereLocator::containsSelector(IHqlExpression * expr)
  159. {
  160. foundSelector = false;
  161. analyse(expr, 0);
  162. return foundSelector;
  163. }
  164. HQL_API bool containsSelectorAnywhere(IHqlExpression * expr, IHqlExpression * selector)
  165. {
  166. HqlSelectorAnywhereLocator locator(selector);
  167. return locator.containsSelector(expr);
  168. }
  169. //------------------------------------------------------------------------------------------------
  170. static HqlTransformerInfo selectCollectingTransformerInfo("SelectCollectingTransformer");
  171. class SelectCollectingTransformer : public NewHqlTransformer
  172. {
  173. public:
  174. SelectCollectingTransformer(HqlExprArray & _found)
  175. : NewHqlTransformer(selectCollectingTransformerInfo), found(_found)
  176. {
  177. }
  178. virtual void analyseExpr(IHqlExpression * expr)
  179. {
  180. if (alreadyVisited(expr))
  181. return;
  182. if (expr->getOperator() == no_select)
  183. {
  184. if (!found.contains(*expr))
  185. found.append(*LINK(expr));
  186. return;
  187. }
  188. NewHqlTransformer::analyseExpr(expr);
  189. }
  190. protected:
  191. HqlExprArray & found;
  192. };
  193. void gatherSelectExprs(HqlExprArray & target, IHqlExpression * expr)
  194. {
  195. SelectCollectingTransformer collector(target);
  196. collector.analyse(expr, 0);
  197. }
  198. //------------------------------------------------------------------------------------------------
  199. static HqlTransformerInfo fieldAccessAnalyserInfo("FieldAccessAnalyser");
  200. FieldAccessAnalyser::FieldAccessAnalyser(IHqlExpression * _selector) : NewHqlTransformer(fieldAccessAnalyserInfo), selector(_selector)
  201. {
  202. unwindFields(fields, selector->queryRecord());
  203. numAccessed = 0;
  204. accessed.setown(createBitSet());
  205. }
  206. IHqlExpression * FieldAccessAnalyser::queryLastFieldAccessed() const
  207. {
  208. if (numAccessed == 0)
  209. return NULL;
  210. if (accessedAll())
  211. return &fields.tos();
  212. ForEachItemInRev(i, fields)
  213. {
  214. if (accessed->test(i))
  215. return &fields.item(i);
  216. }
  217. throwUnexpected();
  218. }
  219. void FieldAccessAnalyser::analyseExpr(IHqlExpression * expr)
  220. {
  221. if (accessedAll() || alreadyVisited(expr))
  222. return;
  223. if (expr == selector)
  224. {
  225. setAccessedAll();
  226. return;
  227. }
  228. if (expr->getOperator() == no_select)
  229. {
  230. if (expr->queryChild(0) == selector)
  231. {
  232. unsigned match = fields.find(*expr->queryChild(1));
  233. assertex(match != NotFound);
  234. if (!accessed->test(match))
  235. {
  236. accessed->set(match);
  237. numAccessed++;
  238. }
  239. return;
  240. }
  241. }
  242. NewHqlTransformer::analyseExpr(expr);
  243. }
  244. void FieldAccessAnalyser::analyseSelector(IHqlExpression * expr)
  245. {
  246. if (expr == selector)
  247. {
  248. setAccessedAll();
  249. return;
  250. }
  251. if (expr->getOperator() == no_select)
  252. {
  253. if (expr->queryChild(0) == selector)
  254. {
  255. unsigned match = fields.find(*expr->queryChild(1));
  256. assertex(match != NotFound);
  257. if (!accessed->test(match))
  258. {
  259. accessed->set(match);
  260. numAccessed++;
  261. }
  262. return;
  263. }
  264. }
  265. NewHqlTransformer::analyseSelector(expr);
  266. }
  267. //------------------------------------------------------------------------------------------------
  268. static void expandSelectText(StringBuffer & s, IHqlExpression * expr)
  269. {
  270. if (expr->getOperator() != no_field)
  271. {
  272. assertex(expr->getOperator() == no_select);
  273. IHqlExpression * ds = expr->queryChild(0);
  274. IHqlExpression * field = expr->queryChild(1);
  275. if (ds != queryActiveTableSelector())
  276. {
  277. expandSelectText(s, ds);
  278. s.append(".");
  279. }
  280. s.append(field->queryName());
  281. }
  282. else
  283. s.append(expr->queryName());
  284. }
  285. static IPropertyTree * addSelect(IPropertyTree * xml, IHqlExpression * expr, bool isUsed)
  286. {
  287. StringBuffer text;
  288. expandSelectText(text, expr);
  289. Owned<IPropertyTree> field = createPTree(isUsed ? "field" : "unused");
  290. field->setProp("@name", text.str());
  291. const char * tag = field->queryName();
  292. return xml->addPropTree(tag, field.getClear());
  293. }
  294. SourceFieldUsage::SourceFieldUsage(IHqlExpression * _source)
  295. : source(_source)
  296. {
  297. usedAll = false;
  298. usedFilepos = false;
  299. }
  300. void SourceFieldUsage::noteSelect(IHqlExpression * select, IHqlExpression * selector)
  301. {
  302. #ifdef SEARCH_FIELD
  303. if (select->queryChild(1)->queryName() == createAtom(SEARCH_FIELD))
  304. {
  305. if (matchesConstantString(queryFilename(), SEARCH_FILENAME, true))
  306. {
  307. breakOnMatchField();
  308. }
  309. }
  310. #endif
  311. //MORE: For simple selectors may be more efficient to search before replacing the selector.
  312. OwnedHqlExpr mapped = replaceSelector(select, selector, queryActiveTableSelector());
  313. //MORE: May need to use a hash table.
  314. if (!selects.contains(*mapped))
  315. selects.append(*mapped.getClear());
  316. }
  317. IHqlExpression * SourceFieldUsage::queryFilename() const
  318. {
  319. switch (source->getOperator())
  320. {
  321. case no_newkeyindex:
  322. return source->queryChild(3);
  323. case no_table:
  324. return source->queryChild(0);
  325. }
  326. throwUnexpected();
  327. return NULL;
  328. }
  329. const char * SourceFieldUsage::queryFilenameText() const
  330. {
  331. if (!cachedFilenameEcl.get())
  332. {
  333. IHqlExpression * filename = queryFilename();
  334. assertex(filename);
  335. StringBuffer nameText;
  336. getExprECL(filename, nameText);
  337. cachedFilenameEcl.set(nameText.str(), nameText.length());
  338. }
  339. return cachedFilenameEcl.get();
  340. }
  341. IPropertyTree * SourceFieldUsage::createReport(bool includeFieldDetail, const IPropertyTree * exclude) const
  342. {
  343. bool sourceIsKey = isKey(source);
  344. const char * type = sourceIsKey ? "index" : "dataset";
  345. const char * nameText = queryFilenameText();
  346. if (exclude)
  347. {
  348. StringBuffer xpath;
  349. xpath.append(type).append("[@name=\"");
  350. xpath.append(nameText);
  351. xpath.append("\"]");
  352. if (exclude->hasProp(xpath))
  353. return NULL;
  354. }
  355. Owned<IPropertyTree> entry = createPTree(type);
  356. entry->setProp("@name", nameText);
  357. unsigned numFields = 0;
  358. unsigned numFieldsUsed = 0;
  359. expandSelects(entry, source->queryRecord(), queryActiveTableSelector(), usedAll, includeFieldDetail, numFields, numFieldsUsed);
  360. if (isKey(source))
  361. {
  362. IHqlExpression * original = queryAttributeChild(source, _original_Atom, 0);
  363. if (!original)
  364. original = source;
  365. IHqlExpression * lastField = queryLastField(original->queryRecord());
  366. if (getBoolAttribute(source, filepositionAtom, true))
  367. {
  368. if (usedFilepos || !lastField->hasAttribute(_implicitFpos_Atom))
  369. {
  370. numFields++;
  371. if (usedFilepos || usedAll)
  372. {
  373. if (includeFieldDetail)
  374. addSelect(entry, lastField, true);
  375. numFieldsUsed++;
  376. }
  377. }
  378. }
  379. else
  380. {
  381. assertex(!usedFilepos);
  382. }
  383. }
  384. entry->setPropInt("@numFields", numFields);
  385. entry->setPropInt("@numFieldsUsed", numFieldsUsed);
  386. return entry.getClear();
  387. }
  388. void SourceFieldUsage::expandSelects(IPropertyTree * xml, IHqlExpression * record, IHqlExpression * selector, bool allUsed, bool includeFieldDetail, unsigned & numFields, unsigned & numFieldsUsed) const
  389. {
  390. bool seenAll = true;
  391. ForEachChild(i, record)
  392. {
  393. IHqlExpression * cur = record->queryChild(i);
  394. switch (cur->getOperator())
  395. {
  396. case no_field:
  397. {
  398. OwnedHqlExpr selected = createSelectExpr(LINK(selector), LINK(cur));
  399. bool thisUsed = allUsed || selects.contains(*selected);
  400. if (cur->isDatarow())
  401. {
  402. expandSelects(xml, cur->queryRecord(), selected, thisUsed, includeFieldDetail, numFields, numFieldsUsed);
  403. }
  404. else
  405. {
  406. numFields++;
  407. if (thisUsed)
  408. {
  409. if (includeFieldDetail)
  410. addSelect(xml, selected, thisUsed);
  411. numFieldsUsed++;
  412. }
  413. else
  414. {
  415. //could have an option to output unused fields, with code like the following:
  416. //addSelect(xml, selected, thisUsed);
  417. seenAll = false;
  418. }
  419. }
  420. break;
  421. }
  422. case no_record:
  423. expandSelects(xml, cur, selector, allUsed, includeFieldDetail, numFields, numFieldsUsed);
  424. break;
  425. case no_ifblock:
  426. //MORE: Theoretically if any of the fields within the ifblock are used, then the fields
  427. //used in the ifblock condition are also used. Needs to be handled by a preprocessing step.
  428. expandSelects(xml, cur->queryChild(1), selector, allUsed, includeFieldDetail, numFields, numFieldsUsed);
  429. break;
  430. }
  431. }
  432. }
  433. //------------------------------------------------------------------------------------------------
  434. static HqlTransformerInfo sourceFieldTrackerInfo("SourceFieldTracker");
  435. class SourceFieldTracker : public NewHqlTransformer
  436. {
  437. public:
  438. SourceFieldTracker(SourceFieldUsage * _fieldUsage, IHqlExpression * _selector)
  439. : NewHqlTransformer(sourceFieldTrackerInfo), fieldUsage(_fieldUsage), selector(_selector)
  440. {
  441. }
  442. virtual void analyseExpr(IHqlExpression * expr);
  443. protected:
  444. bool isSelected(IHqlExpression * expr) const;
  445. protected:
  446. SourceFieldUsage * fieldUsage;
  447. IHqlExpression * selector;
  448. };
  449. void SourceFieldTracker::analyseExpr(IHqlExpression * expr)
  450. {
  451. if (fieldUsage->seenAll() || alreadyVisited(expr))
  452. return;
  453. if (expr == selector)
  454. {
  455. fieldUsage->noteAll();
  456. return;
  457. }
  458. if (isSelected(expr))
  459. {
  460. fieldUsage->noteSelect(expr->queryNormalizedSelector(), selector);
  461. return;
  462. }
  463. switch (expr->getOperator())
  464. {
  465. case no_filepos:
  466. if (expr->queryChild(0) == selector)
  467. {
  468. fieldUsage->noteFilepos();
  469. return;
  470. }
  471. break;
  472. }
  473. NewHqlTransformer::analyseExpr(expr);
  474. }
  475. bool SourceFieldTracker::isSelected(IHqlExpression * expr) const
  476. {
  477. loop
  478. {
  479. if (expr->getOperator() != no_select)
  480. return false;
  481. IHqlExpression * ds = expr->queryChild(0);
  482. if (ds->queryNormalizedSelector() == selector)
  483. return true;
  484. expr = ds;
  485. }
  486. }
  487. //------------------------------------------------------------------------------------------------
  488. void gatherFieldUsage(SourceFieldUsage * fieldUsage, IHqlExpression * expr, IHqlExpression * selector)
  489. {
  490. unsigned first = getNumChildTables(expr);
  491. SourceFieldTracker tracker(fieldUsage, selector);
  492. ForEachChildFrom(i, expr, first)
  493. tracker.analyse(expr->queryChild(i), 0);
  494. }
  495. void gatherParentFieldUsage(SourceFieldUsage * fieldUsage, IHqlExpression * expr)
  496. {
  497. bool hasDs = false;
  498. bool hasLeft = false;
  499. bool hasRight = false;
  500. switch (getChildDatasetType(expr))
  501. {
  502. case childdataset_dataset:
  503. hasDs = true;
  504. break;
  505. case childdataset_datasetleft:
  506. hasDs = true;
  507. hasLeft = true;
  508. break;
  509. case childdataset_left:
  510. hasLeft = true;
  511. break;
  512. case childdataset_same_left_right:
  513. hasLeft = true;
  514. hasRight = true;
  515. break;
  516. case childdataset_top_left_right:
  517. hasDs = true;
  518. hasLeft = true;
  519. hasRight = true;
  520. break;
  521. case childdataset_leftright: // e.g., no_aggregate
  522. hasLeft = true;
  523. break;
  524. }
  525. IHqlExpression * ds = expr->queryChild(0);
  526. IHqlExpression * selSeq = querySelSeq(expr);
  527. //MORE: Do all this in a single pass
  528. if (hasDs)
  529. gatherFieldUsage(fieldUsage, expr, ds->queryNormalizedSelector());
  530. if (hasLeft)
  531. {
  532. OwnedHqlExpr left = createSelector(no_left, ds, selSeq);
  533. gatherFieldUsage(fieldUsage, expr, left);
  534. }
  535. if (hasRight)
  536. {
  537. OwnedHqlExpr right = createSelector(no_right, ds, selSeq);
  538. gatherFieldUsage(fieldUsage, expr, right);
  539. }
  540. }