123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625 |
- /*##############################################################################
- HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ############################################################################## */
- #include "jlib.hpp"
- #include "hqltrans.ipp"
- #include "hqlusage.hpp"
- #include "hqlutil.hpp"
- #include "hqlpmap.hpp"
- #include "hqlthql.hpp"
- //#define SEARCH_FILENAME "xxx"
- //#define SEARCH_FIELD "field1"
- #ifdef SEARCH_FIELD
- static void breakOnMatchField()
- {
- strlen("");
- }
- #endif
- class ExpressionStatsInfo
- {
- public:
- enum { MaxOperands = 17 }; //An arbitrary upper limit to the distinct number of operands being counted
- public:
- ExpressionStatsInfo() { count = 0; _clear(numOperands); countMax = 0; sumMax = 0; }
- void trace()
- {
- DBGLOG("numUnique %u", count);
- for (unsigned i=0; i < MaxOperands; i++)
- DBGLOG(" %u operands: %u", i, numOperands[i]);
- DBGLOG(" %u expressions total %u operands", countMax, sumMax);
- }
- unsigned count;
- unsigned numOperands[MaxOperands];
- unsigned countMax;
- unsigned sumMax;
- };
- static void calcNumUniqueExpressions(IHqlExpression * expr, ExpressionStatsInfo & info)
- {
- if (expr->queryTransformExtra())
- return;
- expr->setTransformExtraUnlinked(expr);
- //use head recursion
- loop
- {
- info.count++;
- unsigned max = expr->numChildren();
- if (max >= ExpressionStatsInfo::MaxOperands)
- {
- info.countMax++;
- info.sumMax += max;
- }
- else
- info.numOperands[max]++;
- if (max == 0)
- return;
- for (unsigned idx=1; idx < max; idx++)
- calcNumUniqueExpressions(expr->queryChild(idx), info);
- expr = expr->queryChild(0);
- }
- }
- unsigned getNumUniqueExpressions(IHqlExpression * expr)
- {
- TransformMutexBlock block;
- ExpressionStatsInfo info;
- calcNumUniqueExpressions(expr, info);
- return info.count;
- }
- unsigned getNumUniqueExpressions(const HqlExprArray & exprs)
- {
- TransformMutexBlock block;
- ExpressionStatsInfo info;
- ForEachItemIn(i, exprs)
- calcNumUniqueExpressions(&exprs.item(i),info);
- return info.count;
- }
- //------------------------------------------------------------------------------------------------
- static HqlTransformerInfo quickExpressionCounterInfo("QuickExpressionCounter");
- class HQL_API QuickExpressionCounter : public QuickHqlTransformer
- {
- public:
- QuickExpressionCounter(IHqlExpression * _search, unsigned _limit)
- : QuickHqlTransformer(quickExpressionCounterInfo, NULL), search(_search), limit(_limit)
- {
- matches = 0;
- }
- void analyse(IHqlExpression * expr)
- {
- if (expr == search)
- matches++;
- if (matches >= limit)
- return;
- QuickHqlTransformer::analyse(expr);
- }
- bool limitReached() const { return matches >= limit; }
- unsigned numMatches() const { return matches; }
- protected:
- HqlExprAttr search;
- unsigned matches;
- unsigned limit;
- };
- extern HQL_API unsigned getNumOccurences(HqlExprArray & exprs, IHqlExpression * search, unsigned limit)
- {
- QuickExpressionCounter counter(search, limit);
- ForEachItemIn(i, exprs)
- counter.analyse(&exprs.item(i));
- return counter.numMatches();
- }
- extern HQL_API void logTreeStats(IHqlExpression * expr)
- {
- TransformMutexBlock block;
- ExpressionStatsInfo info;
- calcNumUniqueExpressions(expr,info);
- info.trace();
- }
- extern HQL_API void logTreeStats(const HqlExprArray & exprs)
- {
- TransformMutexBlock block;
- ExpressionStatsInfo info;
- ForEachItemIn(i, exprs)
- calcNumUniqueExpressions(&exprs.item(i),info);
- info.trace();
- }
- //---------------------------------------------------------------------------
- HQL_API bool containsSelector(IHqlExpression * expr, IHqlExpression * selector)
- {
- return exprReferencesDataset(expr, selector);
- }
- //---------------------------------------------------------------------------
- static HqlTransformerInfo hqlSelectorAnywhereLocatorInfo("HqlSelectorAnywhereLocator");
- HqlSelectorAnywhereLocator::HqlSelectorAnywhereLocator(IHqlExpression * _selector) : NewHqlTransformer(hqlSelectorAnywhereLocatorInfo)
- {
- selector.set(_selector);
- foundSelector = false;
- }
- void HqlSelectorAnywhereLocator::analyseExpr(IHqlExpression * expr)
- {
- if (foundSelector || alreadyVisited(expr))
- return;
- NewHqlTransformer::analyseExpr(expr);
- }
- void HqlSelectorAnywhereLocator::analyseSelector(IHqlExpression * expr)
- {
- if (expr == selector)
- {
- foundSelector = true;
- return;
- }
- NewHqlTransformer::analyseSelector(expr);
- }
- bool HqlSelectorAnywhereLocator::containsSelector(IHqlExpression * expr)
- {
- foundSelector = false;
- analyse(expr, 0);
- return foundSelector;
- }
- HQL_API bool containsSelectorAnywhere(IHqlExpression * expr, IHqlExpression * selector)
- {
- HqlSelectorAnywhereLocator locator(selector);
- return locator.containsSelector(expr);
- }
- //------------------------------------------------------------------------------------------------
- static HqlTransformerInfo selectCollectingTransformerInfo("SelectCollectingTransformer");
- class SelectCollectingTransformer : public NewHqlTransformer
- {
- public:
- SelectCollectingTransformer(HqlExprArray & _found)
- : NewHqlTransformer(selectCollectingTransformerInfo), found(_found)
- {
- }
- virtual void analyseExpr(IHqlExpression * expr)
- {
- if (alreadyVisited(expr))
- return;
- if (expr->getOperator() == no_select)
- {
- if (!found.contains(*expr))
- found.append(*LINK(expr));
- return;
- }
- NewHqlTransformer::analyseExpr(expr);
- }
- protected:
- HqlExprArray & found;
- };
- void gatherSelectExprs(HqlExprArray & target, IHqlExpression * expr)
- {
- SelectCollectingTransformer collector(target);
- collector.analyse(expr, 0);
- }
- //------------------------------------------------------------------------------------------------
- static HqlTransformerInfo fieldAccessAnalyserInfo("FieldAccessAnalyser");
- FieldAccessAnalyser::FieldAccessAnalyser(IHqlExpression * _selector) : NewHqlTransformer(fieldAccessAnalyserInfo), selector(_selector)
- {
- unwindFields(fields, selector->queryRecord());
- numAccessed = 0;
- accessed.setown(createBitSet());
- }
- IHqlExpression * FieldAccessAnalyser::queryLastFieldAccessed() const
- {
- if (numAccessed == 0)
- return NULL;
- if (accessedAll())
- return &fields.tos();
- ForEachItemInRev(i, fields)
- {
- if (accessed->test(i))
- return &fields.item(i);
- }
- throwUnexpected();
- }
- void FieldAccessAnalyser::analyseExpr(IHqlExpression * expr)
- {
- if (accessedAll() || alreadyVisited(expr))
- return;
- if (expr == selector)
- {
- setAccessedAll();
- return;
- }
- if (expr->getOperator() == no_select)
- {
- if (expr->queryChild(0) == selector)
- {
- unsigned match = fields.find(*expr->queryChild(1));
- assertex(match != NotFound);
- if (!accessed->test(match))
- {
- accessed->set(match);
- numAccessed++;
- }
- return;
- }
- }
- NewHqlTransformer::analyseExpr(expr);
- }
- void FieldAccessAnalyser::analyseSelector(IHqlExpression * expr)
- {
- if (expr == selector)
- {
- setAccessedAll();
- return;
- }
- if (expr->getOperator() == no_select)
- {
- if (expr->queryChild(0) == selector)
- {
- unsigned match = fields.find(*expr->queryChild(1));
- assertex(match != NotFound);
- if (!accessed->test(match))
- {
- accessed->set(match);
- numAccessed++;
- }
- return;
- }
- }
- NewHqlTransformer::analyseSelector(expr);
- }
- //------------------------------------------------------------------------------------------------
- static void expandSelectText(StringBuffer & s, IHqlExpression * expr)
- {
- if (expr->getOperator() != no_field)
- {
- assertex(expr->getOperator() == no_select);
- IHqlExpression * ds = expr->queryChild(0);
- IHqlExpression * field = expr->queryChild(1);
- if (ds != queryActiveTableSelector())
- {
- expandSelectText(s, ds);
- s.append(".");
- }
- s.append(field->queryName());
- }
- else
- s.append(expr->queryName());
- }
- static IPropertyTree * addSelect(IPropertyTree * xml, IHqlExpression * expr, bool isUsed)
- {
- StringBuffer text;
- expandSelectText(text, expr);
- Owned<IPropertyTree> field = createPTree(isUsed ? "field" : "unused");
- field->setProp("@name", text.str());
- const char * tag = field->queryName();
- return xml->addPropTree(tag, field.getClear());
- }
- SourceFieldUsage::SourceFieldUsage(IHqlExpression * _source)
- : source(_source)
- {
- usedAll = false;
- usedFilepos = false;
- }
- void SourceFieldUsage::noteSelect(IHqlExpression * select, IHqlExpression * selector)
- {
- #ifdef SEARCH_FIELD
- if (select->queryChild(1)->queryName() == createAtom(SEARCH_FIELD))
- {
- if (matchesConstantString(queryFilename(), SEARCH_FILENAME, true))
- {
- breakOnMatchField();
- }
- }
- #endif
- //MORE: For simple selectors may be more efficient to search before replacing the selector.
- OwnedHqlExpr mapped = replaceSelector(select, selector, queryActiveTableSelector());
- //MORE: May need to use a hash table.
- if (!selects.contains(*mapped))
- selects.append(*mapped.getClear());
- }
- IHqlExpression * SourceFieldUsage::queryFilename() const
- {
- switch (source->getOperator())
- {
- case no_newkeyindex:
- return source->queryChild(3);
- case no_table:
- return source->queryChild(0);
- }
- throwUnexpected();
- return NULL;
- }
- const char * SourceFieldUsage::queryFilenameText() const
- {
- if (!cachedFilenameEcl.get())
- {
- IHqlExpression * filename = queryFilename();
- assertex(filename);
- StringBuffer nameText;
- getExprECL(filename, nameText);
- cachedFilenameEcl.set(nameText.str(), nameText.length());
- }
- return cachedFilenameEcl.get();
- }
- IPropertyTree * SourceFieldUsage::createReport(bool includeFieldDetail, const IPropertyTree * exclude) const
- {
- bool sourceIsKey = isKey(source);
- const char * type = sourceIsKey ? "index" : "dataset";
- const char * nameText = queryFilenameText();
- if (exclude)
- {
- StringBuffer xpath;
- xpath.append(type).append("[@name=\"");
- xpath.append(nameText);
- xpath.append("\"]");
- if (exclude->hasProp(xpath))
- return NULL;
- }
- Owned<IPropertyTree> entry = createPTree(type);
- entry->setProp("@name", nameText);
- unsigned numFields = 0;
- unsigned numFieldsUsed = 0;
- expandSelects(entry, source->queryRecord(), queryActiveTableSelector(), usedAll, includeFieldDetail, numFields, numFieldsUsed);
- if (isKey(source))
- {
- IHqlExpression * original = queryAttributeChild(source, _original_Atom, 0);
- if (!original)
- original = source;
- IHqlExpression * lastField = queryLastField(original->queryRecord());
- if (getBoolAttribute(source, filepositionAtom, true))
- {
- if (usedFilepos || !lastField->hasAttribute(_implicitFpos_Atom))
- {
- numFields++;
- if (usedFilepos || usedAll)
- {
- if (includeFieldDetail)
- addSelect(entry, lastField, true);
- numFieldsUsed++;
- }
- }
- }
- else
- {
- assertex(!usedFilepos);
- }
- }
- entry->setPropInt("@numFields", numFields);
- entry->setPropInt("@numFieldsUsed", numFieldsUsed);
- return entry.getClear();
- }
- void SourceFieldUsage::expandSelects(IPropertyTree * xml, IHqlExpression * record, IHqlExpression * selector, bool allUsed, bool includeFieldDetail, unsigned & numFields, unsigned & numFieldsUsed) const
- {
- bool seenAll = true;
- ForEachChild(i, record)
- {
- IHqlExpression * cur = record->queryChild(i);
- switch (cur->getOperator())
- {
- case no_field:
- {
- OwnedHqlExpr selected = createSelectExpr(LINK(selector), LINK(cur));
- bool thisUsed = allUsed || selects.contains(*selected);
- if (cur->isDatarow())
- {
- expandSelects(xml, cur->queryRecord(), selected, thisUsed, includeFieldDetail, numFields, numFieldsUsed);
- }
- else
- {
- numFields++;
- if (thisUsed)
- {
- if (includeFieldDetail)
- addSelect(xml, selected, thisUsed);
- numFieldsUsed++;
- }
- else
- {
- //could have an option to output unused fields, with code like the following:
- //addSelect(xml, selected, thisUsed);
- seenAll = false;
- }
- }
- break;
- }
- case no_record:
- expandSelects(xml, cur, selector, allUsed, includeFieldDetail, numFields, numFieldsUsed);
- break;
- case no_ifblock:
- //MORE: Theoretically if any of the fields within the ifblock are used, then the fields
- //used in the ifblock condition are also used. Needs to be handled by a preprocessing step.
- expandSelects(xml, cur->queryChild(1), selector, allUsed, includeFieldDetail, numFields, numFieldsUsed);
- break;
- }
- }
- }
- //------------------------------------------------------------------------------------------------
- static HqlTransformerInfo sourceFieldTrackerInfo("SourceFieldTracker");
- class SourceFieldTracker : public NewHqlTransformer
- {
- public:
- SourceFieldTracker(SourceFieldUsage * _fieldUsage, IHqlExpression * _selector)
- : NewHqlTransformer(sourceFieldTrackerInfo), fieldUsage(_fieldUsage), selector(_selector)
- {
- }
- virtual void analyseExpr(IHqlExpression * expr);
- protected:
- bool isSelected(IHqlExpression * expr) const;
- protected:
- SourceFieldUsage * fieldUsage;
- IHqlExpression * selector;
- };
- void SourceFieldTracker::analyseExpr(IHqlExpression * expr)
- {
- if (fieldUsage->seenAll() || alreadyVisited(expr))
- return;
- if (expr == selector)
- {
- fieldUsage->noteAll();
- return;
- }
- if (isSelected(expr))
- {
- fieldUsage->noteSelect(expr->queryNormalizedSelector(), selector);
- return;
- }
- switch (expr->getOperator())
- {
- case no_filepos:
- if (expr->queryChild(0) == selector)
- {
- fieldUsage->noteFilepos();
- return;
- }
- break;
- }
- NewHqlTransformer::analyseExpr(expr);
- }
- bool SourceFieldTracker::isSelected(IHqlExpression * expr) const
- {
- loop
- {
- if (expr->getOperator() != no_select)
- return false;
- IHqlExpression * ds = expr->queryChild(0);
- if (ds->queryNormalizedSelector() == selector)
- return true;
- expr = ds;
- }
- }
- //------------------------------------------------------------------------------------------------
- void gatherFieldUsage(SourceFieldUsage * fieldUsage, IHqlExpression * expr, IHqlExpression * selector)
- {
- unsigned first = getNumChildTables(expr);
- SourceFieldTracker tracker(fieldUsage, selector);
- ForEachChildFrom(i, expr, first)
- tracker.analyse(expr->queryChild(i), 0);
- }
- void gatherParentFieldUsage(SourceFieldUsage * fieldUsage, IHqlExpression * expr)
- {
- bool hasDs = false;
- bool hasLeft = false;
- bool hasRight = false;
- switch (getChildDatasetType(expr))
- {
- case childdataset_dataset:
- hasDs = true;
- break;
- case childdataset_datasetleft:
- hasDs = true;
- hasLeft = true;
- break;
- case childdataset_left:
- hasLeft = true;
- break;
- case childdataset_same_left_right:
- hasLeft = true;
- hasRight = true;
- break;
- case childdataset_top_left_right:
- hasDs = true;
- hasLeft = true;
- hasRight = true;
- break;
- case childdataset_leftright: // e.g., no_aggregate
- hasLeft = true;
- break;
- }
- IHqlExpression * ds = expr->queryChild(0);
- IHqlExpression * selSeq = querySelSeq(expr);
- //MORE: Do all this in a single pass
- if (hasDs)
- gatherFieldUsage(fieldUsage, expr, ds->queryNormalizedSelector());
- if (hasLeft)
- {
- OwnedHqlExpr left = createSelector(no_left, ds, selSeq);
- gatherFieldUsage(fieldUsage, expr, left);
- }
- if (hasRight)
- {
- OwnedHqlExpr right = createSelector(no_right, ds, selSeq);
- gatherFieldUsage(fieldUsage, expr, right);
- }
- }
|