123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023 |
- /*##############################################################################
- HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ############################################################################## */
- #include "hqlopt.ipp"
- #include "hqlpmap.hpp"
- #include "jexcept.hpp"
- #include "jlog.hpp"
- #include "hqlutil.hpp"
- #include "hqlfold.hpp"
- #include "hqlthql.hpp"
- #include "hqlerror.hpp"
- #include "hqlerrors.hpp"
- #include "hqlexpr.ipp" // Not needed, but without it I don't see the symbols in the debugger.
- #include "hqlattr.hpp"
- #include "hqlmeta.hpp"
- #define MIGRATE_JOIN_CONDITIONS // This works, but I doubt it is generally worth the effort. - maybe on a flag.
- //#define TRACE_USAGE
- /*
- Notes:
- * Need to carefully keep track of usage counts after the expression tree has been transformed, otherwise activities end up being duplicated.
- o The usage count of the current expression doesn't matter since it won't be referenced any more...
- o Any replacement nodes need to inherit the link count of the item they are replacing.
- o Link counts for new children need to be incremented (they may already exist so don't set to 1).
- o Link counts for children that are no longer used should be decremented. However since items are not
- combined if the children are shared they will no longer be referenced, so it won't be a disaster if
- that doesn't happen (note aggregate child stripping is an exception).
- o If removal of a node causes other child expressions to no longer be linked, the whole branch needs removing.
- (I don't think we currently have any examples).
- o I try and track new datasets created when projects are expanded.
- o Moving a filter over a project doesn't change the normalized inputs, so the selectorSequence doesn't need changing.
- Known issues:
- o The usage counts are done at a global level, whilst the transformations are dependent on the context. That means it might be possible
- to decrement a link count too many times, causing activities to appear unshared when in reality they are.
- o Sometimes the order the graph is traversed in produces a non optimal result. For instance filter2(filter1(project1(x)) and filter1(project2(x))
- would best be converted to project1(filter2([filter1(x)])) and project2[filter1(x)] where filter1(x) is shared. However it is just as likely to produce:
- project1(filter2,1(x)) and project2(filter1(x)) because the filters are also combined.
- o Similarly nodes can become unshared if
- i) an unshared node is optimized
- ii) a different (shared) node is then optimized to generate the same expression as the original.
- Because the second version is marked as shared it won't get transformed, but the first instance will have been.
- This has been worked around to a certain extent by moving some of the code into the null transformer.
- o Sharing between subqueries is too aggressive. This is worked around by reoptimizing the subqueries.
- o Constant folding can create new datasets with no associated usage. The code is now structured to allow the constant fold to
- be included, but I suspect it makes it too inefficient, and I don't know of any examples causing problems.
- */
- //---------------------------------------------------------------------------
- IHqlExpression * createFilterCondition(const HqlExprArray & conds)
- {
- if (conds.ordinality() == 0)
- return createConstant(true);
- OwnedITypeInfo boolType = makeBoolType();
- return createBalanced(no_and, boolType, conds);
- }
- IHqlExpression * createFilterCondition(const HqlExprArray & conds, IHqlExpression * oldDataset, IHqlExpression * newDataset)
- {
- OwnedHqlExpr mapped = createFilterCondition(conds);
- return replaceSelector(mapped, oldDataset->queryNormalizedSelector(), newDataset->queryNormalizedSelector());
- }
- bool optimizeFilterConditions(IErrorReceiver & errorProcessor, HqlExprArray & conds)
- {
- ForEachItemInRev(i, conds)
- {
- IHqlExpression & cur = conds.item(i);
- if (cur.isConstant())
- {
- OwnedHqlExpr folded = foldHqlExpression(errorProcessor, &cur);
- IValue * value = folded->queryValue();
- if (value)
- {
- if (!value->getBoolValue())
- {
- conds.kill();
- conds.append(*folded.getClear());
- return true;
- }
- conds.remove(i);
- }
- }
- }
- return conds.ordinality() == 0;
- }
- //---------------------------------------------------------------------------
- ExpandMonitor::~ExpandMonitor()
- {
- if (!complex)
- {
- unsigned max = datasetsChanged.ordinality();
- for (unsigned i=0; i < max; i+= 2)
- {
- IHqlExpression & newValue = datasetsChanged.item(i);
- IHqlExpression & oldValue = datasetsChanged.item(i+1);
- if (newValue.queryBody() != oldValue.queryBody())// && oldValue->queryTransformExtra())
- optimizer.inheritUsage(&newValue, &oldValue);
- }
- }
- }
- IHqlExpression * ExpandMonitor::onExpandSelector()
- {
- //SELF.someField := LEFT
- complex = true;
- return NULL;
- }
- void ExpandMonitor::onDatasetChanged(IHqlExpression * newValue, IHqlExpression * oldValue)
- {
- //NB: Cannot call inheritUsage here because a different transform is in operation
- datasetsChanged.append(*LINK(newValue));
- datasetsChanged.append(*LINK(oldValue));
- }
- //MORE: This needs improving... especially caching. Probably stored in the expressions and used for filter scoring
- //(cardinality, cost, ...) - investigate some schemes + review hole implementation
- static bool isComplexExpansion(IHqlExpression * expr)
- {
- switch (expr->getOperator())
- {
- case no_select:
- {
- bool isNew;
- IHqlExpression * ds = querySelectorDataset(expr, isNew);
- //A select from a create row is likely to be optimized
- return isNew && (ds->getOperator() != no_createrow);
- }
- case NO_AGGREGATE:
- case no_call:
- case no_externalcall:
- case no_rowdiff:
- return true;
- case no_constant:
- return false;
- }
- ForEachChild(i, expr)
- if (isComplexExpansion(expr->queryChild(i)))
- return true;
- return false;
- }
- void ExpandComplexityMonitor::analyseTransform(IHqlExpression * transform)
- {
- ForEachChild(i, transform)
- {
- IHqlExpression * cur = transform->queryChild(i);
- switch (cur->getOperator())
- {
- case no_assignall:
- analyseTransform(cur);
- break;
- case no_assign:
- onExpand(cur->queryChild(0), cur->queryChild(1));
- break;
- case no_skip:
- if (isComplexExpansion(cur->queryChild(0)))
- complex = true;
- break;
- }
- if (complex)
- break;
- }
- }
- void ExpandComplexityMonitor::onExpand(IHqlExpression * select, IHqlExpression * newValue)
- {
- if (complex)
- return;
- if (select->isDataset())
- {
- switch (newValue->getOperator())
- {
- case no_null:
- case no_select:
- case no_getresult:
- case no_getgraphresult:
- case no_id2blob:
- //MORE: Should be a common list somewhere...
- break;
- default:
- complex = true;
- return;
- }
- }
- if (!newValue->isPure())
- complex = true;
- else if (isComplexExpansion(newValue))
- complex = true;
- }
- //---------------------------------------------------------------------------
- static HqlTransformerInfo cTreeOptimizerInfo("CTreeOptimizer");
- CTreeOptimizer::CTreeOptimizer(IErrorReceiver & _errorProcessor, unsigned _options) : PARENT(cTreeOptimizerInfo), errorProcessor(_errorProcessor)
- {
- options = _options;
- optimizeFlags |= TCOtransformNonActive;
- }
- IHqlExpression * CTreeOptimizer::extractFilterDs(HqlExprArray & conds, IHqlExpression * expr)
- {
- if (expr->getOperator() != no_filter || isShared(expr))
- return expr;
- IHqlExpression * ds = extractFilterDs(conds, expr->queryChild(0));
- unsigned max = expr->numChildren();
- for (unsigned i = 1; i < max; i++)
- {
- IHqlExpression * cur = queryRealChild(expr, i);
- if (cur)
- cur->unwindList(conds, no_and);
- }
- return ds;
- }
- inline IHqlExpression * makeChildList(IHqlExpression * expr)
- {
- IHqlExpression * exprList = NULL;
- unsigned num = expr->numChildren();
- for (unsigned i=1; i<num; i++)
- exprList = createComma(exprList, LINK(expr->queryChild(i)));
- return exprList;
- }
- IHqlExpression * CTreeOptimizer::removeChildNode(IHqlExpression * expr)
- {
- IHqlExpression * child = expr->queryChild(0);
- DBGLOG("Optimizer: Node %s remove child: %s", queryNode0Text(expr), queryNode1Text(child));
- noteUnused(child);
- return replaceChild(expr, child->queryChild(0));
- }
- IHqlExpression * CTreeOptimizer::removeParentNode(IHqlExpression * expr)
- {
- IHqlExpression * child = expr->queryChild(0);
- DBGLOG("Optimizer: Node %s remove self (now %s)", queryNode0Text(expr), queryNode1Text(child));
- // Need to dec link count of child because it is just about to inherited the link count from the parent
- decUsage(child);
- return LINK(child);
- }
- IHqlExpression * CTreeOptimizer::swapNodeWithChild(IHqlExpression * parent)
- {
- IHqlExpression * child = parent->queryChild(0);
- DBGLOG("Optimizer: Swap %s and %s", queryNode0Text(parent), queryNode1Text(child));
- OwnedHqlExpr newParent = swapDatasets(parent);
- //if this is the only reference to the child (almost certainly true) then no longer refd, so don't inc usage for child.
- noteUnused(child);
- if (!alreadyHasUsage(newParent))
- incUsage(newParent->queryChild(0));
- return newParent.getClear();
- }
- IHqlExpression * CTreeOptimizer::forceSwapNodeWithChild(IHqlExpression * parent)
- {
- OwnedHqlExpr swapped = swapNodeWithChild(parent);
- queryBodyExtra(swapped)->setStopHoist();
- return swapped.getClear();
- }
- IHqlExpression * CTreeOptimizer::swapNodeWithChild(IHqlExpression * parent, unsigned childIndex)
- {
- IHqlExpression * child = parent->queryChild(0);
- DBGLOG("Optimizer: Swap %s and %s", queryNode0Text(parent), queryNode1Text(child));
- OwnedHqlExpr newChild = replaceChildDataset(parent, child->queryChild(childIndex), 0);
- OwnedHqlExpr swapped = insertChildDataset(child, newChild, childIndex);
- if (!alreadyHasUsage(swapped))
- incUsage(newChild);
- noteUnused(child);
- return swapped.getClear();
- }
- IHqlExpression * CTreeOptimizer::swapIntoIf(IHqlExpression * expr, bool force)
- {
- IHqlExpression * child = expr->queryChild(0);
- //Can't optimize over a condition once a graph has been resourced, otherwise the activities aren't found.
- if (child->hasAttribute(_resourced_Atom))
- return LINK(expr);
- IHqlExpression * body = expr->queryBody();
- IHqlExpression * cond = child->queryChild(0);
- IHqlExpression * left = child->queryChild(1);
- IHqlExpression * right = child->queryChild(2);
- OwnedHqlExpr newLeft = replaceChildDataset(body, left, 0);
- OwnedHqlExpr newRight = replaceChildDataset(body, right, 0);
- HqlExprArray args;
- args.append(*LINK(cond));
- args.append(*LINK(newLeft));
- args.append(*LINK(newRight));
- OwnedHqlExpr newIf = child->clone(args);
- if (!alreadyHasUsage(newIf))
- {
- incUsage(newLeft);
- incUsage(newRight);
- }
- OwnedHqlExpr transformedIf = transform(newIf);
- if (force || (newIf != transformedIf))
- {
- //Need to call dec on all expressions that are no longer used... left and right still used by newLeft/newRight
- if (!alreadyHasUsage(newIf))
- {
- //This may possibly leave left/right linked once if transformed(newLeft) doesn't use left any more.
- //But a recursiveDecUsage could cause too much to be decremented.
- if (newLeft != transformedIf->queryChild(1))
- decUsage(newLeft);
- if (newRight != transformedIf->queryChild(2))
- decUsage(newRight);
- }
- noteUnused(child);
- DBGLOG("Optimizer: Swap %s and %s", queryNode0Text(expr), queryNode1Text(child));
- return transformedIf.getClear();
- }
- if (!alreadyHasUsage(newIf))
- {
- decUsage(newLeft);
- decUsage(newRight);
- }
- return LINK(expr);
- }
- //NB: Similar logic to swapIntoIf()
- IHqlExpression * CTreeOptimizer::swapIntoAddFiles(IHqlExpression * expr, bool force)
- {
- IHqlExpression * child = expr->queryChild(0);
- IHqlExpression * body = expr->queryBody();
- bool changed = false;
- HqlExprArray replacedArgs;
- HqlExprArray transformedArgs;
- ForEachChild(idx, child)
- {
- IHqlExpression * in = child->queryChild(idx);
- if (!in->isDataset() && !in->isDatarow())
- {
- replacedArgs.append(*LINK(in));
- transformedArgs.append(*LINK(in));
- }
- else
- {
- IHqlExpression * next = replaceChild(body, in);
- replacedArgs.append(*next);
- //MORE: Will be linked too many times if changed and item already exists
- incUsage(next); //Link so values get correctly inherited if they are transformed.
- IHqlExpression * transformed = transform(next);
- transformedArgs.append(*transformed);
- if (transformed != next)
- changed = true;
- }
- }
- if (force || changed)
- {
- ForEachItemIn(i, replacedArgs)
- {
- if (&replacedArgs.item(i) != &transformedArgs.item(i))
- decUsage(&replacedArgs.item(i)); //If they are the same then inheritUsage wont't have been called, so don't decrement.
- }
- //Need to call dec on all expressions that are no longer used... grand children should not be decremented
- noteUnused(child);
- //And create the new funnel
- DBGLOG("Optimizer: Swap %s and %s", queryNode0Text(expr), queryNode1Text(child));
- return child->clone(transformedArgs);
- }
- //Note, replaced == args so no need to call decUsage on args
- ForEachItemIn(i, replacedArgs)
- {
- IHqlExpression & cur = replacedArgs.item(i);
- if (!cur.isAttribute())
- decUsage(&cur); //If they are the same then inheritUsage wont't have been called, so don't decrement.
- }
- return LINK(expr);
- }
- IHqlExpression * CTreeOptimizer::moveFilterOverSelect(IHqlExpression * expr)
- {
- IHqlExpression * select = expr->queryChild(0);
- if (!isNewSelector(select))
- return NULL;
- IHqlExpression * ds = select->queryChild(0);
- //MORE: If ds is a row then the filter needs to be moved to the root dataset
- if (!ds->isDataset())
- return NULL;
- IHqlExpression * newScope = select->queryNormalizedSelector();
- HqlExprArray args, hoisted, notHoisted;
- HqlExprCopyArray inScope;
- unwindFilterConditions(args, expr);
- ForEachItemIn(i, args)
- {
- IHqlExpression & cur = args.item(i);
- inScope.kill();
- cur.gatherTablesUsed(NULL, &inScope);
- if (inScope.find(*newScope) == NotFound)
- hoisted.append(OLINK(cur));
- else
- notHoisted.append(OLINK(cur));
- }
- if (hoisted.ordinality() == 0)
- return NULL;
- DBGLOG("Optimizer: Move filter over select (%d/%d)", hoisted.ordinality(), args.ordinality());
- //Create a filtered dataset
- IHqlExpression * inDs = LINK(ds);
- if (inDs->isDatarow())
- inDs = createDatasetFromRow(inDs);
- hoisted.add(*inDs, 0);
- OwnedHqlExpr newDs = expr->clone(hoisted);
- //Now a select on that
- args.kill();
- unwindChildren(args, select);
- args.replace(*LINK(newDs), 0);
- OwnedHqlExpr newSelect = select->clone(args);
- if (!alreadyHasUsage(newSelect))
- incUsage(newDs);
- if (notHoisted.ordinality())
- {
- notHoisted.add(*LINK(select), 0);
- OwnedHqlExpr unhoistedFilter = expr->clone(notHoisted);
- OwnedHqlExpr ret = replaceChild(unhoistedFilter, newSelect);
- if (!alreadyHasUsage(ret))
- incUsage(newSelect);
- return ret.getClear();
- }
- return newSelect.getClear();
- }
- IHqlExpression * CTreeOptimizer::optimizeAggregateUnsharedDataset(IHqlExpression * expr, bool isSimpleCount)
- {
- if (isShared(expr) || (getNumChildTables(expr) != 1))
- return LINK(expr);
- //Don't include any operations which rely on the order/distribution:
- bool childIsSimpleCount = isSimpleCount;
- node_operator op = expr->getOperator();
- IHqlExpression * ds = expr->queryChild(0);
- switch (op)
- {
- case no_filter:
- case no_aggregate:
- childIsSimpleCount = false;
- break;
- case no_hqlproject:
- case no_newusertable:
- case no_newaggregate:
- case no_sort:
- case no_subsort:
- case no_distribute:
- case no_keyeddistribute:
- case no_fetch:
- case no_transformebcdic:
- case no_transformascii:
- if (childIsSimpleCount && !isPureActivity(expr))
- childIsSimpleCount = false;
- break;
- case no_compound_indexread:
- case no_compound_diskread:
- case no_keyedlimit:
- break;
- case no_limit:
- if (expr->hasAttribute(onFailAtom))
- return LINK(expr);
- //fall through
- case no_choosen:
- case no_topn:
- if (isSimpleCount)
- break;
- return LINK(expr);
- default:
- return LINK(expr);
- }
- OwnedHqlExpr optimizedDs = optimizeAggregateUnsharedDataset(ds, childIsSimpleCount);
- //Remove items that are really inefficient and unnecessary, but don't for the moment remove projects or anything that changes the
- //record structure.
- switch (op)
- {
- case no_sort:
- case no_subsort:
- case no_distribute:
- case no_keyeddistribute:
- noteUnused(expr);
- return optimizedDs.getClear();
- case no_topn:
- {
- assertex(isSimpleCount);
- noteUnused(expr);
- OwnedHqlExpr ret = createDataset(no_choosen, optimizedDs.getClear(), LINK(expr->queryChild(2)));
- incUsage(ret);
- return expr->cloneAllAnnotations(ret);
- }
- case no_hqlproject:
- case no_newusertable:
- if (isSimpleCount && (options & HOOinsidecompound))
- {
- if (expr->hasAttribute(_countProject_Atom) || expr->hasAttribute(prefetchAtom))
- break;
- if (isPureActivity(expr) && !isAggregateDataset(expr))
- {
- noteUnused(expr);
- return optimizedDs.getClear();
- }
- }
- break;
- }
- if (ds == optimizedDs)
- return LINK(expr);
- OwnedHqlExpr replaced = replaceChild(expr, optimizedDs);
- incUsage(replaced);
- noteUnused(expr);
- return replaced.getClear();
- }
- IHqlExpression * CTreeOptimizer::optimizeAggregateDataset(IHqlExpression * transformed)
- {
- HqlExprArray children;
- unwindChildren(children, transformed);
- IHqlExpression * root = &children.item(0);
- HqlExprAttr ds = root;
- IHqlExpression * wrapper = NULL;
- node_operator aggOp = transformed->getOperator();
- bool insideShared = false;
- bool isScalarAggregate = (aggOp != no_newaggregate) && (aggOp != no_aggregate);
- bool isSimpleCount = isSimpleCountExistsAggregate(transformed, false, true);
- loop
- {
- node_operator dsOp = ds->getOperator();
- IHqlExpression * next = NULL;
- switch (dsOp)
- {
- case no_hqlproject:
- case no_newusertable:
- if (ds->hasAttribute(prefetchAtom))
- break;
- //MORE: If the record is empty then either remove the project if no SKIP, or convert the SKIP to a filter
- //Don't remove projects for the moment because they can make counts of disk reads much less
- //efficient. Delete the following lines once we have a count-diskread activity
- if (!isScalarAggregate && !(options & (HOOcompoundproject|HOOinsidecompound)) && !ds->hasAttribute(_countProject_Atom) )
- break;
- if (isPureActivity(ds) && !isAggregateDataset(ds))
- {
- OwnedMapper mapper = getMapper(ds);
- ExpandSelectorMonitor expandMonitor(*this);
- HqlExprArray newChildren;
- unsigned num = children.ordinality();
- LinkedHqlExpr oldDs = ds;
- LinkedHqlExpr newDs = ds->queryChild(0);
- if (transformed->getOperator() == no_aggregate)
- {
- oldDs.setown(createSelector(no_left, ds, querySelSeq(transformed)));
- newDs.setown(createSelector(no_left, newDs, querySelSeq(transformed)));
- }
- for (unsigned idx = 1; idx < num; idx++)
- {
- OwnedHqlExpr mapped = expandFields(mapper, &children.item(idx), oldDs, newDs, &expandMonitor);
- if (containsCounter(mapped))
- expandMonitor.setComplex();
- newChildren.append(*mapped.getClear());
- }
- if (!expandMonitor.isComplex())
- {
- for (unsigned idx = 1; idx < num; idx++)
- children.replace(OLINK(newChildren.item(idx-1)), idx);
- next = ds->queryChild(0);
- }
- }
- break;
- case no_fetch:
- if (isSimpleCount && !containsSkip(ds->queryChild(3)))
- next = ds->queryChild(1);
- break;
- case no_group:
- if (isScalarAggregate)
- next = ds->queryChild(0);
- break;
- case no_sort:
- case no_subsort:
- case no_sorted:
- //MORE: Allowed if the transform is commutative for no_aggregate
- if (aggOp != no_aggregate)
- next = ds->queryChild(0);
- break;
- case no_distribute:
- case no_distributed:
- case no_keyeddistribute:
- case no_preservemeta:
- if (isScalarAggregate || !isGrouped(ds->queryChild(0)))
- next = ds->queryChild(0);
- break;
- case no_preload:
- wrapper = ds;
- next = ds->queryChild(0);
- break;
- case no_iterate:
- if (isSimpleCount && !containsSkip(ds->queryChild(1)))
- next = ds->queryChild(0);
- break;
- }
- if (!next)
- break;
- if (!insideShared)
- {
- insideShared = isShared(ds);
- noteUnused(ds);
- }
- ds.set(next);
- }
- //Not completely sure about usageCounting being maintained correctly
- if (!insideShared)
- {
- OwnedHqlExpr newDs = (aggOp != no_aggregate) ? optimizeAggregateUnsharedDataset(ds, isSimpleCount) : LINK(ds);
- if (newDs != ds)
- {
- HqlMapTransformer mapper;
- mapper.setMapping(ds, newDs);
- mapper.setSelectorMapping(ds, newDs);
- ForEachItemIn(i, children)
- children.replace(*mapper.transformRoot(&children.item(i)), i);
- ds.set(newDs);
- }
- }
- if (ds == root)
- return LINK(transformed);
- if (wrapper)
- {
- if (ds == root->queryChild(0))
- {
- incUsage(root);
- return LINK(transformed);
- }
- }
- //A different node is now shared between the graphs
- if (insideShared)
- incUsage(ds);
- if (wrapper)
- {
- HqlExprArray args;
- args.append(*ds.getClear());
- unwindChildren(args, wrapper, 1);
- ds.setown(wrapper->clone(args));
- incUsage(ds);
- }
- DBGLOG("Optimizer: Aggregate replace %s with %s", queryNode0Text(root), queryNode1Text(ds));
- children.replace(*ds.getClear(), 0);
- return transformed->clone(children);
- }
- static IHqlExpression * skipMetaAliases(IHqlExpression * expr)
- {
- loop
- {
- switch (expr->getOperator())
- {
- case no_dataset_alias:
- break;
- default:
- return expr;
- }
- expr = expr->queryChild(0);
- }
- }
- IHqlExpression * CTreeOptimizer::optimizeDatasetIf(IHqlExpression * transformed)
- {
- //if(cond, ds(filt1), ds(filt2)) => ds(if(cond,filt1,filt2))
- HqlExprArray leftFilter, rightFilter;
- IHqlExpression * unfilteredLeft = extractFilterDs(leftFilter, transformed->queryChild(1));
- IHqlExpression * unfilteredRight = extractFilterDs(rightFilter, transformed->queryChild(2));
- IHqlExpression * left = skipMetaAliases(unfilteredLeft);
- IHqlExpression * right = skipMetaAliases(unfilteredRight);
- if (left->queryBody() == right->queryBody())
- {
- //If one (or both) or the datasets are aliases then ensure that one of the of the
- //aliases is used in the replacement.
- IHqlExpression * baseDataset = unfilteredLeft;
- if (right->queryNormalizedSelector() != unfilteredRight->queryNormalizedSelector())
- baseDataset = unfilteredRight;
- HqlExprArray args;
- args.append(*LINK(baseDataset));
- OwnedHqlExpr leftCond = createFilterCondition(leftFilter, unfilteredLeft, baseDataset);
- OwnedHqlExpr rightCond = createFilterCondition(rightFilter, unfilteredRight, baseDataset);
- if (leftCond == rightCond)
- {
- args.append(*leftCond.getClear());
- }
- else
- {
- IHqlExpression * cond = transformed->queryChild(0);
- args.append(*createValue(no_if, cond->getType(), LINK(cond), leftCond.getClear(), rightCond.getClear()));
- }
- OwnedHqlExpr ret = createDataset(no_filter, args);
- DBGLOG("Optimizer: Convert %s to a filter", queryNode0Text(transformed));
- //NOTE: left and right never walk over any shared nodes, so don't need to decrement usage for
- //child(1), child(2) or intermediate nodes to left/right, since not referenced any more.
- if (baseDataset == left)
- noteUnused(right); // dataset is now used one less time
- else
- noteUnused(left);
- return transformed->cloneAllAnnotations(ret);
- }
- return LINK(transformed);
- }
- static bool branchesMatch(unsigned options, IHqlExpression * left, IHqlExpression * right)
- {
- if (left->queryBody() == right->queryBody())
- return true;
- node_operator leftOp = left->getOperator();
- if (leftOp != right->getOperator())
- return false;
- switch (leftOp)
- {
- case no_hqlproject:
- case no_newusertable:
- break;
- default:
- return false;
- }
- if (left->numChildren() != right->numChildren())
- return false;
- //Check for the situation where the only difference between two projects is the selector sequence
- ForEachChild(i, left)
- {
- IHqlExpression * curLeft = left->queryChild(i);
- if (curLeft->isAttribute() && (curLeft->queryName() == _selectorSequence_Atom))
- continue;
- IHqlExpression * curRight = right->queryChild(i);
- if (curLeft->queryBody() != curRight->queryBody())
- {
- //The following code allows LEFT to be referred to within the transform, but I don't think it is worth enabling
- //because of the potential cost of replacing the selseq within the transform.
- if (options & HOOexpensive)
- {
- if ((leftOp != no_hqlproject) || !curLeft->isTransform())
- return false;
- if (!recordTypesMatch(curLeft,curRight))
- return false;
- OwnedHqlExpr newTransform = replaceExpression(curLeft, querySelSeq(left), querySelSeq(right));
- if (newTransform->queryBody() != curRight->queryBody())
- return false;
- }
- return false;
- }
- }
- return true;
- }
- IHqlExpression * CTreeOptimizer::optimizeIf(IHqlExpression * expr)
- {
- IHqlExpression * trueExpr = expr->queryChild(1);
- IHqlExpression * falseExpr = expr->queryChild(2);
- if (!falseExpr)
- return NULL;
- if (branchesMatch(options, trueExpr, falseExpr))
- {
- noteUnused(trueExpr); // inherit usage() will increase the usage again
- noteUnused(falseExpr);
- return LINK(trueExpr);
- }
- IHqlExpression * cond = expr->queryChild(0);
- IValue * condValue = cond->queryValue();
- if (condValue)
- {
- if (condValue->getBoolValue())
- {
- recursiveDecUsage(falseExpr);
- decUsage(trueExpr); // inherit usage() will increase the usage again
- return LINK(trueExpr);
- }
- else
- {
- recursiveDecUsage(trueExpr);
- decUsage(falseExpr); // inherit usage() will increase the usage again
- return LINK(falseExpr);
- }
- }
- //Usage counts aren't handled correctly for datarows, so only optimize datasets, otherwise it can get bigger.
- if (!expr->isDataset())
- return NULL;
- //if(c1, if(c2, x, y), z) y==z => if(c1 && c2, x, z)
- //if(c1, if(c2, x, y), z) x==z => if(c1 && !c2, y, z)
- //if(c1, z, if(c2, x, y)) x==z => if(c1 || c2, z, y)
- //if(c1, z, if(c2, x, y)) y==z => if(c1 || !c2, z, x)
- //Only do these changes if c2 has no additional dependencies than c1
- HqlExprArray args;
- if ((trueExpr->getOperator() == no_if) && !isShared(trueExpr))
- {
- IHqlExpression * childCond = trueExpr->queryChild(0);
- if (introducesNewDependencies(cond, childCond))
- return NULL;
- IHqlExpression * childTrue = trueExpr->queryChild(1);
- IHqlExpression * childFalse = trueExpr->queryChild(2);
- if (falseExpr->queryBody() == childFalse->queryBody())
- {
- args.append(*createBoolExpr(no_and, LINK(cond), LINK(childCond)));
- args.append(*LINK(childTrue));
- args.append(*LINK(falseExpr));
- }
- else if (falseExpr->queryBody() == childTrue->queryBody())
- {
- args.append(*createBoolExpr(no_and, LINK(cond), getInverse(childCond)));
- args.append(*LINK(childFalse));
- args.append(*LINK(falseExpr));
- }
- if (args.ordinality())
- {
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(expr), queryNode1Text(trueExpr));
- noteUnused(falseExpr);
- }
- }
- if (args.empty() && (falseExpr->getOperator() == no_if) && !isShared(falseExpr))
- {
- IHqlExpression * childCond = falseExpr->queryChild(0);
- if (introducesNewDependencies(cond, childCond))
- return NULL;
- IHqlExpression * childTrue = falseExpr->queryChild(1);
- IHqlExpression * childFalse = falseExpr->queryChild(2);
- if (trueExpr->queryBody() == childTrue->queryBody())
- {
- args.append(*createBoolExpr(no_or, LINK(cond), LINK(childCond)));
- args.append(*LINK(trueExpr));
- args.append(*LINK(childFalse));
- }
- else if (trueExpr->queryBody() == childFalse->queryBody())
- {
- args.append(*createBoolExpr(no_or, LINK(cond), getInverse(childCond)));
- args.append(*LINK(trueExpr));
- args.append(*LINK(childTrue));
- }
- if (args.ordinality())
- {
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(expr), queryNode1Text(falseExpr));
- noteUnused(trueExpr);
- }
- }
- if (args.ordinality())
- return expr->clone(args);
- return NULL;
- }
- bool CTreeOptimizer::expandFilterCondition(HqlExprArray & expanded, HqlExprArray & unexpanded, IHqlExpression * filter, bool moveOver, bool onlyKeyed)
- {
- HqlExprArray conds;
- unwindFilterConditions(conds, filter);
- IHqlExpression * child = filter->queryChild(0);
- IHqlExpression * grandchild = child->queryChild(0);
- OwnedMapper mapper = getMapper(child);
- ForEachItemIn(i, conds)
- {
- IHqlExpression * cur = &conds.item(i);
- bool isKeyed = containsAssertKeyed(cur);
- if (!onlyKeyed || isKeyed || (options & HOOfiltersharedproject) )
- {
- ExpandComplexityMonitor expandMonitor(*this);
- OwnedHqlExpr expandedFilter;
- if (moveOver)
- expandedFilter.setown(expandFields(mapper, cur, child, grandchild, &expandMonitor));
- else
- expandedFilter.setown(mapper->expandFields(cur, child, grandchild, grandchild, &expandMonitor));
- if (expandedFilter->isConstant())
- {
- expandedFilter.setown(foldHqlExpression(errorProcessor, expandedFilter));
- IValue * value = expandedFilter->queryValue();
- if (value && !value->getBoolValue())
- {
- if (onlyKeyed)
- DBGLOG("Optimizer: Merging filter over shared project always false");
- expanded.kill();
- expanded.append(*LINK(expandedFilter));
- return true;
- }
- }
- if ((!onlyKeyed || isKeyed) && !expandMonitor.isComplex())
- expanded.append(*LINK(expandedFilter));
- else
- unexpanded.append(*LINK(cur));
- }
- else
- unexpanded.append(*LINK(cur));
- }
- return expanded.ordinality() != 0;
- }
- IHqlExpression * CTreeOptimizer::hoistMetaOverProject(IHqlExpression * expr)
- {
- IHqlExpression * child = expr->queryChild(0);
- if (hasUnknownTransform(child))
- return NULL;
- IHqlExpression * grandchild = child->queryChild(0);
- IHqlExpression * active = queryActiveTableSelector();
- try
- {
- OwnedMapper mapper = getMapper(child);
- HqlExprArray args;
- args.append(*LINK(grandchild));
- ForEachChildFrom(i, expr, 1)
- {
- IHqlExpression * cur = expr->queryChild(i);
- args.append(*expandFields(mapper, cur, active, active, NULL));
- }
- OwnedHqlExpr newPreserve = expr->clone(args);
- OwnedHqlExpr newProject = replaceChild(child, newPreserve);
- decUsage(child);
- if (!alreadyHasUsage(newProject))
- incUsage(newPreserve);
- return newProject.getClear();
- }
- catch (IException * e)
- {
- //Can possibly occur if the field has been optimized away. (see bug #76896)
- e->Release();
- return NULL;
- }
- }
- IHqlExpression * CTreeOptimizer::hoistFilterOverProject(IHqlExpression * transformed, bool onlyKeyed)
- {
- IHqlExpression * child = transformed->queryChild(0);
- //Should be able to move filters over count projects, as long as not filtering on the count fields.
- //Would need to add a containsCounter() test in the expandFields code - cannot just test filterExpr
- //because counter may be there (e.g., countindex3.hql)
- if (child->hasAttribute(_countProject_Atom) || child->hasAttribute(prefetchAtom) || isAggregateDataset(child))
- return NULL;
- if (hasUnknownTransform(child))
- return NULL;
- HqlExprArray expanded, unexpanded;
- if (expandFilterCondition(expanded, unexpanded, transformed, true, onlyKeyed))
- {
- if (optimizeFilterConditions(errorProcessor, expanded))
- return getOptimizedFilter(transformed, expanded);
- OwnedHqlExpr filterExpr = createFilterCondition(expanded);
- if (unexpanded.ordinality())
- DBGLOG("Optimizer: Move %d/%d filters over %s", expanded.ordinality(), expanded.ordinality()+unexpanded.ordinality(), queryNode1Text(child));
- else
- DBGLOG("Optimizer: Swap %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- IHqlExpression * newGrandchild = child->queryChild(0);
- OwnedHqlExpr newFilter = createDataset(no_filter, LINK(newGrandchild), LINK(filterExpr));
- newFilter.setown(transformed->cloneAllAnnotations(newFilter));
- OwnedHqlExpr ret = replaceChild(child, newFilter);
- if (!alreadyHasUsage(ret))
- incUsage(newFilter);
- noteUnused(child);
- if (unexpanded.ordinality() == 0)
- return ret.getClear();
- unexpanded.add(*LINK(child), 0);
- OwnedHqlExpr unhoistedFilter = transformed->clone(unexpanded);
- OwnedHqlExpr newUnhoistedFilter = replaceChild(unhoistedFilter, ret);
- if (!alreadyHasUsage(newUnhoistedFilter))
- incUsage(ret);
- return newUnhoistedFilter.getClear();
- }
- return NULL;
- }
- IHqlExpression * CTreeOptimizer::getHoistedFilter(IHqlExpression * transformed, bool canHoistLeft, bool canMergeLeft, bool canHoistRight, bool canMergeRight, unsigned conditionIndex)
- {
- HqlExprArray conds;
- unwindFilterConditions(conds, transformed);
- IHqlExpression * child = transformed->queryChild(0);
- IHqlExpression * left = child->queryChild(0);
- IHqlExpression * right = queryJoinRhs(child);
- IHqlExpression * seq = querySelSeq(child);
- OwnedHqlExpr leftSelector = createSelector(no_left, left, seq);
- OwnedHqlExpr rightSelector = createSelector(no_right, right, seq);
- OwnedHqlExpr activeLeft = ensureActiveRow(left);
- OwnedHqlExpr activeRight = ensureActiveRow(right);
- OwnedMapper mapper = getMapper(child);
- HqlExprArray expanded, unexpanded, leftFilters, rightFilters;;
- ForEachItemIn(i, conds)
- {
- ExpandComplexityMonitor expandMonitor(*this);
- IHqlExpression * cur = &conds.item(i);
- OwnedHqlExpr expandedFilter = mapper->expandFields(cur, child, NULL, NULL, &expandMonitor);
- bool matched = false;
- if (expandedFilter->isConstant())
- {
- expandedFilter.setown(foldHqlExpression(errorProcessor, expandedFilter));
- IValue * value = expandedFilter->queryValue();
- if (value)
- {
- if (!value->getBoolValue())
- return getOptimizedFilter(transformed, false);
- else
- matched = true;
- }
- }
- if (!matched && !expandMonitor.isComplex())
- {
- OwnedHqlExpr leftMappedFilter = replaceSelector(expandedFilter, leftSelector, activeLeft);
- OwnedHqlExpr rightMappedFilter = replaceSelector(expandedFilter, rightSelector, activeRight);
- //MORE: Could also take join conditions into account to sent filter up both sides;
- if (rightMappedFilter==expandedFilter)
- {
- //Only contains LEFT.
- if (canHoistLeft)
- {
- leftFilters.append(*LINK(leftMappedFilter));
- matched = true;
- }
- else if (canMergeLeft && (conditionIndex != NotFound))
- {
- expanded.append(*LINK(expandedFilter));
- matched = true;
- }
- //If the filter expression is invariant of left and right then hoist up both paths.
- if (leftMappedFilter==expandedFilter && canHoistRight)
- {
- rightFilters.append(*LINK(expandedFilter));
- matched = true;
- }
- }
- else if (leftMappedFilter==expandedFilter)
- {
- //Only contains RIGHT.
- if (canHoistRight)
- {
- rightFilters.append(*LINK(rightMappedFilter));
- matched = true;
- }
- else if (canMergeRight && (conditionIndex != NotFound))
- {
- expanded.append(*LINK(expandedFilter));
- matched = true;
- }
- }
- else if (canMergeLeft && canMergeRight && conditionIndex != NotFound)
- {
- expanded.append(*LINK(expandedFilter));
- matched = true;
- }
- }
- if (!matched)
- unexpanded.append(*LINK(cur));
- }
- if (leftFilters.ordinality() || rightFilters.ordinality() || expanded.ordinality())
- {
- LinkedHqlExpr ret = child;
- //first insert filters on the left/right branches
- if (leftFilters.ordinality())
- ret.setown(createHoistedFilter(ret, leftFilters, 0, conds.ordinality()));
- if (rightFilters.ordinality())
- ret.setown(createHoistedFilter(ret, rightFilters, 1, conds.ordinality()));
- //extend the join condition where appropriate
- if (expanded.ordinality())
- {
- DBGLOG("Optimizer: Merge filters(%d/%d) into %s condition", expanded.ordinality(), conds.ordinality(), queryNode1Text(child));
- OwnedITypeInfo boolType = makeBoolType();
- HqlExprArray args;
- unwindChildren(args, ret);
- expanded.add(OLINK(args.item(conditionIndex)), 0);
- args.replace(*createBalanced(no_and, boolType, expanded), conditionIndex);
- ret.setown(ret->clone(args));
- }
- if (ret != child)
- noteUnused(child);
- //Now add the item that couldn't be hoisted.
- if (unexpanded.ordinality())
- {
- if (ret != child)
- incUsage(ret);
- unexpanded.add(*LINK(child), 0);
- OwnedHqlExpr unhoistedFilter = transformed->clone(unexpanded);
- ret.setown(replaceChild(unhoistedFilter, ret));
- }
- return ret.getClear();
- }
- else if (unexpanded.ordinality() == 0)
- //All filters expanded to true => remove the filter
- return getOptimizedFilter(transformed, true) ;
- return NULL;
- }
- IHqlExpression * CTreeOptimizer::createHoistedFilter(IHqlExpression * expr, HqlExprArray & conditions, unsigned childIndex, unsigned maxConditions)
- {
- IHqlExpression * grand = expr->queryChild(childIndex);
- DBGLOG("Optimizer: Hoisting filter(%d/%d) over %s.%d", conditions.ordinality(), maxConditions, queryNode0Text(expr), childIndex);
- conditions.add(*LINK(grand), 0);
- OwnedHqlExpr hoistedFilter = createDataset(no_filter, conditions);
- OwnedHqlExpr ret = insertChildDataset(expr, hoistedFilter, childIndex);
- if (!alreadyHasUsage(ret))
- incUsage(hoistedFilter);
- return ret.getClear();
- }
- IHqlExpression * CTreeOptimizer::queryPromotedFilter(IHqlExpression * expr, node_operator side, unsigned childIndex)
- {
- IHqlExpression * child = expr->queryChild(0);
- IHqlExpression * grand = child->queryChild(childIndex);
- OwnedMapper mapper = getMapper(child);
- HqlExprArray conds;
- unwindFilterConditions(conds, expr);
- HqlExprArray hoisted, unhoisted;
- OwnedHqlExpr mapParent = createSelector(side, grand, querySelSeq(child));
- ForEachItemIn(i1, conds)
- {
- IHqlExpression & cur = conds.item(i1);
- bool ok = false;
- OwnedHqlExpr collapsed = mapper->collapseFields(&cur, child, grand, mapParent, &ok);
- if (ok)
- hoisted.append(*collapsed.getClear());
- else
- unhoisted.append(OLINK(cur));
- }
- if (hoisted.ordinality() == 0)
- return NULL;
- DBGLOG("Optimizer: Hoisting filter(%d/%d) over %s", hoisted.ordinality(), hoisted.ordinality()+unhoisted.ordinality(), queryNode0Text(child));
- OwnedHqlExpr newChild = createHoistedFilter(child, hoisted, childIndex, conds.ordinality());
- noteUnused(child);
- if (unhoisted.ordinality() == 0)
- return newChild.getLink();
- unhoisted.add(*LINK(child), 0);
- OwnedHqlExpr unhoistedFilter = createDataset(no_filter, unhoisted);
- OwnedHqlExpr newUnhoistedFilter = replaceChild(unhoistedFilter, newChild);
- if (!alreadyHasUsage(newUnhoistedFilter))
- incUsage(newChild);
- return newUnhoistedFilter.getClear();
- }
- bool CTreeOptimizer::extractSingleFieldTempTable(IHqlExpression * expr, SharedHqlExpr & retField, SharedHqlExpr & retValues)
- {
- IHqlExpression * record = expr->queryRecord();
- IHqlExpression * field = NULL;
- ForEachChild(i, record)
- {
- IHqlExpression * cur = record->queryChild(i);
- switch (cur->getOperator())
- {
- case no_record:
- case no_ifblock:
- return false;
- case no_field:
- if (cur->queryRecord() || field)
- return false;
- field = cur;
- break;
- }
- }
- if (!field)
- return false;
- OwnedHqlExpr values = normalizeListCasts(expr->queryChild(0));
- switch (values->getOperator())
- {
- case no_null:
- break;
- case no_recordlist:
- {
- HqlExprArray args;
- ITypeInfo * fieldType = field->queryType();
- ForEachChild(i, values)
- {
- IHqlExpression * cur = values->queryChild(i);
- if (cur->getOperator() != no_rowvalue)
- return false;
- args.append(*ensureExprType(cur->queryChild(0), fieldType));
- }
- values.setown(createValue(no_list, makeSetType(LINK(fieldType)), args));
- }
- break;
- default:
- if (values->queryType()->getTypeCode() != type_set)
- return false;
- break;
- }
- retField.set(field);
- retValues.setown(values.getClear());
- return true;
- }
- IHqlExpression * mapJoinConditionToFilter(IHqlExpression * expr, IHqlExpression * search, IHqlExpression * replace)
- {
- switch (expr->getOperator())
- {
- case no_and:
- case no_or:
- {
- HqlExprArray args;
- ForEachChild(i, expr)
- {
- IHqlExpression * mapped = mapJoinConditionToFilter(expr->queryChild(i), search, replace);
- if (!mapped)
- return NULL;
- args.append(*mapped);
- }
- return expr->clone(args);
- }
- case no_eq:
- {
- IHqlExpression * l = expr->queryChild(0);
- IHqlExpression * r = expr->queryChild(1);
- if (l == search)
- return createValue(no_in, makeBoolType(), LINK(r), LINK(replace));
- if (r == search)
- return createValue(no_in, makeBoolType(), LINK(l), LINK(replace));
- break;
- }
- }
- OwnedHqlExpr temp = replaceExpression(expr, search, replace);
- if (temp != expr)
- return NULL;
- return LINK(expr);
- }
-
- IHqlExpression * splitJoinFilter(IHqlExpression * expr, HqlExprArray * leftOnly, HqlExprArray * rightOnly)
- {
- node_operator op = expr->getOperator();
- switch (op)
- {
- case no_assertkeyed:
- case no_and:
- {
- HqlExprArray args;
- ForEachChild(i, expr)
- {
- IHqlExpression * next = splitJoinFilter(expr->queryChild(i), leftOnly, rightOnly);
- if (next)
- args.append(*next);
- }
- unsigned numRealArgs = args.ordinality() - numAttributes(args);
- if (numRealArgs == 0)
- return NULL;
- if ((numRealArgs == 1) && (op == no_and))
- return LINK(&args.item(0));
- return cloneOrLink(expr, args);
- }
- }
- HqlExprCopyArray scopeUsed;
- expr->gatherTablesUsed(NULL, &scopeUsed);
- if (scopeUsed.ordinality() == 1)
- {
- node_operator scopeOp = scopeUsed.item(0).getOperator();
- if (leftOnly && scopeOp == no_left)
- {
- leftOnly->append(*LINK(expr));
- return NULL;
- }
- if (rightOnly && scopeOp == no_right)
- {
- rightOnly->append(*LINK(expr));
- return NULL;
- }
- }
- return LINK(expr);
- }
- IHqlExpression * CTreeOptimizer::optimizeJoinCondition(IHqlExpression * expr)
- {
- //Look at the join condition and move any conditions just on left/right further up the tree
- //can help after other constant folding....
- if (!isSimpleInnerJoin(expr) || expr->hasAttribute(keyedAtom) || expr->hasAttribute(atmostAtom))
- return NULL;
- IHqlExpression * cond = expr->queryChild(2);
- IHqlExpression * seq = querySelSeq(expr);
- HqlExprArray leftOnly, rightOnly;
- OwnedHqlExpr newCond = splitJoinFilter(cond, &leftOnly, isKeyedJoin(expr) ? (HqlExprArray *)NULL : &rightOnly);
- if ((leftOnly.ordinality() == 0) && (rightOnly.ordinality() == 0))
- return NULL;
- HqlExprArray args;
- unwindChildren(args, expr);
- if (leftOnly.ordinality())
- {
- DBGLOG("Optimizer: Hoist %d LEFT conditions out of %s", leftOnly.ordinality(), queryNode0Text(expr));
- IHqlExpression * lhs = expr->queryChild(0);
- OwnedHqlExpr left = createSelector(no_left, lhs, seq);
- OwnedHqlExpr leftFilter = createFilterCondition(leftOnly);
- OwnedHqlExpr newFilter = replaceSelector(leftFilter, left, lhs->queryNormalizedSelector());
- args.replace(*createDataset(no_filter, LINK(lhs), LINK(newFilter)), 0);
- incUsage(&args.item(0));
- }
- if (rightOnly.ordinality())
- {
- DBGLOG("Optimizer: Hoist %d RIGHT conditions out of %s", rightOnly.ordinality(), queryNode0Text(expr));
- IHqlExpression * rhs = expr->queryChild(1);
- OwnedHqlExpr right = createSelector(no_right, rhs, seq);
- OwnedHqlExpr rightFilter = createFilterCondition(rightOnly);
- OwnedHqlExpr newFilter = replaceSelector(rightFilter, right, rhs->queryNormalizedSelector());
- args.replace(*createDataset(no_filter, LINK(rhs), LINK(newFilter)), 1);
- incUsage(&args.item(1));
- }
- if (!newCond)
- newCond.setown(createConstant(true));
- if (!queryAttribute(_conditionFolded_Atom, args))
- args.append(*createAttribute(_conditionFolded_Atom));
- args.replace(*newCond.getClear(), 2);
- return expr->clone(args);
- }
- //DISTRIBUTE(DEDUP(ds, x, y, all), hash(trim(x)))
- //It is likely that the following would be better since it removes one distribute:
- //DEDUP(DISTRIBUTE(ds, hash(trim(x))), x, y, all, LOCAL)
- IHqlExpression * CTreeOptimizer::optimizeDistributeDedup(IHqlExpression * expr)
- {
- IHqlExpression * child = expr->queryChild(0);
- if (!child->hasAttribute(allAtom) || child->hasAttribute(localAtom) || isGrouped(child))
- return NULL;
- DedupInfoExtractor info(child);
- if (info.equalities.ordinality() == 0)
- return NULL;
- IHqlExpression * dist = expr->queryChild(1);
- if (!matchDedupDistribution(dist, info.equalities))
- return NULL;
- DBGLOG("Optimizer: Swap %s and %s", queryNode0Text(expr), queryNode1Text(child));
-
-
- OwnedHqlExpr distn;
- if (expr->hasAttribute(manyAtom))
- {
- //DEDUP(DISTRIBUTE(DEDUP(ds, x, y, all, local), hash(trim(x))), x, y, all, LOCAL)
- HqlExprArray localDedupArgs;
- unwindChildren(localDedupArgs, child);
- localDedupArgs.append(*createLocalAttribute());
- localDedupArgs.append(*createAttribute(hashAtom));
- OwnedHqlExpr localDedup = child->clone(localDedupArgs);
- distn.setown(replaceChildDataset(expr, localDedup, 0));
- }
- else
- {
- //DEDUP(DISTRIBUTE(ds, hash(trim(x))), x, y, all, LOCAL)
- distn.setown(replaceChildDataset(expr, child->queryChild(0), 0));
- }
- HqlExprArray args;
- args.append(*LINK(distn));
- unwindChildren(args, child, 1);
- args.append(*createLocalAttribute());
- //We would have generated a global hash dedup, so adding hash to the local dedup makes sense.
- args.append(*createAttribute(hashAtom));
- OwnedHqlExpr ret = child->clone(args);
- if (!alreadyHasUsage(ret))
- incUsage(distn);
- return ret.getClear();
- }
- IHqlExpression * CTreeOptimizer::optimizeProjectInlineTable(IHqlExpression * transformed, bool childrenAreShared)
- {
- IHqlExpression * child = transformed->queryChild(0);
- IHqlExpression * values = child->queryChild(0);
- //MORE If trivial projection then might be worth merging with multiple items, but unlikely to occur in practice
- if (!isPureInlineDataset(child) || transformed->hasAttribute(prefetchAtom))
- return NULL;
- bool onlyFoldConstant = false;
- if (values->numChildren() != 1)
- {
- if (options & HOOfoldconstantdatasets)
- {
- if (!isConstantDataset(child))
- return NULL;
- onlyFoldConstant = true;
- }
- else
- return NULL;
- }
- if (childrenAreShared)
- {
- if (!isConstantDataset(child))
- return NULL;
- }
- IHqlExpression * transformedCountProject = transformed->queryAttribute(_countProject_Atom);
- IHqlExpression * seq = querySelSeq(transformed);
- node_operator projectOp = transformed->getOperator();
- OwnedHqlExpr oldSelector = (projectOp == no_hqlproject) ? createSelector(no_left, child, seq) : LINK(child->queryNormalizedSelector());
- IHqlExpression * curTransform = queryNewColumnProvider(transformed);
- if (!isKnownTransform(curTransform))
- return NULL;
- ExpandSelectorMonitor monitor(*this);
- HqlExprArray newValues;
- ForEachChild(i, values)
- {
- TableProjectMapper mapper;
- mapper.setMapping(values->queryChild(i), NULL);
- OwnedHqlExpr next = expandFields(&mapper, curTransform, oldSelector, NULL, &monitor);
- //Expand counter inline!
- if (transformedCountProject)
- {
- OwnedHqlExpr counter = createConstant(createIntValue(i+1, 8, false));
- next.setown(replaceExpression(next, transformedCountProject->queryChild(0), counter));
- }
- if (!next || monitor.isComplex())
- return NULL;
- if (onlyFoldConstant)
- {
- next.setown(foldScopedHqlExpression(errorProcessor, NULL, next));
- if (!isConstantTransform(next))
- return NULL;
- }
- newValues.append(*ensureTransformType(next, no_transform));
- }
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- HqlExprArray args;
- args.append(*createValue(no_transformlist, makeNullType(), newValues));
- if (projectOp == no_newusertable)
- args.append(*LINK(transformed->queryChild(1)));
- else
- args.append(*LINK(transformed->queryRecord()));
- unwindChildren(args, child, 2);
- noteUnused(child);
- OwnedHqlExpr ret = child->clone(args);
- return transformed->cloneAllAnnotations(ret);
- }
- void CTreeOptimizer::analyseExpr(IHqlExpression * expr)
- {
- if (incUsage(expr))
- return;
- switch (expr->getOperator())
- {
- case no_filepos:
- case no_file_logicalname:
- case no_sizeof:
- case no_offsetof:
- return;
- case no_table:
- //only look at the filename - not the parent files.
- analyseExpr(expr->queryChild(0));
- return;
- }
- PARENT::analyseExpr(expr);
- }
- bool CTreeOptimizer::noteUnused(IHqlExpression * expr)
- {
- // return false;
- return decUsage(expr);
- }
- bool CTreeOptimizer::decUsage(IHqlExpression * expr)
- {
- OptTransformInfo * extra = queryBodyExtra(expr);
- #ifdef TRACE_USAGE
- if (expr->isDataset() || expr->isDatarow())
- DBGLOG("%lx dec %d [%s]", (unsigned)expr, extra->useCount, queryNode0Text(expr));
- #endif
- if (extra->useCount)
- return extra->useCount-- == 1;
- return false;
- }
- bool CTreeOptimizer::alreadyHasUsage(IHqlExpression * expr)
- {
- OptTransformInfo * extra = queryBodyExtra(expr);
- return (extra->useCount != 0);
- }
- bool CTreeOptimizer::incUsage(IHqlExpression * expr)
- {
- OptTransformInfo * extra = queryBodyExtra(expr);
- #ifdef TRACE_USAGE
- if (expr->isDataset() || expr->isDatarow())
- DBGLOG("%lx inc %d [%s]", (unsigned)expr, extra->useCount, queryNode0Text(expr));
- #endif
- return (extra->useCount++ != 0);
- }
- IHqlExpression * CTreeOptimizer::inheritUsage(IHqlExpression * newExpr, IHqlExpression * oldExpr)
- {
- OptTransformInfo * newExtra = queryBodyExtra(newExpr);
- OptTransformInfo * oldExtra = queryBodyExtra(oldExpr);
- if (oldExtra->getStopHoist())
- newExtra->setStopHoist();
- #ifdef TRACE_USAGE
- if (newExpr->isDataset() || newExpr->isDatarow())
- DBGLOG("%lx inherit %d,%d (from %lx) [%s]", (unsigned)newExpr, newExtra->useCount, oldExtra->useCount, (unsigned)oldExpr, queryNode0Text(newExpr));
- //assertex(extra->useCount);
- if ((oldExtra->useCount == 0) && (newExpr->isDataset() || newExpr->isDatarow()))
- DBGLOG("Inherit0: %lx inherit %d,%d (from %lx)", (unsigned)newExpr, newExtra->useCount, oldExtra->useCount, (unsigned)oldExpr);
- #endif
- newExtra->useCount += oldExtra->useCount;
- return newExpr;
- }
- bool CTreeOptimizer::isComplexTransform(IHqlExpression * transform)
- {
- ExpandComplexityMonitor monitor(*this);
- monitor.analyseTransform(transform);
- return monitor.isComplex();
- }
- IHqlExpression * CTreeOptimizer::expandProjectedDataset(IHqlExpression * child, IHqlExpression * transform, IHqlExpression * childSelector, IHqlExpression * expr)
- {
- if (hasUnknownTransform(child))
- return NULL;
- OwnedMapper mapper = getMapper(child);
- ExpandSelectorMonitor monitor(*this);
- OwnedHqlExpr expandedTransform = expandFields(mapper, transform, childSelector, NULL, &monitor);
- IHqlExpression * onFail = child->queryAttribute(onFailAtom);
- OwnedHqlExpr newOnFail;
- if (onFail)
- {
- IHqlExpression * oldFailTransform = onFail->queryChild(0);
- OwnedMapper onFailMapper = createProjectMapper(oldFailTransform, NULL);
- OwnedHqlExpr onFailTransform = expandFields(onFailMapper, transform, childSelector, NULL, &monitor);
- if (onFailTransform)
- newOnFail.setown(createExprAttribute(onFailAtom, ensureTransformType(onFailTransform, oldFailTransform->getOperator())));
- }
- if (expandedTransform && (!onFail || newOnFail) && !monitor.isComplex())
- {
- unsigned transformIndex = queryTransformIndex(child);
- IHqlExpression * oldTransform = child->queryChild(transformIndex);
- expandedTransform.setown(ensureTransformType(expandedTransform, oldTransform->getOperator()));
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(expr), queryNode1Text(child));
- HqlExprArray args;
- unwindChildren(args, child);
- args.replace(*expandedTransform.getClear(), transformIndex);
- if (onFail)
- args.replace(*newOnFail.getClear(), args.find(*onFail));
- noteUnused(child);
- return child->clone(args);
- }
- return NULL;
- }
- IHqlExpression * CTreeOptimizer::optimizeAggregateCompound(IHqlExpression * transformed)
- {
- //Keep in sync with code in CompoundSourceTransformer
- IHqlExpression * child = transformed->queryChild(0);
- if (isLimitedDataset(child, true))
- return NULL;
- IHqlExpression * tableExpr = queryRoot(transformed);
- node_operator modeOp = queryTableMode(tableExpr);
- if (modeOp == no_csv || modeOp == no_xml)
- return NULL;
- if (isLimitedDataset(child) && !isSimpleCountExistsAggregate(transformed, true, false))
- return NULL;
- node_operator newOp = no_none;
- node_operator childOp = child->getOperator();
- if (queryRealChild(transformed, 3))
- {
- //Grouped aggregate
- switch (childOp)
- {
- case no_compound_diskread:
- case no_compound_disknormalize:
- newOp = no_compound_diskgroupaggregate;
- break;
- case no_compound_indexread:
- case no_compound_indexnormalize:
- newOp = no_compound_indexgroupaggregate;
- break;
- case no_compound_childread:
- case no_compound_childnormalize:
- newOp = no_compound_childgroupaggregate;
- break;
- }
- }
- else
- {
- switch (childOp)
- {
- case no_compound_diskread:
- case no_compound_disknormalize:
- newOp = no_compound_diskaggregate;
- break;
- case no_compound_indexread:
- case no_compound_indexnormalize:
- newOp = no_compound_indexaggregate;
- break;
- case no_compound_childread:
- case no_compound_childnormalize:
- newOp = no_compound_childaggregate;
- break;
- case no_compound_inline:
- newOp = no_compound_inline;
- break;
- }
- }
- if (newOp)
- return createDataset(newOp, removeChildNode(transformed));
- return NULL;
- }
- bool CTreeOptimizer::childrenAreShared(IHqlExpression * expr)
- {
- if (expr->isDataset() || expr->isDatarow())
- {
- switch (getChildDatasetType(expr))
- {
- case childdataset_none:
- return false;
- case childdataset_dataset:
- case childdataset_datasetleft:
- case childdataset_left:
- case childdataset_same_left_right:
- case childdataset_top_left_right:
- case childdataset_dataset_noscope:
- {
- IHqlExpression * ds = expr->queryChild(0);
- //Don't restrict the items that can be combined with no_null.
- return isShared(ds);
- }
- case childdataset_leftright:
- return isShared(expr->queryChild(0)) || isShared(expr->queryChild(1));
- case childdataset_evaluate:
- case childdataset_if:
- case childdataset_case:
- case childdataset_map:
- case childdataset_nway_left_right:
- return true; // stop any folding of these...
- case childdataset_many_noscope:
- case childdataset_many:
- {
- ForEachChild(i, expr)
- {
- IHqlExpression * cur = expr->queryChild(i);
- if (!cur->isAttribute() && isShared(cur))
- return true;
- }
- return false;
- }
- default:
- UNIMPLEMENTED;
- }
- }
- switch (expr->getOperator())
- {
- case no_select:
- if (!isNewSelector(expr))
- return false;
- return isShared(expr->queryChild(0));
- case NO_AGGREGATE:
- return isShared(expr->queryChild(0));
- }
- return false;
- }
- bool CTreeOptimizer::isWorthMovingProjectOverLimit(IHqlExpression * project)
- {
- if (queryBodyExtra(project)->getStopHoist())
- return false;
- IHqlExpression * expr = project->queryChild(0);
- loop
- {
- switch (expr->getOperator())
- {
- case no_limit:
- case no_keyedlimit:
- case no_choosen:
- expr = expr->queryChild(0);
- break;
- case no_compound_diskread:
- case no_compound_disknormalize:
- case no_compound_indexread:
- case no_compound_indexnormalize:
- case no_compound_childread:
- case no_compound_childnormalize:
- case no_compound_selectnew:
- case no_compound_inline:
- //if (options & HOOcompoundproject)
- return true;
- case no_join:
- if (isKeyedJoin(expr))
- return false;
- case no_selfjoin:
- case no_fetch:
- case no_normalize:
- case no_newparse:
- case no_newxmlparse:
- return true;
- case no_null:
- return true;
- case no_newusertable:
- if (isAggregateDataset(expr))
- return false;
- //fallthrough.
- case no_hqlproject:
- if (!isPureActivity(expr) || expr->hasAttribute(_countProject_Atom) || expr->hasAttribute(prefetchAtom))
- return false;
- return true;
- default:
- return false;
- }
- if (isShared(expr))
- return false;
- }
- }
- IHqlExpression * CTreeOptimizer::moveProjectionOverSimple(IHqlExpression * transformed, bool noMoveIfFail, bool errorIfFail)
- {
- IHqlExpression * child = transformed->queryChild(0);
- IHqlExpression * grandchild = child->queryChild(0);
- IHqlExpression * newProject = replaceChild(transformed, grandchild);
- HqlExprArray args;
- args.append(*newProject);
- OwnedMapper mapper = getMapper(transformed);
- ForEachChild(idx, child)
- {
- if (idx != 0)
- {
- bool ok = true;
- IHqlExpression * cur = child->queryChild(idx);
- IHqlExpression * collapsed;
- //NB: Attributes are generally independent of the input dataset, so they shouldn't be reverse mapped,
- //otherwise if a input-invariant expression is projected it can cause problems (jholt44.eclxml)
- if (cur->isAttribute())
- collapsed = LINK(cur);
- else
- collapsed = mapper->collapseFields(cur, grandchild, newProject, &ok);
- if (!ok)
- {
- ::Release(collapsed);
- if (errorIfFail)
- {
- StringBuffer cause;
- if (cur->getOperator() == no_sortlist)
- {
- ForEachChild(i, cur)
- {
- IHqlExpression * elem = cur->queryChild(i);
- OwnedHqlExpr collapsed = mapper->collapseFields(elem, grandchild, newProject, &ok);
- if (!ok)
- {
- cause.append(" expression: ");
- getExprECL(elem, cause);
- break;
- }
- }
- }
- throwError1(HQLERR_BadProjectOfStepping, cause.str());
- }
- if (noMoveIfFail)
- return LINK(transformed);
- //NB: Always succeed for distributed/sorted/grouped, because it is needed for the disk read/index read processing.
- if (cur->getOperator() == no_sortlist)
- collapsed = createValue(no_sortlist, makeSortListType(NULL), createAttribute(unknownAtom));
- else
- collapsed = createAttribute(unknownAtom);
- }
- args.append(*collapsed);
- }
- }
-
- DBGLOG("Optimizer: Swap %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- OwnedHqlExpr swapped = child->clone(args);
- if (!alreadyHasUsage(swapped))
- incUsage(newProject);
- noteUnused(child);
- return swapped.getClear();
- }
- IHqlExpression * CTreeOptimizer::moveProjectionOverLimit(IHqlExpression * transformed)
- {
- IHqlExpression * child = transformed->queryChild(0);
- IHqlExpression * grandchild = child->queryChild(0);
- IHqlExpression * newProject = replaceChild(transformed, grandchild);
- HqlExprArray args;
- args.append(*newProject);
- ExpandSelectorMonitor monitor(*this);
- ForEachChildFrom(idx, child, 1)
- {
- IHqlExpression * cur = child->queryChild(idx);
- if (cur->isAttribute() && cur->queryName() == onFailAtom)
- {
- IHqlExpression * oldFailTransform = cur->queryChild(0);
- if (!isKnownTransform(oldFailTransform))
- return LINK(transformed);
- OwnedMapper onFailMapper = createProjectMapper(oldFailTransform, NULL);
- IHqlExpression * projectionTransformer = queryNewColumnProvider(transformed);
- OwnedHqlExpr parentSelector = getParentDatasetSelector(transformed);
- OwnedHqlExpr onFailTransform = expandFields(onFailMapper, projectionTransformer, parentSelector, NULL, &monitor);
- args.append(*createExprAttribute(onFailAtom, ensureTransformType(onFailTransform, oldFailTransform->getOperator())));
- }
- else
- args.append(*LINK(cur));
- }
- if (monitor.isComplex())
- return LINK(transformed);
- DBGLOG("Optimizer: Swap %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- OwnedHqlExpr swapped = child->clone(args);
- if (!alreadyHasUsage(swapped))
- incUsage(newProject);
- noteUnused(child);
- return swapped.getClear();
- }
- IHqlExpression * CTreeOptimizer::insertChild(IHqlExpression * expr, IHqlExpression * newChild)
- {
- return insertChildDataset(expr, newChild, 0);
- }
- IHqlExpression * CTreeOptimizer::replaceChild(IHqlExpression * expr, IHqlExpression * newChild)
- {
- return replaceChildDataset(expr, newChild, 0);
- }
- void CTreeOptimizer::unwindReplaceChild(HqlExprArray & args, IHqlExpression * expr, IHqlExpression * newChild)
- {
- HqlMapTransformer mapper;
- mapper.setMapping(expr->queryChild(0), newChild);
- mapper.setSelectorMapping(expr->queryChild(0), newChild);
- ForEachChild(idx, expr)
- args.append(*mapper.transformRoot(expr->queryChild(idx)));
- }
- ANewTransformInfo * CTreeOptimizer::createTransformInfo(IHqlExpression * expr)
- {
- return CREATE_NEWTRANSFORMINFO(OptTransformInfo, expr);
- }
- IHqlExpression * CTreeOptimizer::expandFields(TableProjectMapper * mapper, IHqlExpression * expr, IHqlExpression * oldDataset, IHqlExpression * newDataset, IExpandCallback * _expandCallback)
- {
- OwnedHqlExpr expandedFilter = mapper->expandFields(expr, oldDataset, newDataset, _expandCallback);
- //There used to be code to constant fold filters here - but it can cause dataset expressions to become duplicated
- //causing code to be duplicated. Only fold expressions that are reduced to constants.
- return expandedFilter.getClear();
- }
- IHqlExpression * CTreeOptimizer::inheritSkips(IHqlExpression * newTransform, IHqlExpression * oldTransform, IHqlExpression * oldSelector, IHqlExpression * newSelector)
- {
- HqlExprArray args;
- ForEachChild(i, oldTransform)
- {
- IHqlExpression * cur = oldTransform->queryChild(i);
- if (cur->getOperator() == no_skip)
- args.append(*replaceSelector(cur, oldSelector, newSelector));
- }
- if (args.ordinality() == 0)
- return LINK(newTransform);
- unwindChildren(args, newTransform);
- return newTransform->clone(args);
- }
- IHqlExpression * CTreeOptimizer::createTransformed(IHqlExpression * expr)
- {
- node_operator op = expr->getOperator();
- switch (op)
- {
- case no_field:
- case no_record:
- return LINK(expr);
- }
- //Do this first, so that any references to a child dataset that changes are correctly updated, before proceeding any further.
- OwnedHqlExpr dft = defaultCreateTransformed(expr);
- updateOrphanedSelectors(dft, expr);
- OwnedHqlExpr ret = doCreateTransformed(dft, expr);
- if (ret->queryBody() == expr->queryBody())
- return ret.getClear();
- inheritUsage(ret, expr);
- if (ret == dft)
- return ret.getClear();
- return transform(ret);
- }
- IHqlExpression * CTreeOptimizer::getOptimizedFilter(IHqlExpression * transformed, bool alwaysTrue)
- {
- if (alwaysTrue)
- return removeParentNode(transformed);
- else
- {
- noteUnused(transformed->queryChild(0));
- //MORE: Really wants to walk down the entire chain until we hit something that is shared.
- IHqlExpression * ret = createNullDataset(transformed);
- DBGLOG("Optimizer: Replace %s with %s", queryNode0Text(transformed), queryNode1Text(ret));
- return ret;
- }
- }
- IHqlExpression * CTreeOptimizer::getOptimizedFilter(IHqlExpression * transformed, HqlExprArray const & filters)
- {
- return getOptimizedFilter(transformed, filters.ordinality() == 0);
- }
- void CTreeOptimizer::recursiveDecUsage(IHqlExpression * expr)
- {
- if (decUsage(expr))
- recursiveDecChildUsage(expr);
- }
- void CTreeOptimizer::recursiveDecChildUsage(IHqlExpression * expr)
- {
- switch (getChildDatasetType(expr))
- {
- case childdataset_none:
- break;
- case childdataset_dataset:
- case childdataset_datasetleft:
- case childdataset_left:
- case childdataset_same_left_right:
- case childdataset_top_left_right:
- case childdataset_dataset_noscope:
- recursiveDecUsage(expr->queryChild(0));
- break;
- case childdataset_leftright:
- recursiveDecUsage(expr->queryChild(0));
- recursiveDecUsage(expr->queryChild(0));
- break;
- case childdataset_if:
- recursiveDecUsage(expr->queryChild(1));
- if (expr->queryChild(2))
- recursiveDecUsage(expr->queryChild(2));
- break;
- case childdataset_evaluate:
- case childdataset_case:
- case childdataset_map:
- case childdataset_nway_left_right:
- break; // who knows?
- case childdataset_many_noscope:
- case childdataset_many:
- {
- ForEachChild(i, expr)
- recursiveDecUsage(expr->queryChild(i));
- break;
- }
- default:
- UNIMPLEMENTED;
- }
- }
- IHqlExpression * CTreeOptimizer::replaceWithNull(IHqlExpression * transformed)
- {
- IHqlExpression * ret = createNullExpr(transformed);
- DBGLOG("Optimizer: Replace %s with %s", queryNode0Text(transformed), queryNode1Text(ret));
- recursiveDecChildUsage(transformed);
- return ret;
- }
- IHqlExpression * CTreeOptimizer::replaceWithNullRow(IHqlExpression * expr)
- {
- IHqlExpression * ret = createRow(no_null, LINK(expr->queryRecord()));
- DBGLOG("Optimizer: Replace %s with %s", queryNode0Text(expr), queryNode1Text(ret));
- recursiveDecChildUsage(expr);
- return ret;
- }
- IHqlExpression * CTreeOptimizer::replaceWithNullRowDs(IHqlExpression * expr)
- {
- assertex(!isGrouped(expr));
- IHqlExpression * ret = createDatasetFromRow(createRow(no_null, LINK(expr->queryRecord())));
- DBGLOG("Optimizer: Replace %s with %s", queryNode0Text(expr), queryNode1Text(ret));
- recursiveDecChildUsage(expr);
- return ret;
- }
- IHqlExpression * CTreeOptimizer::transformExpanded(IHqlExpression * expr)
- {
- return transform(expr);
- }
- IHqlExpression * CTreeOptimizer::queryMoveKeyedExpr(IHqlExpression * transformed)
- {
- //Need to swap with these, regardless of whether the input is shared, because the keyed limit only makes sense
- //inside a compound source
- IHqlExpression * child = transformed->queryChild(0);
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_compound_indexread:
- case no_compound_diskread:
- case no_assertsorted:
- case no_assertdistributed:
- case no_section: // no so sure...
- case no_sectioninput:
- case no_executewhen:
- return swapNodeWithChild(transformed);
- case no_compound:
- return swapNodeWithChild(transformed, 1);
- case no_if:
- return swapIntoIf(transformed, true);
- case no_nonempty:
- case no_addfiles:
- case no_chooseds:
- return swapIntoAddFiles(transformed, true);
- //Force the child to be keyed if it is surrounded by something that needs to be keyed, to ensure both migrate up the tree
- case no_hqlproject:
- case no_newusertable:
- case no_aggregate:
- case no_newaggregate:
- case no_choosen:
- case no_limit:
- case no_keyedlimit:
- case no_sorted:
- case no_stepped:
- case no_distributed:
- case no_preservemeta:
- case no_grouped:
- case no_nofold:
- case no_nohoist:
- case no_filter:
- {
- OwnedHqlExpr newChild = queryMoveKeyedExpr(child);
- if (newChild)
- {
- OwnedHqlExpr moved = replaceChildDataset(transformed, newChild, 0);
- decUsage(child);
- if (!alreadyHasUsage(moved))
- incUsage(newChild);
- return moved.getClear();
- }
- }
- }
- return NULL;
- }
- IHqlExpression * CTreeOptimizer::doCreateTransformed(IHqlExpression * transformed, IHqlExpression * _expr)
- {
- OwnedHqlExpr folded = foldNullDataset(transformed);
- if (folded && folded != transformed)
- return folded.getClear();
- node_operator op = transformed->getOperator();
- IHqlExpression * child = transformed->queryChild(0);
- //Any optimizations that remove the current node, or modify the current node don't need to check if the children are shared
- //Removing child nodes could be included, but it may create more spillers/spliters - which may be significant in thor.
- switch (op)
- {
- case no_if:
- {
- OwnedHqlExpr ret = optimizeIf(transformed);
- if (ret)
- return ret.getClear();
- //Processed hereThis won't split shared nodes, but one of the children may be shared - so proce
- if (transformed->isDataset())
- return optimizeDatasetIf(transformed);
- break;
- }
- case no_keyedlimit:
- {
- IHqlExpression * ret = queryMoveKeyedExpr(transformed);
- if (ret)
- return ret;
- break;
- }
- case no_filter:
- if (filterIsKeyed(transformed))
- {
- IHqlExpression * ret = queryMoveKeyedExpr(transformed);
- if (ret)
- return ret;
- }
- break;
- case no_hqlproject:
- {
- IHqlExpression * counterAttr = transformed->queryAttribute(_countProject_Atom);
- if (counterAttr && !transformContainsCounter(transformed->queryChild(1), counterAttr->queryChild(0)))
- return removeAttribute(transformed, _countProject_Atom);
- //fallthrough
- }
- case no_newusertable:
- if (transformed->hasAttribute(keyedAtom))
- {
- IHqlExpression * ret = queryMoveKeyedExpr(transformed);
- if (ret)
- return ret;
- }
- break;
- case no_join:
- {
- #ifdef MIGRATE_JOIN_CONDITIONS
- OwnedHqlExpr ret = optimizeJoinCondition(transformed);
- if (ret)
- return ret.getClear();
- #endif
- //Unfortunately you cannot convert a keyed join to an index read because the input dataset could contain duplicates
- //That would generate duplicates in the output which would be missing from a index read.
- //MORE:
- //If left outer join, and transform doesn't reference RIGHT, and only one rhs record could match each lhs record (e.g., it was rolled
- //up, or a non-many lookup join, then the join could be converted into a project
- //Can occur once fields get implicitly removed from transforms etc. - e.g., bc10.xhql, although that code has since been fixed.
- //There is no point in distributing the rhs of a global lookup join => remove it.
- if (transformed->hasAttribute(lookupAtom) && !transformed->hasAttribute(localAtom))
- {
- IHqlExpression * rhs = transformed->queryChild(1);
- if (rhs->getOperator() == no_distribute)
- {
- DBGLOG("Optimizer: Remove %s from RHS of global LOOKUP JOIN", queryNode0Text(rhs));
- return ::replaceChild(transformed, 1, rhs->queryChild(0));
- }
- }
- break;
- }
- case no_dedup:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_dedup:
- {
- DedupInfoExtractor dedup1(transformed); // slightly costly to create
- DedupInfoExtractor dedup2(child);
- switch (dedup1.compareWith(dedup2))
- {
- //In roxie this would probably be better, in thor it may create extra spills
- //case DedupInfoExtractor::DedupDoesAll:
- // return removeChildNode(transformed);
- case DedupInfoExtractor::DedupDoesNothing:
- return removeParentNode(transformed);
- }
- break;
- }
- }
- break;
- }
- case no_aggregate:
- case no_newaggregate:
- {
- node_operator childOp = child->getOperator();
- if (transformed->hasAttribute(keyedAtom))
- {
- IHqlExpression * moved = NULL;
- switch(childOp)
- {
- case no_compound_diskread:
- case no_compound_disknormalize:
- case no_compound_indexread:
- case no_compound_indexnormalize:
- case no_compound_childread:
- case no_compound_childnormalize:
- if (!isGrouped(queryRoot(child)) && (options & HOOhascompoundaggregate))
- moved = optimizeAggregateCompound(transformed);
- break;
- default:
- moved = queryMoveKeyedExpr(transformed);
- break;
- }
- if (moved)
- return moved;
- }
- IHqlExpression * folded = NULL;
- switch(childOp)
- {
- case no_thisnode:
- return swapNodeWithChild(transformed);
- case no_inlinetable:
- if ((options & HOOfoldconstantdatasets) && isPureInlineDataset(child))
- folded = queryOptimizeAggregateInline(transformed, child->queryChild(0)->numChildren());
- break;
- default:
- if ((options & HOOfoldconstantdatasets) && hasSingleRow(child))
- folded = queryOptimizeAggregateInline(transformed, 1);
- break;
- }
- if (folded)
- {
- recursiveDecUsage(child);
- return folded;
- }
- //MORE: The OHOinsidecompound isn't really good enough - because might remove projects from
- //nested child aggregates which could benifit from them. Probably not as long as all compound
- //activities support aggregation. In fact test should be removable everywhere once all
- //engines support the new activities.
- if (isGrouped(transformed->queryChild(0)) || (queryRealChild(transformed, 3) && !(options & HOOinsidecompound)))
- break;
- OwnedHqlExpr ret = optimizeAggregateDataset(transformed);
- if (ret != transformed)
- return ret.getClear();
- break;
- }
- case NO_AGGREGATE:
- return optimizeAggregateDataset(transformed);
- case no_selectnth:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_inlinetable:
- {
- __int64 index = getIntValue(transformed->queryChild(1), -1);
- if (index == -1)
- break;
- IHqlExpression * values = child->queryChild(0);
- if (!values->isPure())
- break;
- if (index < 1 || index > values->numChildren())
- return replaceWithNull(transformed);
-
- //MORE If trivial projection then might be worth merging with multiple items, but unlikely to occur in practice
- OwnedHqlExpr ret = createRow(no_createrow, LINK(values->queryChild((unsigned)index-1)));
- noteUnused(child);
- DBGLOG("Optimizer: Replace %s with %s", queryNode0Text(transformed), queryNode1Text(ret));
- return ret.getClear();
- }
- case no_datasetfromrow:
- {
- __int64 index = getIntValue(transformed->queryChild(1), -1);
- if (index == -1)
- break;
- if (index != 1)
- return replaceWithNull(transformed);
-
- IHqlExpression * ret = child->queryChild(0);
- noteUnused(child);
- decUsage(ret); // will inherit later
- DBGLOG("Optimizer: Replace %s with %s", queryNode0Text(transformed), queryNode1Text(ret));
- return LINK(ret);
- }
- #if 0
- //This works (with either condition used), but I don't tink it is worth the cycles..
- case no_choosen:
- {
- __int64 index = getIntValue(transformed->queryChild(1), -1);
- __int64 choosenMax = getIntValue(child->queryChild(1), -1);
- //choosen(x,<n>)[m] == x[m] iff n >= m
- // if ((index == 1) && (choosenMax == 1) && !queryRealChild(child, 2))
- if ((index > 0) && (choosenMax >= index) && !queryRealChild(child, 2) && !isGrouped(child->queryChild(0)))
- return removeChildNode(transformed);
- }
- break;
- #endif
- }
- break;
- }
- case no_select:
- {
- if (transformed->hasAttribute(newAtom))
- {
- node_operator childOp = child->getOperator();
- switch (childOp)
- {
- case no_createrow:
- {
- OwnedHqlExpr match = getExtractSelect(child->queryChild(0), transformed->queryChild(1), false);
- if (match)
- {
- IHqlExpression * cur = match;
- while (isCast(cur))
- cur = cur->queryChild(0);
- if (cur->isPure())
- {
- //This test should not be required, but it avoids problems with elements from rows
- //being used conditionally within transforms. See HPCC-11018 for details.
- if (isIndependentOfScope(match))
- {
- DBGLOG("Optimizer: Extract value %s from %s", queryNode0Text(cur), queryNode1Text(transformed));
- noteUnused(child);
- return match.getClear();
- }
- switch (cur->getOperator())
- {
- case no_createrow:
- case no_constant:
- case no_select:
- case no_null:
- case no_getresult:
- case no_getgraphresult:
- DBGLOG("Optimizer: Extract value %s from %s", queryNode0Text(match), queryNode1Text(transformed));
- noteUnused(child);
- return match.getClear();
- }
- }
- }
- }
- break;
- case no_datasetfromrow:
- {
- HqlExprArray args;
- args.append(*LINK(child->queryChild(0)));
- unwindChildren(args, transformed, 1);
- noteUnused(child);
- return transformed->clone(args);
- }
- break;
- case no_inlinetable:
- {
- IHqlExpression * values = child->queryChild(0);
- if (values->numChildren() == 1)
- {
- IHqlExpression * transform = values->queryChild(0);
- OwnedHqlExpr match = getExtractSelect(transform, transformed->queryChild(1), false);
- if (match)
- {
- IHqlExpression * cur = match;
- while (isCast(cur))
- cur = cur->queryChild(0);
- switch (cur->getOperator())
- {
- case no_constant:
- case no_select:
- case no_null:
- case no_getresult:
- case no_getgraphresult:
- case no_inlinetable:
- case no_left:
- case no_right:
- {
- DBGLOG("Optimizer: Extract value %s from %s", queryNode0Text(match), queryNode1Text(transformed));
- noteUnused(child);
- return match.getClear();
- }
- }
- }
- }
- }
- break;
- }
- }
- }
- break;
- case no_extractresult:
- {
- //Very similar to the transform above, but needs to be done separately because of the new representation of no_extractresult.
- //extract(inline-table(single-row), somefield) -> single-row.somefield if simple valued.
- node_operator childOp = child->getOperator();
- switch (childOp)
- {
- case no_inlinetable:
- {
- IHqlExpression * extracted = transformed->queryChild(1);
- if ((extracted->getOperator() == no_select) && (extracted->queryChild(0) == child->queryNormalizedSelector()))
- {
- IHqlExpression * values = child->queryChild(0);
- if (values->numChildren() == 1)
- {
- IHqlExpression * transform = values->queryChild(0);
- OwnedHqlExpr match = getExtractSelect(transform, extracted->queryChild(1), false);
- if (match)
- {
- IHqlExpression * cur = match;
- while (isCast(cur))
- cur = cur->queryChild(0);
- switch (cur->getOperator())
- {
- case no_constant:
- case no_select:
- case no_null:
- case no_getresult:
- case no_getgraphresult:
- {
- DBGLOG("Optimizer: Extract value %s from %s", queryNode0Text(match), queryNode1Text(transformed));
- noteUnused(child);
- HqlExprArray args;
- args.append(*match.getClear());
- unwindChildren(args, transformed, 2);
- return createValue(no_setresult, makeVoidType(), args);
- }
- }
- }
- }
- }
- }
- break;
- }
- }
- break;
- case no_keyeddistribute:
- case no_distribute:
- {
- if (transformed->hasAttribute(skewAtom))
- break;
- //If distribution matches existing and grouped then don't distribute, but still remove grouping.
- IHqlExpression * distn = queryDistribution(transformed);
- if (distn == queryDistribution(child))
- {
- assertex(isGrouped(child)); // not grouped handled already.
- OwnedHqlExpr ret = createDataset(no_group, LINK(child));
- DBGLOG("Optimizer: replace %s with %s", queryNode0Text(transformed), queryNode1Text(ret));
- return transformed->cloneAllAnnotations(ret);
- }
- break;
- }
- case no_choosen:
- {
- IValue * num = transformed->queryChild(1)->queryValue();
- if (num && (num->getIntValue() >= 1) && !queryRealChild(transformed, 2))
- {
- if (hasNoMoreRowsThan(child, 1))
- return removeParentNode(transformed);
- }
- break;
- }
- case no_preservemeta:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_hqlproject:
- case no_newusertable:
- {
- IHqlExpression * ret = hoistMetaOverProject(transformed);
- if (ret)
- return ret;
- break;
- }
- //more; iterate, join? others?
- case no_compound_diskread:
- case no_compound_disknormalize:
- case no_compound_indexread:
- case no_compound_indexnormalize:
- case no_compound_childread:
- case no_compound_childnormalize:
- case no_compound_selectnew:
- case no_compound_inline:
- return swapNodeWithChild(transformed);
- }
- break;
- }
- case no_temptable:
- {
- if (child->getOperator() == no_list)
- {
- ECLlocation dummyLocation(0, 0, 0, NULL);
- OwnedHqlExpr inlineTable = convertTempTableToInlineTable(errorProcessor, dummyLocation, transformed);
- if (transformed != inlineTable)
- return inlineTable.getClear();
- }
- break;
- }
- case no_normalize:
- //Convert NORMALIZE(ds, 0, t(LEFT, COUNTER)) to empty dataset
- if (matchesConstantValue(transformed->queryChild(1), 0))
- return replaceWithNull(transformed);
- //Convert NORMALIZE(ds, 1, t(LEFT, COUNTER)) to PROJECT(ds, t(LEFT, 1));
- if (matchesConstantValue(transformed->queryChild(1), 1))
- {
- IHqlExpression * counter = queryAttributeChild(transformed, _countProject_Atom, 0);
- HqlExprArray args;
- unwindChildren(args, transformed, 0, 1);
- IHqlExpression * transform = transformed->queryChild(2);
- if (counter)
- {
- OwnedHqlExpr one = createConstant(counter->queryType()->castFrom(false, I64C(1)));
- //Remove the annotations from the transform, otherwise it may say t(LEFT,COUNTER) which is confusing.
- args.append(*replaceExpression(transform->queryBody(), counter, one));
- }
- else
- args.append(*LINK(transform));
- DBGLOG("Optimizer: Convert %s(,1) into PROJECT", queryNode0Text(transformed));
- unwindChildren(args, transformed, 3);
- //This is not a count project.. so remove the attribute.
- removeAttribute(args, _countProject_Atom);
- return createDataset(no_hqlproject, args);
- }
- break;
- case no_split:
- node_operator childOp = child->getOperator();
- if (childOp == no_split)
- {
- //Don't convert an unbalanced splitter into a balanced splitter
- //- best would be to set unbalanced on the child, but that would require more complication.
- if (transformed->hasAttribute(balancedAtom) || !child->hasAttribute(balancedAtom))
- return removeParentNode(transformed);
- }
- //This would remove splits only used once, but dangerous if we ever get the usage counting wrong...
- //if (queryBodyExtra(transformed)->useCount == 1)
- // return removeParentNode(transformed);
- break;
- }
- bool shared = childrenAreShared(transformed);
- if (shared)
- {
- bool okToContinue = false;
- switch (op)
- {
- case no_filter:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_hqlproject:
- case no_newusertable:
- {
- IHqlExpression * ret = hoistFilterOverProject(transformed, true);
- if (ret)
- return ret;
- break;
- }
- case no_inlinetable:
- //shared is checked within the code below....
- okToContinue = true;
- break;
- }
- break;
- }
- case no_hqlproject:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_inlinetable:
- okToContinue = true;
- break;
- }
- break;
- }
- case no_addfiles:
- //It is generally worth always combining inlinetable + inlinetable because it opens the scope
- //for more optimizations (e.g., filters on inlinetables) and the counts also become a known constant.
- okToContinue = true;
- break;
- }
- if (!okToContinue)
- return LINK(transformed);
- }
- switch (op)
- {
- case no_choosen:
- {
- //worth moving a choosen over an activity that doesn't read a record at a time.
- //also worth moving if it brings two projects closer togther, if
- //that doesn't mess up a projected disk read.
- IHqlExpression * const1 = transformed->queryChild(1);
- IValue * val1 = const1->queryValue();
- if (val1)
- {
- __int64 limit = val1->getIntValue();
- if ((limit == CHOOSEN_ALL_LIMIT) && !transformed->queryChild(2))
- return removeParentNode(transformed);
- //if (limit == 0)
- //.,..
- }
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_choosen:
- {
- //Too complicated to process the grouped variants.
- if (isGrouped(child) || isGrouped(transformed))
- break;
- if (transformed->queryChild(2) || child->queryChild(2))
- {
- //choosen(choosen(x, a, b), c, d))
- //could generate choosen(x, (b+d-1), min(c, a)) but I doubt it is worth it....
- break;
- }
- IHqlExpression * const2 = child->queryChild(1);
- IValue * val2 = const2->queryValue();
- if (val1 && val2)
- {
- __int64 ival1 = val1->getIntValue();
- __int64 ival2 = val2->getIntValue();
- IHqlExpression * newLimit;
- if (ival1 < ival2)
- newLimit = const1;
- else
- newLimit = const2;
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- return createDataset(no_choosen, LINK(child->queryChild(0)), LINK(newLimit));
- //don't bother to transform
- }
- break;
- }
- //This can be done, but I think it makes matters worse. The choosen() will short circuit the reading anyway,
- //so no advantage of swapping with the project, and makes things worse, since stops projects commoning up.
- case no_hqlproject:
- case no_newusertable:
- case no_transformascii:
- case no_transformebcdic:
- {
- if (isPureActivity(child) && !isAggregateDataset(child))
- {
- //Don't move a choosen with a start value over a count project - we could if we also adjust the counter
- if (child->queryAttribute(_countProject_Atom))
- {
- //Don't swap with a grouped project with counter - it changes the meaning of the counter
- if (!isGrouped(child) && !queryRealChild(transformed, 2))
- return forceSwapNodeWithChild(transformed);
- }
- else
- return forceSwapNodeWithChild(transformed);
- }
- break;
- }
- case no_fetch: //NB: Not filtered fetch
- {
- if (!containsSkip(child->queryChild(3)))
- return swapNodeWithChild(transformed, 1);
- break;
- }
- case no_if:
- return swapIntoIf(transformed);
- case no_nonempty:
- case no_chooseds:
- return swapIntoAddFiles(transformed);
- case no_sort:
- //If the sort is grouped then this can't be converted to a topn.
- if (!isGrouped(child))
- {
- unsigned __int64 topNLimit = 1000;
- OwnedHqlExpr topn = queryConvertChoosenNSort(transformed, topNLimit);
- if (topn)
- {
- noteUnused(child);
- return topn.getClear();
- }
- }
- break;
- }
- break;
- }
- case no_limit:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_hqlproject:
- case no_newusertable:
- {
- if (isPureActivity(child) && !isAggregateDataset(child) && !transformed->hasAttribute(onFailAtom))
- return forceSwapNodeWithChild(transformed);
- break;
- }
- case no_fetch:
- {
- if (isPureActivity(child))
- return swapNodeWithChild(transformed, 1);
- break;
- }
- case no_if:
- return swapIntoIf(transformed);
- case no_nonempty:
- case no_chooseds:
- return swapIntoAddFiles(transformed);
- case no_limit:
- {
- //Could be cleverer... but this is safer
- if (transformed->queryAttribute(skipAtom) != child->queryAttribute(skipAtom))
- break;
- if (transformed->queryAttribute(onFailAtom) != child->queryAttribute(onFailAtom))
- break;
- OwnedHqlExpr parentLimit = foldHqlExpression(errorProcessor, transformed->queryChild(1));
- OwnedHqlExpr childLimit = foldHqlExpression(errorProcessor, child->queryChild(1));
- if (parentLimit == childLimit)
- return removeParentNode(transformed);
- IValue * parentLimitValue = parentLimit->queryValue();
- IValue * childLimitValue = childLimit->queryValue();
- if (parentLimitValue && childLimitValue)
- {
- if (parentLimitValue->getIntValue() <= childLimitValue->getIntValue())
- return removeParentNode(transformed);
- }
- break;
- }
- case no_compound_indexread:
- case no_compound_diskread:
- if (!isLimitedDataset(child))
- {
- if (transformed->hasAttribute(skipAtom) || transformed->hasAttribute(onFailAtom))
- {
- //only merge if roxie
- }
- else
- {
- if ((options & HOOnoclonelimit) || ((options & HOOnocloneindexlimit) && (childOp == no_compound_indexread)))
- return swapNodeWithChild(transformed);
- OwnedHqlExpr childLimit = ::replaceChild(transformed, 0, child->queryChild(0));
- OwnedHqlExpr localLimit = appendLocalAttribute(childLimit);
- OwnedHqlExpr newCompound = ::replaceChild(child, 0, localLimit);
- incUsage(localLimit);
- incUsage(newCompound);
- decUsage(child);
- return ::replaceChild(transformed, 0, newCompound);
- }
- }
- break;
- case no_choosen:
- {
- OwnedHqlExpr parentLimit = foldHqlExpression(errorProcessor, transformed->queryChild(1));
- OwnedHqlExpr childLimit = foldHqlExpression(errorProcessor, child->queryChild(1));
- if (getIntValue(parentLimit, 0) > getIntValue(childLimit, I64C(0x7fffffffffffffff)))
- return removeParentNode(transformed);
- break;
- }
- case no_topn:
- {
- OwnedHqlExpr parentLimit = foldHqlExpression(errorProcessor, transformed->queryChild(1));
- OwnedHqlExpr childLimit = foldHqlExpression(errorProcessor, child->queryChild(2));
- if (getIntValue(parentLimit, 0) > getIntValue(childLimit, I64C(0x7fffffffffffffff)))
- return removeParentNode(transformed);
- break;
- }
- }
- break;
- }
- case no_dedup:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_dedup:
- {
- DedupInfoExtractor dedup1(transformed); // slightly costly to create
- DedupInfoExtractor dedup2(child);
- switch (dedup1.compareWith(dedup2))
- {
- case DedupInfoExtractor::DedupDoesAll:
- return removeChildNode(transformed);
- }
- break;
- }
- }
- break;
- }
- case no_filter:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_filter:
- {
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- HqlExprArray args;
- unwindChildren(args, child);
- unwindChildren(args, transformed, 1);
- OwnedHqlExpr combined = child->clone(args);
- return transformed->cloneAllAnnotations(combined);
- }
- case no_hqlproject:
- case no_newusertable:
- {
- IHqlExpression * ret = hoistFilterOverProject(transformed, false);
- if (ret)
- return ret;
- break;
- }
- //more; iterate, join? others?
- case no_compound_diskread:
- case no_compound_disknormalize:
- case no_compound_indexread:
- case no_compound_indexnormalize:
- case no_compound_childread:
- case no_compound_childnormalize:
- case no_compound_selectnew:
- case no_compound_inline:
- if (!isLimitedDataset(child))
- return swapNodeWithChild(transformed);
- break;
- case no_sorted:
- case no_stepped:
- case no_distributed:
- case no_distribute:
- case no_group:
- case no_grouped:
- case no_keyeddistribute:
- case no_sort:
- case no_subsort:
- case no_preload:
- case no_assertsorted:
- case no_assertgrouped:
- case no_assertdistributed:
- return swapNodeWithChild(transformed);
- case no_keyedlimit:
- {
- //It is ugly this is forced.... but ensures filters get combined
- OwnedHqlExpr ret = swapNodeWithChild(transformed);
- //Need to add the filter as a skip on the onFail() transform
- IHqlExpression * onFail = ret->queryAttribute(onFailAtom);
- if (!onFail)
- return ret.getClear();
- IHqlExpression * limitTransform = onFail->queryChild(0);
- if (!isKnownTransform(limitTransform))
- return ret.getClear();
- NewProjectMapper2 mapper;
- mapper.setMapping(limitTransform);
- HqlExprArray filterArgs;
- unwindChildren(filterArgs, transformed, 1);
- OwnedITypeInfo boolType = makeBoolType();
- OwnedHqlExpr cond = createBalanced(no_and, boolType, filterArgs);
- OwnedHqlExpr skipFilter = mapper.expandFields(cond, child, NULL, NULL, NULL);
- OwnedHqlExpr skip = createValue(no_skip, makeVoidType(), getInverse(skipFilter));
- OwnedHqlExpr newTransform = appendOwnedOperand(limitTransform, skip.getClear());
- OwnedHqlExpr newOnFail = createExprAttribute(onFailAtom, newTransform.getClear());
- return replaceOwnedAttribute(ret, newOnFail.getClear());
- }
- case no_if:
- return swapIntoIf(transformed);
- case no_nonempty:
- case no_chooseds:
- return swapIntoAddFiles(transformed);
- case no_fetch:
- if (isPureActivity(child) && !hasUnknownTransform(child))
- {
- IHqlExpression * ret = getHoistedFilter(transformed, false, false, true, true, NotFound);
- if (ret)
- return ret;
- }
- break;
- case no_iterate:
- //Should be possible to move a filter over a iterate, but only really same if the filter fields match the grouping criteria
- #if 0
- if (isPureActivity(child))
- {
- OwnedHqlExpr ret = queryPromotedFilter(transformed, no_right, 0);
- if (ret)
- return ret.getClear();
- }
- #endif
- break;
- case no_rollup:
- //I don't think you can't move a filter over a rollup because it might affect the records rolled up.
- //unless the filter fields match the grouping criteria
- #if 0
- if (isPureActivity(child))
- {
- OwnedHqlExpr ret = queryPromotedFilter(transformed, no_left, 0);
- if (ret)
- return ret.getClear();
- }
- #endif
- break;
- case no_selfjoin:
- if (isPureActivity(child) && !hasUnknownTransform(child) && !isLimitedJoin(child) && !child->hasAttribute(fullouterAtom) && !child->hasAttribute(fullonlyAtom) && !child->hasAttribute(_countProject_Atom))
- {
- //Strictly speaking, we could hoist conditions that can be hoisted for left only (or even full) joins etc. if the fields that are filtered
- //are based on equalities in the join condition. However, that can wait.... (same for join below...)
- bool canHoistLeft = !child->hasAttribute(rightouterAtom) && !child->hasAttribute(rightonlyAtom) &&
- !child->hasAttribute(leftouterAtom) && !child->hasAttribute(leftonlyAtom);
- bool canMergeLeft = isInnerJoin(child);
- bool canHoistRight = false;
- bool canMergeRight = canMergeLeft;
- IHqlExpression * ret = getHoistedFilter(transformed, canHoistLeft, canMergeLeft, canHoistRight, canMergeRight, 2);
- if (ret)
- return ret;
- }
- break;
- case no_join:
- if (isPureActivity(child) && !hasUnknownTransform(child) && !isLimitedJoin(child) && !child->hasAttribute(fullouterAtom) && !child->hasAttribute(fullonlyAtom) && !child->hasAttribute(_countProject_Atom))
- {
- bool canHoistLeft = !child->hasAttribute(rightouterAtom) && !child->hasAttribute(rightonlyAtom);
- bool canMergeLeft = isInnerJoin(child);
- bool canHoistRight = !child->hasAttribute(leftouterAtom) && !child->hasAttribute(leftonlyAtom) && !isKeyedJoin(child);
- bool canMergeRight = canMergeLeft;
- IHqlExpression * ret = getHoistedFilter(transformed, canHoistLeft, canMergeLeft, canHoistRight, canMergeRight, 2);
- if (ret)
- return ret;
- }
- break;
- case no_select:
- {
- IHqlExpression * ret = moveFilterOverSelect(transformed);
- if (ret)
- return ret;
- }
- break;
- case no_inlinetable:
- if (options & HOOfoldconstantdatasets)
- {
- HqlExprArray conditions;
- unwindChildren(conditions, transformed, 1);
- OwnedITypeInfo boolType = makeBoolType();
- OwnedHqlExpr filterCondition = createBalanced(no_and, boolType, conditions);
- HqlExprArray filtered;
- IHqlExpression * values = child->queryChild(0);
- unsigned numValues = values->numChildren();
- unsigned numOk = 0;
- //A vague rule of thumb for the maximum proportion to retain if the dataset is shared.
- unsigned maxSharedFiltered = (numValues >= 10) ? numValues / 10 : 1;
- ForEachChild(i, values)
- {
- IHqlExpression * curTransform = values->queryChild(i);
- if (!isKnownTransform(curTransform))
- break;
- NewProjectMapper2 mapper;
- mapper.setMapping(curTransform);
- OwnedHqlExpr expandedFilter = mapper.expandFields(filterCondition, child, NULL, NULL);
- //This can prematurely ignore some expressions e.g., x and (' ' = ' '), but saves lots of
- //additional constant folding on non constant expressions, so worthwhile.
- if (!expandedFilter->isConstant())
- break;
- OwnedHqlExpr folded = foldHqlExpression(errorProcessor, expandedFilter);
- IValue * value = folded->queryValue();
- if (!value)
- break;
- if (value->getBoolValue())
- {
- filtered.append(*LINK(curTransform));
- //Only break sharing on an inline dataset if it generates something significantly smaller.
- if (shared && (filtered.ordinality() > maxSharedFiltered))
- break;
- }
- numOk++;
- }
- if (numOk == numValues)
- {
- if (filtered.ordinality() == 0)
- return replaceWithNull(transformed);
- if (filtered.ordinality() == values->numChildren())
- return removeParentNode(transformed);
- DBGLOG("Optimizer: Node %s reduce values in child: %s from %d to %d", queryNode0Text(transformed), queryNode1Text(child), values->numChildren(), filtered.ordinality());
- HqlExprArray args;
- args.append(*values->clone(filtered));
- unwindChildren(args, child, 1);
- decUsage(child);
- return child->clone(args);
- }
- }
- break;
- }
- break;
- }
- case no_keyedlimit:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_distributed:
- case no_sorted:
- case no_stepped:
- case no_limit:
- case no_choosen:
- case no_compound_indexread:
- case no_compound_diskread:
- case no_assertsorted:
- case no_assertdistributed:
- return swapNodeWithChild(transformed);
- case no_if:
- return swapIntoIf(transformed);
- case no_nonempty:
- case no_chooseds:
- return swapIntoAddFiles(transformed);
- }
- break;
- }
- case no_hqlproject:
- {
- node_operator childOp = child->getOperator();
- IHqlExpression * transformedCountProject = transformed->queryAttribute(_countProject_Atom);
- if (transformed->hasAttribute(prefetchAtom))
- break; // play safe
- IHqlExpression * transformKeyed = transformed->queryAttribute(keyedAtom);
- IHqlExpression * transform = transformed->queryChild(1);
- switch(childOp)
- {
- case no_if:
- if (isComplexTransform(transform))
- break;
- return swapIntoIf(transformed);
- case no_nonempty:
- case no_chooseds:
- if (isComplexTransform(transform))
- break;
- return swapIntoAddFiles(transformed);
- case no_newusertable:
- if (isAggregateDataset(child))
- break;
- case no_hqlproject:
- {
- if (!isPureActivityIgnoringSkip(child) || hasUnknownTransform(child))
- break;
- IHqlExpression * childTransform = queryNewColumnProvider(child);
- if (assignsContainSkip(childTransform))
- break;
- IHqlExpression * childCountProject = child->queryAttribute(_countProject_Atom);
- //Don't merge two count projects - unless we go through and replace counter instances.
- if (transformedCountProject && childCountProject)
- break;
- IHqlExpression * childKeyed = child->queryAttribute(keyedAtom);
- if (childKeyed && !transformKeyed)
- break;
- OwnedMapper mapper = getMapper(child);
- IHqlExpression * transformedSeq = querySelSeq(transformed);
- OwnedHqlExpr oldLeft = createSelector(no_left, child, transformedSeq);
- OwnedHqlExpr newLeft = createSelector(no_left, child->queryChild(0), transformedSeq);
- ExpandSelectorMonitor monitor(*this);
- OwnedHqlExpr expandedTransform = expandFields(mapper, transform, oldLeft, newLeft, &monitor);
- if (expandedTransform && !monitor.isComplex())
- {
- expandedTransform.setown(inheritSkips(expandedTransform, child->queryChild(1), mapper->queryTransformSelector(), newLeft));
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- //NB: Merging a project with a count project can actually remove the count project..
- IHqlExpression * countProjectAttr = transformedCountProject;
- if (childCountProject && transformContainsCounter(expandedTransform, childCountProject->queryChild(0)))
- countProjectAttr = childCountProject;
- noteUnused(child);
- HqlExprArray args;
- args.append(*LINK(child->queryChild(0)));
- args.append(*expandedTransform.getClear());
- if (countProjectAttr)
- args.append(*LINK(countProjectAttr));
- args.append(*LINK(transformedSeq));
- if (transformKeyed)
- args.append(*LINK(transformKeyed));
- unwindHintAttrs(args, transformed);
- unwindHintAttrs(args, child);
- OwnedHqlExpr ret = createDataset(op, args);
- ret.setown(child->cloneAllAnnotations(ret));
- return transformed->cloneAllAnnotations(ret);
- }
- break;
- }
- case no_join:
- if (isKeyedJoin(child))
- break;
- //fall through
- case no_selfjoin:
- case no_fetch:
- case no_normalize:
- case no_newparse:
- case no_newxmlparse:
- case no_rollupgroup:
- {
- if (!isPureActivity(child) || !isPureActivity(transformed) || transformedCountProject)
- break;
- IHqlExpression * transformedSeq = querySelSeq(transformed);
- OwnedHqlExpr oldLeft = createSelector(no_left, child, transformedSeq);
- IHqlExpression * ret = expandProjectedDataset(child, transform, oldLeft, transformed);
- if (ret)
- return ret;
- break;
- }
- case no_preload:
- if (!transformedCountProject)
- return swapNodeWithChild(transformed);
- break;
- case no_sort:
- case no_subsort:
- if (transformedCountProject)
- break;
- if (increasesRowSize(transformed))
- break;
- return moveProjectionOverSimple(transformed, true, false);
- case no_distribute:
- //Cannot move a count project over anything that changes the order of the records.
- if (transformedCountProject)
- break;
- if (increasesRowSize(transformed))
- break;
- return moveProjectionOverSimple(transformed, true, false);
- case no_distributed:
- case no_sorted:
- case no_grouped:
- if (transformedCountProject)
- break;
- return moveProjectionOverSimple(transformed, false, false);
- case no_stepped:
- return moveProjectionOverSimple(transformed, true, false);
- case no_keyedlimit:
- if (isWorthMovingProjectOverLimit(transformed))
- {
- if (child->hasAttribute(onFailAtom))
- return moveProjectionOverLimit(transformed);
- return swapNodeWithChild(transformed);
- }
- break;
- case no_catchds:
- //could treat like a limit, but not at the moment
- break;
- case no_limit:
- case no_choosen:
- if (isWorthMovingProjectOverLimit(transformed))
- {
- //MORE: Later this is going to be worth moving aggregates.... when we have a compound aggregates.
- if (isPureActivity(transformed) && !isAggregateDataset(transformed) && !transformedCountProject)
- {
- if (child->hasAttribute(onFailAtom))
- return moveProjectionOverLimit(transformed);
- return swapNodeWithChild(transformed);
- }
- }
- break;
- case no_inlinetable:
- {
- if (transformContainsSkip(transform))
- break;
- IHqlExpression * ret = optimizeProjectInlineTable(transformed, shared);
- if (ret)
- return ret;
- break;
- }
- case no_compound_diskread:
- case no_compound_disknormalize:
- case no_compound_indexread:
- case no_compound_indexnormalize:
- case no_compound_childread:
- case no_compound_childnormalize:
- case no_compound_selectnew:
- case no_compound_inline:
- if (!transformedCountProject)
- return swapNodeWithChild(transformed);
- break;
- case no_addfiles:
- if (transformedCountProject || isComplexTransform(transform))
- break;
- return swapIntoAddFiles(transformed);
- }
- break;
- }
- case no_projectrow:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_if:
- if (isComplexTransform(transformed->queryChild(1)))
- break;
- return swapIntoIf(transformed);
- case no_createrow:
- case no_projectrow:
- {
- if (!isPureActivity(child) || !isPureActivity(transformed) || hasUnknownTransform(child))
- break;
- IHqlExpression * transform = transformed->queryChild(1);
- IHqlExpression * transformedSeq = querySelSeq(transformed);
- OwnedHqlExpr oldLeft = createSelector(no_left, child, transformedSeq);
- OwnedMapper mapper = getMapper(child);
- ExpandSelectorMonitor monitor(*this);
- OwnedHqlExpr expandedTransform = expandFields(mapper, transform, oldLeft, NULL, &monitor);
- if (expandedTransform && !monitor.isComplex())
- {
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- HqlExprArray args;
- unwindChildren(args, child);
- args.replace(*expandedTransform.getClear(), queryTransformIndex(child));
- noteUnused(child);
- return createRow(child->getOperator(), args);
- }
- break;
- }
- }
- break;
- }
- case no_selectfields:
- case no_usertable:
- //shouldn't really have any, because we can't really process them properly.
- break;
- case no_newusertable:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_if:
- if (isComplexTransform(transformed->queryChild(2)))
- break;
- return swapIntoIf(transformed);
- case no_nonempty:
- case no_chooseds:
- if (isComplexTransform(transformed->queryChild(2)))
- break;
- return swapIntoAddFiles(transformed);
- case no_newusertable:
- if (isAggregateDataset(child))
- break;
- //fallthrough.
- case no_hqlproject:
- {
- if (!isPureActivity(child) || hasUnknownTransform(child))
- break;
- if (child->hasAttribute(_countProject_Atom) || child->hasAttribute(prefetchAtom))
- break;
-
- IHqlExpression * transformKeyed = transformed->queryAttribute(keyedAtom);
- IHqlExpression * childKeyed = child->queryAttribute(keyedAtom);
- if (childKeyed && !transformKeyed)
- break;
- IHqlExpression * grandchild = child->queryChild(0);
- OwnedMapper mapper = getMapper(child);
- HqlExprArray args;
- args.append(*LINK(grandchild));
- args.append(*LINK(transformed->queryChild(1)));
-
- ExpandSelectorMonitor monitor(*this);
- IHqlExpression * transformExpr = transformed->queryChild(2);
- HqlExprArray assigns;
- ForEachChild(idxt, transformExpr)
- {
- IHqlExpression * cur = transformExpr->queryChild(idxt);
- if (cur->getOperator() == no_assign)
- {
- IHqlExpression * tgt = cur->queryChild(0);
- IHqlExpression * src = cur->queryChild(1);
- assigns.append(*createAssign(LINK(tgt), expandFields(mapper, src, child, grandchild, &monitor)));
- }
- else
- {
- assigns.append(*LINK(cur));
- }
- }
- OwnedHqlExpr expandedTransform = transformExpr->clone(assigns);
- args.append(*LINK(expandedTransform));
- unsigned max = transformed->numChildren();
- for(unsigned idx=3; idx < max; idx++)
- args.append(*expandFields(mapper, transformed->queryChild(idx), child, grandchild, &monitor));
- if (!monitor.isComplex())
- {
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- removeAttribute(args, _internal_Atom);
- noteUnused(child);
- return transformed->clone(args);
- }
- break;
- }
- case no_join:
- if (isKeyedJoin(child))
- break;
- //fall through
- case no_selfjoin:
- case no_fetch:
- case no_normalize:
- case no_newparse:
- case no_newxmlparse:
- case no_rollupgroup:
- {
- if (!isPureActivity(child) || !isPureActivity(transformed))
- break;
- IHqlExpression * transform = transformed->queryChild(2);
- IHqlExpression * ret = expandProjectedDataset(child, transform, child, transformed);
- if (ret)
- return ret;
- break;
- }
- case no_preload:
- return swapNodeWithChild(transformed);
- case no_distribute:
- case no_sort:
- case no_subsort:
- if (increasesRowSize(transformed))
- break;
- return moveProjectionOverSimple(transformed, true, false);
- case no_distributed:
- case no_sorted:
- case no_grouped:
- return moveProjectionOverSimple(transformed, false, false);
- case no_stepped:
- return moveProjectionOverSimple(transformed, false, true);
- case no_keyedlimit:
- case no_limit:
- case no_choosen:
- if (isWorthMovingProjectOverLimit(transformed))
- {
- if (isPureActivity(transformed) && !isAggregateDataset(transformed))
- {
- if (child->hasAttribute(onFailAtom))
- return moveProjectionOverLimit(transformed);
- return swapNodeWithChild(transformed);
- }
- }
- break;
- case no_compound_diskread:
- case no_compound_disknormalize:
- case no_compound_indexread:
- case no_compound_indexnormalize:
- case no_compound_childread:
- case no_compound_childnormalize:
- case no_compound_selectnew:
- case no_compound_inline:
- if (!isAggregateDataset(transformed))
- return swapNodeWithChild(transformed);
- break;
- case no_addfiles:
- if (isComplexTransform(transformed->queryChild(2)))
- break;
- return swapIntoAddFiles(transformed);
- case no_inlinetable:
- {
- IHqlExpression * ret = optimizeProjectInlineTable(transformed, shared);
- if (ret)
- return ret;
- break;
- }
- }
- break;
- }
- case no_group:
- {
- switch (child->getOperator())
- {
- case no_group:
- {
- IHqlExpression * newChild = child;
- bool isLocal = transformed->hasAttribute(localAtom);
- while (newChild->getOperator() == no_group)
- {
- if (newChild->queryAttribute(allAtom))
- break;
- if (queryRealChild(newChild, 1))
- {
- //Don't allow local groups to remove non-local groups.
- if (isLocal && !newChild->hasAttribute(localAtom))
- break;
- }
- noteUnused(newChild);
- newChild = newChild->queryChild(0);
- }
- if (child == newChild)
- break;
- if (queryGrouping(transformed) == queryGrouping(newChild))
- {
- decUsage(newChild); // since will inherit usage on return
- return LINK(newChild);
- }
- return replaceChild(transformed, newChild);
- }
- case no_hqlproject:
- case no_newusertable:
- //Move ungroups() over projects to increase the likely hood of combining projects and removing groups
- // if (!queryRealChild(transformed, 1) && !child->hasAttribute(_countProject_Atom) && !isAggregateDataset(child))
- // return swapNodeWithChild(transformed);
- break;
- }
- break;
- }
- //GH->Ilka no_enth now has a different format, may want to do something with that as well.
- case no_sample:
- {
- IValue * const1 = transformed->queryChild(1)->queryValue();
- if (const1)
- {
- __int64 val1 = const1->getIntValue();
- if (val1 == 1)
- return removeParentNode(transformed);
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_hqlproject:
- case no_newusertable:
- if (isPureActivity(child) && !child->hasAttribute(_countProject_Atom) && !child->hasAttribute(prefetchAtom) && !isAggregateDataset(child))
- return swapNodeWithChild(transformed);
- break;
- }
- }
- break;
- }
- case no_sort:
- {
- switch(child->getOperator())
- {
- case no_sort:
- case no_subsort:
- if (!isLocalActivity(transformed) || isLocalActivity(child))
- return removeChildNode(transformed);
- break;
- case no_distributed:
- case no_distribute:
- case no_keyeddistribute:
- if (!isLocalActivity(transformed))
- return removeChildNode(transformed); // no transform()
- break;
- }
- break;
- }
- case no_subsort:
- {
- switch(child->getOperator())
- {
- case no_sort:
- {
- if (isGrouped(transformed))
- break;
- //Convert subsort(sort) back into a single sort. Do not convert if it would change the distribution.
- if (!isAlwaysLocal() && (!isLocalActivity(transformed) || !isLocalActivity(child)))
- break;
- OwnedHqlExpr sortOrder = getExistingSortOrder(transformed, true, true);
- //A weird user defined SUBSORT could create an unknown sort order
- if (!sortOrder)
- break;
- OwnedHqlExpr newOrder = replaceSelector(sortOrder, queryActiveTableSelector(), child->queryNormalizedSelector());
- decUsage(child);
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- return ::replaceChild(child, 1, newOrder);
- }
- case no_subsort:
- //This should almost certainly be improved, but it might be a bit tricky!
- break;
- }
- break;
- }
- case no_keyeddistribute:
- case no_distribute:
- {
- if (transformed->hasAttribute(skewAtom))
- break;
- //If distribution matches existing and grouped then don't distribute, but still remove grouping.
- IHqlExpression * distn = queryDistribution(transformed);
- switch(child->getOperator())
- {
- case no_distributed:
- case no_distribute:
- case no_keyeddistribute:
- case no_sort:
- case no_subsort:
- if (!transformed->hasAttribute(mergeAtom))
- return removeChildNode(transformed);
- break;
- case no_dedup:
- {
- IHqlExpression * ret = optimizeDistributeDedup(transformed);
- if (ret)
- return ret;
- break;
- }
- case no_addfiles:
- if ((distn == queryDistribution(child->queryChild(0))) ||
- (distn == queryDistribution(child->queryChild(1))))
- return swapIntoAddFiles(transformed);
- break;
- }
- break;
- }
- case no_distributed:
- {
- switch(child->getOperator())
- {
- case no_distribute:
- case no_distributed:
- if (transformed->queryChild(1) == child->queryChild(1))
- return removeParentNode(transformed);
- break;
- case no_compound_diskread:
- case no_compound_disknormalize:
- case no_compound_indexread:
- case no_compound_indexnormalize:
- return swapNodeWithChild(transformed);
- }
- break;
- }
- case no_sorted:
- {
- switch(child->getOperator())
- {
- case no_compound_diskread:
- case no_compound_disknormalize:
- case no_compound_indexread:
- case no_compound_indexnormalize:
- return swapNodeWithChild(transformed);
- }
- break;
- }
- case no_aggregate:
- case no_newaggregate:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_if:
- return swapIntoIf(transformed);
- case no_nonempty:
- case no_chooseds:
- return swapIntoAddFiles(transformed);
- case no_compound_diskread:
- case no_compound_disknormalize:
- case no_compound_indexread:
- case no_compound_indexnormalize:
- case no_compound_childread:
- case no_compound_childnormalize:
- if (!isGrouped(child) && (options & HOOhascompoundaggregate) && !transformed->hasAttribute(localAtom))
- {
- IHqlExpression * ret = optimizeAggregateCompound(transformed);
- if (ret)
- return ret;
- }
- break;
- case no_thisnode:
- return swapNodeWithChild(transformed);
- }
- //MORE: The OHOinsidecompound isn't really good enough - because might remove projects from
- //nested child aggregates which could benifit from them. Probably not as long as all compound
- //activities support aggregation. In fact test should be removable everywhere once all
- //engines support the new activities.
- if (isGrouped(transformed->queryChild(0)) || (queryRealChild(transformed, 3) && !(options & HOOinsidecompound)))
- break;
- return optimizeAggregateDataset(transformed);
- }
- case NO_AGGREGATE:
- return optimizeAggregateDataset(transformed);
- case no_fetch:
- {
- //NB: Required for fetch implementation
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_newusertable:
- if (isAggregateDataset(child))
- break;
- //fallthrough.
- case no_hqlproject:
- if (!hasUnknownTransform(child))
- {
- OwnedMapper mapper = getMapper(child);
- IHqlExpression * selSeq = querySelSeq(transformed);
- OwnedHqlExpr oldLeft = createSelector(no_left, child, selSeq);
- OwnedHqlExpr newLeft = createSelector(no_left, child->queryChild(0), selSeq);
- IHqlExpression * expanded = expandFields(mapper, transformed->queryChild(3), oldLeft, newLeft);
- if (expanded)
- {
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- HqlExprArray args;
- args.append(*LINK(child->queryChild(0)));
- args.append(*LINK(transformed->queryChild(1)));
- args.append(*LINK(transformed->queryChild(2)));
- args.append(*expanded);
- args.append(*LINK(selSeq));
- return transformed->clone(args);
- }
- }
- break;
- }
- break;
- }
- case no_addfiles:
- {
- //MORE: This is possibly worth doing even if the children are shared.
- HqlExprArray allTransforms;
- bool ok = true;
- ForEachChild(i, transformed)
- {
- IHqlExpression * cur = transformed->queryChild(i);
- if (!cur->isAttribute())
- {
- if (cur->getOperator() != no_inlinetable)
- {
- ok = false;
- break;
- }
- cur->queryChild(0)->unwindList(allTransforms, no_transformlist);
- }
- }
- if (!ok)
- break;
- DBGLOG("Optimizer: Merge inline tables for %s", queryNode0Text(transformed));
- HqlExprArray args;
- args.append(*createValue(no_transformlist, makeNullType(), allTransforms));
- args.append(*LINK(child->queryRecord()));
- ForEachChild(i2, transformed)
- {
- IHqlExpression * cur = transformed->queryChild(i2);
- if (!cur->isAttribute())
- decUsage(cur);
- }
- OwnedHqlExpr ret = createDataset(no_inlinetable, args);
- return transformed->cloneAllAnnotations(ret);
- }
- #if 0
- //Something like the following might theoretically be useful, but seems to cause problems not commoning up
- case no_select:
- if (transformed->hasAttribute(newAtom) && !childrenAreShared(child))
- {
- OwnedHqlExpr ret = transformTrivialSelectProject(transformed);
- if (ret)
- {
- DBGLOG("Optimizer: Select %s from %s optimized", ret->queryChild(1)->queryName()->str(), queryNode1Text(child));
- noteUnused(child);
- return ret.getClear();
- }
- }
- break;
- #endif
- case no_datasetfromrow:
- {
- node_operator childOp = child->getOperator();
- switch (childOp)
- {
- case no_createrow:
- {
- DBGLOG("Optimizer: Merge %s and %s to Inline table", queryNode0Text(transformed), queryNode1Text(child));
- HqlExprArray args;
- args.append(*createValue(no_transformlist, makeNullType(), LINK(child->queryChild(0))));
- args.append(*LINK(child->queryRecord()));
- OwnedHqlExpr ret = createDataset(no_inlinetable, args);
- ret.setown(child->cloneAllAnnotations(ret));
- return transformed->cloneAllAnnotations(ret);
- }
- }
- break;
- }
- case no_join:
- {
- if (isKeyedJoin(transformed) || transformed->hasAttribute(lookupAtom))
- {
- node_operator childOp = child->getOperator();
- switch (childOp)
- {
- case no_newusertable:
- case no_hqlproject:
- {
- if (!isPureActivity(child) || child->queryAttribute(_countProject_Atom) || child->hasAttribute(prefetchAtom))
- break;
- IHqlExpression * transform = queryNewColumnProvider(child);
- if (transformContainsSkip(transform) || !isSimpleTransformToMergeWith(transform))
- break;
- OwnedMapper mapper = getMapper(child);
- IHqlExpression * transformedSeq = querySelSeq(transformed);
- OwnedHqlExpr oldLeft = createSelector(no_left, child, transformedSeq);
- OwnedHqlExpr newLeft = createSelector(no_left, child->queryChild(0), transformedSeq);
- bool ok = true;
- HqlExprArray args;
- args.append(*LINK(child->queryChild(0)));
- args.append(*LINK(transformed->queryChild(1)));
- ExpandSelectorMonitor monitor(*this);
- ForEachChildFrom(i, transformed, 2)
- {
- OwnedHqlExpr expanded = expandFields(mapper, transformed->queryChild(i), oldLeft, newLeft, &monitor);
- if (expanded && !monitor.isComplex())
- {
- args.append(*expanded.getClear());
- }
- else
- {
- ok = false;
- break;
- }
- }
- if (ok)
- {
- //If expanding the project removed all references to left (very silly join....) make it an all join
- if (transformed->hasAttribute(lookupAtom) && !exprReferencesDataset(&args.item(2), newLeft))
- args.append(*createAttribute(allAtom));
- DBGLOG("Optimizer: Merge %s and %s", queryNode0Text(transformed), queryNode1Text(child));
- noteUnused(child);
- OwnedHqlExpr merged = transformed->clone(args);
- //Substituting constants into LEFT join expression can cause problems for the ATMOST join
- //Only keyed joins currently support it.
- if (transformed->hasAttribute(atmostAtom) && !isKeyedJoin(transformed))
- {
- if (joinHasRightOnlyHardMatch(merged, false))
- merged.clear();
- }
- if (merged)
- return merged.getClear();
- }
- break;
- }
- }
- }
- break;
- }
- case no_selectnth:
- {
- node_operator childOp = child->getOperator();
- switch(childOp)
- {
- case no_sort:
- {
- IHqlExpression * index = transformed->queryChild(1);
- if (getIntValue(index, 99999) <= 100 && !isGrouped(child))
- {
- HqlExprArray topnArgs;
- unwindChildren(topnArgs, child);
- topnArgs.add(*LINK(index), 2);
- OwnedHqlExpr topn = createDataset(no_topn, topnArgs);
- incUsage(topn);
- DBGLOG("Optimizer: Replace %s with %s", queryNode0Text(child), queryNode1Text(topn));
- HqlExprArray selectnArgs;
- selectnArgs.append(*child->cloneAllAnnotations(topn));
- unwindChildren(selectnArgs, transformed, 1);
- return transformed->clone(selectnArgs);
- }
- break;
- }
- case no_compound_indexread:
- {
- //If we reach here the index read isn't shared, so different indices won't duplicate the index read.
- if (!isLimitedDataset(child))
- {
- //Add a choosen() within the index read to minimize the records returned remotely - convert ir[1] to choosen(ir,1)[1]
- //Make it local because that is the thor semantics (roxie is happy with local or non local)
- OwnedHqlExpr limited = createDataset(no_choosen, LINK(child->queryChild(0)), createComma(LINK(transformed->queryChild(1)), createLocalAttribute()));
- OwnedHqlExpr newIndexRead = replaceChild(child, limited);
- return replaceChild(transformed, newIndexRead);
- }
- break;
- }
- }
- }
- }
- return LINK(transformed);
- }
- IHqlExpression * CTreeOptimizer::defaultCreateTransformed(IHqlExpression * expr)
- {
- return PARENT::createTransformed(expr);
- }
- TableProjectMapper * CTreeOptimizer::getMapper(IHqlExpression * expr)
- {
- return new TableProjectMapper(expr);
- }
- bool CTreeOptimizer::isShared(IHqlExpression * expr)
- {
- switch (expr->getOperator())
- {
- case no_null:
- return false;
- case no_spillgraphresult:
- case no_spill:
- case no_split:
- case no_throughaggregate:
- case no_commonspill:
- return true;
- }
- return (queryBodyExtra(expr)->useCount > 1);
- }
- bool CTreeOptimizer::isSharedOrUnknown(IHqlExpression * expr)
- {
- switch (expr->getOperator())
- {
- case no_null:
- return false;
- case no_spillgraphresult:
- case no_spill:
- case no_split:
- case no_throughaggregate:
- case no_commonspill:
- return true;
- }
- OptTransformInfo * extra = queryBodyExtra(expr);
- return (extra->useCount != 1);
- }
- IHqlExpression * optimizeHqlExpression(IErrorReceiver & errorProcessor, IHqlExpression * expr, unsigned options)
- {
- //The no_compound can get very heavily nested => unwind to save stack traversal. We really should support nary no_compound
- HqlExprArray args, newArgs;
- unwindCommaCompound(args, expr);
- optimizeHqlExpression(errorProcessor, newArgs, args, options);
- OwnedHqlExpr optimized = createActionList(newArgs);
- if (expr == optimized)
- return optimized.getClear();
- //If the graph was optimized then it is highly likely there are now constant folding opportunities
- return foldHqlExpression(errorProcessor, optimized);
- }
- void optimizeHqlExpression(IErrorReceiver & errorProcessor, HqlExprArray & target, HqlExprArray & source, unsigned options)
- {
- CTreeOptimizer optimizer(errorProcessor, options);
- optimizer.analyseArray(source, 0);
- optimizer.transformRoot(source, target);
- }
- /*
- Implementation issues:
- 1. References to transformed items.
- x := project(w, ...);
- y := filter(x, ...);
- z := distibute(y, x.fx);
- when x and y are switched, all references to x need to be replaced by x'
- y' := filter(w, ...);
- x' := project(y', ...);
- z := distibute(x', x'.fx);
- Need to map an selector, where selector->queryNormalized() == oldDataset->queryNormalized() and replace with newDataset->queryNormalized()
- However, the mapping is context dependant - depending on what the parent dataset is.
- Could either have transformed[parentDataset] or could post process the transformed expression.
- So to process efficiently, we need:
- a) transformedSelector[parentCtx];
- b) transformed[parentCtx]
- c) on dataset transform, set dataset->queryNormalizedSelector()->transformedSelector[ctx] to newDataset->queryNormalizedSelector();
- d) on mapping, replace with i) queryTransformed(x) or queryNomalizedSelector()->transformedSelector[ctx];
- Could either have
- expr->queryExtra()->transformedSelector[parentCtx]
- or
- ::transformSelector[parentCtx, expr]
- First is not likely to affect many nodes - since only will be set on datasets.
- Second is likely to use much less memory, and probably as quick - trading an extra indirection+construction time with an assign to a structure.
- Have a noComma(top-ds, prev-ctx) to mark the current context.
- *** Only need to change if dataset is visible inside the arguments to the ECL syntax ***
- Use an array of ctx, where tos is current don't seed with a dummy value - because will cause commas to be created
- The idea of the transformedSelector should also be generalized:
- if (!transformed) try transformedSelector, and set transformedSelector to result.
- - should we replace the boolean flags in CHqlExpression with a mask?
- i) would make anding /oring more efficient.
- ii) would make adding code generator helpers much less painful - use 32bits and allocate from top down for the code generator.
-
- Useful flags
- - context free - not getresults or access to fields in unrelated tables.
- - unconditional?
- - look at transforms and see what causes pain.
- 2. optimizing shared items.
- * When is it worthwhile?
- o removing duplicate sorts?
- o when it only removes a node e.g., count(project).
- o when would enable operation to be done more efficiently. ??Eg.
- * Need to differentiate between a use and a reference - only link count former.
- */
|