hqlattr.cpp 110 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "jmisc.hpp"
  14. #include "jfile.hpp"
  15. #include "jiter.ipp"
  16. #include "jexcept.hpp"
  17. #include "jmutex.hpp"
  18. #include "jsort.hpp"
  19. #include "jutil.hpp"
  20. #include "hql.hpp"
  21. #include "hqlexpr.ipp"
  22. #include "hqlgram.hpp"
  23. #include "hqlfold.hpp"
  24. #include "hqlthql.hpp"
  25. #include "hqlpmap.hpp"
  26. #include <math.h>
  27. #include "hqlerrors.hpp"
  28. #include "hqlerror.hpp"
  29. #include "hqlplugins.hpp"
  30. #include "hqltrans.ipp"
  31. #include "hqlutil.hpp"
  32. #include "eclrtl.hpp"
  33. #include "hqlattr.hpp"
  34. #include "hqlmeta.hpp"
  35. static CriticalSection * attributeCS;
  36. MODULE_INIT(INIT_PRIORITY_HQLINTERNAL)
  37. {
  38. attributeCS = new CriticalSection;
  39. return true;
  40. }
  41. MODULE_EXIT()
  42. {
  43. delete attributeCS;
  44. }
  45. // This file should contain most of the derived attribute calculation for nodes in the expression tree,
  46. // Other candidates are
  47. // checkConstant, getChilddatasetType(), getNumChildTables
  48. // queryHasRows, definesColumnList(), queryTransformIndex
  49. // initFlagsBefore(), updatFlagsAfter()
  50. // getCachedEclCRC(), cacheTablesUsed(), isIndependentOfScope()
  51. // logic inside createDataset
  52. //Originally the idea was to have a class instance for each kind of opcode, and to call opcode[op]->evaluateAttrXXXXXX(this);
  53. //to evaluate the attribute. However because there are so many opcodes I'm not convinced this is the best way.
  54. //Better may be to model it more on the way queryRecordCount() is implemented.
  55. //This switch statement provides an entry of each opcode grouped according to its function.
  56. //within each group try and maintain alphabetical ordering
  57. unsigned getOperatorMetaFlags(node_operator op)
  58. {
  59. switch (op)
  60. {
  61. case no_none:
  62. case no_nobody:
  63. //Records/types
  64. case no_field:
  65. case no_record:
  66. case no_type:
  67. case no_ifblock:
  68. case no_enum:
  69. case no_selfref:
  70. case no_typedef:
  71. //Simple arithmetic expressions with no children:
  72. case no_constant:
  73. case no_variable:
  74. case no_quoted: // codegen only
  75. case no_getresult:
  76. case no_matched:
  77. case no_matchtext:
  78. case no_matchlength:
  79. case no_matchposition:
  80. case no_failcode:
  81. case no_failmessage:
  82. case no_id2blob:
  83. case no_blob2id:
  84. case no_clustersize:
  85. case no_loopcounter:
  86. case no_callback:
  87. case no_assertwild:
  88. case no_eventname:
  89. case no_eventextra:
  90. case no_debug_option_value:
  91. //Arithmetic operators
  92. case no_mul:
  93. case no_div:
  94. case no_modulus:
  95. case no_negate:
  96. case no_add:
  97. case no_sub:
  98. case no_exp:
  99. case no_power:
  100. case no_round:
  101. case no_roundup:
  102. case no_ln:
  103. case no_log10:
  104. case no_sin:
  105. case no_cos:
  106. case no_tan:
  107. case no_asin:
  108. case no_acos:
  109. case no_atan:
  110. case no_atan2:
  111. case no_sinh:
  112. case no_cosh:
  113. case no_tanh:
  114. case no_sqrt:
  115. case no_truncate:
  116. case no_cast:
  117. case no_implicitcast:
  118. case no_abs:
  119. case no_charlen:
  120. case no_sizeof:
  121. case no_offsetof:
  122. case no_nameof:
  123. case no_band:
  124. case no_bor:
  125. case no_bxor:
  126. case no_bnot:
  127. case no_order: //?? also a comparison
  128. case no_rank:
  129. case no_ranked:
  130. case no_hash:
  131. case no_typetransfer:
  132. case no_lshift:
  133. case no_rshift:
  134. case no_crc:
  135. case no_random:
  136. case no_counter:
  137. case no_address:
  138. case no_hash32:
  139. case no_hash64:
  140. case no_wuid:
  141. case no_countdict:
  142. case no_existslist:
  143. case no_countlist:
  144. case no_maxlist:
  145. case no_minlist:
  146. case no_sumlist:
  147. case no_unicodeorder:
  148. case no_assertkeyed:
  149. case no_hashmd5:
  150. case no_pure:
  151. case no_sequence:
  152. case no_getenv:
  153. //Selection operators - could be arithmetic, string, dataset etc.
  154. case no_map:
  155. case no_if:
  156. case no_choose:
  157. case no_which:
  158. case no_rejected:
  159. case no_mapto:
  160. case no_case:
  161. //String operators:
  162. case no_concat:
  163. case no_substring:
  164. case no_asstring:
  165. case no_intformat:
  166. case no_realformat:
  167. case no_trim:
  168. case no_fromunicode:
  169. case no_tounicode:
  170. case no_keyunicode:
  171. case no_rowdiff:
  172. case no_xmltext:
  173. case no_xmlunicode:
  174. case no_xmldecode:
  175. case no_xmlencode:
  176. case no_matchunicode:
  177. case no_matchutf8:
  178. case no_regex_find:
  179. case no_regex_replace:
  180. case no_toxml:
  181. //Boolean operators:
  182. case no_eq:
  183. case no_ne:
  184. case no_lt:
  185. case no_le:
  186. case no_gt:
  187. case no_ge:
  188. case no_not:
  189. case no_notnot:
  190. case no_and:
  191. case no_or:
  192. case no_xor:
  193. case no_notin:
  194. case no_in:
  195. case no_notbetween:
  196. case no_between:
  197. case no_is_valid:
  198. case no_indict:
  199. //Lists/Sets etc.
  200. case no_list:
  201. case no_all:
  202. case no_addsets:
  203. case no_createset:
  204. case no_rowset:
  205. case no_rowsetindex:
  206. case no_rowsetrange:
  207. case no_sortlist:
  208. case no_recordlist:
  209. case no_datasetlist:
  210. case no_transformlist:
  211. //Aggregate operators
  212. case no_count:
  213. case no_exists:
  214. case no_existsdict:
  215. case no_max:
  216. case no_min:
  217. case no_sum:
  218. case no_ave:
  219. case no_variance:
  220. case no_covariance:
  221. case no_correlation:
  222. case no_countgroup:
  223. case no_existsgroup:
  224. case no_maxgroup:
  225. case no_mingroup:
  226. case no_sumgroup:
  227. case no_avegroup:
  228. case no_vargroup:
  229. case no_covargroup:
  230. case no_corrgroup:
  231. case no_within:
  232. case no_notwithin:
  233. case no_countcompare:
  234. //Selectors
  235. case no_left:
  236. case no_right:
  237. case no_self:
  238. case no_activetable:
  239. case no_activerow:
  240. case no_top:
  241. //Transforms
  242. case no_transform:
  243. case no_assign:
  244. case no_assignall:
  245. case no_newtransform:
  246. //Rows
  247. case no_selectmap:
  248. case no_selectnth:
  249. case no_matchrow:
  250. case no_matchattr: // and scalar
  251. case no_projectrow:
  252. case no_createrow:
  253. case no_newrow:
  254. case no_temprow:
  255. //Dictionaries
  256. case no_createdictionary:
  257. //Datasets [see also selection operators]
  258. case no_rollup:
  259. case no_iterate:
  260. case no_hqlproject:
  261. case no_group:
  262. case no_cogroup:
  263. case no_cosort:
  264. case no_index:
  265. case no_table:
  266. case no_keyindex:
  267. case no_temptable:
  268. case no_usertable:
  269. case no_choosen:
  270. case no_filter:
  271. case no_fetch:
  272. case no_join:
  273. case no_sort:
  274. case no_subsort:
  275. case no_sorted:
  276. case no_dedup:
  277. case no_enth:
  278. case no_sample:
  279. case no_selectfields:
  280. case no_addfiles:
  281. case no_distribute:
  282. case no_normalize:
  283. case no_distributed:
  284. case no_preservemeta:
  285. case no_grouped:
  286. case no_denormalize:
  287. case no_newusertable:
  288. case no_newaggregate:
  289. case no_aggregate:
  290. case no_choosesets:
  291. case no_workunit_dataset:
  292. case no_split:
  293. case no_spill:
  294. case no_readspill:
  295. case no_writespill:
  296. case no_commonspill:
  297. case no_parse:
  298. case no_newparse:
  299. case no_throughaggregate:
  300. case no_compound_diskread:
  301. case no_compound_disknormalize:
  302. case no_compound_diskaggregate:
  303. case no_compound_diskcount:
  304. case no_compound_diskgroupaggregate:
  305. case no_compound_indexread:
  306. case no_compound_indexnormalize:
  307. case no_compound_indexaggregate:
  308. case no_compound_indexcount:
  309. case no_compound_indexgroupaggregate:
  310. case no_compound_childread:
  311. case no_compound_childnormalize:
  312. case no_compound_childaggregate:
  313. case no_compound_childcount:
  314. case no_compound_childgroupaggregate:
  315. case no_compound_inline:
  316. case no_getgraphresult:
  317. case no_compound_fetch:
  318. case no_topn:
  319. case no_newxmlparse:
  320. case no_httpcall:
  321. case no_soapcall:
  322. case no_soapcall_ds:
  323. case no_newsoapcall:
  324. case no_newsoapcall_ds:
  325. case no_nonempty:
  326. case no_filtergroup:
  327. case no_limit:
  328. case no_catchds:
  329. case no_loop:
  330. case no_forcenolocal:
  331. case no_allnodes:
  332. case no_selfjoin:
  333. case no_process:
  334. case no_thisnode:
  335. case no_getgraphloopresult:
  336. case no_graphloop:
  337. case no_assertstepped:
  338. case no_assertsorted:
  339. case no_assertgrouped:
  340. case no_assertdistributed:
  341. case no_mergejoin:
  342. case no_nwayjoin:
  343. case no_nwaymerge:
  344. case no_stepped:
  345. case no_datasetfromrow:
  346. case no_datasetfromdictionary:
  347. case no_assert_ds:
  348. case no_combine:
  349. case no_rollupgroup:
  350. case no_regroup:
  351. case no_combinegroup:
  352. case no_inlinetable:
  353. case no_denormalizegroup:
  354. case no_xmlproject:
  355. case no_spillgraphresult:
  356. case no_rows:
  357. case no_keyedlimit:
  358. case no_compound_selectnew:
  359. case no_getgraphloopresultset:
  360. case no_preload:
  361. case no_merge:
  362. case no_keyeddistribute:
  363. case no_newkeyindex:
  364. case no_anon:
  365. case no_pseudods:
  366. case no_deserialize:
  367. case no_serialize:
  368. case no_forcegraph:
  369. case no_related:
  370. case no_executewhen:
  371. case no_callsideeffect:
  372. case no_fromxml:
  373. case no_xmlparse:
  374. case no_normalizegroup:
  375. case no_owned_ds:
  376. case no_dataset_alias:
  377. case no_chooseds:
  378. //Multiple different kinds of values
  379. case no_select:
  380. case no_indirect:
  381. case no_selectindirect:
  382. case no_null:
  383. case no_globalscope:
  384. case no_nothor:
  385. case no_embedbody:
  386. case no_alias_scope:
  387. case no_evalonce:
  388. case no_forcelocal:
  389. case no_cluster:
  390. //Parser only - not in normalized expression trees
  391. case no_evaluate:
  392. case no_macro:
  393. case no_transformebcdic:
  394. case no_transformascii:
  395. case no_metaactivity:
  396. case no_loadxml:
  397. case no_fieldmap:
  398. case no_template_context:
  399. case no_processing:
  400. case no_merge_pending:
  401. case no_merge_nomatch:
  402. case no_namedactual:
  403. case no_assertconstant:
  404. case no_assertconcrete:
  405. case no_delayedscope:
  406. //Code generator only - only created once code is being generated.
  407. case no_postinc:
  408. case no_postdec:
  409. case no_preinc:
  410. case no_predec:
  411. case no_pselect:
  412. case no_deref:
  413. case no_ordered:
  414. case no_decimalstack:
  415. case no_translated:
  416. case no_filepos:
  417. case no_file_logicalname:
  418. case no_reference:
  419. case no_assign_addfiles:
  420. case no_nullptr:
  421. case no_childquery:
  422. //Workflow
  423. case no_stored:
  424. case no_failure:
  425. case no_success:
  426. case no_recovery:
  427. case no_wait:
  428. case no_event:
  429. case no_persist:
  430. case no_when:
  431. case no_setconditioncode:
  432. case no_priority:
  433. case no_colon:
  434. case no_setworkflow_cond:
  435. case no_global:
  436. case no_workflow:
  437. case no_workflow_action:
  438. case no_checkpoint:
  439. case no_define:
  440. case no_independent:
  441. case no_catch:
  442. case no_once:
  443. //Patterns
  444. case no_pat_select:
  445. case no_pat_const:
  446. case no_pat_pattern:
  447. case no_pat_follow:
  448. case no_pat_first:
  449. case no_pat_last:
  450. case no_pat_repeat:
  451. case no_pat_instance:
  452. case no_pat_anychar:
  453. case no_pat_token:
  454. case no_pat_imptoken:
  455. case no_pat_set:
  456. case no_pat_checkin:
  457. case no_pat_x_before_y:
  458. case no_pat_x_after_y:
  459. case no_pat_index:
  460. case no_pat_beginpattern:
  461. case no_pat_endpattern:
  462. case no_pat_checklength:
  463. case no_pat_featureparam:
  464. case no_pat_featureactual:
  465. case no_pat_featuredef:
  466. case no_pat_validate:
  467. case no_pat_use:
  468. case no_featuretype:
  469. case no_pat_guard:
  470. case no_penalty:
  471. case no_pat_case:
  472. case no_pat_nocase:
  473. case no_pat_before_y:
  474. case no_pat_after_y:
  475. case no_pat_production:
  476. case no_pat_or:
  477. //Pseudo-Attributes
  478. case no_csv:
  479. case no_sql:
  480. case no_thor:
  481. case no_flat:
  482. case no_pipe:
  483. case no_joined:
  484. case no_any:
  485. case no_xml:
  486. case no_distributer:
  487. case no_keyed:
  488. case no_sortpartition:
  489. //Multiple types
  490. case no_outofline:
  491. case no_create_initializer:
  492. //Actions
  493. case no_buildindex:
  494. case no_output:
  495. case no_apply:
  496. case no_fail:
  497. case no_distribution:
  498. case no_ensureresult:
  499. case no_setresult:
  500. case no_sequential:
  501. case no_parallel:
  502. case no_actionlist:
  503. case no_soapaction_ds:
  504. case no_newsoapaction_ds:
  505. case no_keydiff:
  506. case no_keypatch:
  507. case no_returnresult:
  508. case no_outputscalar:
  509. case no_evaluate_stmt:
  510. case no_return_stmt:
  511. case no_setgraphloopresult:
  512. case no_skip:
  513. case no_assert:
  514. case no_notify:
  515. case no_setgraphresult:
  516. case no_extractresult:
  517. case no_unused81:
  518. case no_definesideeffect:
  519. // Scopes etc.
  520. case no_scope:
  521. case no_forwardscope:
  522. case no_remotescope:
  523. case no_privatescope:
  524. case no_virtualscope:
  525. case no_concretescope:
  526. case no_mergedscope:
  527. //Used for representing functional attributes
  528. case no_service:
  529. case no_external:
  530. case no_funcdef:
  531. case no_externalcall:
  532. case no_libraryselect:
  533. case no_bound_func:
  534. case no_purevirtual:
  535. case no_internalselect:
  536. case no_delayedselect:
  537. case no_unboundselect:
  538. case no_libraryscope:
  539. case no_libraryscopeinstance:
  540. case no_libraryinput:
  541. case no_call:
  542. case no_attrname:
  543. // Other
  544. case no_comma:
  545. case no_uncommoned_comma:
  546. case no_compound:
  547. case no_param:
  548. case no_setmeta:
  549. case no_omitted:
  550. case no_range:
  551. case no_rangeto:
  552. case no_rangefrom:
  553. case no_rangecommon:
  554. case no_nofold:
  555. case no_nohoist:
  556. case no_section:
  557. case no_sectioninput:
  558. case no_alias:
  559. case no_unknown: // used for callbacks
  560. case no_attr:
  561. case no_attr_link:
  562. case no_attr_expr:
  563. case no_cachealias:
  564. case no_subgraph:
  565. case no_rowvalue:
  566. case no_loopbody:
  567. case no_complex:
  568. //Not implemented anywhere:
  569. case no_impure: // not really used
  570. case no_dependenton:
  571. case no_alias_project:
  572. case no_nolink:
  573. case no_joincount:
  574. case no_guard:
  575. case no_hint:
  576. case no_cloned:
  577. case no_childdataset:
  578. case no_envsymbol:
  579. case no_bound_type:
  580. case no_mix:
  581. case no_persist_check:
  582. case no_dataset_from_transform:
  583. case no_id:
  584. case no_unused6:
  585. case no_unused13: case no_unused14: case no_unused15:
  586. case no_unused24: case no_unused25: case no_unused28: case no_unused29:
  587. case no_unused30: case no_unused31: case no_unused32: case no_unused33: case no_unused34: case no_unused35: case no_unused36: case no_unused37: case no_unused38:
  588. case no_unused40: case no_unused41: case no_unused42: case no_unused43: case no_unused44: case no_unused45: case no_unused46: case no_unused47: case no_unused48: case no_unused49:
  589. case no_unused50: case no_unused52:
  590. case no_unused80: case no_unused83:
  591. case no_unused101: case no_unused102:
  592. case no_is_null:
  593. case no_position:
  594. case no_current_time:
  595. case no_current_date:
  596. case no_current_timestamp:
  597. case no_update:
  598. //The following never get created IHqlExpressions, they are used as constants in the PARSE internal structures.
  599. case no_pat_compound:
  600. case no_pat_begintoken:
  601. case no_pat_endtoken:
  602. case no_pat_begincheck:
  603. case no_pat_endcheckin:
  604. case no_pat_endchecklength:
  605. case no_pat_beginseparator:
  606. case no_pat_endseparator:
  607. case no_pat_separator:
  608. case no_pat_beginvalidate:
  609. case no_pat_endvalidate:
  610. case no_pat_dfa:
  611. case no_pat_singlechar:
  612. case no_pat_beginrecursive:
  613. case no_pat_endrecursive:
  614. case no_pat_utf8single:
  615. case no_pat_utf8lead:
  616. case no_pat_utf8follow:
  617. case no_eclcrc:
  618. return 0;
  619. case no_isomitted:
  620. return 0;
  621. default:
  622. DBGLOG("**** Missing meta flags for operator %d ***", (int)op);
  623. return 0;
  624. }
  625. }
  626. //---------------------------------------------------------------------------------
  627. inline unsigned truncMaxlength(unsigned __int64 value)
  628. {
  629. return (value > MAX_MAXLENGTH) ? MAX_MAXLENGTH : (unsigned)value;
  630. }
  631. static unsigned getMaxSize(ITypeInfo * type, IHqlExpression * maxLength, IHqlExpression * maxSize, IHqlExpression * maxCount)
  632. {
  633. unsigned size = type->getSize();
  634. if (size != UNKNOWN_LENGTH)
  635. return size;
  636. if (!maxLength) maxLength = queryPropertyChild(type, maxLengthAtom, 0);
  637. if (!maxSize) maxSize = queryPropertyChild(type, maxSizeAtom, 0);
  638. if (!maxCount) maxCount = queryPropertyChild(type, maxCountAtom, 0);
  639. if (maxSize)
  640. return (unsigned)getIntValue(maxSize, 0);
  641. if (maxLength)
  642. {
  643. unsigned __int64 len = (unsigned)getIntValue(maxLength, 0);
  644. switch (type->getTypeCode())
  645. {
  646. case type_string:
  647. case type_data:
  648. return truncMaxlength(sizeof(size32_t) + len);
  649. case type_unicode:
  650. return truncMaxlength(sizeof(size32_t) + len*sizeof(UChar));
  651. case type_qstring:
  652. return truncMaxlength(sizeof(size32_t) + rtlQStrSize((unsigned)len));
  653. case type_varstring:
  654. return truncMaxlength(len + 1);
  655. case type_varunicode:
  656. return truncMaxlength((len + 1) * sizeof(UChar));
  657. case type_utf8:
  658. return truncMaxlength(sizeof(size32_t) + (len * 4));
  659. case type_set:
  660. return truncMaxlength(len);
  661. }
  662. }
  663. if (maxCount)
  664. {
  665. unsigned __int64 count = getIntValue(maxCount, 0);
  666. switch (type->getTypeCode())
  667. {
  668. case type_set:
  669. {
  670. ITypeInfo * childType = type->queryChildType();
  671. if (!childType)
  672. break;
  673. unsigned elemSize = getMaxSize(childType, NULL, NULL, NULL);
  674. if (elemSize != UNKNOWN_LENGTH)
  675. return truncMaxlength(sizeof(bool) + sizeof(size32_t) + count * elemSize);
  676. break;
  677. }
  678. }
  679. }
  680. return UNKNOWN_LENGTH;
  681. }
  682. static unsigned getMaxSize(IHqlExpression * field)
  683. {
  684. switch (field->getOperator())
  685. {
  686. case no_select:
  687. return getMaxSize(field->queryChild(1));
  688. case no_indirect:
  689. return getMaxSize(field->queryChild(0));
  690. }
  691. ITypeInfo * type = field->queryType();
  692. IHqlExpression * maxLength = queryPropertyChild(field, maxLengthAtom, 0);
  693. IHqlExpression * maxSize = queryPropertyChild(field, maxSizeAtom, 0);
  694. IHqlExpression * maxCount = queryPropertyChild(field, maxCountAtom, 0);
  695. unsigned max = getMaxSize(type, maxLength, maxSize, maxCount);
  696. if (max != UNKNOWN_LENGTH)
  697. return max;
  698. ITypeInfo * indirect = queryModifier(type, typemod_indirect);
  699. if (indirect)
  700. {
  701. IHqlExpression * original = static_cast<IHqlExpression *>(indirect->queryModifierExtra());
  702. return getMaxSize(original);
  703. }
  704. return max;
  705. }
  706. //Some arbitrary guess at the size of a variable length string field.
  707. static double twoThirds = 2.0/3.0;
  708. static unsigned guessSize(unsigned minLen, unsigned maxLen)
  709. {
  710. if (maxLen == UNKNOWN_LENGTH)
  711. maxLen = 4096;
  712. if (maxLen < minLen)
  713. return minLen;
  714. double value = pow((double)(maxLen-minLen), twoThirds);
  715. return truncMaxlength(minLen + (unsigned __int64)ceil(value));
  716. }
  717. static IHqlExpression * querySerializedForm(IHqlExpression * expr, IAtom * variation)
  718. {
  719. if (expr)
  720. {
  721. ExprPropKind kind;
  722. if (variation == diskAtom)
  723. kind = EPdiskserializedForm;
  724. else if (variation == internalAtom)
  725. kind = EPinternalserializedForm;
  726. else
  727. throwUnexpected();
  728. IHqlExpression * attr = expr->queryAttribute(kind);
  729. if (attr)
  730. return attr;
  731. }
  732. return expr;
  733. }
  734. static HqlTransformerInfo serializedRecordCreatorInfo("SerializedRecordCreator");
  735. class SerializedRecordCreator : public QuickHqlTransformer
  736. {
  737. public:
  738. SerializedRecordCreator(IAtom * _variety) : QuickHqlTransformer(serializedRecordCreatorInfo, NULL), variety(_variety) {}
  739. virtual IHqlExpression * createTransformedBody(IHqlExpression * expr)
  740. {
  741. switch (expr->getOperator())
  742. {
  743. case no_field:
  744. {
  745. if (expr->hasProperty(_linkCounted_Atom))
  746. {
  747. OwnedHqlExpr transformed = QuickHqlTransformer::createTransformedBody(expr);
  748. return removeProperty(transformed, _linkCounted_Atom);
  749. }
  750. break;
  751. }
  752. }
  753. return QuickHqlTransformer::createTransformedBody(expr);
  754. }
  755. virtual ITypeInfo * transformType(ITypeInfo * type)
  756. {
  757. Owned<ITypeInfo> transformed = QuickHqlTransformer::transformType(type);
  758. return getSerializedForm(transformed, variety);
  759. }
  760. protected:
  761. IAtom * variety;
  762. };
  763. static IHqlExpression * evaluateSerializedRecord(IHqlExpression * expr, IAtom * variation)
  764. {
  765. SerializedRecordCreator transformer(variation);
  766. return transformer.transform(expr);
  767. }
  768. //---------------------------------------------------------------------------------
  769. class CHqlExprMeta
  770. {
  771. public:
  772. static inline IHqlExpression * addAttribute(IHqlExpression * expr, ExprPropKind kind, IHqlExpression * value)
  773. {
  774. CHqlExpression * cexpr = static_cast<CHqlExpression *>(expr);
  775. cexpr->addAttribute(kind, value);
  776. return value;
  777. }
  778. static inline IHqlExpression * queryExistingAttribute(IHqlExpression * expr, ExprPropKind kind)
  779. {
  780. CHqlExpression * cexpr = static_cast<CHqlExpression *>(expr);
  781. return cexpr->queryExistingAttribute(kind);
  782. }
  783. } meta;
  784. //-- Attribute: serialized form -------------------------------------------------------------------------------
  785. static IHqlExpression * evaluateAttrSerializedForm(IHqlExpression * expr, ExprPropKind kind, IAtom * variation)
  786. {
  787. if (expr->getOperator() == no_record || expr->getOperator() == no_field)
  788. {
  789. OwnedHqlExpr serialized = evaluateSerializedRecord(expr, variation);
  790. if (serialized != expr)
  791. {
  792. //Tag serialized form so don't re-evaluated
  793. meta.addAttribute(serialized, kind, serialized);
  794. }
  795. return meta.addAttribute(expr, kind, serialized);
  796. }
  797. return NULL;
  798. }
  799. //-- Attribute: size -------------------------------------------------------------------------------
  800. //no_field
  801. static IHqlExpression * evaluateFieldAttrSize(IHqlExpression * expr)
  802. {
  803. ITypeInfo * type = expr->queryType();
  804. unsigned minSize = UNKNOWN_LENGTH;
  805. unsigned maxSize = 0;
  806. unsigned thisSize = type->getSize();
  807. OwnedHqlExpr thisMaxSizeExpr;
  808. if (expr->hasProperty(_isBlobInIndex_Atom))
  809. {
  810. thisSize = sizeof(unsigned __int64);
  811. }
  812. else
  813. {
  814. switch (type->getTypeCode())
  815. {
  816. case type_bitfield:
  817. {
  818. thisSize = type->queryChildType()->getSize();
  819. break;
  820. }
  821. case type_record:
  822. case type_row:
  823. {
  824. if (hasReferenceModifier(type))
  825. thisSize = sizeof(void *);
  826. else
  827. {
  828. IHqlExpression * ret = expr->queryRecord()->queryAttribute(EPsize);
  829. return meta.addAttribute(expr, EPsize, ret);
  830. }
  831. break;
  832. }
  833. case type_dictionary:
  834. case type_groupedtable:
  835. case type_table:
  836. {
  837. if (expr->hasProperty(_linkCounted_Atom))
  838. {
  839. thisSize = sizeof(size32_t) + sizeof(byte * *);
  840. break;
  841. }
  842. IHqlExpression * count = NULL;
  843. IHqlExpression * size = NULL;
  844. IHqlExpression * maxLength = NULL;
  845. IHqlExpression * maxCount = NULL;
  846. ForEachChild(i, expr)
  847. {
  848. IHqlExpression * attr = expr->queryChild(i);
  849. if (attr->isAttribute())
  850. {
  851. IAtom * name = attr->queryName();
  852. if (name == countAtom)
  853. count = attr->queryChild(0);
  854. else if (name == sizeofAtom)
  855. size = attr->queryChild(0);
  856. else if (name == maxLengthAtom)
  857. maxLength = attr->queryChild(0);
  858. else if (name == maxSizeAtom)
  859. maxLength = attr->queryChild(0);
  860. else if (name == maxCountAtom)
  861. maxCount = attr->queryChild(0);
  862. else if ((name == choosenAtom) && attr->queryChild(0)->queryValue())
  863. maxCount = attr->queryChild(0);
  864. }
  865. }
  866. IHqlExpression * record = expr->queryRecord();
  867. IHqlExpression * childRecordSizeExpr = record->queryAttribute(EPsize);
  868. unsigned childExpectedSize = (unsigned)getIntValue(childRecordSizeExpr->queryChild(0));
  869. unsigned childMinimumSize = (unsigned)getIntValue(childRecordSizeExpr->queryChild(1));
  870. IHqlExpression * childMaximumSizeExpr = childRecordSizeExpr->queryChild(2);
  871. unsigned childMaximumSize = (unsigned)getIntValue(childMaximumSizeExpr, UNKNOWN_LENGTH);
  872. ITypeInfo * sizetType = childMaximumSizeExpr->queryType();
  873. if (count || size)
  874. {
  875. minSize = 0;
  876. if (size && size->queryValue())
  877. thisSize = (unsigned)getIntValue(size);
  878. else if (count && count->queryValue())
  879. {
  880. unsigned __int64 num = (unsigned)getIntValue(count);
  881. thisSize = truncMaxlength(num * childExpectedSize);
  882. minSize = truncMaxlength(num * childMinimumSize);
  883. if (childMaximumSize != UNKNOWN_LENGTH)
  884. maxSize = truncMaxlength(num * childMaximumSize);
  885. else
  886. thisMaxSizeExpr.setown(createValue(no_mul, LINK(sizetType), ensureExprType(count, sizetType), LINK(childMaximumSizeExpr)));
  887. }
  888. else
  889. {
  890. thisSize = UNKNOWN_LENGTH;
  891. if (maxLength)
  892. maxSize = (unsigned)getIntValue(maxLength);
  893. else if (maxCount)
  894. {
  895. if (childMaximumSize != UNKNOWN_LENGTH)
  896. maxSize = truncMaxlength((unsigned __int64)getIntValue(maxCount) * childMaximumSize);
  897. else
  898. thisMaxSizeExpr.setown(createValue(no_mul, LINK(sizetType), ensureExprType(maxCount, sizetType), LINK(childMaximumSizeExpr)));
  899. }
  900. else
  901. maxSize = UNKNOWN_LENGTH;
  902. }
  903. }
  904. else
  905. {
  906. minSize = sizeof(size32_t);
  907. if (maxLength)
  908. maxSize = (unsigned)getIntValue(maxLength);
  909. else if (maxCount)
  910. {
  911. if (childMaximumSize != UNKNOWN_LENGTH)
  912. maxSize = truncMaxlength(sizeof(size32_t) + (unsigned __int64)getIntValue(maxCount) * childMaximumSize);
  913. else
  914. thisMaxSizeExpr.setown(createValue(no_add, LINK(sizetType), getSizetConstant(sizeof(size32_t)),
  915. createValue(no_mul, LINK(sizetType), ensureExprType(maxCount, sizetType), LINK(childMaximumSizeExpr))));
  916. }
  917. else
  918. maxSize = UNKNOWN_LENGTH;
  919. }
  920. break;
  921. }
  922. case type_string:
  923. case type_data:
  924. case type_unicode:
  925. case type_qstring:
  926. case type_utf8:
  927. if (thisSize == UNKNOWN_LENGTH)
  928. {
  929. minSize = sizeof(size32_t);
  930. maxSize = getMaxSize(expr);
  931. }
  932. break;
  933. case type_varstring:
  934. if (thisSize == UNKNOWN_LENGTH)
  935. {
  936. minSize = 1;
  937. maxSize = getMaxSize(expr);
  938. }
  939. break;
  940. case type_varunicode:
  941. if (thisSize == UNKNOWN_LENGTH)
  942. {
  943. minSize = sizeof(UChar);
  944. maxSize = getMaxSize(expr);
  945. }
  946. break;
  947. case type_set:
  948. if (thisSize == UNKNOWN_LENGTH)
  949. {
  950. minSize = sizeof(size32_t)+sizeof(bool);
  951. maxSize = getMaxSize(expr);
  952. }
  953. break;
  954. case type_alien:
  955. {
  956. IHqlAlienTypeInfo * alien = queryAlienType(type);
  957. thisSize = alien->getPhysicalTypeSize();
  958. if (thisSize == UNKNOWN_LENGTH)
  959. {
  960. IHqlExpression * lengthAttr = queryUncastExpr(alien->queryLengthFunction());
  961. if (lengthAttr->isConstant() && !lengthAttr->isFunction())
  962. {
  963. OwnedHqlExpr folded = foldHqlExpression(lengthAttr);
  964. if (folded->queryValue())
  965. thisSize = (unsigned)getIntValue(folded);
  966. }
  967. }
  968. if (thisSize == UNKNOWN_LENGTH)
  969. {
  970. minSize = 0;
  971. IHqlExpression * maxSizeExpr = expr->queryProperty(maxSizeAtom);
  972. if (!maxSizeExpr)
  973. maxSizeExpr = expr->queryProperty(maxLengthAtom);
  974. if (maxSizeExpr)
  975. maxSize = (unsigned)getIntValue(maxSizeExpr->queryChild(0));
  976. else
  977. maxSize = alien->getMaxSize();
  978. }
  979. break;
  980. }
  981. case type_packedint:
  982. minSize = 1;
  983. maxSize = (type->queryPromotedType()->getSize()+1);
  984. thisSize = (maxSize > 2) ? 2 : 1;
  985. break;
  986. case type_any:
  987. minSize = 1;
  988. maxSize = getMaxSize(expr);
  989. break;
  990. default:
  991. assertex(thisSize != UNKNOWN_LENGTH);
  992. break;
  993. }
  994. }
  995. if (thisMaxSizeExpr)
  996. maxSize = UNKNOWN_LENGTH;
  997. if (thisSize == UNKNOWN_LENGTH)
  998. thisSize = guessSize(minSize, maxSize);
  999. else
  1000. {
  1001. if (minSize == UNKNOWN_LENGTH)
  1002. minSize = thisSize;
  1003. if (maxSize == 0)
  1004. maxSize = thisSize;
  1005. }
  1006. if ((thisSize == minSize) && (minSize == maxSize))
  1007. {
  1008. OwnedHqlExpr attr = getFixedSizeAttr(thisSize);
  1009. return meta.addAttribute(expr, EPsize, attr);
  1010. }
  1011. if (!thisMaxSizeExpr)
  1012. thisMaxSizeExpr.setown((maxSize == UNKNOWN_LENGTH) ? createAttribute(unknownSizeFieldAtom) : getSizetConstant(maxSize));
  1013. OwnedHqlExpr attr = createExprAttribute(_propSize_Atom, getSizetConstant(thisSize), getSizetConstant(minSize), thisMaxSizeExpr.getClear());
  1014. return meta.addAttribute(expr, EPsize, attr);
  1015. }
  1016. //no_ifblock
  1017. static IHqlExpression * evaluateIfBlockAttrSize(IHqlExpression * expr)
  1018. {
  1019. IHqlExpression * size = expr->queryChild(1)->queryAttribute(EPsize);
  1020. unsigned averageSize = (unsigned)getIntValue(size->queryChild(0), 0)/2;
  1021. OwnedHqlExpr attr = createExprAttribute(_propSize_Atom, getSizetConstant(averageSize), getSizetConstant(0), LINK(size->queryChild(2)));
  1022. return meta.addAttribute(expr, EPsize, attr);
  1023. }
  1024. //no_record
  1025. static IHqlExpression * evaluateRecordAttrSize(IHqlExpression * expr)
  1026. {
  1027. unsigned __int64 expectedSize = 0;
  1028. unsigned __int64 minimumSize = 0;
  1029. unsigned __int64 maximumSize = 0;
  1030. OwnedHqlExpr maximumSizeExpr;
  1031. OwnedHqlExpr derivedSizeExpr;
  1032. bool hasUnknownMaxSizeField = false;
  1033. BitfieldPacker packer;
  1034. ForEachChild(i, expr)
  1035. {
  1036. IHqlExpression * cur = expr->queryChild(i);
  1037. ITypeInfo * type = cur->queryType();
  1038. if (type && type->getTypeCode() == type_bitfield)
  1039. {
  1040. unsigned thisBitOffset, thisBits;
  1041. if (!packer.checkSpaceAvailable(thisBitOffset, thisBits, type))
  1042. {
  1043. size32_t thisSize = type->queryChildType()->getSize();
  1044. expectedSize += thisSize;
  1045. minimumSize += thisSize;
  1046. maximumSize += thisSize;
  1047. }
  1048. }
  1049. else
  1050. {
  1051. packer.reset();
  1052. IHqlExpression * size = cur->queryAttribute(EPsize);
  1053. if (size)
  1054. {
  1055. expectedSize += (size32_t)getIntValue(size->queryChild(0));
  1056. minimumSize += (size32_t)getIntValue(size->queryChild(1));
  1057. IHqlExpression * maxExpr = size->queryChild(2);
  1058. if (maxExpr->queryValue())
  1059. maximumSize += (size32_t)getIntValue(maxExpr);
  1060. else if (maxExpr->isAttribute())
  1061. {
  1062. assertex(maxExpr->queryName() == unknownSizeFieldAtom);
  1063. hasUnknownMaxSizeField = true;
  1064. }
  1065. else
  1066. extendAdd(maximumSizeExpr, maxExpr);
  1067. }
  1068. }
  1069. }
  1070. if ((minimumSize != maximumSize) || maximumSizeExpr || hasUnknownMaxSizeField)
  1071. {
  1072. IHqlExpression * maxLength = queryPropertyChild(expr, maxLengthAtom, 0);
  1073. if (maxLength)
  1074. {
  1075. if (!hasUnknownMaxSizeField)
  1076. {
  1077. if (maximumSize || !maximumSizeExpr)
  1078. {
  1079. OwnedHqlExpr maxExpr = getSizetConstant(truncMaxlength(maximumSize));
  1080. extendAdd(maximumSizeExpr, maxExpr);
  1081. }
  1082. derivedSizeExpr.set(maximumSizeExpr);
  1083. }
  1084. maximumSize = (unsigned)getIntValue(maxLength, UNKNOWN_LENGTH);
  1085. maximumSizeExpr.clear();
  1086. if (derivedSizeExpr)
  1087. {
  1088. //If not a constant then it is derived from the default maxlength, so the explicit maxlength is better
  1089. //otherwise use the minimum value
  1090. if (derivedSizeExpr->queryValue())
  1091. {
  1092. unsigned maxDerived = (unsigned)getIntValue(derivedSizeExpr);
  1093. if (maximumSize > maxDerived)
  1094. maximumSize = maxDerived;
  1095. }
  1096. }
  1097. }
  1098. else if (hasUnknownMaxSizeField)
  1099. {
  1100. OwnedHqlExpr maxExpr = getSizetConstant(truncMaxlength(maximumSize));
  1101. extendAdd(maximumSizeExpr, maxExpr);
  1102. maximumSize = 0;
  1103. //?create an expression to represent
  1104. // if (totalSize * 2 > defaultMaxRecordSize, totalSize + defaultMaxRecordSize / 2, defaultMaxRecordSize);
  1105. IHqlExpression * defaultMaxLength = queryDefaultMaxRecordLengthExpr();
  1106. OwnedHqlExpr minmax = LINK(maximumSizeExpr);
  1107. OwnedHqlExpr minMaxTimes2 = createValue(no_mul, defaultMaxLength->getType(), LINK(minmax), getSizetConstant(2));
  1108. OwnedHqlExpr cond = createBoolExpr(no_gt, LINK(minMaxTimes2), LINK(defaultMaxLength));
  1109. OwnedHqlExpr trueExpr = createValue(no_add, defaultMaxLength->getType(), LINK(minmax), createValue(no_div, defaultMaxLength->getType(), LINK(defaultMaxLength), getSizetConstant(2)));
  1110. maximumSizeExpr.setown(createValue(no_if, defaultMaxLength->getType(), LINK(cond), LINK(trueExpr), LINK(defaultMaxLength)));
  1111. }
  1112. }
  1113. if ((maximumSize == 0) && expr->hasProperty(_nonEmpty_Atom))
  1114. {
  1115. expectedSize = 1;
  1116. minimumSize = 1;
  1117. maximumSize = 1;
  1118. }
  1119. if (maximumSize || !maximumSizeExpr)
  1120. {
  1121. OwnedHqlExpr maxExpr = getSizetConstant(truncMaxlength(maximumSize));
  1122. extendAdd(maximumSizeExpr, maxExpr);
  1123. }
  1124. HqlExprArray args;
  1125. args.append(*getSizetConstant(truncMaxlength(expectedSize)));
  1126. args.append(*getSizetConstant(truncMaxlength(minimumSize)));
  1127. args.append(*LINK(maximumSizeExpr));
  1128. if (derivedSizeExpr)
  1129. args.append(*LINK(derivedSizeExpr));
  1130. OwnedHqlExpr sizeAttr = createExprAttribute(_propSize_Atom, args);
  1131. return meta.addAttribute(expr, EPsize, sizeAttr);
  1132. }
  1133. static IHqlExpression * evaluateAttrSize(IHqlExpression * expr)
  1134. {
  1135. switch (expr->getOperator())
  1136. {
  1137. case no_field:
  1138. return evaluateFieldAttrSize(expr);
  1139. case no_ifblock:
  1140. return evaluateIfBlockAttrSize(expr);
  1141. case no_record:
  1142. return evaluateRecordAttrSize(expr);
  1143. case no_transform:
  1144. case no_newtransform:
  1145. {
  1146. //MORE: This could calculate a better estimate for the size of the record by taking into account any constant values or datasets that are assigned.
  1147. IHqlExpression * record = expr->queryRecord();
  1148. IHqlExpression * recordSize = record->queryAttribute(EPsize);
  1149. return meta.addAttribute(expr, EPsize, recordSize);
  1150. }
  1151. }
  1152. IHqlExpression * record = expr->queryRecord();
  1153. if (record)
  1154. return record->queryAttribute(EPsize);
  1155. return NULL;
  1156. }
  1157. IHqlExpression * getSerializedForm(IHqlExpression * expr, IAtom * variation)
  1158. {
  1159. return LINK(querySerializedForm(expr, variation));
  1160. }
  1161. ITypeInfo * getSerializedForm(ITypeInfo * type, IAtom * variation)
  1162. {
  1163. if (!type)
  1164. return NULL;
  1165. switch (type->getTypeCode())
  1166. {
  1167. case type_record:
  1168. {
  1169. IHqlExpression * record = queryRecord(queryUnqualifiedType(type));
  1170. IHqlExpression * serializedRecord = querySerializedForm(record, variation);
  1171. if (record == serializedRecord)
  1172. return LINK(type);
  1173. return cloneModifiers(type, serializedRecord->queryType());
  1174. }
  1175. case type_row:
  1176. case type_transform:
  1177. case type_table:
  1178. case type_groupedtable:
  1179. {
  1180. //MORE: If (variant == internalAtom) consider using a format that prefixes the dataset with a count instead of a size
  1181. OwnedITypeInfo noOutOfLineType = removeModifier(type, typemod_outofline);
  1182. OwnedITypeInfo noLinkCountType = removeProperty(noOutOfLineType, _linkCounted_Atom);
  1183. ITypeInfo * childType = noLinkCountType->queryChildType();
  1184. OwnedITypeInfo newChild = getSerializedForm(childType, variation);
  1185. return replaceChildType(noLinkCountType, newChild);
  1186. }
  1187. case type_dictionary:
  1188. {
  1189. OwnedITypeInfo noOutOfLineType = removeModifier(type, typemod_outofline);
  1190. OwnedITypeInfo noLinkCountType = removeProperty(noOutOfLineType, _linkCounted_Atom);
  1191. ITypeInfo * childType = noLinkCountType->queryChildType();
  1192. OwnedITypeInfo newChild = getSerializedForm(childType, variation);
  1193. if (variation == internalAtom)
  1194. return replaceChildType(noLinkCountType, newChild);
  1195. OwnedITypeInfo datasetType = makeTableType(LINK(newChild), NULL, NULL, NULL);
  1196. return cloneModifiers(noLinkCountType, datasetType);
  1197. }
  1198. }
  1199. return LINK(type);
  1200. }
  1201. //-- Attribute: unadorned form (no annotations) -------------------------------------------------------------------------------
  1202. //Use a transformer to implement the mapping - since it contains the logic for processing types etc, but use the attributes as an extra cache.
  1203. class HqlCachedAttributeTransformer : public QuickHqlTransformer
  1204. {
  1205. public:
  1206. HqlCachedAttributeTransformer(HqlTransformerInfo & _transformInfo, ExprPropKind _propKind);
  1207. virtual IHqlExpression * transform(IHqlExpression * expr);
  1208. protected:
  1209. ExprPropKind propKind;
  1210. };
  1211. HqlCachedAttributeTransformer::HqlCachedAttributeTransformer(HqlTransformerInfo & _transformInfo, ExprPropKind _propKind)
  1212. : QuickHqlTransformer(_transformInfo, NULL), propKind(_propKind)
  1213. {
  1214. }
  1215. IHqlExpression * HqlCachedAttributeTransformer::transform(IHqlExpression * expr)
  1216. {
  1217. IHqlExpression * match = meta.queryExistingAttribute(expr, propKind);
  1218. if (match)
  1219. return LINK(match);
  1220. OwnedHqlExpr transformed = QuickHqlTransformer::transform(expr);
  1221. if (transformed != expr)
  1222. {
  1223. //Tag serialized form so don't re-evaluate
  1224. meta.addAttribute(transformed, propKind, transformed);
  1225. }
  1226. meta.addAttribute(expr, propKind, transformed);
  1227. return transformed.getClear();
  1228. }
  1229. class HqlUnadornedNormalizer : public HqlCachedAttributeTransformer
  1230. {
  1231. public:
  1232. HqlUnadornedNormalizer();
  1233. virtual IHqlExpression * createTransformed(IHqlExpression * expr);
  1234. virtual ITypeInfo * transformType(ITypeInfo * type);
  1235. };
  1236. static HqlTransformerInfo hqlUnadornedInfo("HqlUnadornedNormalizer");
  1237. HqlUnadornedNormalizer::HqlUnadornedNormalizer() : HqlCachedAttributeTransformer(hqlUnadornedInfo, EPunadorned)
  1238. {
  1239. }
  1240. ITypeInfo * HqlUnadornedNormalizer::transformType(ITypeInfo * type)
  1241. {
  1242. return HqlCachedAttributeTransformer::transformType(queryUnqualifiedType(type));
  1243. }
  1244. IHqlExpression * HqlUnadornedNormalizer::createTransformed(IHqlExpression * expr)
  1245. {
  1246. IHqlExpression * body = expr->queryBody(false);
  1247. if (expr != body)
  1248. return transform(body);
  1249. node_operator op = expr->getOperator();
  1250. switch (op)
  1251. {
  1252. case no_field:
  1253. {
  1254. //Remove the default values...
  1255. HqlExprArray children;
  1256. bool same = true;
  1257. ForEachChild(idx, expr)
  1258. {
  1259. IHqlExpression * cur = expr->queryChild(idx);
  1260. if (cur->isAttribute())
  1261. {
  1262. IHqlExpression * mapped = transform(cur);
  1263. children.append(*mapped);
  1264. if (mapped != cur)
  1265. same = false;
  1266. }
  1267. else
  1268. same = false;
  1269. }
  1270. ITypeInfo * type = expr->queryType();
  1271. OwnedITypeInfo newType = transformType(type);
  1272. IIdAtom * id = expr->queryId();
  1273. //Fields names compare case-insignificantly therefore the field name is converted to lower case so that
  1274. //equivalent fields are mapped to the same normalized expression.
  1275. IIdAtom * newid = createIdAtom(id->lower()->str());
  1276. if ((type != newType) || (id != newid))
  1277. return createField(newid, newType.getClear(), children);
  1278. if (same)
  1279. return LINK(expr);
  1280. return expr->clone(children);
  1281. }
  1282. case no_param:
  1283. {
  1284. ITypeInfo * type = expr->queryType();
  1285. OwnedITypeInfo newType = transformType(type);
  1286. HqlExprArray children;
  1287. transformChildren(expr, children); // could just unwind
  1288. return createParameter(expr->queryId(), UnadornedParameterIndex, newType.getClear(), children);
  1289. }
  1290. }
  1291. return HqlCachedAttributeTransformer::createTransformed(expr);
  1292. }
  1293. static IHqlExpression * evaluateAttrUnadorned(IHqlExpression * expr)
  1294. {
  1295. HqlUnadornedNormalizer normalizer;
  1296. //NB: Also has the side-effect of adding any missing attributes
  1297. OwnedHqlExpr dummy = normalizer.transform(expr);
  1298. return meta.queryExistingAttribute(expr, EPunadorned);
  1299. }
  1300. //---------------------------------------------------------------------------------
  1301. static unsigned queryFieldAlignment(ITypeInfo * type)
  1302. {
  1303. unsigned size = type->getSize();
  1304. type_t tc = type->getTypeCode();
  1305. switch (tc)
  1306. {
  1307. case type_int:
  1308. case type_swapint:
  1309. switch (size)
  1310. {
  1311. case 2:
  1312. case 4:
  1313. case 8:
  1314. return size;
  1315. }
  1316. return 1;
  1317. case type_boolean:
  1318. case type_real:
  1319. return size;
  1320. case type_string:
  1321. case type_varstring:
  1322. case type_data:
  1323. if (hasLinkCountedModifier(type))
  1324. return sizeof(void *);
  1325. return 1;
  1326. case type_char:
  1327. case type_decimal:
  1328. case type_packedint:
  1329. case type_set: // would be nicer if properly aligned. Even nicer if the (isall,size) were separate
  1330. case type_utf8:
  1331. case type_qstring:
  1332. case type_any:
  1333. return 1;
  1334. case type_bitfield:
  1335. case type_array:
  1336. case type_enumerated:
  1337. return queryFieldAlignment(type->queryChildType());
  1338. case type_pointer:
  1339. return sizeof(void *);
  1340. case type_table:
  1341. case type_row:
  1342. case type_groupedtable:
  1343. if (hasLinkCountedModifier(type))
  1344. return sizeof(void *);
  1345. return 1;
  1346. case type_alien:
  1347. return queryFieldAlignment(type->queryChildType());
  1348. case type_unicode:
  1349. case type_varunicode:
  1350. return sizeof(UChar);
  1351. default:
  1352. throwUnexpectedType(type);
  1353. }
  1354. }
  1355. static unsigned queryFieldAlignment(IHqlExpression * field)
  1356. {
  1357. return queryFieldAlignment(field->queryType());
  1358. }
  1359. class FieldAlignCompare : public ICompare
  1360. {
  1361. public:
  1362. FieldAlignCompare(const HqlExprCopyArray & _original) : original(_original) {}
  1363. virtual int docompare(const void * inleft, const void * inright) const
  1364. {
  1365. IInterface * pleft = (IInterface *)(inleft);
  1366. IInterface * pright = (IInterface *)(inright);
  1367. IHqlExpression * left = static_cast<IHqlExpression *>(pleft);
  1368. IHqlExpression * right = static_cast<IHqlExpression *>(pright);
  1369. IHqlExpression * leftSizeAttr = left->queryAttribute(EPsize);
  1370. IHqlExpression * rightSizeAttr = right->queryAttribute(EPsize);
  1371. bool leftIsFixedSize = leftSizeAttr->queryChild(1) == leftSizeAttr->queryChild(2);
  1372. bool rightIsFixedSize = rightSizeAttr->queryChild(1) == rightSizeAttr->queryChild(2);
  1373. if (leftIsFixedSize && rightIsFixedSize)
  1374. {
  1375. bool leftIsBitfield = (left->queryType()->getTypeCode() == type_bitfield);
  1376. bool rightIsBitfield = (right->queryType()->getTypeCode() == type_bitfield);
  1377. if (!leftIsBitfield && !rightIsBitfield)
  1378. {
  1379. //First choose the largest alignment first
  1380. unsigned leftAlign = queryFieldAlignment(left);
  1381. unsigned rightAlign = queryFieldAlignment(right);
  1382. if (leftAlign != rightAlign)
  1383. return (int)(rightAlign - leftAlign);
  1384. #if 0
  1385. //Then choose smallest item next - so access is more compact
  1386. unsigned leftSize = getIntValue(leftSizeAttr->queryChild(0));
  1387. unsigned rightSize = getIntValue(rightSizeAttr->queryChild(0));
  1388. if (leftSize != rightSize)
  1389. return (int)(leftSize - rightSize);
  1390. #endif
  1391. //fall through to default
  1392. }
  1393. else if (!leftIsBitfield)
  1394. return -1;
  1395. else if (!rightIsBitfield)
  1396. return +1;
  1397. else
  1398. {
  1399. //Two bitfields - need better handling
  1400. }
  1401. }
  1402. else if (leftIsFixedSize)
  1403. return -1;
  1404. else if (rightIsFixedSize)
  1405. return +1;
  1406. else
  1407. {
  1408. //both variable size
  1409. }
  1410. //default processing currently by name - may change to use original order
  1411. return original.find(*left) - original.find(*right);
  1412. // return stricmp(left->queryName()->str(), right->queryName()->str());
  1413. }
  1414. protected:
  1415. const HqlExprCopyArray & original;
  1416. } ;
  1417. static bool optimizeFieldOrder(HqlExprArray & out, const HqlExprCopyArray & in)
  1418. {
  1419. HqlExprCopyArray sorted;
  1420. appendArray(sorted, in);
  1421. FieldAlignCompare compare(in);
  1422. qsortvec((void * *)sorted.getArray(), sorted.ordinality(), compare);
  1423. ForEachItemIn(i, sorted)
  1424. out.append(OLINK(sorted.item(i)));
  1425. return true;
  1426. }
  1427. static IHqlExpression * evaluateAttrAligned(IHqlExpression * expr)
  1428. {
  1429. bool same = true;
  1430. HqlExprArray result;
  1431. HqlExprCopyArray reorder;
  1432. assertex(expr->getOperator() == no_record);
  1433. ForEachChild(i, expr)
  1434. {
  1435. IHqlExpression * cur = expr->queryChild(i);
  1436. switch (cur->getOperator())
  1437. {
  1438. case no_field:
  1439. reorder.append(*cur);
  1440. break;
  1441. case no_ifblock:
  1442. case no_record:
  1443. default:
  1444. if (optimizeFieldOrder(result, reorder))
  1445. same = false;
  1446. result.append(*LINK(cur));
  1447. reorder.kill();
  1448. break;
  1449. }
  1450. }
  1451. if (optimizeFieldOrder(result, reorder))
  1452. same = false;
  1453. OwnedHqlExpr newRecord = same ? LINK(expr) : expr->clone(result);
  1454. if (expr == newRecord)
  1455. return meta.addAttribute(expr, EPaligned, queryAlignedAttr());
  1456. meta.addAttribute(newRecord, EPaligned, queryAlignedAttr());
  1457. OwnedHqlExpr alignAttr = createExprAttribute(_propAligned_Atom, newRecord.getClear());
  1458. return meta.addAttribute(expr, EPaligned, alignAttr);
  1459. }
  1460. //---------------------------------------------------------------------------------
  1461. MODULE_INIT(INIT_PRIORITY_HQLMETA)
  1462. {
  1463. for (node_operator op = (node_operator)(no_none+1); op < no_last_op; op = (node_operator)(op+1))
  1464. getOperatorMetaFlags(op);
  1465. return true;
  1466. }
  1467. MODULE_EXIT()
  1468. {
  1469. }
  1470. //---------------------------------------------------------------------------------
  1471. // Functions that provide simple information about an operator, that don't require tree traversal.
  1472. bool isLocalActivity(IHqlExpression * expr)
  1473. {
  1474. switch (expr->getOperator())
  1475. {
  1476. case no_distribute:
  1477. case no_keyeddistribute:
  1478. case no_if:
  1479. case no_chooseds:
  1480. return false;
  1481. case no_forcelocal:
  1482. case no_combinegroup:
  1483. case no_regroup:
  1484. return true;
  1485. //local makes no sense for the following
  1486. case no_throughaggregate:
  1487. case no_filter:
  1488. case no_related:
  1489. return false;
  1490. case no_group:
  1491. case no_grouped:
  1492. case no_dedup:
  1493. case no_cogroup:
  1494. case no_cosort:
  1495. case no_sort:
  1496. case no_subsort:
  1497. case no_sorted:
  1498. case no_topn:
  1499. case no_iterate:
  1500. case no_rollup:
  1501. case no_newaggregate:
  1502. case no_merge:
  1503. case no_choosen:
  1504. case no_choosesets:
  1505. case no_enth:
  1506. case no_sample:
  1507. case no_buildindex:
  1508. case no_limit:
  1509. case no_catchds:
  1510. case no_newkeyindex:
  1511. case no_table:
  1512. case no_process:
  1513. case no_assertsorted:
  1514. case no_assertgrouped:
  1515. case no_nonempty:
  1516. case no_loop:
  1517. case no_graphloop:
  1518. case no_aggregate:
  1519. case no_combine:
  1520. assertex(localChangesActivity(expr));
  1521. return expr->hasProperty(localAtom);
  1522. case no_newusertable:
  1523. if (isAggregateDataset(expr))
  1524. return expr->hasProperty(localAtom);
  1525. return false;
  1526. case no_hqlproject: // count project may result in distributed output, but not be local(!)
  1527. if (expr->hasProperty(_countProject_Atom))
  1528. return expr->hasProperty(localAtom);
  1529. return false;
  1530. case no_denormalize:
  1531. case no_denormalizegroup:
  1532. case no_join:
  1533. case no_mergejoin: //???
  1534. case no_nwayjoin:
  1535. case no_nwaymerge:
  1536. case no_selfjoin:
  1537. case no_joincount:
  1538. assertex(localChangesActivity(expr));
  1539. return expr->hasProperty(localAtom);
  1540. case no_compound:
  1541. return isLocalActivity(expr->queryChild(1));
  1542. case no_compound_diskread:
  1543. case no_compound_disknormalize:
  1544. case no_compound_diskaggregate:
  1545. case no_compound_diskcount:
  1546. case no_compound_diskgroupaggregate:
  1547. case no_compound_indexread:
  1548. case no_compound_indexnormalize:
  1549. case no_compound_indexaggregate:
  1550. case no_compound_indexcount:
  1551. case no_compound_indexgroupaggregate:
  1552. {
  1553. if (expr->hasProperty(localAtom))
  1554. return true;
  1555. IHqlExpression * root = queryRoot(expr);
  1556. while (root->getOperator() == no_select)
  1557. {
  1558. bool isNew;
  1559. IHqlExpression * ds = querySelectorDataset(root, isNew);
  1560. if (!isNew)
  1561. break;
  1562. root = queryRoot(ds);
  1563. }
  1564. return isLocalActivity(root);
  1565. }
  1566. case no_compound_childread:
  1567. case no_compound_childnormalize:
  1568. case no_compound_childaggregate:
  1569. case no_compound_childcount:
  1570. case no_compound_childgroupaggregate:
  1571. case no_compound_selectnew:
  1572. case no_compound_inline:
  1573. return true;
  1574. default:
  1575. {
  1576. assertex(!localChangesActivity(expr));
  1577. ITypeInfo * exprType = expr->queryType();
  1578. if (exprType && (exprType->queryDistributeInfo() != NULL))
  1579. return !isGroupedActivity(expr);
  1580. return false;
  1581. }
  1582. }
  1583. }
  1584. bool isGroupedAggregateActivity(IHqlExpression * expr, IHqlExpression * grouping)
  1585. {
  1586. if (grouping && !grouping->isAttribute())
  1587. return expr->hasProperty(groupedAtom);
  1588. return isGrouped(expr->queryChild(0));
  1589. }
  1590. bool isGroupedActivity(IHqlExpression * expr)
  1591. {
  1592. switch (expr->getOperator())
  1593. {
  1594. case no_group:
  1595. case no_enth:
  1596. case no_distribute:
  1597. case no_fetch:
  1598. case no_keyeddistribute:
  1599. case no_merge:
  1600. case no_graphloop:
  1601. return false;
  1602. case no_denormalize:
  1603. case no_denormalizegroup:
  1604. case no_regroup:
  1605. case no_addfiles:
  1606. case no_join:
  1607. case no_mergejoin:
  1608. case no_nwayjoin:
  1609. case no_nwaymerge:
  1610. case no_selfjoin:
  1611. case no_combine:
  1612. case no_combinegroup:
  1613. case no_if:
  1614. case no_chooseds:
  1615. case no_case:
  1616. case no_map:
  1617. case no_loop:
  1618. case no_choosen:
  1619. case no_process:
  1620. case no_nonempty:
  1621. case no_related:
  1622. case no_pipe:
  1623. return isGrouped(expr->queryType());
  1624. case no_selectfields:
  1625. case no_usertable:
  1626. return isGroupedAggregateActivity(expr, expr->queryChild(2));
  1627. case no_aggregate:
  1628. case no_newaggregate:
  1629. case no_newusertable:
  1630. return isGroupedAggregateActivity(expr, expr->queryChild(3));
  1631. case no_null:
  1632. case no_anon:
  1633. case no_pseudods:
  1634. case no_fail:
  1635. case no_skip:
  1636. case no_all:
  1637. case no_workunit_dataset:
  1638. case no_getgraphresult:
  1639. case no_getgraphloopresult:
  1640. case no_getresult:
  1641. case no_rows:
  1642. case no_internalselect:
  1643. case no_delayedselect:
  1644. case no_unboundselect:
  1645. case no_libraryselect:
  1646. case no_purevirtual:
  1647. case no_libraryinput:
  1648. //All the source activities
  1649. return isGrouped(expr->queryType());
  1650. case no_compound:
  1651. return isGroupedActivity(expr->queryChild(1));
  1652. case no_output:
  1653. return expr->hasProperty(groupedAtom) && isGroupedActivity(expr->queryChild(0));
  1654. default:
  1655. if (getNumChildTables(expr) == 1)
  1656. return isGrouped(expr->queryChild(0));
  1657. return false;
  1658. }
  1659. }
  1660. bool localChangesActivityData(IHqlExpression * expr)
  1661. {
  1662. switch (expr->getOperator())
  1663. {
  1664. case no_compound_diskread:
  1665. case no_compound_disknormalize:
  1666. case no_compound_diskaggregate:
  1667. case no_compound_diskcount:
  1668. case no_compound_diskgroupaggregate:
  1669. case no_compound_indexread:
  1670. case no_compound_indexnormalize:
  1671. case no_compound_indexaggregate:
  1672. case no_compound_indexcount:
  1673. case no_compound_indexgroupaggregate:
  1674. case no_newkeyindex:
  1675. case no_table:
  1676. return true;
  1677. case no_denormalize:
  1678. case no_denormalizegroup:
  1679. case no_join:
  1680. case no_joincount:
  1681. return isKeyedJoin(expr); // keyed join, local means only look at the local key part.
  1682. //case no_fetch:////????
  1683. }
  1684. return false;
  1685. }
  1686. bool localChangesActivityAction(IHqlExpression * expr)
  1687. {
  1688. switch (expr->getOperator())
  1689. {
  1690. case no_dedup:
  1691. case no_group:
  1692. case no_grouped:
  1693. case no_cogroup:
  1694. case no_cosort:
  1695. case no_sort:
  1696. case no_subsort:
  1697. case no_sorted:
  1698. case no_topn:
  1699. case no_iterate:
  1700. case no_rollup:
  1701. case no_newaggregate:
  1702. case no_aggregate:
  1703. case no_merge:
  1704. case no_choosen:
  1705. case no_choosesets:
  1706. case no_enth:
  1707. case no_sample:
  1708. case no_buildindex:
  1709. case no_limit:
  1710. case no_catchds:
  1711. case no_compound_diskaggregate:
  1712. case no_compound_diskgroupaggregate:
  1713. case no_compound_indexaggregate:
  1714. case no_compound_indexgroupaggregate:
  1715. case no_process:
  1716. case no_assertsorted:
  1717. case no_assertgrouped:
  1718. case no_nonempty:
  1719. case no_loop:
  1720. case no_graphloop:
  1721. case no_combine:
  1722. return true;
  1723. case no_hqlproject:
  1724. return expr->hasProperty(_countProject_Atom);
  1725. case no_newusertable:
  1726. return isAggregateDataset(expr);
  1727. case no_denormalize:
  1728. case no_denormalizegroup:
  1729. case no_join:
  1730. case no_mergejoin: //???
  1731. case no_nwayjoin:
  1732. case no_nwaymerge:
  1733. case no_selfjoin:
  1734. case no_joincount:
  1735. return !isKeyedJoin(expr); // Keyed joins always
  1736. }
  1737. return false;
  1738. }
  1739. bool localChangesActivity(IHqlExpression * expr)
  1740. {
  1741. return localChangesActivityData(expr) || localChangesActivityAction(expr);
  1742. }
  1743. unsigned isStreamingActivity(IHqlExpression * expr)
  1744. {
  1745. switch (expr->getOperator())
  1746. {
  1747. case no_sort:
  1748. case no_topn:
  1749. if (isGrouped(expr))
  1750. return 0;
  1751. return 1;
  1752. case no_join:
  1753. case no_denormalize:
  1754. if (isKeyedJoin(expr))
  1755. return 0;
  1756. if (expr->hasProperty(lookupAtom))
  1757. return 2;
  1758. return 3; // ok if lhs/rhs are sorted...
  1759. case no_selfjoin:
  1760. return 1; // ok if sorted.
  1761. case no_dedup:
  1762. if (isGrouped(expr))
  1763. return 0;
  1764. if (expr->hasProperty(hashAtom) || expr->hasProperty(allAtom))
  1765. return false;
  1766. break;
  1767. case no_addfiles:
  1768. //if ordered and same item is read by lhs and rhs
  1769. // ordered addfiles?
  1770. break;
  1771. case no_libraryselect:
  1772. //????
  1773. return 1;
  1774. case no_spillgraphresult:
  1775. case no_setgraphresult:
  1776. case no_setgraphloopresult:
  1777. break; //except for default loop output because likely to be read by a child as a whole
  1778. }
  1779. return 0;
  1780. }
  1781. // More complex derived information which requires tree traversal.
  1782. bool isInlineTrivialDataset(IHqlExpression * expr)
  1783. {
  1784. loop
  1785. {
  1786. switch (expr->getOperator())
  1787. {
  1788. case no_selectnth:
  1789. switch (expr->queryChild(1)->getOperator())
  1790. {
  1791. case no_constant:
  1792. case no_counter:
  1793. break;
  1794. default:
  1795. return false;
  1796. }
  1797. expr = expr->queryChild(0);
  1798. break;
  1799. case no_workunit_dataset:
  1800. case no_getresult:
  1801. case no_null:
  1802. return true;
  1803. case no_getgraphresult:
  1804. return !expr->hasProperty(_distributed_Atom);
  1805. default:
  1806. return false;
  1807. }
  1808. }
  1809. }
  1810. bool isTrivialDataset(IHqlExpression * expr)
  1811. {
  1812. loop
  1813. {
  1814. if (isInlineTrivialDataset(expr))
  1815. return true;
  1816. switch (expr->getOperator())
  1817. {
  1818. case no_translated:
  1819. case no_null:
  1820. case no_temprow:
  1821. case no_projectrow:
  1822. case no_left:
  1823. case no_right:
  1824. case no_id2blob:
  1825. case no_activerow:
  1826. case no_typetransfer:
  1827. case no_rows:
  1828. case no_skip:
  1829. case no_matchattr:
  1830. case no_matchrow:
  1831. case no_libraryinput:
  1832. case no_workunit_dataset:
  1833. case no_activetable:
  1834. case no_top:
  1835. return true;
  1836. case no_select:
  1837. if (!isNewSelector(expr))
  1838. return false;
  1839. if (expr->isDataset())
  1840. return true;
  1841. expr = expr->queryChild(0);
  1842. break;
  1843. case no_selectnth:
  1844. case no_alias:
  1845. case no_sorted:
  1846. case no_distributed:
  1847. case no_grouped:
  1848. case no_preservemeta:
  1849. case no_dataset_alias:
  1850. case no_filter:
  1851. expr = expr->queryChild(0);
  1852. break;
  1853. case no_inlinetable:
  1854. return isConstantDataset(expr);
  1855. default:
  1856. return false;
  1857. }
  1858. }
  1859. }
  1860. static unsigned estimateRowSize(IHqlExpression * record)
  1861. {
  1862. IHqlExpression * size = record->queryAttribute(EPsize);
  1863. if (!size || !size->queryChild(2)->queryValue())
  1864. return UNKNOWN_LENGTH;
  1865. return (unsigned)getIntValue(size->queryChild(0));
  1866. }
  1867. bool reducesRowSize(IHqlExpression * expr)
  1868. {
  1869. //More: This should be improved...., but slightly tricky without doing lots more processing.
  1870. IHqlExpression * transform = queryNewColumnProvider(expr);
  1871. IHqlExpression * prevRecord = expr->queryChild(0)->queryRecord();
  1872. unsigned newRowSize = estimateRowSize(transform->queryRecord());
  1873. unsigned prevRowSize = estimateRowSize(prevRecord);
  1874. if ((newRowSize != UNKNOWN_LENGTH) && (prevRowSize != UNKNOWN_LENGTH))
  1875. return newRowSize < prevRowSize;
  1876. IHqlExpression * record = expr->queryRecord();
  1877. if (getFlatFieldCount(record) < getFlatFieldCount(prevRecord))
  1878. return true;
  1879. return false;
  1880. }
  1881. bool increasesRowSize(IHqlExpression * expr)
  1882. {
  1883. IHqlExpression * transform = queryNewColumnProvider(expr);
  1884. IHqlExpression * prevRecord = expr->queryChild(0)->queryRecord();
  1885. unsigned newRowSize = estimateRowSize(transform);
  1886. unsigned prevRowSize = estimateRowSize(prevRecord);
  1887. if ((newRowSize != UNKNOWN_LENGTH) && (prevRowSize != UNKNOWN_LENGTH))
  1888. return newRowSize > prevRowSize;
  1889. IHqlExpression * record = expr->queryRecord();
  1890. if (getFlatFieldCount(record) > getFlatFieldCount(prevRecord))
  1891. return true;
  1892. return false;
  1893. }
  1894. bool isLimitedDataset(IHqlExpression * expr, bool onFailOnly)
  1895. {
  1896. loop
  1897. {
  1898. if (expr->hasProperty(limitAtom))
  1899. return true;
  1900. switch (expr->getOperator())
  1901. {
  1902. case no_choosen:
  1903. case no_limit:
  1904. // case no_keyedlimit: // not included because it is done before everything else, so filters can be merged in
  1905. if (!onFailOnly || expr->hasProperty(onFailAtom))
  1906. return true;
  1907. break;
  1908. case no_table:
  1909. case no_newkeyindex:
  1910. return false;
  1911. default:
  1912. if (getNumChildTables(expr) != 1)
  1913. return false;
  1914. break;
  1915. }
  1916. expr = expr->queryChild(0);
  1917. }
  1918. }
  1919. bool containsAnyActions(IHqlExpression * expr)
  1920. {
  1921. switch (expr->getOperator())
  1922. {
  1923. case no_comma:
  1924. case no_compound:
  1925. case no_actionlist:
  1926. {
  1927. ForEachChild(i, expr)
  1928. {
  1929. if (containsAnyActions(expr->queryChild(i)))
  1930. return true;
  1931. }
  1932. return false;
  1933. }
  1934. case no_setmeta:
  1935. return false;
  1936. default:
  1937. return true;
  1938. }
  1939. }
  1940. //-- Attribute: record count -------------------------------------------------------------------------------
  1941. unsigned getCardinality(IHqlExpression * expr)
  1942. {
  1943. loop
  1944. {
  1945. switch (expr->getOperator())
  1946. {
  1947. case no_select:
  1948. expr = expr->queryChild(1);
  1949. break;
  1950. case no_constant:
  1951. return 1;
  1952. case no_field:
  1953. {
  1954. IHqlExpression * cardinality = queryPropertyChild(expr, cardinalityAtom, 0);
  1955. if (cardinality)
  1956. return (unsigned)getIntValue(cardinality);
  1957. }
  1958. //fall through:
  1959. default:
  1960. return expr->queryType()->getCardinality();
  1961. }
  1962. }
  1963. }
  1964. bool isSmallGrouping(IHqlExpression * sortlist)
  1965. {
  1966. unsigned __int64 totalCardinality = 1;
  1967. unsigned max = sortlist->numChildren();
  1968. for (unsigned idx = 0; idx < max; idx++)
  1969. {
  1970. IHqlExpression * cur = sortlist->queryChild(idx);
  1971. unsigned cardinality = getCardinality(cur);
  1972. if (!cardinality)
  1973. return false;
  1974. totalCardinality *= cardinality;
  1975. //don't use hash aggregation if larger than 100,000 potential elements
  1976. if (totalCardinality >= 100000)
  1977. return false;
  1978. }
  1979. return true;
  1980. }
  1981. //An estimate of the order of magnitude of the number of rows in a dataset. See function below for artificial thresholds.
  1982. const static unsigned __int64 RCtinyLimit = 10;
  1983. const static unsigned __int64 RCgroupLimit = 1000;
  1984. const static unsigned __int64 RCfewLimit = 100000;
  1985. const static unsigned __int64 RCmemoryLimit = 50000000;
  1986. const static unsigned __int64 RCclusterSizeEstimate = 5000;
  1987. enum RowCountMagnitude
  1988. {
  1989. RCMnone, // 0
  1990. RCMtiny, // < 10
  1991. RCMgroup, // < 1000
  1992. RCMfew, // < 100,000
  1993. RCMmemory, // < memory
  1994. RCMdisk, // who knows?
  1995. RCMunknown
  1996. };
  1997. const char * const magnitudeText[] = {
  1998. "empty",
  1999. "tiny",
  2000. "group",
  2001. "few",
  2002. "memory",
  2003. "disk",
  2004. "unknown"
  2005. };
  2006. inline RowCountMagnitude getRowCountMagnitude(__int64 num)
  2007. {
  2008. if (num == 0)
  2009. return RCMnone;
  2010. if (num <= RCtinyLimit)
  2011. return RCMtiny;
  2012. if (num <= RCgroupLimit)
  2013. return RCMgroup;
  2014. if (num <= RCfewLimit)
  2015. return RCMfew;
  2016. if (num <= RCmemoryLimit)
  2017. return RCMmemory;
  2018. return RCMdisk;
  2019. }
  2020. static IHqlExpression * makeConstant(__int64 value)
  2021. {
  2022. if ((value >= 0) && (size32_t)value == value)
  2023. return getSizetConstant((size32_t)value);
  2024. return createConstant(value);
  2025. }
  2026. struct HqlRowCountInfo
  2027. {
  2028. public:
  2029. HqlRowCountInfo() { setUnknown(RCMnone); }
  2030. void applyChoosen(__int64 limit, bool isLocal);
  2031. void combineAlternatives(const HqlRowCountInfo & other);
  2032. void combineBoth(const HqlRowCountInfo & other);
  2033. bool extractHint(IHqlExpression * hint);
  2034. void limitMin(__int64 value);
  2035. void setEstimate(__int64 n);
  2036. void scaleFixed(__int64 scale);
  2037. void scaleRange(__int64 scale);
  2038. void setMin(__int64 n) { min.setown(makeConstant(n)); }
  2039. void setN(__int64 n);
  2040. void setRange(__int64 low, __int64 high);
  2041. void setUnknown(RowCountMagnitude _magnitude);
  2042. void setMaxMagnitude(RowCountMagnitude _magnitude)
  2043. {
  2044. if (magnitude > _magnitude)
  2045. magnitude = _magnitude;
  2046. }
  2047. IHqlExpression * createRecordCountAttr()
  2048. {
  2049. return createExprAttribute(_propRecordCount_Atom, makeConstant(magnitude), LINK(min), LINK(max));// , LINK(estimate));
  2050. }
  2051. void extract(IHqlExpression * attr)
  2052. {
  2053. assertex(attr->queryName() == _propRecordCount_Atom);
  2054. magnitude = (RowCountMagnitude)getIntValue(attr->queryChild(0));
  2055. min.set(attr->queryChild(1));
  2056. max.set(attr->queryChild(2));
  2057. //estimate.set(attr->queryChild(3));
  2058. }
  2059. inline void setSingleRow() { setN(1); }
  2060. void getText(StringBuffer & text) const;
  2061. __int64 getMin() const { return getIntValue(min); }
  2062. inline bool isSingleRow() const
  2063. {
  2064. return matchesConstantValue(min, 1) && matchesConstantValue(max, 1);
  2065. }
  2066. inline bool alwaysHasRow() const
  2067. {
  2068. return !matchesConstantValue(min, 0);
  2069. }
  2070. public:
  2071. OwnedHqlExpr min; // Absolute minimum - can't be fewer records
  2072. OwnedHqlExpr max; // Absolute maximum - can't be more records
  2073. RowCountMagnitude magnitude; // Expected magnitude. Normally matches max, but may occasionally diverge,.
  2074. //It might be possible to calculate an estimate of the number of rows, but I'm not sure if it
  2075. //is possible to make it significantly more useful than the magnitude.
  2076. // OwnedHqlExpr estimate;
  2077. };
  2078. void HqlRowCountInfo::applyChoosen(__int64 limit, bool isLocal)
  2079. {
  2080. if (getMin() > limit)
  2081. min.setown(makeConstant(limit));
  2082. __int64 maxLimit = isLocal ? RCclusterSizeEstimate*limit : limit;
  2083. if (getIntValue(max, maxLimit+1) > maxLimit)
  2084. max.setown(makeConstant(maxLimit));
  2085. RowCountMagnitude newMagnitude = getRowCountMagnitude(maxLimit);
  2086. if (magnitude > newMagnitude)
  2087. magnitude = newMagnitude;
  2088. }
  2089. void HqlRowCountInfo::combineAlternatives(const HqlRowCountInfo & other)
  2090. {
  2091. if (other.getMin() < getMin())
  2092. min.set(other.min);
  2093. IValue * maxValue = max->queryValue();
  2094. if (maxValue)
  2095. {
  2096. IValue * otherMaxValue = other.max->queryValue();
  2097. if (!otherMaxValue || (otherMaxValue->getIntValue() > maxValue->getIntValue()))
  2098. max.set(other.max);
  2099. }
  2100. if (magnitude < other.magnitude)
  2101. magnitude = other.magnitude;
  2102. }
  2103. void HqlRowCountInfo::combineBoth(const HqlRowCountInfo & other)
  2104. {
  2105. min.setown(makeConstant(getMin()+other.getMin()));
  2106. IValue * maxValue = max->queryValue();
  2107. IValue * otherMaxValue = other.max->queryValue();
  2108. if (!otherMaxValue)
  2109. max.set(other.max);
  2110. else if (maxValue)
  2111. {
  2112. __int64 newMax = maxValue->getIntValue()+otherMaxValue->getIntValue();
  2113. max.setown(makeConstant(newMax));
  2114. }
  2115. //Appending shouldn't change to a larger magnitude.
  2116. if (magnitude < other.magnitude)
  2117. magnitude = other.magnitude;
  2118. }
  2119. bool HqlRowCountInfo::extractHint(IHqlExpression * hint)
  2120. {
  2121. IHqlExpression * arg = hint->queryChild(0);
  2122. if (!arg)
  2123. return false;
  2124. switch (arg->getOperator())
  2125. {
  2126. case no_constant:
  2127. setN(getIntValue(arg));
  2128. return true;
  2129. case no_rangeto:
  2130. setRange(0, getIntValue(arg->queryChild(0)));
  2131. return true;
  2132. case no_range:
  2133. setRange(getIntValue(arg->queryChild(0)), getIntValue(arg->queryChild(1)));
  2134. return true;
  2135. case no_attr:
  2136. {
  2137. IAtom * name = arg->queryName();
  2138. RowCountMagnitude magnitude = RCMnone;
  2139. if (name == tinyAtom)
  2140. magnitude = RCMtiny;
  2141. else if (name == groupAtom)
  2142. magnitude = RCMgroup;
  2143. else if (name == fewAtom)
  2144. magnitude = RCMfew;
  2145. else if (name == memoryAtom)
  2146. magnitude = RCMmemory;
  2147. if (magnitude != RCMnone)
  2148. {
  2149. setUnknown(magnitude);
  2150. return true;
  2151. }
  2152. break;
  2153. }
  2154. }
  2155. return false;
  2156. }
  2157. void HqlRowCountInfo::getText(StringBuffer & text) const
  2158. {
  2159. min->queryValue()->generateECL(text);
  2160. text.append("..");
  2161. if (max->queryValue())
  2162. max->queryValue()->generateECL(text);
  2163. else
  2164. text.append("?");
  2165. text.append("[").append(magnitudeText[magnitude]).append("]");
  2166. }
  2167. void HqlRowCountInfo::limitMin(__int64 value)
  2168. {
  2169. if (getMin() > value)
  2170. min.setown(makeConstant(value));
  2171. }
  2172. void HqlRowCountInfo::scaleFixed(__int64 scale)
  2173. {
  2174. __int64 minValue = getMin();
  2175. __int64 maxValue = getIntValue(max, 0);
  2176. if (maxValue)
  2177. {
  2178. setRange(minValue * scale, maxValue * scale); // MORE: Worry about 64bit overflow
  2179. }
  2180. else
  2181. {
  2182. setUnknown(RCMdisk);
  2183. setMin(minValue * scale);
  2184. }
  2185. }
  2186. void HqlRowCountInfo::scaleRange(__int64 scale)
  2187. {
  2188. scaleFixed(scale);
  2189. min.setown(makeConstant(0));
  2190. }
  2191. void HqlRowCountInfo::setEstimate(__int64 n)
  2192. {
  2193. magnitude = getRowCountMagnitude(n);
  2194. }
  2195. void HqlRowCountInfo::setN(__int64 n)
  2196. {
  2197. setMin(n);
  2198. max.set(min);
  2199. magnitude = getRowCountMagnitude(n);
  2200. }
  2201. void HqlRowCountInfo::setRange(__int64 low, __int64 high)
  2202. {
  2203. min.setown(makeConstant(low));
  2204. max.setown(makeConstant(high));
  2205. magnitude = getRowCountMagnitude(high);
  2206. }
  2207. void HqlRowCountInfo::setUnknown(RowCountMagnitude _magnitude)
  2208. {
  2209. min.setown(getSizetConstant(0));
  2210. max.setown(getUnknownAttribute());
  2211. magnitude = _magnitude;
  2212. }
  2213. //MORE: This information should be cached in an attribute, once it is working, and used in more than one place.
  2214. void retrieveRowInformation(HqlRowCountInfo & info, IHqlExpression * expr)
  2215. {
  2216. IHqlExpression * attr = expr->queryAttribute(EPrecordCount);
  2217. info.extract(attr);
  2218. }
  2219. static void calcIntersectingRowInformation(HqlRowCountInfo & info, IHqlExpression * expr, unsigned firstDs)
  2220. {
  2221. retrieveRowInformation(info, expr->queryChild(firstDs));
  2222. ForEachChildFrom(i, expr, firstDs+1)
  2223. {
  2224. IHqlExpression * cur = expr->queryChild(i);
  2225. if (!cur->isAttribute())
  2226. {
  2227. HqlRowCountInfo nextInfo;
  2228. retrieveRowInformation(nextInfo, cur);
  2229. info.combineBoth(nextInfo);
  2230. }
  2231. }
  2232. }
  2233. //MORE: This would benefit from knowing if the target is hthor/roxie (or a thoir child query) so it could tell if local means
  2234. //anything. The best solution is to annotate the graph with _global_ for thor, or _single_ for the others. One day....
  2235. IHqlExpression * calcRowInformation(IHqlExpression * expr)
  2236. {
  2237. HqlRowCountInfo info;
  2238. IHqlExpression * hint = queryHint(expr, outputAtom);
  2239. if (hint && info.extractHint(hint))
  2240. return info.createRecordCountAttr();
  2241. IHqlExpression * ds = expr->queryChild(0);
  2242. node_operator op = expr->getOperator();
  2243. switch (op)
  2244. {
  2245. case no_nothor:
  2246. case no_thor:
  2247. case no_compound_diskread:
  2248. case no_compound_disknormalize:
  2249. case no_compound_diskaggregate:
  2250. case no_compound_diskcount:
  2251. case no_compound_diskgroupaggregate:
  2252. case no_compound_indexread:
  2253. case no_compound_indexnormalize:
  2254. case no_compound_indexaggregate:
  2255. case no_compound_indexcount:
  2256. case no_compound_indexgroupaggregate:
  2257. case no_compound_childread:
  2258. case no_compound_childnormalize:
  2259. case no_compound_childaggregate:
  2260. case no_compound_childcount:
  2261. case no_compound_childgroupaggregate:
  2262. case no_compound_inline:
  2263. case no_compound_selectnew:
  2264. case no_compound_fetch:
  2265. case no_alias:
  2266. case no_forcelocal:
  2267. case no_distribute:
  2268. case no_distributed:
  2269. case no_preservemeta:
  2270. case no_keyeddistribute:
  2271. case no_sorted:
  2272. case no_stepped:
  2273. case no_assertsorted:
  2274. case no_assertgrouped:
  2275. case no_assertdistributed:
  2276. case no_sort:
  2277. case no_subsort:
  2278. case no_nohoist:
  2279. case no_section:
  2280. case no_sectioninput:
  2281. case no_assert_ds:
  2282. case no_readspill:
  2283. case no_writespill:
  2284. case no_commonspill:
  2285. case no_forcegraph:
  2286. case no_split:
  2287. case no_spill:
  2288. case no_spillgraphresult:
  2289. case no_outofline:
  2290. case no_globalscope:
  2291. case no_throughaggregate:
  2292. case no_alias_scope:
  2293. case no_thisnode:
  2294. case no_preload:
  2295. case no_combine:
  2296. case no_catchds:
  2297. case no_metaactivity:
  2298. case no_cosort:
  2299. case no_serialize:
  2300. case no_deserialize:
  2301. case no_executewhen:
  2302. case no_owned_ds:
  2303. case no_dataset_alias:
  2304. {
  2305. return getRecordCountInfo(ds);
  2306. }
  2307. case no_allnodes:
  2308. {
  2309. retrieveRowInformation(info, ds);
  2310. info.scaleRange(RCclusterSizeEstimate);
  2311. break;
  2312. }
  2313. case no_limit:
  2314. case no_keyedlimit:
  2315. {
  2316. retrieveRowInformation(info, ds);
  2317. __int64 limit = getIntValue(expr->queryChild(1), 0);
  2318. if ((limit != 0) && !isGrouped(expr))
  2319. info.applyChoosen(limit, isLocalActivity(expr));
  2320. else
  2321. info.limitMin(limit);
  2322. break;
  2323. }
  2324. case no_hqlproject:
  2325. case no_iterate:
  2326. {
  2327. retrieveRowInformation(info, ds);
  2328. if (transformContainsSkip(expr->queryChild(1)))
  2329. info.limitMin(0);
  2330. break;
  2331. }
  2332. case no_fetch:
  2333. {
  2334. retrieveRowInformation(info, expr->queryChild(1));
  2335. if (transformContainsSkip(expr->queryChild(3)))
  2336. info.limitMin(0);
  2337. break;
  2338. }
  2339. case no_dedup:
  2340. {
  2341. retrieveRowInformation(info, ds);
  2342. //Only affect minimum => Grouped, local and non grouped may all reduce to 1
  2343. info.limitMin(1);
  2344. break;
  2345. }
  2346. case no_rollup:
  2347. case no_rollupgroup:
  2348. {
  2349. //rollup on a single row is a single row, rollup on non single may or may not be.
  2350. retrieveRowInformation(info, ds);
  2351. if (transformContainsSkip(queryNewColumnProvider(expr)))
  2352. info.limitMin(0);
  2353. else
  2354. info.limitMin(1);
  2355. break;
  2356. }
  2357. case no_aggregate:
  2358. case no_newaggregate:
  2359. case no_newusertable:
  2360. case no_selectfields:
  2361. case no_usertable:
  2362. {
  2363. retrieveRowInformation(info, ds);
  2364. if (isAggregateDataset(expr))
  2365. {
  2366. IHqlExpression * grouping = queryDatasetGroupBy(expr);
  2367. if (!grouping)
  2368. grouping = queryGrouping(ds);
  2369. if (grouping)
  2370. {
  2371. //Either aggregate grouped dataset, or grouping supplied. Similar semantics.
  2372. //minimum is 1 unless inputs has minimum of 0
  2373. info.limitMin(1);
  2374. if (expr->hasProperty(fewAtom))
  2375. info.setMaxMagnitude(RCMfew);
  2376. else if (isSmallGrouping(grouping))
  2377. info.setMaxMagnitude(RCMfew);
  2378. }
  2379. else if (isLocalActivity(expr))
  2380. {
  2381. info.setRange(1, RCclusterSizeEstimate); // local,ungrouped -> one per node
  2382. }
  2383. else
  2384. info.setSingleRow();
  2385. }
  2386. else
  2387. {
  2388. if (transformContainsSkip(queryNewColumnProvider(expr)))
  2389. info.limitMin(0);
  2390. }
  2391. break; // maybe a project of an aggregate
  2392. }
  2393. case no_selectnth:
  2394. case no_datasetfromrow:
  2395. case no_activerow:
  2396. {
  2397. info.setSingleRow();
  2398. break;
  2399. }
  2400. case no_rows:
  2401. {
  2402. info.setUnknown(RCMgroup);
  2403. break;
  2404. }
  2405. case no_rowsetindex:
  2406. case no_rowsetrange:
  2407. {
  2408. info.setUnknown(RCMmemory);
  2409. break;
  2410. }
  2411. case no_workunit_dataset:
  2412. case no_getgraphresult:
  2413. case no_getgraphloopresult:
  2414. case no_getresult:
  2415. {
  2416. IHqlExpression * attr = expr->queryProperty(_propRecordCount_Atom);
  2417. if (attr)
  2418. return LINK(attr);
  2419. if (expr->isDatarow() || expr->hasProperty(rowAtom))
  2420. {
  2421. info.setSingleRow();
  2422. }
  2423. else
  2424. {
  2425. if (expr->hasProperty(_distributed_Atom))
  2426. info.setUnknown(RCMdisk);
  2427. else
  2428. info.setUnknown(RCMfew);
  2429. }
  2430. break;
  2431. }
  2432. case no_table:
  2433. case no_keyindex:
  2434. case no_newkeyindex:
  2435. {
  2436. IHqlExpression * attr = expr->queryProperty(_propRecordCount_Atom);
  2437. if (attr)
  2438. return LINK(attr);
  2439. if (expr->isDatarow() || expr->hasProperty(rowAtom))
  2440. {
  2441. info.setSingleRow();
  2442. }
  2443. else
  2444. {
  2445. info.setUnknown(RCMdisk);
  2446. //Allow an annotation on a dataset to specify exact and ranges of counts.
  2447. IHqlExpression * count = queryPropertyChild(expr, countAtom, 0);
  2448. IHqlExpression * maxCount = queryPropertyChild(expr, maxCountAtom, 0);
  2449. IHqlExpression * aveCount = queryPropertyChild(expr, aveAtom, 0);
  2450. if (count)
  2451. info.setN(getIntValue(count));
  2452. else if (maxCount)
  2453. info.setRange(0, getIntValue(maxCount));
  2454. else if (aveCount)
  2455. info.setEstimate(getIntValue(aveCount));
  2456. }
  2457. break;
  2458. }
  2459. case no_filter:
  2460. case no_filtergroup:
  2461. case no_sample:
  2462. {
  2463. retrieveRowInformation(info, ds);
  2464. info.limitMin(0);
  2465. //More sample could potentially reduce the magnitude
  2466. break;
  2467. }
  2468. case no_temptable:
  2469. {
  2470. IHqlExpression * values = expr->queryChild(0);
  2471. if (values->getOperator() == no_recordlist)
  2472. info.setN(values->numChildren());
  2473. else
  2474. info.setUnknown(RCMfew);
  2475. break;
  2476. }
  2477. case no_inlinetable:
  2478. {
  2479. IHqlExpression * transforms = expr->queryChild(0);
  2480. unsigned maxValue = transforms->numChildren();
  2481. unsigned minValue = 0;
  2482. for (unsigned i=0; i < maxValue; i++)
  2483. {
  2484. if (!containsSkip(transforms->queryChild(i)))
  2485. minValue++;
  2486. }
  2487. info.setRange(minValue, maxValue);
  2488. break;
  2489. }
  2490. case no_dataset_from_transform:
  2491. {
  2492. // only if the count is a constant value
  2493. IHqlExpression * count = expr->queryChild(0);
  2494. IValue * value = count->queryValue();
  2495. if (value)
  2496. {
  2497. IHqlExpression * transform = expr->queryChild(1);
  2498. __int64 maxCount = value->getIntValue();
  2499. if (containsSkip(transform))
  2500. info.setRange(0, maxCount);
  2501. else
  2502. info.setN(maxCount);
  2503. }
  2504. // leave it be, if it's a constant expression or a variable
  2505. break;
  2506. }
  2507. case no_null:
  2508. info.setN(expr->isDatarow() ? 1 : 0);
  2509. break;
  2510. case no_fail:
  2511. info.setN(0);
  2512. break;
  2513. case no_if:
  2514. {
  2515. retrieveRowInformation(info, expr->queryChild(1));
  2516. IHqlExpression * rhs = expr->queryChild(2);
  2517. if (rhs)
  2518. {
  2519. HqlRowCountInfo rhsInfo;
  2520. retrieveRowInformation(rhsInfo, rhs);
  2521. info.combineAlternatives(rhsInfo);
  2522. }
  2523. else
  2524. {
  2525. info.min.setown(getSizetConstant(0));
  2526. }
  2527. break;
  2528. }
  2529. case no_nonempty:
  2530. {
  2531. retrieveRowInformation(info, ds);
  2532. //Go through the children so we get a sensible value for the magnitude
  2533. unsigned max = expr->numChildren();
  2534. for (unsigned i=1; i< max; i++)
  2535. {
  2536. if (!isZero(info.min))
  2537. break;
  2538. IHqlExpression * cur = expr->queryChild(i);
  2539. if (!cur->isAttribute())
  2540. {
  2541. HqlRowCountInfo nextInfo;
  2542. retrieveRowInformation(nextInfo, cur);
  2543. info.min.set(nextInfo.min);
  2544. info.combineAlternatives(nextInfo);
  2545. }
  2546. }
  2547. break;
  2548. }
  2549. case no_chooseds:
  2550. case no_regroup:
  2551. case no_combinegroup:
  2552. case no_addfiles:
  2553. case no_merge:
  2554. {
  2555. unsigned firstDataset = getFirstActivityArgument(expr);
  2556. calcIntersectingRowInformation(info, expr, firstDataset);
  2557. break;
  2558. }
  2559. case no_choosen:
  2560. {
  2561. retrieveRowInformation(info, ds);
  2562. __int64 choosenLimit = getIntValue(expr->queryChild(1), 0);
  2563. if (choosenLimit == CHOOSEN_ALL_LIMIT)
  2564. info.limitMin(0); // play safe - could be clever if second value is constant, and min/max known.
  2565. else if ((choosenLimit != 0) && !isGrouped(expr))
  2566. info.applyChoosen(choosenLimit, isLocalActivity(expr));
  2567. else
  2568. info.limitMin(choosenLimit);
  2569. }
  2570. break;
  2571. case no_topn:
  2572. {
  2573. retrieveRowInformation(info, ds);
  2574. __int64 choosenLimit = getIntValue(expr->queryChild(2), 0);
  2575. if ((choosenLimit > 0) && !isGrouped(expr))
  2576. info.applyChoosen(choosenLimit, isLocalActivity(expr));
  2577. else
  2578. info.limitMin(choosenLimit);
  2579. }
  2580. break;
  2581. case no_select:
  2582. {
  2583. bool isNew;
  2584. IHqlExpression * realDs = querySelectorDataset(expr, isNew);
  2585. if (isNew)
  2586. retrieveRowInformation(info, realDs);
  2587. else
  2588. info.setSingleRow();
  2589. if (!expr->isDatarow())
  2590. {
  2591. IHqlExpression * field = expr->queryChild(1);
  2592. __int64 count = getIntValue(queryPropertyChild(field, countAtom, 0), 0);
  2593. __int64 maxcount = getIntValue(queryPropertyChild(field, maxCountAtom, 0), 0);
  2594. if (count)
  2595. info.scaleFixed(count);
  2596. else if (maxcount)
  2597. info.scaleRange(maxcount);
  2598. else if (info.isSingleRow())
  2599. info.setUnknown(RCMfew);
  2600. else
  2601. info.setUnknown(RCMdisk);
  2602. }
  2603. break;
  2604. }
  2605. case no_normalize:
  2606. {
  2607. retrieveRowInformation(info, ds);
  2608. IValue * numRows = expr->queryChild(1)->queryValue();
  2609. if (numRows)
  2610. {
  2611. __int64 scale = numRows->getIntValue();
  2612. if (containsSkip(expr->queryChild(2)))
  2613. info.scaleRange(scale);
  2614. else
  2615. info.scaleFixed(scale);
  2616. }
  2617. else
  2618. info.setUnknown(RCMdisk);
  2619. break;
  2620. }
  2621. case no_group:
  2622. case no_grouped:
  2623. //MORE: Not completely sure how we should handle groups.
  2624. return getRecordCountInfo(ds);
  2625. case no_join:
  2626. case no_selfjoin:
  2627. {
  2628. bool maxSingleRowOut = false;
  2629. if (expr->hasProperty(leftonlyAtom))
  2630. maxSingleRowOut = true;
  2631. else if (isLeftJoin(expr) || isInnerJoin(expr))
  2632. {
  2633. IHqlExpression * keep = queryPropertyChild(expr, keepAtom, 0);
  2634. if (matchesConstantValue(keep, 1))
  2635. maxSingleRowOut = true;
  2636. }
  2637. if (maxSingleRowOut)
  2638. {
  2639. retrieveRowInformation(info, ds);
  2640. if (!expr->hasProperty(leftouterAtom) || containsSkip(expr->queryChild(3)))
  2641. info.limitMin(0);
  2642. }
  2643. else
  2644. info.setUnknown(RCMdisk);
  2645. break;
  2646. }
  2647. case no_denormalize:
  2648. case no_denormalizegroup:
  2649. {
  2650. retrieveRowInformation(info, ds);
  2651. if (containsSkip(expr->queryChild(3)))
  2652. info.limitMin(0);
  2653. break;
  2654. }
  2655. case no_mergejoin:
  2656. case no_nwayjoin:
  2657. case no_nwaymerge:
  2658. info.setUnknown(RCMdisk);
  2659. break;
  2660. case no_loop:
  2661. case no_graphloop:
  2662. case no_libraryselect:
  2663. case no_libraryinput:
  2664. case no_param:
  2665. case no_anon:
  2666. case no_nofold: // assume nothing - to stop subsequent optimizations
  2667. case no_delayedselect:
  2668. case no_unboundselect:
  2669. case no_internalselect:
  2670. info.setUnknown(RCMdisk);
  2671. break;
  2672. case no_parse:
  2673. case no_newparse:
  2674. case no_xmlparse:
  2675. case no_newxmlparse:
  2676. case no_soapcall:
  2677. case no_soapcall_ds:
  2678. case no_newsoapcall:
  2679. case no_newsoapcall_ds:
  2680. case no_httpcall:
  2681. case no_process:
  2682. case no_pipe:
  2683. case no_translated:
  2684. case no_datasetfromdictionary:
  2685. //MORE could improve each of these
  2686. info.setUnknown(RCMdisk);
  2687. break;
  2688. case no_map:
  2689. case no_case:
  2690. {
  2691. if (expr->isDatarow())
  2692. {
  2693. info.setSingleRow();
  2694. break;
  2695. }
  2696. //This is primarily implemented so the annotations in the graph look correct
  2697. unsigned start = (op == no_case) ? 1 : 0;
  2698. IHqlExpression * dft = NULL;
  2699. ForEachChildFrom(i1, expr, start)
  2700. {
  2701. IHqlExpression * cur = expr->queryChild(i1);
  2702. if (cur->getOperator() != no_mapto)
  2703. {
  2704. if (!cur->isAttribute())
  2705. dft = cur;
  2706. break;
  2707. }
  2708. }
  2709. if (dft)
  2710. retrieveRowInformation(info, dft);
  2711. else
  2712. info.setN(0);
  2713. ForEachChildFrom(i2, expr, start)
  2714. {
  2715. IHqlExpression * cur = expr->queryChild(i2);
  2716. if (cur->getOperator() == no_mapto)
  2717. {
  2718. HqlRowCountInfo rhsInfo;
  2719. retrieveRowInformation(rhsInfo, cur->queryChild(1));
  2720. info.combineAlternatives(rhsInfo);
  2721. }
  2722. }
  2723. break;
  2724. }
  2725. case no_id2blob:
  2726. case no_xmlproject:
  2727. case no_call:
  2728. case no_externalcall:
  2729. info.setUnknown(RCMfew);
  2730. break;
  2731. case no_colon:
  2732. {
  2733. IHqlExpression * workflow = expr->queryChild(1);
  2734. //For either of
  2735. if (queryOperatorInList(no_stored, workflow) || queryOperatorInList(no_recovery, workflow))
  2736. {
  2737. info.setUnknown(RCMdisk);
  2738. break;
  2739. }
  2740. //MORE: Could restrict based on few flags
  2741. return getRecordCountInfo(ds);
  2742. }
  2743. case no_choosesets:
  2744. case no_enth:
  2745. //MORE: Could sum the numbers to return
  2746. return getRecordCountInfo(ds);
  2747. case no_compound:
  2748. return getRecordCountInfo(expr->queryChild(1));
  2749. default:
  2750. if (expr->isDataset())
  2751. UNIMPLEMENTED_XY("Record count calculation for operator", getOpString(op));
  2752. if (expr->isDatarow())
  2753. info.setSingleRow();
  2754. else
  2755. info.setUnknown(RCMdisk); //Assume the worse case...
  2756. break;
  2757. }
  2758. return info.createRecordCountAttr();
  2759. }
  2760. static IHqlExpression * evaluateAttrRecordCount(IHqlExpression * expr)
  2761. {
  2762. OwnedHqlExpr info = calcRowInformation(expr);
  2763. return meta.addAttribute(expr, EPrecordCount, info);
  2764. }
  2765. void getRecordCountText(StringBuffer & result, IHqlExpression * expr)
  2766. {
  2767. HqlRowCountInfo info;
  2768. retrieveRowInformation(info, expr);
  2769. info.getText(result);
  2770. }
  2771. //---------------------------------------------------------------------------------
  2772. bool hasFewRows(IHqlExpression * expr)
  2773. {
  2774. HqlRowCountInfo info;
  2775. retrieveRowInformation(info, expr);
  2776. return (info.magnitude <= RCMfew);
  2777. }
  2778. bool spillToWorkunitNotFile(IHqlExpression * expr, ClusterType platform)
  2779. {
  2780. if (platform == RoxieCluster)
  2781. return true;
  2782. if (isThorCluster(platform))
  2783. {
  2784. //In thor, all rows will get sent to master and written to dali, and then read back on slave 0
  2785. //not likely to be more efficient unless only a single row - although the generated code accessing
  2786. //from a child query is better
  2787. return hasNoMoreRowsThan(expr, 1);
  2788. }
  2789. return hasFewRows(expr);
  2790. }
  2791. bool hasSingleRow(IHqlExpression * expr)
  2792. {
  2793. HqlRowCountInfo info;
  2794. retrieveRowInformation(info, expr);
  2795. return info.isSingleRow();
  2796. }
  2797. bool hasNoMoreRowsThan(IHqlExpression * expr, __int64 limit)
  2798. {
  2799. HqlRowCountInfo info;
  2800. retrieveRowInformation(info, expr);
  2801. return getIntValue(info.max, limit+1) <= limit;
  2802. }
  2803. // Functions for testing whether
  2804. // Functions for accessing attributes from types etc.
  2805. IHqlExpression * queryProperty(ITypeInfo * type, IAtom * search)
  2806. {
  2807. loop
  2808. {
  2809. typemod_t curModifier = type->queryModifier();
  2810. switch (curModifier)
  2811. {
  2812. case typemod_none:
  2813. return NULL;
  2814. case typemod_attr:
  2815. {
  2816. IHqlExpression * prop = static_cast<IHqlExpression *>(type->queryModifierExtra());
  2817. if (prop->queryName() == search)
  2818. return prop;
  2819. break;
  2820. }
  2821. case typemod_original:
  2822. {
  2823. IHqlExpression * original = static_cast<IHqlExpression *>(type->queryModifierExtra());
  2824. IHqlExpression * match = original->queryProperty(search);
  2825. if (match)
  2826. return match;
  2827. break;
  2828. }
  2829. }
  2830. type = type->queryTypeBase();
  2831. }
  2832. }
  2833. IHqlExpression * queryPropertyChild(ITypeInfo * type, IAtom * search, unsigned idx)
  2834. {
  2835. IHqlExpression * match = queryProperty(type, search);
  2836. if (match)
  2837. return match->queryChild(idx);
  2838. return NULL;
  2839. }
  2840. // Functions for extracting and preserving attribute information on types and fields.
  2841. void cloneFieldModifier(Shared<ITypeInfo> & type, ITypeInfo * donorType, IAtom * attr)
  2842. {
  2843. IHqlExpression * match = queryProperty(donorType, attr);
  2844. if (!match)
  2845. return;
  2846. IHqlExpression * existing = queryProperty(type, attr);
  2847. if (match == existing)
  2848. return;
  2849. type.setown(makeAttributeModifier(type.getClear(), LINK(match)));
  2850. }
  2851. ITypeInfo * cloneEssentialFieldModifiers(ITypeInfo * donor, ITypeInfo * rawtype)
  2852. {
  2853. Linked<ITypeInfo> type = rawtype;
  2854. cloneFieldModifier(type, donor, maxLengthAtom);
  2855. cloneFieldModifier(type, donor, maxSizeAtom);
  2856. cloneFieldModifier(type, donor, maxCountAtom);
  2857. return type.getClear();
  2858. }
  2859. ITypeInfo * removeProperty(ITypeInfo * t, IAtom * search)
  2860. {
  2861. typemod_t curModifier = t->queryModifier();
  2862. if (curModifier == typemod_none)
  2863. return LINK(t);
  2864. ITypeInfo * base = t->queryTypeBase();
  2865. if (curModifier == typemod_attr)
  2866. {
  2867. IHqlExpression * attr = (IHqlExpression *)t->queryModifierExtra();
  2868. if (attr->queryName() == search)
  2869. return LINK(base);
  2870. }
  2871. OwnedITypeInfo newBase = removeProperty(base, search);
  2872. if (newBase == base)
  2873. return LINK(t);
  2874. return makeModifier(newBase.getClear(), curModifier, LINK(t->queryModifierExtra()));
  2875. }
  2876. bool isUninheritedFieldAttribute(IHqlExpression * expr)
  2877. {
  2878. if (expr->isAttribute())
  2879. {
  2880. IAtom * name = expr->queryName();
  2881. //MORE: Attributes of datasets need a different representation - should probably be include in the type somehow...
  2882. if ((name == virtualAtom) || (name == countAtom))
  2883. return true;
  2884. }
  2885. return false;
  2886. }
  2887. bool hasUninheritedAttribute(IHqlExpression * field)
  2888. {
  2889. ForEachChild(i, field)
  2890. if (isUninheritedFieldAttribute(field->queryChild(i)))
  2891. return true;
  2892. return false;
  2893. }
  2894. IHqlExpression * extractFieldAttrs(IHqlExpression * field)
  2895. {
  2896. IHqlExpression * attrs = NULL;
  2897. ForEachChild(idx, field)
  2898. {
  2899. IHqlExpression * child = field->queryChild(idx);
  2900. if (child->isAttribute())
  2901. {
  2902. //MORE: Attributes of datasets need a different representation - should probably be include in the type somehow...
  2903. if (!isUninheritedFieldAttribute(child))
  2904. {
  2905. // which others should we ignore?
  2906. attrs = createComma(attrs, LINK(child));
  2907. }
  2908. }
  2909. }
  2910. return attrs;
  2911. }
  2912. IHqlExpression * extractAttrsFromExpr(IHqlExpression * value)
  2913. {
  2914. if (!value)
  2915. return NULL;
  2916. if (value->getOperator() == no_select)
  2917. value = value->queryChild(1);
  2918. if (value->getOperator() == no_field)
  2919. return extractFieldAttrs(value);
  2920. return NULL;
  2921. }
  2922. // Type processing
  2923. ITypeInfo * getPromotedECLType(ITypeInfo * lType, ITypeInfo * rType)
  2924. {
  2925. return ::getPromotedType(lType, rType);
  2926. }
  2927. ITypeInfo * getPromotedECLCompareType(ITypeInfo * lType, ITypeInfo * rType)
  2928. {
  2929. return ::getPromotedCompareType(lType, rType);
  2930. }
  2931. unsigned getMaxRecordSize(IHqlExpression * record, unsigned defaultMaxRecordSize, bool & hasKnownSize, bool & usedDefault)
  2932. {
  2933. IHqlExpression * size = record->queryAttribute(EPsize);
  2934. IHqlExpression * minSizeExpr = size->queryChild(1);
  2935. IHqlExpression * maxSizeExpr = size->queryChild(2);
  2936. unsigned maxSize = (unsigned)getIntValue(maxSizeExpr, UNKNOWN_LENGTH);
  2937. hasKnownSize = (minSizeExpr == maxSizeExpr);
  2938. if (maxSize == UNKNOWN_LENGTH)
  2939. {
  2940. OwnedHqlExpr defaultExpr = getSizetConstant(defaultMaxRecordSize);
  2941. OwnedHqlExpr value = replaceExpression(maxSizeExpr, queryDefaultMaxRecordLengthExpr(), defaultExpr);
  2942. OwnedHqlExpr folded = foldHqlExpression(value);
  2943. assertex(folded);
  2944. maxSize = (unsigned)getIntValue(folded);
  2945. unsigned minSize = getIntValue(minSizeExpr);
  2946. if (maxSize < minSize)
  2947. maxSize = minSize;
  2948. usedDefault = true;
  2949. }
  2950. else
  2951. usedDefault = false;
  2952. return maxSize;
  2953. }
  2954. size32_t getExpectedRecordSize(IHqlExpression * record)
  2955. {
  2956. IHqlExpression * size = record->queryAttribute(EPsize);
  2957. return size ? (size32_t)getIntValue(size->queryChild(0)) : 0;
  2958. }
  2959. size32_t getMinRecordSize(IHqlExpression * record)
  2960. {
  2961. IHqlExpression * size = record->queryAttribute(EPsize);
  2962. return size ? (size32_t)getIntValue(size->queryChild(1)) : 0;
  2963. }
  2964. unsigned getMaxRecordSize(IHqlExpression * record, unsigned defaultMaxRecordSize)
  2965. {
  2966. bool isKnownSize, usedDefault;
  2967. return getMaxRecordSize(record, defaultMaxRecordSize, isKnownSize, usedDefault);
  2968. }
  2969. bool maxRecordSizeUsesDefault(IHqlExpression * record)
  2970. {
  2971. IHqlExpression * maxSize = record->queryAttribute(EPsize)->queryChild(2);
  2972. return (maxSize->queryValue() == NULL);
  2973. }
  2974. bool isVariableSizeRecord(IHqlExpression * record)
  2975. {
  2976. IHqlExpression * sizeAttr = record->queryAttribute(EPsize);
  2977. return sizeAttr->queryChild(1) != sizeAttr->queryChild(2);
  2978. }
  2979. bool maxRecordSizeIsAmbiguous(IHqlExpression * record, size32_t & specifiedSize, size32_t & derivedSize)
  2980. {
  2981. IHqlExpression * sizeAttr = record->queryAttribute(EPsize);
  2982. IHqlExpression * derivedSizeExpr = sizeAttr->queryChild(3);
  2983. if (!derivedSizeExpr || !derivedSizeExpr->isConstant())
  2984. return false;
  2985. OwnedHqlExpr foldedDerivedSize = foldHqlExpression(derivedSizeExpr);
  2986. if (!foldedDerivedSize->queryValue())
  2987. return false;
  2988. IHqlExpression * maxLength = sizeAttr->queryChild(2);
  2989. OwnedHqlExpr foldedMaxLength = foldHqlExpression(maxLength);
  2990. if (!foldedMaxLength->queryValue())
  2991. return false;
  2992. specifiedSize = (size32_t)foldedMaxLength->queryValue()->getIntValue();
  2993. derivedSize = (size32_t) foldedDerivedSize->queryValue()->getIntValue();
  2994. return derivedSize != specifiedSize;
  2995. }
  2996. bool maxRecordSizeCanBeDerived(IHqlExpression * record)
  2997. {
  2998. if (!isVariableSizeRecord(record))
  2999. return true;
  3000. if (record->hasProperty(maxLengthAtom))
  3001. {
  3002. IHqlExpression * sizeAttr = record->queryAttribute(EPsize);
  3003. IHqlExpression * derivedSizeExpr = sizeAttr->queryChild(3);
  3004. return (derivedSizeExpr != NULL);
  3005. }
  3006. return !maxRecordSizeUsesDefault(record);
  3007. }
  3008. //---------------------------------------------------------------------------------
  3009. bool recordRequiresSerialization(IHqlExpression * expr, IAtom * serializeForm)
  3010. {
  3011. if (!expr)
  3012. return false;
  3013. if (querySerializedForm(expr, serializeForm) != expr)
  3014. return true;
  3015. return false;
  3016. }
  3017. bool recordRequiresDestructor(IHqlExpression * expr)
  3018. {
  3019. if (!expr)
  3020. return false;
  3021. //true if the internal serialized form is different
  3022. if (querySerializedForm(expr, internalAtom) != expr)
  3023. return true;
  3024. return false;
  3025. }
  3026. bool recordRequiresLinkCount(IHqlExpression * expr)
  3027. {
  3028. //MORE: This should strictly speaking check if any of the child fields are link counted.
  3029. //This function is a sufficient proxy at the moment
  3030. return recordRequiresDestructor(expr);
  3031. }
  3032. bool recordSerializationDiffers(IHqlExpression * expr, IAtom * serializeForm1, IAtom * serializeForm2)
  3033. {
  3034. return querySerializedForm(expr, serializeForm1) != querySerializedForm(expr, serializeForm2);
  3035. }
  3036. extern HQL_API bool typeRequiresDeserialization(ITypeInfo * type, IAtom * serializeForm)
  3037. {
  3038. Owned<ITypeInfo> serializedType = getSerializedForm(type, serializeForm);
  3039. if (queryUnqualifiedType(serializedType) == queryUnqualifiedType(type))
  3040. return false;
  3041. type_t stc = serializedType->getTypeCode();
  3042. if (stc != type->getTypeCode())
  3043. return true;
  3044. if (stc == type_table)
  3045. {
  3046. if (recordTypesMatch(serializedType, type))
  3047. return false;
  3048. return true;
  3049. }
  3050. return true;
  3051. }
  3052. //---------------------------------------------------------------------------------
  3053. IHqlExpression * queryRecordCountInfo(IHqlExpression * expr)
  3054. {
  3055. return expr->queryAttribute(EPrecordCount);
  3056. }
  3057. IHqlExpression * getRecordCountInfo(IHqlExpression * expr)
  3058. {
  3059. return LINK(expr->queryAttribute(EPrecordCount));
  3060. }
  3061. IHqlExpression * queryExpectedRecordCount(IHqlExpression * expr)
  3062. {
  3063. IHqlExpression * attr = expr->queryAttribute(EPrecordCount);
  3064. return attr ? attr->queryChild(0) : NULL;
  3065. }
  3066. IHqlExpression * getPackedRecord(IHqlExpression * expr)
  3067. {
  3068. IHqlExpression * attr = expr->queryAttribute(EPaligned);
  3069. IHqlExpression * packed = attr->queryChild(0);
  3070. if (!packed) packed = expr;
  3071. return LINK(packed);
  3072. }
  3073. /*
  3074. * This function can be called while parsing (or later) to find a "normalized" version of a record or a field.
  3075. * It ignores default values for fields, and removes named symbols/ other location specific information - so
  3076. * that identical records defined in macros etc. are treated as identical.
  3077. */
  3078. IHqlExpression * getUnadornedRecordOrField(IHqlExpression * expr)
  3079. {
  3080. if (!expr)
  3081. return NULL;
  3082. IHqlExpression * attr = expr->queryAttribute(EPunadorned);
  3083. return LINK(attr);
  3084. }
  3085. //---------------------------------------------------------------------------------
  3086. inline bool isAlwaysLocationIndependent(IHqlExpression * expr)
  3087. {
  3088. switch (expr->getOperator())
  3089. {
  3090. case no_constant:
  3091. case no_param:
  3092. case no_quoted:
  3093. case no_variable:
  3094. return true;
  3095. case no_attr:
  3096. return (expr->numChildren() == 0);
  3097. }
  3098. return false;
  3099. }
  3100. class HqlLocationIndependentNormalizer : public QuickHqlTransformer
  3101. {
  3102. public:
  3103. HqlLocationIndependentNormalizer();
  3104. virtual IHqlExpression * createTransformed(IHqlExpression * expr);
  3105. virtual ITypeInfo * transformType(ITypeInfo * type);
  3106. protected:
  3107. IHqlExpression * doCreateTransformed(IHqlExpression * expr);
  3108. };
  3109. static HqlTransformerInfo hqlLocationIndependentInfo("HqlLocationIndependentNormalizer");
  3110. HqlLocationIndependentNormalizer::HqlLocationIndependentNormalizer() : QuickHqlTransformer(hqlLocationIndependentInfo, NULL)
  3111. {
  3112. }
  3113. ITypeInfo * HqlLocationIndependentNormalizer::transformType(ITypeInfo * type)
  3114. {
  3115. switch (type->queryModifier())
  3116. {
  3117. case typemod_original:
  3118. return transformType(type->queryTypeBase());
  3119. case typemod_none:
  3120. return QuickHqlTransformer::transformType(type);
  3121. case typemod_indirect:
  3122. {
  3123. IHqlExpression * original = static_cast<IHqlExpression *>(type->queryModifierExtra());
  3124. OwnedHqlExpr transformed = transform(original);
  3125. return makeModifier(transformed->getType(), typemod_indirect, LINK(transformed));
  3126. }
  3127. default:
  3128. {
  3129. ITypeInfo * typeBase = type->queryTypeBase();
  3130. Owned<ITypeInfo> newType = transformType(typeBase);
  3131. if (typeBase == newType)
  3132. return LINK(type);
  3133. return cloneModifier(type, newType);
  3134. }
  3135. }
  3136. }
  3137. IHqlExpression * HqlLocationIndependentNormalizer::doCreateTransformed(IHqlExpression * expr)
  3138. {
  3139. node_operator op = expr->getOperator();
  3140. switch (op)
  3141. {
  3142. case no_attr:
  3143. {
  3144. //Original attributes cause chaos => remove all children from attributes
  3145. if (expr->numChildren() != 0)
  3146. return createAttribute(expr->queryName());
  3147. return LINK(expr);
  3148. }
  3149. case no_field:
  3150. {
  3151. //Remove the default values from fields since they just confuse.
  3152. HqlExprArray children;
  3153. bool same = true;
  3154. ForEachChild(idx, expr)
  3155. {
  3156. IHqlExpression * cur = expr->queryChild(idx);
  3157. if (cur->isAttribute())
  3158. {
  3159. IHqlExpression * mapped = transform(cur);
  3160. children.append(*mapped);
  3161. if (mapped != cur)
  3162. same = false;
  3163. }
  3164. else
  3165. same = false;
  3166. }
  3167. ITypeInfo * type = expr->queryType();
  3168. OwnedITypeInfo newType = transformType(type);
  3169. if (type != newType)
  3170. return createField(expr->queryId(), newType.getClear(), children);
  3171. if (same)
  3172. return LINK(expr);
  3173. return expr->clone(children);
  3174. }
  3175. }
  3176. return QuickHqlTransformer::createTransformed(expr);
  3177. }
  3178. IHqlExpression * HqlLocationIndependentNormalizer::createTransformed(IHqlExpression * expr)
  3179. {
  3180. //Remove all annotations. It is vaguely possible there are some annotations we would want to retain, but I don't know of any
  3181. IHqlExpression * body = expr->queryBody(false);
  3182. if (expr != body)
  3183. return transform(body);
  3184. if (isAlwaysLocationIndependent(expr))
  3185. return LINK(expr);
  3186. IHqlExpression * match = meta.queryExistingAttribute(expr, EPlocationIndependent);
  3187. if (match)
  3188. return LINK(match);
  3189. OwnedHqlExpr transformed = doCreateTransformed(expr);
  3190. meta.addAttribute(expr, EPlocationIndependent, transformed);
  3191. return transformed.getClear();
  3192. }
  3193. IHqlExpression * evaluateAttrLocationIndependent(IHqlExpression * expr)
  3194. {
  3195. if (isAlwaysLocationIndependent(expr))
  3196. return expr->queryBody();
  3197. //Because the transformers contain all the logic for how scopes etc. are transformed it is much better to
  3198. //use a transformer which caches the result in the expression tree instead of trying to replicate
  3199. //all the rules in some member functions.
  3200. HqlLocationIndependentNormalizer normalizer;
  3201. OwnedHqlExpr transformed = normalizer.transform(expr);
  3202. return transformed; // NB: no getClear(). Because it is cached it is guaranteed to exist even when this link is released.
  3203. }
  3204. IHqlExpression * queryLocationIndependent(IHqlExpression * expr)
  3205. {
  3206. IHqlExpression * match = expr->queryAttribute(EPlocationIndependent);
  3207. if (match)
  3208. return match;
  3209. return expr;
  3210. }
  3211. static void clonePropertyAsModifier(Owned<ITypeInfo> & type, IHqlExpression * donor, IAtom * attr)
  3212. {
  3213. if (queryProperty(type, attr))
  3214. return;
  3215. IHqlExpression * match = donor->queryProperty(attr);
  3216. if (!match)
  3217. return;
  3218. type.setown(makeAttributeModifier(type.getClear(), LINK(match)));
  3219. }
  3220. ITypeInfo * preserveTypeQualifiers(ITypeInfo * ownedType, IHqlExpression * donor)
  3221. {
  3222. //The following would be a good idea, but it won't work until we introduce a recordof() operator
  3223. //and use that whenever queryRecord() is currenly called (see bug46863)
  3224. // type = makeModifier(type, typemod_indirect, LINK(arg));
  3225. //Instead, just clone the attributes we need
  3226. IHqlExpression * field = queryFieldFromExpr(donor);
  3227. switch (field->getOperator())
  3228. {
  3229. case no_field:
  3230. // case no_record:
  3231. break;
  3232. default:
  3233. return ownedType;
  3234. }
  3235. OwnedITypeInfo type = ownedType;
  3236. clonePropertyAsModifier(type, field, maxLengthAtom);
  3237. clonePropertyAsModifier(type, field, maxSizeAtom);
  3238. clonePropertyAsModifier(type, field, maxCountAtom);
  3239. return type.getClear();
  3240. }
  3241. static bool cloneModifierAsProperty(HqlExprArray & args, ITypeInfo * donor, IAtom * attr)
  3242. {
  3243. IHqlExpression * match = queryProperty(donor, attr);
  3244. if (!match)
  3245. return true;
  3246. if (queryProperty(attr, args))
  3247. return true;
  3248. args.append(*LINK(match));
  3249. return false;
  3250. }
  3251. bool preserveTypeQualifiers(HqlExprArray & args, ITypeInfo * donor)
  3252. {
  3253. bool same = true;
  3254. same = cloneModifierAsProperty(args, donor, maxLengthAtom) && same;
  3255. same = cloneModifierAsProperty(args, donor, maxSizeAtom) && same;
  3256. same = cloneModifierAsProperty(args, donor, maxCountAtom) && same;
  3257. return same;
  3258. }
  3259. IHqlExpression * preserveTypeQualifiers(IHqlExpression * ownedField, ITypeInfo * donor)
  3260. {
  3261. OwnedHqlExpr field = ownedField;
  3262. HqlExprArray args;
  3263. unwindChildren(args, field);
  3264. if (preserveTypeQualifiers(args, donor))
  3265. return field.getClear();
  3266. return field->clone(args);
  3267. }
  3268. bool isLinkedRowset(ITypeInfo * t)
  3269. {
  3270. switch (t->getTypeCode())
  3271. {
  3272. case type_table:
  3273. case type_groupedtable:
  3274. case type_dictionary:
  3275. return hasLinkCountedModifier(t);
  3276. }
  3277. return false;
  3278. }
  3279. bool isArrayRowset(ITypeInfo * t)
  3280. {
  3281. switch (t->getTypeCode())
  3282. {
  3283. case type_table:
  3284. case type_groupedtable:
  3285. case type_array:
  3286. case type_dictionary:
  3287. {
  3288. if (hasLinkCountedModifier(t))
  3289. assertex(hasLinkCountedModifier(t->queryChildType()));
  3290. if (hasOutOfLineModifier(t) || hasLinkCountedModifier(t))
  3291. return true;
  3292. ITypeInfo * rowType = t->queryChildType();
  3293. if (hasOutOfLineModifier(rowType) || hasLinkCountedModifier(rowType))
  3294. throwUnexpected();
  3295. return false;
  3296. }
  3297. case type_row:
  3298. throwUnexpected();
  3299. }
  3300. return false;
  3301. }
  3302. bool hasLinkedRow(ITypeInfo * t)
  3303. {
  3304. switch (t->getTypeCode())
  3305. {
  3306. case type_table:
  3307. case type_groupedtable:
  3308. case type_dictionary:
  3309. return hasLinkedRow(t->queryChildType());
  3310. case type_row:
  3311. return hasLinkCountedModifier(t);
  3312. }
  3313. return false;
  3314. }
  3315. ITypeInfo * setLinkCountedAttr(ITypeInfo * _type, bool setValue)
  3316. {
  3317. Linked<ITypeInfo> type = _type;
  3318. switch (type->getTypeCode())
  3319. {
  3320. case type_table:
  3321. case type_groupedtable:
  3322. case type_dictionary:
  3323. {
  3324. ITypeInfo * rowType = type->queryChildType();
  3325. Owned<ITypeInfo> newRowType = setLinkCountedAttr(rowType, setValue);
  3326. if (rowType != newRowType)
  3327. type.setown(replaceChildType(type, newRowType));
  3328. break;
  3329. }
  3330. case type_row:
  3331. break;
  3332. default:
  3333. return type.getClear();
  3334. }
  3335. if (hasLinkCountedModifier(type))
  3336. {
  3337. if (setValue)
  3338. return LINK(type);
  3339. return removeProperty(type, _linkCounted_Atom);
  3340. }
  3341. else
  3342. {
  3343. if (setValue)
  3344. return makeAttributeModifier(LINK(type), getLinkCountedAttr());
  3345. return LINK(type);
  3346. }
  3347. }
  3348. ITypeInfo * setStreamedAttr(ITypeInfo * _type, bool setValue)
  3349. {
  3350. Linked<ITypeInfo> type = _type;
  3351. switch (type->getTypeCode())
  3352. {
  3353. case type_groupedtable:
  3354. {
  3355. ITypeInfo * dsType = type->queryChildType();
  3356. Owned<ITypeInfo> newDsType = setStreamedAttr(dsType, setValue);
  3357. if (dsType != newDsType)
  3358. type.setown(replaceChildType(type, newDsType));
  3359. break;
  3360. }
  3361. case type_table:
  3362. break;
  3363. default:
  3364. return type.getClear();
  3365. }
  3366. if (hasStreamedModifier(type))
  3367. {
  3368. if (setValue)
  3369. return LINK(type);
  3370. return removeProperty(type, streamedAtom);
  3371. }
  3372. else
  3373. {
  3374. if (setValue)
  3375. return makeAttributeModifier(LINK(type), getStreamedAttr());
  3376. return LINK(type);
  3377. }
  3378. }
  3379. //---------------------------------------------------------------------------------------------------------------------
  3380. IHqlExpression * CHqlExpression::queryExistingAttribute(ExprPropKind propKind) const
  3381. {
  3382. CriticalBlock block(*attributeCS);
  3383. CHqlDynamicAttribute * cur = attributes;
  3384. while (cur)
  3385. {
  3386. if (cur->kind == propKind)
  3387. {
  3388. IHqlExpression * value = cur->value;
  3389. if (value)
  3390. return value;
  3391. return const_cast<CHqlExpression *>(this);
  3392. }
  3393. cur = cur->next;
  3394. }
  3395. return NULL;
  3396. }
  3397. void CHqlExpression::addAttribute(ExprPropKind kind, IHqlExpression * value)
  3398. {
  3399. if (value == this)
  3400. value = NULL;
  3401. CriticalBlock block(*attributeCS);
  3402. //theoretically we should test if the attribute has already been added by another thread, but in practice there is no
  3403. //problem if the attribute is present twice.
  3404. CHqlDynamicAttribute * attr = new CHqlDynamicAttribute(kind, value);
  3405. attr->next = attributes;
  3406. attributes = attr;
  3407. }
  3408. IHqlExpression * CHqlExpression::queryAttribute(ExprPropKind kind)
  3409. {
  3410. IHqlExpression * match = queryExistingAttribute(kind);
  3411. if (match)
  3412. return match;
  3413. switch (kind)
  3414. {
  3415. case EPrecordCount:
  3416. return evaluateAttrRecordCount(this);
  3417. case EPdiskserializedForm:
  3418. return evaluateAttrSerializedForm(this, kind, diskAtom);
  3419. case EPinternalserializedForm:
  3420. return evaluateAttrSerializedForm(this, kind, internalAtom);
  3421. case EPsize:
  3422. return evaluateAttrSize(this);
  3423. case EPaligned:
  3424. return evaluateAttrAligned(this);
  3425. case EPunadorned:
  3426. return evaluateAttrUnadorned(this);
  3427. case EPlocationIndependent:
  3428. return evaluateAttrLocationIndependent(this);
  3429. }
  3430. return NULL;
  3431. }