rtldynfield.cpp 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include <math.h>
  15. #include <stdio.h>
  16. #include "jmisc.hpp"
  17. #include "jlib.hpp"
  18. #include "eclhelper.hpp"
  19. #include "eclrtl_imp.hpp"
  20. #include "rtldynfield.hpp"
  21. #include "rtlrecord.hpp"
  22. #include "rtlembed.hpp"
  23. //#define TRACE_TRANSLATION
  24. #define VALIDATE_TYPEINFO_HASHES
  25. #define RTLTYPEINFO_FORMAT_1 80 // In case we ever want to support more than one format
  26. //---------------------------------------------------------------------------------------------------------------------
  27. const RtlTypeInfo *FieldTypeInfoStruct::createRtlTypeInfo(IThorIndexCallback *_callback) const
  28. {
  29. const RtlTypeInfo *ret = nullptr;
  30. switch (fieldType & RFTMkind)
  31. {
  32. case type_boolean:
  33. ret = new RtlBoolTypeInfo(fieldType, length);
  34. break;
  35. case type_keyedint:
  36. ret = new RtlKeyedIntTypeInfo(fieldType, length, childType);
  37. break;
  38. case type_int:
  39. ret = new RtlIntTypeInfo(fieldType, length);
  40. break;
  41. case type_filepos:
  42. ret = new RtlFileposTypeInfo(fieldType, length, childType, _callback);
  43. break;
  44. case type_real:
  45. ret = new RtlRealTypeInfo(fieldType, length);
  46. break;
  47. case type_decimal:
  48. ret = new RtlDecimalTypeInfo(fieldType, length);
  49. break;
  50. case type_string:
  51. ret = new RtlStringTypeInfo(fieldType, length);
  52. break;
  53. case type_bitfield:
  54. ret = new RtlBitfieldTypeInfo(fieldType, length);
  55. break;
  56. case type_varstring:
  57. ret = new RtlVarStringTypeInfo(fieldType, length);
  58. break;
  59. case type_data:
  60. ret = new RtlDataTypeInfo(fieldType, length);
  61. break;
  62. case type_table:
  63. assert(childType);
  64. ret = new RtlDatasetTypeInfo(fieldType, length, childType);
  65. break;
  66. case type_set:
  67. assert(childType);
  68. ret = new RtlSetTypeInfo(fieldType, length, childType);
  69. break;
  70. case type_row:
  71. assert(childType);
  72. ret = new RtlRowTypeInfo(fieldType, length, childType);
  73. break;
  74. case type_swapint:
  75. ret = new RtlSwapIntTypeInfo(fieldType, length);
  76. break;
  77. case type_packedint:
  78. ret = new RtlPackedIntTypeInfo(fieldType, length);
  79. break;
  80. case type_qstring:
  81. ret = new RtlQStringTypeInfo(fieldType, length);
  82. break;
  83. case type_unicode:
  84. ret = new RtlUnicodeTypeInfo(fieldType, length, locale);
  85. break;
  86. case type_varunicode:
  87. ret = new RtlVarUnicodeTypeInfo(fieldType, length, locale);
  88. break;
  89. case type_utf8:
  90. ret = new RtlUtf8TypeInfo(fieldType, length, locale);
  91. break;
  92. case type_record:
  93. ret = new RtlRecordTypeInfo(fieldType, length, fieldsArray);
  94. break;
  95. case type_ifblock:
  96. ret = new RtlDynamicIfBlockTypeInfo(fieldType, length, fieldsArray);
  97. break;
  98. default:
  99. throwUnexpected();
  100. }
  101. return ret;
  102. };
  103. typedef MapBetween<const RtlTypeInfo *, const RtlTypeInfo *, StringAttr, const char *> TypeNameMap;
  104. typedef MapBetween<const RtlTypeInfo *, const RtlTypeInfo *, unsigned, unsigned> TypeNumMap;
  105. /**
  106. * class CRtlFieldTypeSerializer
  107. *
  108. * Serializer class for creating json representation of a RtlTypeInfo structure.
  109. *
  110. */
  111. class CRtlFieldTypeSerializer
  112. {
  113. public:
  114. /**
  115. * Serialize a RtlTypeInfo structure to JSON
  116. *
  117. * @param out Buffer for resulting serialized string
  118. * @param type RtlTypeInfo structure to be serialized
  119. * @return Referenced to supplied buffer
  120. */
  121. static StringBuffer &serialize(StringBuffer &out, const RtlTypeInfo *type)
  122. {
  123. CRtlFieldTypeSerializer s(out, type);
  124. s.doSerialize();
  125. return out;
  126. }
  127. private:
  128. CRtlFieldTypeSerializer(StringBuffer &_out, const RtlTypeInfo *_base)
  129. : json(_out), base(_base)
  130. {
  131. }
  132. void doSerialize()
  133. {
  134. json.append("{");
  135. serializeType(base);
  136. json.append("\n}");
  137. }
  138. void serializeType(const RtlTypeInfo *type)
  139. {
  140. if (!serialized(type))
  141. {
  142. // Make sure all child types are serialized first
  143. const RtlTypeInfo *child = type->queryChildType();
  144. if (child)
  145. serializeType(child);
  146. const RtlFieldInfo * const * fields = type->queryFields();
  147. if (fields)
  148. {
  149. for (;;)
  150. {
  151. const RtlFieldInfo * child = *fields;
  152. if (!child)
  153. break;
  154. serializeType(child->type);
  155. fields++;
  156. }
  157. }
  158. // Now serialize this one
  159. if (type != base)
  160. {
  161. VStringBuffer newName("ty%d", ++nextTypeName);
  162. types.setValue(type, newName.str());
  163. startField(newName.str());
  164. serializeMe(type);
  165. closeCurly();
  166. }
  167. else
  168. serializeMe(type);
  169. }
  170. }
  171. void serializeMe(const RtlTypeInfo *type)
  172. {
  173. if (!type->canSerialize())
  174. throw makeStringException(MSGAUD_user, 1000, "DICTIONARY type structures cannot be serialized");
  175. addPropHex("fieldType", type->fieldType);
  176. addProp("length", type->length);
  177. addPropNonEmpty("locale", type->queryLocale());
  178. const RtlTypeInfo *child = type->queryChildType();
  179. if (child)
  180. addPropType("child", child);
  181. const RtlFieldInfo * const * fields = type->queryFields();
  182. if (fields)
  183. {
  184. startFields();
  185. for (;;)
  186. {
  187. const RtlFieldInfo * child = *fields;
  188. if (!child)
  189. break;
  190. newline();
  191. openCurly();
  192. addProp("name", child->name);
  193. addPropType("type", child->type);
  194. addProp("xpath", child->xpath);
  195. if (child->flags)
  196. addPropHex("flags", child->flags);
  197. // initializer is tricky - it's not (in general) a null-terminated string but the actual length is not easily available
  198. if (child->initializer)
  199. {
  200. addProp("init", child->type->size(child->initializer, nullptr), child->initializer);
  201. }
  202. closeCurly();
  203. fields++;
  204. }
  205. endFields();
  206. }
  207. }
  208. bool serialized(const RtlTypeInfo *type)
  209. {
  210. return types.find(type) != nullptr;
  211. }
  212. void startField(const char *name)
  213. {
  214. newline().appendf("\"%s\": ", name);
  215. openCurly();
  216. }
  217. void addProp(const char *propName, const char *propVal)
  218. {
  219. if (propVal)
  220. {
  221. newline();
  222. encodeJSON(json.append("\""), propName).append("\": ");
  223. encodeJSON(json.append("\""), propVal).append("\"");
  224. }
  225. }
  226. void addProp(const char *propName, size32_t propLen, const byte *propVal)
  227. {
  228. if (propVal)
  229. {
  230. newline();
  231. encodeJSON(json.append("\""), propName).append("\": \"");
  232. JBASE64_Encode(propVal, propLen, json, false);
  233. json.append("\"");
  234. }
  235. }
  236. void addPropNonEmpty(const char *propName, const char *propVal)
  237. {
  238. if (propVal && *propVal)
  239. addProp(propName, propVal);
  240. }
  241. void addProp(const char *propName, unsigned propVal)
  242. {
  243. newline().appendf("\"%s\": %u", propName, propVal);
  244. }
  245. void addPropHex(const char *propName, unsigned propVal)
  246. {
  247. newline().appendf("\"%s\": %u", propName, propVal); // Nice idea but json does not support hex constants :(
  248. }
  249. void addPropType(const char *propName, const RtlTypeInfo *type)
  250. {
  251. addProp(propName, queryTypeName(type));
  252. }
  253. const char *queryTypeName(const RtlTypeInfo *type)
  254. {
  255. StringAttr *typeName = types.getValue(type);
  256. assertex(typeName);
  257. return typeName->get();
  258. }
  259. void startFields()
  260. {
  261. newline().appendf("\"fields\": ");
  262. openCurly('[');
  263. }
  264. void endFields()
  265. {
  266. closeCurly(']');
  267. }
  268. StringBuffer &newline()
  269. {
  270. if (commaPending)
  271. json.append(',');
  272. json.appendf("\n%*s", indent, "");
  273. commaPending = true;
  274. return json;
  275. }
  276. void closeCurly(char brace = '}')
  277. {
  278. indent--;
  279. json.appendf("\n%*s%c", indent, "", brace);
  280. commaPending = true;
  281. }
  282. void openCurly(char brace = '{')
  283. {
  284. json.append(brace);
  285. indent++;
  286. commaPending = false;
  287. }
  288. TypeNameMap types;
  289. StringBuffer &json;
  290. const RtlTypeInfo *base = nullptr;
  291. unsigned indent = 1;
  292. unsigned nextTypeName = 0;
  293. bool commaPending = false;
  294. };
  295. class IndexBiasTranslator
  296. {
  297. public:
  298. IndexBiasTranslator(const RtlTypeInfo *type)
  299. {
  300. translatedType = type; // Assume no translation needed until proven otherwise
  301. if (type->getType() != type_record)
  302. return;
  303. const RtlFieldInfo * const * fields = type->queryFields();
  304. if (!fields)
  305. return;
  306. unsigned numFields;
  307. needsTranslation = false;
  308. for (numFields=0;;numFields++)
  309. {
  310. const RtlFieldInfo * child = fields[numFields];
  311. if (!child)
  312. break;
  313. switch (child->type->getType())
  314. {
  315. #if __BYTE_ORDER == __LITTLE_ENDIAN
  316. case type_swapint:
  317. if (!child->type->isUnsigned())
  318. needsTranslation = true;
  319. break;
  320. case type_int:
  321. needsTranslation = true;
  322. break;
  323. #else
  324. case type_int:
  325. if (!child->type->isUnsigned())
  326. needsTranlsation = true;
  327. break;
  328. case type_swapint:
  329. needsTranslation = true;
  330. break;
  331. #endif
  332. }
  333. }
  334. if (!needsTranslation && numFields > 1)
  335. {
  336. // Check if need last field translating to a type_filepos
  337. switch(fields[numFields]->type->getType())
  338. {
  339. case type_int:
  340. case type_swapint:
  341. case type_packedint:
  342. case type_bitfield:
  343. needsTranslation = true;
  344. }
  345. }
  346. if (needsTranslation)
  347. {
  348. translated = new bool[numFields];
  349. RtlFieldInfo * * newFields = new RtlFieldInfo * [numFields+1];
  350. newFields[numFields] = nullptr;
  351. for (unsigned idx = 0; idx < numFields; idx++)
  352. {
  353. newFields[idx] = new RtlFieldInfo(*fields[idx]);
  354. const RtlTypeInfo *newType = createBiasType(fields[idx]->type, idx > 1 && idx == numFields-1);
  355. // MORE - Is it an issue if we don't common these up?
  356. if (newType)
  357. {
  358. newFields[idx]->type = newType;
  359. translated[idx] = true;
  360. }
  361. else
  362. translated[idx] = false;
  363. }
  364. translatedType = new RtlRecordTypeInfo(type->fieldType, type->length, newFields);
  365. }
  366. }
  367. ~IndexBiasTranslator()
  368. {
  369. if (needsTranslation)
  370. {
  371. const RtlFieldInfo * const * fields = translatedType->queryFields();
  372. for (unsigned idx = 0;;idx++)
  373. {
  374. const RtlFieldInfo * child = fields[idx];
  375. if (!child)
  376. break;
  377. if (translated[idx])
  378. child->type->doDelete();
  379. delete child;
  380. }
  381. delete [] fields;
  382. translatedType->doDelete();
  383. delete [] translated;
  384. }
  385. }
  386. const RtlTypeInfo *queryTranslatedType()
  387. {
  388. return translatedType;
  389. }
  390. private:
  391. static const RtlTypeInfo *createBiasType(const RtlTypeInfo *origType, bool isLastField)
  392. {
  393. auto type = origType->getType();
  394. if (type==type_int || type==type_swapint)
  395. {
  396. unsigned flags = origType->fieldType & ~RFTMkind;
  397. unsigned length = origType->length;
  398. if (isLastField)
  399. return new RtlFileposTypeInfo(type_filepos | flags, length, origType, nullptr);
  400. #if __BYTE_ORDER == __LITTLE_ENDIAN
  401. else if (type == type_int || origType->isSigned())
  402. #else
  403. else if (type == type_swapint || origType->isSigned()) // MORE - this may not be right if compiler machine endianness does not match this machine
  404. #endif
  405. return new RtlKeyedIntTypeInfo(type_keyedint | flags, length, origType);
  406. }
  407. return nullptr;
  408. }
  409. const RtlTypeInfo *translatedType = nullptr;
  410. bool needsTranslation = false;
  411. bool *translated = nullptr;
  412. };
  413. class CRtlFieldTypeBinSerializer
  414. {
  415. public:
  416. /**
  417. * Serialize a RtlTypeInfo structure to binary
  418. *
  419. * @param out Buffer for resulting serialized string
  420. * @param type RtlTypeInfo structure to be serialized
  421. * @return Referenced to supplied buffer
  422. */
  423. static MemoryBuffer &serialize(MemoryBuffer &out, const RtlTypeInfo *type, bool applyBias)
  424. {
  425. int oldEnd = out.setEndian(__LITTLE_ENDIAN);
  426. CRtlFieldTypeBinSerializer s(out);
  427. byte format = RTLTYPEINFO_FORMAT_1;
  428. out.append(format);
  429. DelayedMarker<hash64_t> hash(out);
  430. DelayedSizeMarker size(out);
  431. size32_t pos = out.length();
  432. if (applyBias)
  433. {
  434. IndexBiasTranslator translator(type);
  435. s.serializeType(translator.queryTranslatedType());
  436. }
  437. else
  438. s.serializeType(type);
  439. size.write();
  440. hash.write(rtlHash64Data(size.size(), out.toByteArray()+pos, 0));
  441. out.setEndian(oldEnd);
  442. return out;
  443. }
  444. private:
  445. CRtlFieldTypeBinSerializer(MemoryBuffer &_out)
  446. : out(_out)
  447. {
  448. }
  449. void serializeType(const RtlTypeInfo *type)
  450. {
  451. if (!serialized(type))
  452. {
  453. // Make sure all child types are serialized first
  454. const RtlTypeInfo *child = type->queryChildType();
  455. if (child)
  456. serializeType(child);
  457. const RtlFieldInfo * const * fields = type->queryFields();
  458. if (fields)
  459. {
  460. for (unsigned idx = 0;;idx++)
  461. {
  462. const RtlFieldInfo * child = fields[idx];
  463. if (!child)
  464. break;
  465. serializeType(child->type);
  466. }
  467. }
  468. // Now serialize this one
  469. types.setValue(type, nextTypeNum++);
  470. serializeMe(type);
  471. }
  472. }
  473. void serializeMe(const RtlTypeInfo *type)
  474. {
  475. if (!type->canSerialize())
  476. throw makeStringException(MSGAUD_user, 1000, "DICTIONARY type structures cannot be serialized");
  477. unsigned fieldType = type->fieldType;
  478. const char *locale = type->queryLocale();
  479. if (locale && *locale)
  480. fieldType |= RFTMhasLocale;
  481. const RtlTypeInfo *child = type->queryChildType();
  482. if (child)
  483. fieldType |= RFTMhasChildType;
  484. const RtlFieldInfo * const * fields = type->queryFields();
  485. if (fields)
  486. fieldType |= RFTMhasFields;
  487. out.append(fieldType);
  488. out.appendPacked(type->length);
  489. if (fieldType & RFTMhasLocale)
  490. out.append(locale);
  491. if (child)
  492. out.appendPacked(queryTypeIdx(child));
  493. if (fields)
  494. {
  495. unsigned count = countFields(fields);
  496. out.appendPacked(count);
  497. for (;;)
  498. {
  499. const RtlFieldInfo * child = *fields;
  500. if (!child)
  501. break;
  502. out.append(child->name);
  503. out.appendPacked(queryTypeIdx(child->type));
  504. unsigned flags = child->flags;
  505. if (child->xpath)
  506. flags |= RFTMhasXpath;
  507. if (child->initializer)
  508. flags |= RFTMhasInitializer;
  509. out.append(flags);
  510. if (child->xpath)
  511. out.append(child->xpath);
  512. // initializer is tricky - it's not (in general) a null-terminated string but the actual length is not easily available
  513. if (child->initializer)
  514. {
  515. unsigned initLength = child->type->size(child->initializer, nullptr);
  516. out.appendPacked(initLength).append(initLength, child->initializer);
  517. }
  518. fields++;
  519. }
  520. }
  521. }
  522. bool serialized(const RtlTypeInfo *type)
  523. {
  524. return types.find(type) != nullptr;
  525. }
  526. unsigned queryTypeIdx(const RtlTypeInfo *type)
  527. {
  528. unsigned *typeNum = types.getValue(type);
  529. assertex(typeNum);
  530. return *typeNum;
  531. }
  532. TypeNumMap types;
  533. MemoryBuffer &out;
  534. unsigned nextTypeNum = 0;
  535. };
  536. /**
  537. * class CRtlFieldTypeDeserializer
  538. *
  539. * Deserializer class for creating a RtlTypeInfo structure from json representation.
  540. *
  541. * Note that the resulting RtlTypeInfo structures are owned by this object and will be
  542. * destroyed when this object is destroyed.
  543. *
  544. */
  545. class CRtlFieldTypeDeserializer : public CInterfaceOf<IRtlFieldTypeDeserializer>
  546. {
  547. public:
  548. /**
  549. * CRtlFieldTypeDeserializer constructor
  550. *
  551. * @param _callback Supplies a callback to be used for blobs/filepositions.
  552. */
  553. CRtlFieldTypeDeserializer(IThorIndexCallback *_callback)
  554. : callback(_callback)
  555. {
  556. }
  557. /**
  558. * CRtlFieldTypeDeserializer destructor
  559. * <p>
  560. * Releases all RtlTypeInfo and related structures created by this deserializer
  561. */
  562. ~CRtlFieldTypeDeserializer()
  563. {
  564. // Need some care - all the RtlTypeInfo objects I created need to be destroyed, together with anything else I had to create
  565. // Strings (other than the init strings) are preserved in the AtomTable
  566. HashIterator allTypes(types);
  567. ForEach(allTypes)
  568. {
  569. const RtlTypeInfo **type = types.mapToValue(&allTypes.query());
  570. cleanupType(*type);
  571. }
  572. cleanupType(base);
  573. }
  574. /**
  575. * Obtain the deserialized type information
  576. * <p>
  577. * Note that the RtlTypeInfo objects are not link-counted, so the lifetime of these objects
  578. * is determined by the lifetime of the deserializer. They will be released once the deserializer
  579. * that created them is deleted.
  580. * <p>
  581. * Do not call more than once.
  582. *
  583. * @param _json JSON text to be deserialized, as created by CRtlFieldTypeSerializer
  584. * @return Deserialized type object
  585. */
  586. virtual const RtlTypeInfo *deserialize(const char *json) override
  587. {
  588. assertex(!base);
  589. Owned<IPropertyTree> jsonTree = createPTreeFromJSONString(json);
  590. base = deserializeType(jsonTree, jsonTree);
  591. return base;
  592. }
  593. /**
  594. * Obtain the deserialized type information
  595. * <p>
  596. * Note that the RtlTypeInfo objects are not link-counted, so the lifetime of these objects
  597. * is determined by the lifetime of the deserializer. They will be released once the deserializer
  598. * that created them is deleted.
  599. * <p>
  600. * Do not call more than once.
  601. *
  602. * @param _jsonTree JSON property tree to be deserialized, as created by CRtlFieldTypeSerializer
  603. * @return Deserialized type object
  604. */
  605. virtual const RtlTypeInfo *deserialize(IPropertyTree &jsonTree) override
  606. {
  607. assertex(!base);
  608. base = deserializeType(&jsonTree, &jsonTree);
  609. return base;
  610. }
  611. /**
  612. * Obtain the deserialized type information
  613. * <p>
  614. * Note that the RtlTypeInfo objects are not link-counted, so the lifetime of these objects
  615. * is determined by the lifetime of the deserializer. They will be released once the deserializer
  616. * that created them is deleted.
  617. * <p>
  618. * Do not call more than once.
  619. *
  620. * @param buf Binary serialized typeinfo to be deserialized, as created by CRtlFieldTypeSerializer
  621. * @return Deserialized type object
  622. */
  623. virtual const RtlTypeInfo *deserialize(MemoryBuffer &buf) override
  624. {
  625. assertex(!base);
  626. unsigned nextTypeNum = 0;
  627. int oldEndian = buf.setEndian(__LITTLE_ENDIAN);
  628. try
  629. {
  630. byte format;
  631. buf.read(format);
  632. if (format != RTLTYPEINFO_FORMAT_1)
  633. throw MakeStringException(0, "Invalid type info (%d) in CRtlFieldTypeDeserializer::deserialize", format);
  634. hash64_t hash;
  635. buf.read(hash);
  636. size32_t size;
  637. buf.read(size);
  638. #ifdef VALIDATE_TYPEINFO_HASHES
  639. hash64_t expected = rtlHash64Data(size, buf.readDirect(0), 0);
  640. if (expected != hash)
  641. throw MakeStringException(0, "Invalid type info hash in CRtlFieldTypeDeserializer::deserialize");
  642. #endif
  643. size32_t endpos = buf.getPos() + size;
  644. while (buf.getPos() < endpos)
  645. {
  646. if (base)
  647. {
  648. addType(base, nextTypeNum++);
  649. base = nullptr; // in case of exceptions...
  650. }
  651. base = deserializeType(buf);
  652. }
  653. if (buf.getPos()!=endpos)
  654. throw MakeStringException(0, "Invalid type info (incorrect size data) in CRtlFieldTypeDeserializer::deserialize");
  655. buf.setEndian(oldEndian);
  656. return base;
  657. }
  658. catch(...)
  659. {
  660. buf.setEndian(oldEndian);
  661. throw;
  662. }
  663. }
  664. virtual const RtlTypeInfo *addType(FieldTypeInfoStruct &info, const ITypeInfo *type) override
  665. {
  666. VStringBuffer name("%p", type);
  667. const RtlTypeInfo ** found = types.getValue(name);
  668. if (found)
  669. return *found;
  670. info.locale = keep(info.locale);
  671. const RtlTypeInfo * ret = info.createRtlTypeInfo(callback);
  672. types.setValue(name, ret);
  673. return ret;
  674. }
  675. virtual const RtlTypeInfo *lookupType(const ITypeInfo *type) const override
  676. {
  677. VStringBuffer name("%p", type);
  678. const RtlTypeInfo ** found = types.getValue(name);
  679. if (found)
  680. return *found;
  681. return nullptr;
  682. }
  683. virtual const RtlFieldInfo *addFieldInfo(const char *fieldName, const char *xpath, const RtlTypeInfo *type, unsigned flags, const char *init) override
  684. {
  685. // MORE - we could hang onto this for cleanup, rather than assuming that we keep it via a later addType() call?
  686. return new RtlFieldStrInfo(keep(fieldName), keep(xpath), type, flags, init);
  687. }
  688. private:
  689. KeptAtomTable atoms; // Used to ensure proper lifetime of strings used in type structures
  690. MapStringTo<const RtlTypeInfo *> types; // Ensures structures only generated once
  691. const RtlTypeInfo *base = nullptr; // Holds the resulting type
  692. IThorIndexCallback *callback = nullptr;
  693. void cleanupType(const RtlTypeInfo *type)
  694. {
  695. if (type)
  696. {
  697. // Releases all memory for a single RtlTypeInfo object
  698. const RtlFieldInfo * const * fields = type->queryFields();
  699. if (fields)
  700. {
  701. const RtlFieldInfo * const * cur = fields;
  702. for (;;)
  703. {
  704. const RtlFieldInfo * child = *cur;
  705. if (!child)
  706. break;
  707. // We don't need to delete other strings - they are owned by atom table.
  708. // But the initializer is decoded and thus owned by me
  709. delete child->initializer;
  710. delete child;
  711. cur++;
  712. }
  713. delete [] fields;
  714. }
  715. type->doDelete();
  716. }
  717. }
  718. const RtlTypeInfo *lookupType(const char *name, IPropertyTree *all)
  719. {
  720. const RtlTypeInfo ** found = types.getValue(name);
  721. if (found)
  722. return *found;
  723. const RtlTypeInfo *type = deserializeType(all->queryPropTree(name), all);
  724. types.setValue(name, type);
  725. return type;
  726. }
  727. const RtlTypeInfo *lookupType(unsigned idx)
  728. {
  729. // Could keep an expanding array of types instead - but the hash table is already there for json support...
  730. VStringBuffer key("%u", idx);
  731. const RtlTypeInfo ** found = types.getValue(key);
  732. if (found)
  733. return *found;
  734. throw makeStringException(-1, "Invalid serialized type information");
  735. }
  736. void addType(const RtlTypeInfo *type, unsigned idx)
  737. {
  738. VStringBuffer key("%u", idx);
  739. assert(types.getValue(key)==nullptr);
  740. types.setValue(key, type);
  741. }
  742. const char *keep(const char *string)
  743. {
  744. if (string)
  745. return str(atoms.addAtom(string));
  746. else
  747. return nullptr;
  748. }
  749. const RtlTypeInfo *deserializeType(IPropertyTree *type, IPropertyTree *all)
  750. {
  751. FieldTypeInfoStruct info;
  752. info.fieldType = type->getPropInt("fieldType");
  753. info.length = type->getPropInt("length");
  754. info.locale = keep(type->queryProp("locale"));
  755. const char *child = type->queryProp("child");
  756. if (child)
  757. info.childType = lookupType(child, all);
  758. if ((info.fieldType & RFTMkind) == type_record)
  759. {
  760. unsigned numFields = type->getCount("fields");
  761. info.fieldsArray = new const RtlFieldInfo * [numFields+1];
  762. info.fieldsArray[numFields] = nullptr;
  763. Owned<IPropertyTreeIterator> fields = type->getElements("fields");
  764. unsigned n = 0;
  765. ForEach(*fields)
  766. {
  767. IPropertyTree &field = fields->query();
  768. const char *fieldTypeName = field.queryProp("type");
  769. const char *fieldName = keep(field.queryProp("name"));
  770. const char *fieldXpath = keep(field.queryProp("xpath"));
  771. unsigned flags = field.getPropInt("flags");
  772. const char *fieldInit = field.queryProp("init");
  773. if (fieldInit)
  774. {
  775. StringBuffer decoded;
  776. JBASE64_Decode(fieldInit, decoded);
  777. fieldInit = decoded.detach(); // NOTE - this gets freed in cleanupType()
  778. }
  779. info.fieldsArray[n] = new RtlFieldStrInfo(fieldName, fieldXpath, lookupType(fieldTypeName, all), flags, fieldInit);
  780. n++;
  781. }
  782. }
  783. return info.createRtlTypeInfo(callback);
  784. }
  785. const RtlTypeInfo *deserializeType(MemoryBuffer &type)
  786. {
  787. FieldTypeInfoStruct info;
  788. type.read(info.fieldType);
  789. type.readPacked(info.length);
  790. if (info.fieldType & RFTMhasLocale)
  791. {
  792. const char *locale;
  793. type.read(locale);
  794. info.locale = keep(locale);
  795. }
  796. if (info.fieldType & RFTMhasChildType)
  797. {
  798. unsigned childIdx;
  799. type.readPacked(childIdx);
  800. info.childType = lookupType(childIdx);
  801. }
  802. if (info.fieldType & RFTMhasFields)
  803. {
  804. unsigned numFields;
  805. type.readPacked(numFields);
  806. info.fieldsArray = new const RtlFieldInfo * [numFields+1];
  807. info.fieldsArray[numFields] = nullptr;
  808. for (int n = 0; n < numFields; n++)
  809. {
  810. const char *fieldName;
  811. type.read(fieldName);
  812. unsigned fieldType;
  813. type.readPacked(fieldType);
  814. unsigned fieldFlags;
  815. type.read(fieldFlags);
  816. const char *xpath = nullptr;
  817. if (fieldFlags & RFTMhasXpath)
  818. type.read(xpath);
  819. void *init = nullptr;
  820. if (fieldFlags & RFTMhasInitializer)
  821. {
  822. unsigned initLength;
  823. type.readPacked(initLength);
  824. init = malloc(initLength);
  825. memcpy(init, type.readDirect(initLength), initLength);
  826. }
  827. fieldFlags &= ~RFTMserializerFlags;
  828. info.fieldsArray[n] = new RtlFieldStrInfo(keep(fieldName), keep(xpath), lookupType(fieldType), fieldFlags, (const char *) init);
  829. }
  830. }
  831. info.fieldType &= ~RFTMserializerFlags;
  832. return info.createRtlTypeInfo(callback);
  833. }
  834. void patchIndexFilePos()
  835. {
  836. if (callback && (base->fieldType & RFTMkind) == type_record)
  837. {
  838. // Yukky hack time
  839. // Assumes that the fieldinfo is not shared...
  840. // But that is also assumed by the code that cleans them up.
  841. const RtlFieldInfo * const *fields = base->queryFields();
  842. for(;;)
  843. {
  844. const RtlFieldInfo *field = *fields++;
  845. if (!field)
  846. break;
  847. if (field->type->getType() == type_filepos) // probably blobs too?
  848. {
  849. static_cast<RtlFileposTypeInfo *>(const_cast<RtlTypeInfo *>(field->type))->setCallback(callback);
  850. }
  851. }
  852. }
  853. }
  854. };
  855. extern ECLRTL_API IRtlFieldTypeDeserializer *createRtlFieldTypeDeserializer(IThorIndexCallback *callback)
  856. {
  857. return new CRtlFieldTypeDeserializer(callback);
  858. }
  859. extern ECLRTL_API StringBuffer &dumpTypeInfo(StringBuffer &ret, const RtlTypeInfo *t)
  860. {
  861. return CRtlFieldTypeSerializer::serialize(ret, t);
  862. }
  863. extern ECLRTL_API MemoryBuffer &dumpTypeInfo(MemoryBuffer &ret, const RtlTypeInfo *t, bool useBias)
  864. {
  865. return CRtlFieldTypeBinSerializer::serialize(ret, t, useBias);
  866. }
  867. extern ECLRTL_API void serializeRecordType(size32_t & __lenResult, void * & __result, IOutputMetaData & metaVal)
  868. {
  869. MemoryBuffer ret;
  870. CRtlFieldTypeBinSerializer::serialize(ret, metaVal.queryTypeInfo(), false);
  871. __lenResult = ret.length();
  872. __result = ret.detach();
  873. }
  874. extern ECLRTL_API void dumpRecordType(size32_t & __lenResult,char * & __result,IOutputMetaData &metaVal)
  875. {
  876. StringBuffer ret;
  877. CRtlFieldTypeSerializer::serialize(ret, metaVal.queryTypeInfo());
  878. #ifdef _DEBUG
  879. StringBuffer ret2;
  880. CRtlFieldTypeDeserializer deserializer(nullptr);
  881. CRtlFieldTypeSerializer::serialize(ret2, deserializer.deserialize(ret));
  882. assert(streq(ret, ret2));
  883. MemoryBuffer out;
  884. CRtlFieldTypeBinSerializer::serialize(out, metaVal.queryTypeInfo(), false);
  885. CRtlFieldTypeDeserializer bindeserializer(nullptr);
  886. CRtlFieldTypeSerializer::serialize(ret2.clear(), bindeserializer.deserialize(out));
  887. assert(streq(ret, ret2));
  888. #endif
  889. __lenResult = ret.length();
  890. __result = ret.detach();
  891. }
  892. extern ECLRTL_API void getFieldVal(size32_t & __lenResult,char * & __result, int column, IOutputMetaData & metaVal, const byte *row)
  893. {
  894. __lenResult = 0;
  895. __result = nullptr;
  896. if (column >= 0)
  897. {
  898. const RtlRecord &r = metaVal.queryRecordAccessor(true);
  899. unsigned numOffsets = r.getNumVarFields() + 1;
  900. size_t * variableOffsets = (size_t *)alloca(numOffsets * sizeof(size_t));
  901. RtlRow offsetCalculator(r, row, numOffsets, variableOffsets);
  902. offsetCalculator.getUtf8(__lenResult, __result, column);
  903. }
  904. }
  905. extern ECLRTL_API int getFieldNum(const char *fieldName, IOutputMetaData & metaVal)
  906. {
  907. const RtlRecord r = metaVal.queryRecordAccessor(true);
  908. return r.getFieldNum(fieldName);
  909. }
  910. enum FieldMatchType {
  911. // On a field, exactly one of the below is set, but translator returns a bitmap indicating
  912. // which were required (and we can restrict translation to allow some types but not others)
  913. match_perfect = 0x00, // exact type match - use memcpy
  914. match_link = 0x01, // copy a nested dataset by linking
  915. match_move = 0x02, // at least one field has moved (set on translator)
  916. match_remove = 0x04, // at least one field has been removed (set on translator)
  917. match_truncate = 0x08, // dest is truncated copy of source - use memcpy
  918. match_extend = 0x10, // dest is padded version of source - use memcpy and memset
  919. match_typecast = 0x20, // type has changed - cast required
  920. match_none = 0x40, // No matching field in source - use null value
  921. match_recurse = 0x80, // Use recursive translator for child records/datasets
  922. match_fail = 0x100, // no translation possible
  923. // This flag may be set in conjunction with the others
  924. match_inifblock = 0x200, // matching to a field in an ifblock - may not be present
  925. };
  926. StringBuffer &describeFlags(StringBuffer &out, FieldMatchType flags)
  927. {
  928. if (flags == match_perfect)
  929. return out.append("perfect");
  930. unsigned origlen = out.length();
  931. if (flags & match_link) out.append("|link");
  932. if (flags & match_move) out.append("|move");
  933. if (flags & match_remove) out.append("|remove");
  934. if (flags & match_truncate) out.append("|truncate");
  935. if (flags & match_extend) out.append("|extend");
  936. if (flags & match_typecast) out.append("|typecast");
  937. if (flags & match_none) out.append("|none");
  938. if (flags & match_recurse) out.append("|recurse");
  939. if (flags & match_inifblock) out.append("|ifblock");
  940. if (flags & match_fail) out.append("|fail");
  941. assertex(out.length() > origlen);
  942. return out.remove(origlen, 1);
  943. }
  944. inline constexpr FieldMatchType operator|(FieldMatchType a, FieldMatchType b) { return (FieldMatchType)((int)a | (int)b); }
  945. inline FieldMatchType &operator|=(FieldMatchType &a, FieldMatchType b) { return (FieldMatchType &) ((int &)a |= (int)b); }
  946. class GeneralRecordTranslator : public CInterfaceOf<IDynamicTransform>
  947. {
  948. public:
  949. GeneralRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo)
  950. : destRecInfo(_destRecInfo), sourceRecInfo(_srcRecInfo)
  951. {
  952. matchInfo = new MatchInfo[destRecInfo.getNumFields()];
  953. createMatchInfo();
  954. }
  955. ~GeneralRecordTranslator()
  956. {
  957. delete [] matchInfo;
  958. }
  959. virtual void describe() const override
  960. {
  961. doDescribe(0);
  962. }
  963. virtual size32_t translate(ARowBuilder &builder, const byte *sourceRec) const override
  964. {
  965. dbgassertex(canTranslate());
  966. return doTranslate(builder, 0, sourceRec);
  967. }
  968. virtual bool canTranslate() const override
  969. {
  970. return (matchFlags & match_fail) == 0;
  971. }
  972. virtual bool needsTranslate() const override
  973. {
  974. return (matchFlags & ~match_link) != 0;
  975. }
  976. private:
  977. void doDescribe(unsigned indent) const
  978. {
  979. for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
  980. {
  981. const char *source = destRecInfo.queryName(idx);
  982. const MatchInfo &match = matchInfo[idx];
  983. if (match.matchType == match_none)
  984. DBGLOG("%*sNo match for field %s - default value will be used", indent, "", source);
  985. else
  986. {
  987. StringBuffer matchStr;
  988. DBGLOG("%*sMatch (%s) to field %d for field %s (%x)", indent, "", describeFlags(matchStr, match.matchType).str(), match.matchIdx, source, destRecInfo.queryType(idx)->fieldType);
  989. if (match.subTrans)
  990. match.subTrans->doDescribe(indent+2);
  991. }
  992. }
  993. if (!canTranslate())
  994. DBGLOG("%*sTranslation is NOT possible", indent, "");
  995. else if (needsTranslate())
  996. {
  997. StringBuffer matchStr;
  998. DBGLOG("%*sTranslation is possible (%s)", indent, "", describeFlags(matchStr, matchFlags).str());
  999. }
  1000. else
  1001. DBGLOG("%*sTranslation is not necessary", indent, "");
  1002. }
  1003. size32_t doTranslate(ARowBuilder &builder, size32_t offset, const byte *sourceRec) const
  1004. {
  1005. unsigned numOffsets = sourceRecInfo.getNumVarFields() + 1;
  1006. size_t * variableOffsets = (size_t *)alloca(numOffsets * sizeof(size_t));
  1007. byte * destConditions = (byte *)alloca(destRecInfo.getNumIfBlocks() * sizeof(byte));
  1008. RtlRow sourceRow(sourceRecInfo, sourceRec, numOffsets, variableOffsets);
  1009. size32_t estimate = destRecInfo.getFixedSize();
  1010. if (!estimate)
  1011. {
  1012. estimate = estimateNewSize(sourceRow);
  1013. builder.ensureCapacity(offset+estimate, "record");
  1014. }
  1015. size32_t origOffset = offset;
  1016. for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
  1017. {
  1018. const RtlFieldInfo *field = destRecInfo.queryField(idx);
  1019. if (field->omitable() && destRecInfo.excluded(field, builder.getSelf(), destConditions))
  1020. continue;
  1021. const RtlTypeInfo *type = field->type;
  1022. const MatchInfo &match = matchInfo[idx];
  1023. if (match.matchType == match_none || match.matchType==match_fail)
  1024. offset = type->buildNull(builder, offset, field);
  1025. else
  1026. {
  1027. unsigned matchField = match.matchIdx;
  1028. const RtlTypeInfo *sourceType = sourceRecInfo.queryType(matchField);
  1029. size_t sourceOffset = sourceRow.getOffset(matchField);
  1030. const byte *source = sourceRec + sourceOffset;
  1031. size_t copySize = sourceRow.getSize(matchField);
  1032. if (copySize == 0 && (match.matchType & match_inifblock)) // Field is missing because of an ifblock - use default value
  1033. {
  1034. offset = type->buildNull(builder, offset, field);
  1035. }
  1036. else
  1037. {
  1038. switch (match.matchType & ~match_inifblock)
  1039. {
  1040. case match_perfect:
  1041. {
  1042. // Look ahead for other perfect matches and combine the copies
  1043. while (idx < destRecInfo.getNumFields()-1)
  1044. {
  1045. const MatchInfo &nextMatch = matchInfo[idx+1];
  1046. if (nextMatch.matchType == match_perfect && nextMatch.matchIdx == matchField+1)
  1047. {
  1048. idx++;
  1049. matchField++;
  1050. }
  1051. else
  1052. break;
  1053. }
  1054. size_t copySize = sourceRow.getOffset(matchField+1) - sourceOffset;
  1055. builder.ensureCapacity(offset+copySize, field->name);
  1056. memcpy(builder.getSelf()+offset, source, copySize);
  1057. offset += copySize;
  1058. break;
  1059. }
  1060. case match_truncate:
  1061. {
  1062. assert(type->isFixedSize());
  1063. size32_t copySize = type->getMinSize();
  1064. builder.ensureCapacity(offset+copySize, field->name);
  1065. memcpy(builder.getSelf()+offset, source, copySize);
  1066. offset += copySize;
  1067. break;
  1068. }
  1069. case match_extend:
  1070. {
  1071. assert(type->isFixedSize());
  1072. size32_t destSize = type->getMinSize();
  1073. builder.ensureCapacity(offset+destSize, field->name);
  1074. memcpy(builder.getSelf()+offset, source, copySize);
  1075. offset += copySize;
  1076. unsigned fillSize = destSize - copySize;
  1077. memset(builder.getSelf()+offset, match.fillChar, fillSize);
  1078. offset += fillSize;
  1079. break;
  1080. }
  1081. case match_typecast:
  1082. offset = translateScalar(builder, offset, field, type, sourceType, source);
  1083. break;
  1084. case match_link:
  1085. {
  1086. // a 32-bit record count, and a (linked) pointer to an array of record pointers
  1087. byte *dest = builder.ensureCapacity(offset+sizeof(size32_t)+sizeof(const byte **), field->name)+offset;
  1088. *(size32_t *)dest = *(size32_t *)source;
  1089. *(const byte ***)(dest + sizeof(size32_t)) = rtlLinkRowset(*(const byte ***)(source + sizeof(size32_t)));
  1090. offset += sizeof(size32_t)+sizeof(const byte **);
  1091. break;
  1092. }
  1093. case match_recurse:
  1094. if (type->getType()==type_record)
  1095. offset = match.subTrans->doTranslate(builder, offset, source);
  1096. else if (type->isLinkCounted())
  1097. {
  1098. // a 32-bit record count, and a pointer to an array of record pointers
  1099. IEngineRowAllocator *childAllocator = builder.queryAllocator()->createChildRowAllocator(type->queryChildType());
  1100. assertex(childAllocator); // May not be available when using serialized types (but unlikely to want to create linkcounted children remotely either)
  1101. size32_t sizeInBytes = sizeof(size32_t) + sizeof(void *);
  1102. builder.ensureCapacity(offset+sizeInBytes, field->name);
  1103. size32_t numRows = 0;
  1104. const byte **childRows = nullptr;
  1105. if (sourceType->isLinkCounted())
  1106. {
  1107. // a 32-bit count, then a pointer to the source rows
  1108. size32_t childCount = *(size32_t *) source;
  1109. source += sizeof(size32_t);
  1110. const byte ** sourceRows = *(const byte***) source;
  1111. for (size32_t childRow = 0; childRow < childCount; childRow++)
  1112. {
  1113. RtlDynamicRowBuilder childBuilder(*childAllocator);
  1114. size32_t childLen = match.subTrans->doTranslate(childBuilder, 0, sourceRows[childRow]);
  1115. childRows = childAllocator->appendRowOwn(childRows, ++numRows, (void *) childBuilder.finalizeRowClear(childLen));
  1116. }
  1117. }
  1118. else
  1119. {
  1120. // a 32-bit size, then rows inline
  1121. size32_t childSize = *(size32_t *) source;
  1122. source += sizeof(size32_t);
  1123. const byte *initialSource = source;
  1124. while ((size_t)(source - initialSource) < childSize)
  1125. {
  1126. RtlDynamicRowBuilder childBuilder(*childAllocator);
  1127. size32_t childLen = match.subTrans->doTranslate(childBuilder, 0, source);
  1128. childRows = childAllocator->appendRowOwn(childRows, ++numRows, (void *) childBuilder.finalizeRowClear(childLen));
  1129. source += sourceType->queryChildType()->size(source, nullptr); // MORE - shame to repeat a calculation that the translate above almost certainly just did
  1130. }
  1131. }
  1132. // Go back in and patch the count, remembering it may have moved
  1133. rtlWriteInt4(builder.getSelf()+offset, numRows);
  1134. * ( const void * * ) (builder.getSelf()+offset+sizeof(size32_t)) = childRows;
  1135. offset += sizeInBytes;
  1136. }
  1137. else
  1138. {
  1139. size32_t countOffset = offset;
  1140. byte *dest = builder.ensureCapacity(offset+sizeof(size32_t), field->name)+offset;
  1141. offset += sizeof(size32_t);
  1142. size32_t initialOffset = offset;
  1143. *(size32_t *)dest = 0; // patched below when true figure known
  1144. if (sourceType->isLinkCounted())
  1145. {
  1146. // a 32-bit count, then a pointer to the source rows
  1147. size32_t childCount = *(size32_t *) source;
  1148. source += sizeof(size32_t);
  1149. const byte ** sourceRows = *(const byte***) source;
  1150. for (size32_t childRow = 0; childRow < childCount; childRow++)
  1151. {
  1152. offset = match.subTrans->doTranslate(builder, offset, sourceRows[childRow]);
  1153. }
  1154. }
  1155. else
  1156. {
  1157. // a 32-bit size, then rows inline
  1158. size32_t childSize = *(size32_t *) source;
  1159. source += sizeof(size32_t);
  1160. const byte *initialSource = source;
  1161. while ((size_t)(source - initialSource) < childSize)
  1162. {
  1163. offset = match.subTrans->doTranslate(builder, offset, source);
  1164. source += sourceType->queryChildType()->size(source, nullptr); // MORE - shame to repeat a calculation that the translate above almost certainly just did
  1165. }
  1166. }
  1167. dest = builder.getSelf() + countOffset; // Note - may have been moved by reallocs since last calculated
  1168. *(size32_t *)dest = offset - initialOffset;
  1169. }
  1170. break;
  1171. default:
  1172. throwUnexpected();
  1173. }
  1174. }
  1175. }
  1176. }
  1177. if (estimate && offset-origOffset != estimate)
  1178. {
  1179. // Note - ifblocks make this assertion invalid. We do not account for potentially omitted fields
  1180. // when estimating target record size.
  1181. if (!destRecInfo.getNumIfBlocks())
  1182. assert(offset-origOffset > estimate); // Estimate is always supposed to be conservative
  1183. #ifdef TRACE_TRANSLATION
  1184. DBGLOG("Wrote %u bytes to record (estimate was %u)\n", offset-origOffset, estimate);
  1185. #endif
  1186. }
  1187. return offset;
  1188. }
  1189. inline FieldMatchType match() const
  1190. {
  1191. return matchFlags;
  1192. }
  1193. const RtlRecord &destRecInfo;
  1194. const RtlRecord &sourceRecInfo;
  1195. unsigned fixedDelta = 0; // total size of all fixed-size source fields that are not matched
  1196. UnsignedArray unmatched; // List of all variable-size source fields that are unmatched
  1197. FieldMatchType matchFlags = match_perfect;
  1198. struct MatchInfo
  1199. {
  1200. unsigned matchIdx = 0;
  1201. FieldMatchType matchType = match_fail;
  1202. char fillChar = 0;
  1203. GeneralRecordTranslator *subTrans = nullptr;
  1204. ~MatchInfo()
  1205. {
  1206. delete subTrans;
  1207. }
  1208. } *matchInfo;
  1209. static size32_t translateScalar(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, const RtlTypeInfo *destType, const RtlTypeInfo *sourceType, const byte *source)
  1210. {
  1211. // This code COULD move into rtlfield.cpp?
  1212. switch(destType->getType())
  1213. {
  1214. case type_filepos:
  1215. case type_boolean:
  1216. case type_int:
  1217. case type_swapint:
  1218. case type_packedint:
  1219. offset = destType->buildInt(builder, offset, field, sourceType->getInt(source));
  1220. break;
  1221. case type_real:
  1222. offset = destType->buildReal(builder, offset, field, sourceType->getReal(source));
  1223. break;
  1224. case type_decimal: // Go via string - not common enough to special-case
  1225. case type_data:
  1226. case type_string:
  1227. case type_varstring:
  1228. case type_qstring:
  1229. {
  1230. size32_t size;
  1231. rtlDataAttr text;
  1232. sourceType->getString(size, text.refstr(), source);
  1233. offset = destType->buildString(builder, offset, field, size, text.getstr());
  1234. break;
  1235. }
  1236. case type_unicode:
  1237. case type_varunicode:
  1238. case type_utf8:
  1239. {
  1240. size32_t utf8chars;
  1241. rtlDataAttr utf8Text;
  1242. sourceType->getUtf8(utf8chars, utf8Text.refstr(), source);
  1243. offset = destType->buildUtf8(builder, offset, field, utf8chars, utf8Text.getstr());
  1244. break;
  1245. }
  1246. case type_set:
  1247. {
  1248. bool isAll = *(bool *) source;
  1249. source+= sizeof(bool);
  1250. byte *dest = builder.ensureCapacity(offset+sizeof(bool)+sizeof(size32_t), field->name)+offset;
  1251. *(size32_t *) (dest + sizeof(bool)) = 0; // Patch later when size known
  1252. offset += sizeof(bool) + sizeof(size32_t);
  1253. if (isAll)
  1254. {
  1255. *(bool*) dest = true;
  1256. }
  1257. else
  1258. {
  1259. *(bool*) dest = false;
  1260. size32_t sizeOffset = offset - sizeof(size32_t); // Where we need to patch
  1261. size32_t childSize = *(size32_t *)source;
  1262. source += sizeof(size32_t);
  1263. const byte *initialSource = source;
  1264. size32_t initialOffset = offset;
  1265. const RtlTypeInfo *destChildType = destType->queryChildType();
  1266. const RtlTypeInfo *sourceChildType = sourceType->queryChildType();
  1267. while ((size_t)(source - initialSource) < childSize)
  1268. {
  1269. offset = translateScalar(builder, offset, field, destChildType, sourceChildType, source);
  1270. source += sourceChildType->size(source, nullptr); // MORE - shame to repeat a calculation that the translate above almost certainly just did
  1271. }
  1272. dest = builder.getSelf() + sizeOffset; // Note - man have been moved by reallocs since last calculated
  1273. *(size32_t *)dest = offset - initialOffset;
  1274. }
  1275. break;
  1276. }
  1277. default:
  1278. throwUnexpected();
  1279. }
  1280. return offset;
  1281. }
  1282. size32_t estimateNewSize(const RtlRow &sourceRow) const
  1283. {
  1284. //DBGLOG("Source record size is %d", (int) sourceRow.getRecordSize());
  1285. size32_t expectedSize = sourceRow.getRecordSize() - fixedDelta;
  1286. //DBGLOG("Source record size without omitted fixed size fields is %d", expectedSize);
  1287. ForEachItemIn(i, unmatched)
  1288. {
  1289. unsigned fieldNo = unmatched.item(i);
  1290. expectedSize -= sourceRow.getSize(fieldNo);
  1291. //DBGLOG("Reducing estimated size by %d to %d for omitted field %d (%s)", (int) sourceRow.getSize(fieldNo), expectedSize, fieldNo, sourceRecInfo.queryName(fieldNo));
  1292. }
  1293. if (matchFlags & ~(match_perfect|match_link|match_none|match_extend|match_truncate))
  1294. {
  1295. for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
  1296. {
  1297. const MatchInfo &match = matchInfo[idx];
  1298. const RtlTypeInfo *type = destRecInfo.queryType(idx);
  1299. unsigned matchField = match.matchIdx;
  1300. switch (match.matchType)
  1301. {
  1302. case match_perfect:
  1303. case match_link:
  1304. case match_none:
  1305. case match_extend:
  1306. case match_truncate:
  1307. // These ones were already included in fixedDelta
  1308. break;
  1309. default:
  1310. // This errs on the side of small - i.e. it assumes that all typecasts end up at minimum size
  1311. // We could do better in some cases e.g. variable string <-> variable unicode we can assume factor of 2,
  1312. // uft8 <-> string we could calculate here - but unlikely to be worth the effort.
  1313. // But it's fine for fixed size output fields, including truncate/extend
  1314. // We could also precalculate the expected delta if all omitted fields are fixed size - but not sure how likely/worthwhile that is.
  1315. expectedSize += type->getMinSize() - sourceRow.getSize(matchField);
  1316. //DBGLOG("Adjusting estimated size by (%d - %d) to %d for translated field %d (%s)", (int) sourceRow.getSize(matchField), type->getMinSize(), expectedSize, matchField, sourceRecInfo.queryName(matchField));
  1317. break;
  1318. }
  1319. }
  1320. }
  1321. return expectedSize;
  1322. }
  1323. void createMatchInfo()
  1324. {
  1325. for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
  1326. {
  1327. const RtlFieldInfo *field = destRecInfo.queryField(idx);
  1328. const RtlTypeInfo *type = field->type;
  1329. MatchInfo &info = matchInfo[idx];
  1330. info.matchIdx = sourceRecInfo.getFieldNum(destRecInfo.queryName(idx));
  1331. if (info.matchIdx == -1)
  1332. {
  1333. info.matchType = match_none;
  1334. size32_t defaultSize = field->initializer ? type->size(field->initializer, nullptr) : type->getMinSize();
  1335. fixedDelta -= defaultSize;
  1336. //DBGLOG("Decreasing fixedDelta size by %d to %d for defaulted field %d (%s)", defaultSize, fixedDelta, idx, destRecInfo.queryName(idx));
  1337. }
  1338. else
  1339. {
  1340. const RtlTypeInfo *sourceType = sourceRecInfo.queryType(info.matchIdx);
  1341. if (!type->isScalar() || !sourceType->isScalar())
  1342. {
  1343. if (type->getType() != sourceType->getType())
  1344. info.matchType = match_fail; // No translation from one non-scalar type to another
  1345. else
  1346. {
  1347. switch (type->getType())
  1348. {
  1349. case type_set:
  1350. if (type->queryChildType()->fieldType==sourceType->queryChildType()->fieldType &&
  1351. type->queryChildType()->length==sourceType->queryChildType()->length)
  1352. info.matchType = match_perfect;
  1353. else
  1354. info.matchType = match_typecast;
  1355. break;
  1356. case type_row: // These are not expected I think...
  1357. throwUnexpected();
  1358. case type_ifblock:
  1359. case type_record:
  1360. case type_table:
  1361. {
  1362. const RtlRecord *subDest = destRecInfo.queryNested(idx);
  1363. const RtlRecord *subSrc = sourceRecInfo.queryNested(info.matchIdx);
  1364. info.subTrans = new GeneralRecordTranslator(*subDest, *subSrc);
  1365. if (!info.subTrans->needsTranslate())
  1366. {
  1367. // Child does not require translation, but check linkcount mode matches too!
  1368. if (type->isLinkCounted())
  1369. if (sourceType->isLinkCounted())
  1370. info.matchType = match_link;
  1371. else
  1372. info.matchType = match_recurse;
  1373. else
  1374. if (sourceType->isLinkCounted())
  1375. info.matchType = match_recurse;
  1376. else
  1377. info.matchType = match_perfect;
  1378. if (info.matchType != match_recurse)
  1379. {
  1380. delete info.subTrans;
  1381. info.subTrans = nullptr;
  1382. }
  1383. }
  1384. else if (info.subTrans->canTranslate())
  1385. {
  1386. info.matchType = match_recurse;
  1387. matchFlags |= info.subTrans->matchFlags;
  1388. }
  1389. else
  1390. info.matchType = match_fail;
  1391. break;
  1392. }
  1393. default:
  1394. info.matchType = match_fail;
  1395. break;
  1396. }
  1397. }
  1398. }
  1399. else if (type->fieldType==sourceType->fieldType && type->fieldType != type_filepos)
  1400. {
  1401. if (type->length==sourceType->length)
  1402. {
  1403. info.matchType = match_perfect;
  1404. }
  1405. else
  1406. {
  1407. assert(type->isFixedSize()); // Both variable size would have matched length above
  1408. info.matchType = match_typecast;
  1409. if (type->length < sourceType->length)
  1410. {
  1411. if (type->canTruncate())
  1412. {
  1413. info.matchType = match_truncate;
  1414. fixedDelta += sourceType->getMinSize()-type->getMinSize();
  1415. //DBGLOG("Increasing fixedDelta size by %d to %d for truncated field %d (%s)", sourceType->getMinSize()-type->getMinSize(), fixedDelta, idx, destRecInfo.queryName(idx));
  1416. }
  1417. }
  1418. else
  1419. {
  1420. if (type->canExtend(info.fillChar))
  1421. {
  1422. info.matchType = match_extend;
  1423. fixedDelta += sourceType->getMinSize()-type->getMinSize();
  1424. //DBGLOG("Decreasing fixedDelta size by %d to %d for truncated field %d (%s)", type->getMinSize()-sourceType->getMinSize(), fixedDelta, idx, destRecInfo.queryName(idx));
  1425. }
  1426. }
  1427. }
  1428. }
  1429. else
  1430. info.matchType = match_typecast;
  1431. if (sourceRecInfo.queryField(info.matchIdx)->flags & RFTMinifblock)
  1432. info.matchType |= match_inifblock; // Avoids incorrect commoning up of adjacent matches
  1433. // MORE - could note the highest interesting fieldnumber in the source and not bother filling in offsets after that
  1434. // Not sure it would help much though - usually need to know the total record size anyway in real life
  1435. if (idx != info.matchIdx)
  1436. matchFlags |= match_move;
  1437. }
  1438. matchFlags |= info.matchType;
  1439. }
  1440. if (sourceRecInfo.getNumFields() > destRecInfo.getNumFields())
  1441. matchFlags |= match_remove;
  1442. if (matchFlags && !destRecInfo.getFixedSize())
  1443. {
  1444. for (unsigned idx = 0; idx < sourceRecInfo.getNumFields(); idx++)
  1445. {
  1446. const RtlFieldInfo *field = sourceRecInfo.queryField(idx);
  1447. const RtlTypeInfo *type = field->type;
  1448. if (destRecInfo.getFieldNum(field->name) == (unsigned) -1)
  1449. {
  1450. // unmatched field
  1451. if (type->isFixedSize())
  1452. {
  1453. //DBGLOG("Reducing estimated size by %d for (fixed size) omitted field %s", (int) type->getMinSize(), field->name);
  1454. fixedDelta += type->getMinSize();
  1455. }
  1456. else
  1457. unmatched.append(idx);
  1458. }
  1459. }
  1460. //DBGLOG("Source record contains %d bytes of omitted fixed size fields", fixedDelta);
  1461. }
  1462. }
  1463. };
  1464. extern ECLRTL_API const IDynamicTransform *createRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo)
  1465. {
  1466. return new GeneralRecordTranslator(_destRecInfo, _srcRecInfo);
  1467. }
  1468. class TranslatedRowStream : public CInterfaceOf<IRowStream>
  1469. {
  1470. public:
  1471. TranslatedRowStream(IRowStream *_inputStream, IEngineRowAllocator *_resultAllocator, const RtlRecord &outputRecord, const RtlRecord &inputRecord)
  1472. : inputStream(_inputStream), resultAllocator(_resultAllocator), translator(new GeneralRecordTranslator(outputRecord, inputRecord))
  1473. {
  1474. translator->describe();
  1475. }
  1476. virtual const void *nextRow()
  1477. {
  1478. if (eof)
  1479. return NULL;
  1480. const void *inRow = inputStream->nextRow();
  1481. if (!inRow)
  1482. {
  1483. if (eogSeen)
  1484. eof = true;
  1485. else
  1486. eogSeen = true;
  1487. return nullptr;
  1488. }
  1489. else
  1490. eogSeen = false;
  1491. RtlDynamicRowBuilder rowBuilder(resultAllocator);
  1492. size32_t len = translator->translate(rowBuilder, (const byte *) inRow);
  1493. rtlReleaseRow(inRow);
  1494. return rowBuilder.finalizeRowClear(len);
  1495. }
  1496. virtual void stop() override
  1497. {
  1498. resultAllocator.clear();
  1499. }
  1500. bool canTranslate() const
  1501. {
  1502. return translator->canTranslate();
  1503. }
  1504. bool needsTranslate() const
  1505. {
  1506. return translator->needsTranslate();
  1507. }
  1508. protected:
  1509. Linked<IRowStream> inputStream;
  1510. Linked<IEngineRowAllocator> resultAllocator;
  1511. Owned<const IDynamicTransform> translator;
  1512. unsigned numOffsets = 0;
  1513. size_t * variableOffsets = nullptr;
  1514. bool eof = false;
  1515. bool eogSeen = false;
  1516. };
  1517. extern ECLRTL_API IRowStream * transformRecord(IEngineRowAllocator * resultAllocator,IOutputMetaData & metaInput,IRowStream * input)
  1518. {
  1519. if (resultAllocator->queryOutputMeta()==&metaInput)
  1520. return LINK(input);
  1521. Owned<TranslatedRowStream> stream = new TranslatedRowStream(input, resultAllocator,
  1522. resultAllocator->queryOutputMeta()->queryRecordAccessor(true),
  1523. metaInput.queryRecordAccessor(true));
  1524. if (!stream->needsTranslate())
  1525. return LINK(input);
  1526. else if (!stream->canTranslate())
  1527. rtlFail(0, "Cannot translate record stream");
  1528. else
  1529. return stream.getClear();
  1530. }