rtldynfield.cpp 93 KB


  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include <math.h>
  15. #include <stdio.h>
  16. #include "jmisc.hpp"
  17. #include "jlib.hpp"
  18. #include "eclhelper.hpp"
  19. #include "eclrtl_imp.hpp"
  20. #include "rtldynfield.hpp"
  21. #include "rtlrecord.hpp"
  22. #include "rtlembed.hpp"
  23. #include "rtlnewkey.hpp"
  24. //#define TRACE_TRANSLATION
  25. #define VALIDATE_TYPEINFO_HASHES
  26. #define RTLTYPEINFO_FORMAT_1 81 // In case we ever want to support more than one format or change how it is stored
  27. //---------------------------------------------------------------------------------------------------------------------
  28. extern ECLRTL_API RecordTranslationMode getTranslationMode(const char *val, bool isLocal)
  29. {
  30. if (isEmptyString(val) || strToBool(val) || strieq(val, "payload"))
  31. return RecordTranslationMode::Payload;
  32. else if (strieq(val, "alwaysDisk") || strieq(val, "disk"))
  33. {
  34. if (!isLocal)
  35. WARNLOG("alwaysDisk translation mode should only ever be used via a HINT");
  36. return RecordTranslationMode::AlwaysDisk;
  37. }
  38. else if (strieq(val, "alwaysECL") || strieq(val, "ecl"))
  39. {
  40. if (isLocal)
  41. return RecordTranslationMode::AlwaysECL;
  42. WARNLOG("Unsupported alwaysECL translation mode used globally, translation disabled - use with HINT to set locally.");
  43. }
  44. return RecordTranslationMode::None;
  45. }
  46. extern ECLRTL_API const char *getTranslationModeText(RecordTranslationMode val)
  47. {
  48. switch (val)
  49. {
  50. case RecordTranslationMode::AlwaysDisk: return "alwaysDisk";
  51. case RecordTranslationMode::AlwaysECL: return "alwaysECL";
  52. case RecordTranslationMode::Payload: return "payload";
  53. case RecordTranslationMode::None: return "off";
  54. }
  55. throwUnexpected();
  56. }
  57. //---------------------------------------------------------------------------------------------------------------------
  58. const RtlTypeInfo *FieldTypeInfoStruct::createRtlTypeInfo() const
  59. {
  60. const RtlTypeInfo *ret = nullptr;
  61. switch (fieldType & RFTMkind)
  62. {
  63. case type_boolean:
  64. ret = new RtlBoolTypeInfo(fieldType, length);
  65. break;
  66. case type_keyedint:
  67. ret = new RtlKeyedIntTypeInfo(fieldType, length, childType);
  68. break;
  69. case type_int:
  70. ret = new RtlIntTypeInfo(fieldType, length);
  71. break;
  72. case type_blob:
  73. ret = new RtlBlobTypeInfo(fieldType, length, childType);
  74. break;
  75. case type_filepos:
  76. #if __BYTE_ORDER == __LITTLE_ENDIAN
  77. ret = new RtlSwapIntTypeInfo(fieldType, length);
  78. #else
  79. ret = new RtlIntTypeInfo(fieldType, length);
  80. #endif
  81. break;
  82. case type_real:
  83. ret = new RtlRealTypeInfo(fieldType, length);
  84. break;
  85. case type_decimal:
  86. ret = new RtlDecimalTypeInfo(fieldType, length);
  87. break;
  88. case type_string:
  89. ret = new RtlStringTypeInfo(fieldType, length);
  90. break;
  91. case type_bitfield:
  92. ret = new RtlBitfieldTypeInfo(fieldType, length);
  93. break;
  94. case type_varstring:
  95. ret = new RtlVarStringTypeInfo(fieldType, length);
  96. break;
  97. case type_data:
  98. ret = new RtlDataTypeInfo(fieldType, length);
  99. break;
  100. case type_table:
  101. assert(childType);
  102. ret = new RtlDatasetTypeInfo(fieldType, length, childType);
  103. break;
  104. case type_dictionary:
  105. assert(childType);
  106. ret = new RtlDictionaryTypeInfo(fieldType, length, childType);
  107. break;
  108. case type_set:
  109. assert(childType);
  110. ret = new RtlSetTypeInfo(fieldType, length, childType);
  111. break;
  112. case type_row:
  113. assert(childType);
  114. ret = new RtlRowTypeInfo(fieldType, length, childType);
  115. break;
  116. case type_swapint:
  117. ret = new RtlSwapIntTypeInfo(fieldType, length);
  118. break;
  119. case type_packedint:
  120. ret = new RtlPackedIntTypeInfo(fieldType, length);
  121. break;
  122. case type_qstring:
  123. ret = new RtlQStringTypeInfo(fieldType, length);
  124. break;
  125. case type_unicode:
  126. ret = new RtlUnicodeTypeInfo(fieldType, length, locale);
  127. break;
  128. case type_varunicode:
  129. ret = new RtlVarUnicodeTypeInfo(fieldType, length, locale);
  130. break;
  131. case type_utf8:
  132. ret = new RtlUtf8TypeInfo(fieldType, length, locale);
  133. break;
  134. case type_record:
  135. ret = new RtlRecordTypeInfo(fieldType, length, fieldsArray);
  136. break;
  137. case type_ifblock:
  138. ret = new RtlDynamicIfBlockTypeInfo(fieldType, length, fieldsArray, nullptr, filter);
  139. break;
  140. case type_alien:
  141. assert(childType);
  142. ret = new RtlAlienTypeInfo(fieldType, length, childType);
  143. break;
  144. default:
  145. throwUnexpected();
  146. }
  147. return ret;
  148. };
  149. typedef MapBetween<const RtlTypeInfo *, const RtlTypeInfo *, StringAttr, const char *> TypeNameMap;
  150. typedef MapBetween<const RtlTypeInfo *, const RtlTypeInfo *, unsigned, unsigned> TypeNumMap;
  151. /**
  152. * class CRtlFieldTypeSerializer
  153. *
  154. * Serializer class for creating json representation of a RtlTypeInfo structure.
  155. *
  156. */
  157. class CRtlFieldTypeSerializer
  158. {
  159. public:
  160. /**
  161. * Serialize a RtlTypeInfo structure to JSON
  162. *
  163. * @param out Buffer for resulting serialized string
  164. * @param type RtlTypeInfo structure to be serialized
  165. * @return Referenced to supplied buffer
  166. */
  167. static StringBuffer &serialize(StringBuffer &out, const RtlTypeInfo *type)
  168. {
  169. CRtlFieldTypeSerializer s(out, type);
  170. s.doSerialize();
  171. return out;
  172. }
  173. private:
  174. CRtlFieldTypeSerializer(StringBuffer &_out, const RtlTypeInfo *_base)
  175. : json(_out), base(_base)
  176. {
  177. }
  178. void doSerialize()
  179. {
  180. json.append("{");
  181. serializeType(base);
  182. json.append("\n}");
  183. }
  184. void serializeType(const RtlTypeInfo *type)
  185. {
  186. if (!serialized(type))
  187. {
  188. // Make sure all child types are serialized first
  189. const RtlTypeInfo *childType = type->queryChildType();
  190. if (childType)
  191. serializeType(childType);
  192. const RtlFieldInfo * const * fields = type->queryFields();
  193. if (fields)
  194. {
  195. for (;;)
  196. {
  197. const RtlFieldInfo * child = *fields;
  198. if (!child)
  199. break;
  200. serializeType(child->type);
  201. fields++;
  202. }
  203. }
  204. // Now serialize this one
  205. if (type != base)
  206. {
  207. VStringBuffer newName("ty%d", ++nextTypeName);
  208. types.setValue(type, newName.str());
  209. startField(newName.str());
  210. serializeMe(type);
  211. closeCurly();
  212. }
  213. else
  214. serializeMe(type);
  215. }
  216. }
  217. void serializeMe(const RtlTypeInfo *type)
  218. {
  219. if (!type->canSerialize())
  220. throw makeStringException(MSGAUD_user, 1000, "This type structure cannot be serialized");
  221. addPropHex("fieldType", type->fieldType);
  222. addProp("length", type->length);
  223. addPropNonEmpty("locale", type->queryLocale());
  224. const RtlTypeInfo *childType = type->queryChildType();
  225. if (childType)
  226. addPropType("child", childType);
  227. const IFieldFilter * filter = type->queryFilter();
  228. if (filter)
  229. {
  230. StringBuffer filterText;
  231. filter->serialize(filterText);
  232. addPropType("filterType", &filter->queryType());
  233. addProp("filter", filterText);
  234. }
  235. const RtlFieldInfo * const * fields = type->queryFields();
  236. if (fields)
  237. {
  238. startFields();
  239. for (;;)
  240. {
  241. const RtlFieldInfo * child = *fields;
  242. if (!child)
  243. break;
  244. newline();
  245. openCurly();
  246. addProp("name", child->name);
  247. addPropType("type", child->type);
  248. addProp("xpath", child->xpath);
  249. if (child->flags)
  250. addPropHex("flags", child->flags);
  251. // initializer is tricky - it's not (in general) a null-terminated string but the actual length is not easily available
  252. if (child->initializer)
  253. {
  254. if (isVirtualInitializer(child->initializer))
  255. addProp("vinit", getVirtualInitializer(child->initializer));
  256. else
  257. addProp("init", child->type->size((const byte *) child->initializer, nullptr), (const byte *) child->initializer);
  258. }
  259. closeCurly();
  260. fields++;
  261. }
  262. endFields();
  263. }
  264. }
  265. bool serialized(const RtlTypeInfo *type)
  266. {
  267. return types.find(type) != nullptr;
  268. }
  269. void startField(const char *name)
  270. {
  271. newline().appendf("\"%s\": ", name);
  272. openCurly();
  273. }
  274. void addProp(const char *propName, const char *propVal)
  275. {
  276. if (propVal)
  277. {
  278. newline();
  279. encodeJSON(json.append("\""), propName).append("\": ");
  280. encodeJSON(json.append("\""), propVal).append("\"");
  281. }
  282. }
  283. void addProp(const char *propName, size32_t propLen, const byte *propVal)
  284. {
  285. if (propVal)
  286. {
  287. newline();
  288. encodeJSON(json.append("\""), propName).append("\": \"");
  289. JBASE64_Encode(propVal, propLen, json, false);
  290. json.append("\"");
  291. }
  292. }
  293. void addPropNonEmpty(const char *propName, const char *propVal)
  294. {
  295. if (propVal && *propVal)
  296. addProp(propName, propVal);
  297. }
  298. void addProp(const char *propName, unsigned propVal)
  299. {
  300. newline().appendf("\"%s\": %u", propName, propVal);
  301. }
  302. void addPropHex(const char *propName, unsigned propVal)
  303. {
  304. newline().appendf("\"%s\": %u", propName, propVal); // Nice idea but json does not support hex constants :(
  305. }
  306. void addPropType(const char *propName, const RtlTypeInfo *type)
  307. {
  308. addProp(propName, queryTypeName(type));
  309. }
  310. const char *queryTypeName(const RtlTypeInfo *type)
  311. {
  312. StringAttr *typeName = types.getValue(type);
  313. assertex(typeName);
  314. return typeName->get();
  315. }
  316. void startFields()
  317. {
  318. newline().appendf("\"fields\": ");
  319. openCurly('[');
  320. }
  321. void endFields()
  322. {
  323. closeCurly(']');
  324. }
  325. StringBuffer &newline()
  326. {
  327. if (commaPending)
  328. json.append(',');
  329. json.appendf("\n%*s", indent, "");
  330. commaPending = true;
  331. return json;
  332. }
  333. void closeCurly(char brace = '}')
  334. {
  335. indent--;
  336. json.appendf("\n%*s%c", indent, "", brace);
  337. commaPending = true;
  338. }
  339. void openCurly(char brace = '{')
  340. {
  341. json.append(brace);
  342. indent++;
  343. commaPending = false;
  344. }
  345. TypeNameMap types;
  346. StringBuffer &json;
  347. const RtlTypeInfo *base = nullptr;
  348. unsigned indent = 1;
  349. unsigned nextTypeName = 0;
  350. bool commaPending = false;
  351. };
  352. class CRtlFieldTypeBinSerializer
  353. {
  354. public:
  355. /**
  356. * Serialize a RtlTypeInfo structure to binary
  357. *
  358. * @param out Buffer for resulting serialized string
  359. * @param type RtlTypeInfo structure to be serialized
  360. * @return Referenced to supplied buffer
  361. */
  362. static MemoryBuffer &serialize(MemoryBuffer &out, const RtlTypeInfo *type)
  363. {
  364. int oldEnd = out.setEndian(__LITTLE_ENDIAN);
  365. CRtlFieldTypeBinSerializer s(out);
  366. byte format = RTLTYPEINFO_FORMAT_1;
  367. out.append(format);
  368. DelayedMarker<hash64_t> hash(out);
  369. DelayedSizeMarker size(out);
  370. size32_t pos = out.length();
  371. s.serializeType(type);
  372. size.write();
  373. hash.write(rtlHash64Data(size.size(), out.toByteArray()+pos, 0));
  374. out.setEndian(oldEnd);
  375. return out;
  376. }
  377. private:
  378. CRtlFieldTypeBinSerializer(MemoryBuffer &_out)
  379. : out(_out)
  380. {
  381. }
  382. void serializeType(const RtlTypeInfo *type)
  383. {
  384. if (!serialized(type))
  385. {
  386. // Make sure all child types are serialized first
  387. const RtlTypeInfo *child = type->queryChildType();
  388. if (child)
  389. serializeType(child);
  390. const RtlFieldInfo * const * fields = type->queryFields();
  391. if (fields)
  392. {
  393. for (unsigned idx = 0;;idx++)
  394. {
  395. const RtlFieldInfo * child = fields[idx];
  396. if (!child)
  397. break;
  398. serializeType(child->type);
  399. }
  400. }
  401. // Now serialize this one
  402. types.setValue(type, nextTypeNum++);
  403. serializeMe(type);
  404. }
  405. }
  406. void serializeMe(const RtlTypeInfo *type)
  407. {
  408. if (!type->canSerialize())
  409. throw makeStringException(MSGAUD_user, 1000, "This type structure cannot be serialized");
  410. unsigned fieldType = type->fieldType;
  411. const char *locale = type->queryLocale();
  412. if (locale && *locale)
  413. fieldType |= RFTMhasLocale;
  414. const RtlTypeInfo *child = type->queryChildType();
  415. if (child)
  416. fieldType |= RFTMhasChildType;
  417. const RtlFieldInfo * const * fields = type->queryFields();
  418. if (fields)
  419. fieldType |= RFTMhasFields;
  420. out.append(fieldType);
  421. out.appendPacked(type->length);
  422. if (fieldType & RFTMhasLocale)
  423. out.append(locale);
  424. if (child)
  425. out.appendPacked(queryTypeIdx(child));
  426. const IFieldFilter * filter = type->queryFilter();
  427. if (filter)
  428. {
  429. out.appendPacked(queryTypeIdx(&filter->queryType()));
  430. filter->serialize(out);
  431. }
  432. if (fields)
  433. {
  434. unsigned count = countFields(fields);
  435. out.appendPacked(count);
  436. for (;;)
  437. {
  438. const RtlFieldInfo * child = *fields;
  439. if (!child)
  440. break;
  441. out.append(child->name);
  442. out.appendPacked(queryTypeIdx(child->type));
  443. unsigned flags = child->flags;
  444. if (child->xpath)
  445. flags |= RFTMhasXpath;
  446. if (child->initializer)
  447. {
  448. if (isVirtualInitializer(child->initializer))
  449. flags |= RFTMhasVirtualInitializer;
  450. else
  451. flags |= RFTMhasInitializer;
  452. }
  453. out.append(flags);
  454. if (child->xpath)
  455. out.append(child->xpath);
  456. // initializer is tricky - it's not (in general) a null-terminated string but the actual length is not easily available
  457. if (flags & RFTMhasInitializer)
  458. {
  459. unsigned initLength = child->type->size((const byte *) child->initializer, nullptr);
  460. out.appendPacked(initLength).append(initLength, (const byte *) child->initializer);
  461. }
  462. else if (flags &RFTMhasVirtualInitializer)
  463. out.append(getVirtualInitializer(child->initializer));
  464. fields++;
  465. }
  466. }
  467. }
  468. bool serialized(const RtlTypeInfo *type)
  469. {
  470. return types.find(type) != nullptr;
  471. }
  472. unsigned queryTypeIdx(const RtlTypeInfo *type)
  473. {
  474. unsigned *typeNum = types.getValue(type);
  475. assertex(typeNum);
  476. return *typeNum;
  477. }
  478. TypeNumMap types;
  479. MemoryBuffer &out;
  480. unsigned nextTypeNum = 0;
  481. };
  482. /**
  483. * class CRtlFieldTypeDeserializer
  484. *
  485. * Deserializer class for creating a RtlTypeInfo structure from json representation.
  486. *
  487. * Note that the resulting RtlTypeInfo structures are owned by this object and will be
  488. * destroyed when this object is destroyed.
  489. *
  490. */
  491. class CRtlFieldTypeDeserializer : public CInterfaceOf<IRtlFieldTypeDeserializer>
  492. {
  493. public:
  494. /**
  495. * CRtlFieldTypeDeserializer constructor
  496. *
  497. * @param _callback Supplies a callback to be used for blobs/filepositions.
  498. */
  499. CRtlFieldTypeDeserializer()
  500. {
  501. }
  502. /**
  503. * CRtlFieldTypeDeserializer destructor
  504. * <p>
  505. * Releases all RtlTypeInfo and related structures created by this deserializer
  506. */
  507. ~CRtlFieldTypeDeserializer()
  508. {
  509. // Need some care - all the RtlTypeInfo objects I created need to be destroyed, together with anything else I had to create
  510. // Strings (other than the init strings) are preserved in the AtomTable
  511. // First allow the types to clean up any critical cached information, then delete them in a second pass
  512. HashIterator allTypes(types);
  513. ForEach(allTypes)
  514. {
  515. const RtlTypeInfo **type = types.mapToValue(&allTypes.query());
  516. cleanupType(*type);
  517. }
  518. cleanupType(base);
  519. ForEach(allTypes)
  520. {
  521. const RtlTypeInfo **type = types.mapToValue(&allTypes.query());
  522. deleteType(*type);
  523. }
  524. deleteType(base);
  525. }
  526. /**
  527. * Obtain the deserialized type information
  528. * <p>
  529. * Note that the RtlTypeInfo objects are not link-counted, so the lifetime of these objects
  530. * is determined by the lifetime of the deserializer. They will be released once the deserializer
  531. * that created them is deleted.
  532. * <p>
  533. * Do not call more than once.
  534. *
  535. * @param _json JSON text to be deserialized, as created by CRtlFieldTypeSerializer
  536. * @return Deserialized type object
  537. */
  538. virtual const RtlTypeInfo *deserialize(const char *json) override
  539. {
  540. assertex(!base);
  541. Owned<IPropertyTree> jsonTree = createPTreeFromJSONString(json);
  542. base = deserializeType(jsonTree, jsonTree);
  543. return base;
  544. }
  545. /**
  546. * Obtain the deserialized type information
  547. * <p>
  548. * Note that the RtlTypeInfo objects are not link-counted, so the lifetime of these objects
  549. * is determined by the lifetime of the deserializer. They will be released once the deserializer
  550. * that created them is deleted.
  551. * <p>
  552. * Do not call more than once.
  553. *
  554. * @param _jsonTree JSON property tree to be deserialized, as created by CRtlFieldTypeSerializer
  555. * @return Deserialized type object
  556. */
  557. virtual const RtlTypeInfo *deserialize(IPropertyTree &jsonTree) override
  558. {
  559. assertex(!base);
  560. base = deserializeType(&jsonTree, &jsonTree);
  561. return base;
  562. }
  563. /**
  564. * Obtain the deserialized type information
  565. * <p>
  566. * Note that the RtlTypeInfo objects are not link-counted, so the lifetime of these objects
  567. * is determined by the lifetime of the deserializer. They will be released once the deserializer
  568. * that created them is deleted.
  569. * <p>
  570. * Do not call more than once.
  571. *
  572. * @param buf Binary serialized typeinfo to be deserialized, as created by CRtlFieldTypeSerializer
  573. * @return Deserialized type object
  574. */
  575. virtual const RtlTypeInfo *deserialize(MemoryBuffer &buf) override
  576. {
  577. assertex(!base);
  578. unsigned nextTypeNum = 0;
  579. int oldEndian = buf.setEndian(__LITTLE_ENDIAN);
  580. try
  581. {
  582. byte format;
  583. buf.read(format);
  584. if (format != RTLTYPEINFO_FORMAT_1)
  585. throw MakeStringException(0, "Invalid type info (%d) in CRtlFieldTypeDeserializer::deserialize", format);
  586. hash64_t hash;
  587. buf.read(hash);
  588. size32_t size;
  589. buf.read(size);
  590. #ifdef VALIDATE_TYPEINFO_HASHES
  591. hash64_t expected = rtlHash64Data(size, buf.readDirect(0), 0);
  592. if (expected != hash)
  593. throw MakeStringException(0, "Invalid type info hash in CRtlFieldTypeDeserializer::deserialize");
  594. #endif
  595. size32_t endpos = buf.getPos() + size;
  596. while (buf.getPos() < endpos)
  597. {
  598. if (base)
  599. {
  600. addType(base, nextTypeNum++);
  601. base = nullptr; // in case of exceptions...
  602. }
  603. base = deserializeType(buf);
  604. }
  605. if (buf.getPos()!=endpos)
  606. throw MakeStringException(0, "Invalid type info (incorrect size data) in CRtlFieldTypeDeserializer::deserialize");
  607. buf.setEndian(oldEndian);
  608. return base;
  609. }
  610. catch(...)
  611. {
  612. buf.setEndian(oldEndian);
  613. throw;
  614. }
  615. }
  616. virtual const RtlTypeInfo *addType(FieldTypeInfoStruct &info, const IInterface *typeOrIfblock) override
  617. {
  618. VStringBuffer name("%p", typeOrIfblock);
  619. const RtlTypeInfo ** found = types.getValue(name);
  620. if (found)
  621. return *found;
  622. savedTypes.append(LINK(typeOrIfblock));
  623. info.locale = keep(info.locale);
  624. const RtlTypeInfo * ret = info.createRtlTypeInfo();
  625. types.setValue(name, ret);
  626. unsigned baseType = (info.fieldType & RFTMkind);
  627. if (baseType == type_record)
  628. patchIfBlockParentRow(ret, static_cast<const RtlRecordTypeInfo *>(ret));
  629. return ret;
  630. }
  631. virtual const RtlTypeInfo *lookupType(const IInterface * typeOrIfBlock) const override
  632. {
  633. VStringBuffer name("%p", typeOrIfBlock);
  634. const RtlTypeInfo ** found = types.getValue(name);
  635. if (found)
  636. return *found;
  637. return nullptr;
  638. }
  639. virtual const RtlFieldInfo *addFieldInfo(const char *fieldName, const char *xpath, const RtlTypeInfo *type, unsigned flags, const char *init) override
  640. {
  641. // MORE - we could hang onto this for cleanup, rather than assuming that we keep it via a later addType() call?
  642. return new RtlFieldStrInfo(keep(fieldName), keep(xpath), type, flags, init);
  643. }
  644. private:
  645. KeptAtomTable atoms; // Used to ensure proper lifetime of strings used in type structures
  646. MapStringTo<const RtlTypeInfo *> types; // Ensures structures only generated once
  647. const RtlTypeInfo *base = nullptr; // Holds the resulting type
  648. IConstPointerArray savedTypes; // ensure types remain alive for subsequent lookups
  649. void deleteType(const RtlTypeInfo *type)
  650. {
  651. if (type)
  652. {
  653. // Releases all memory for a single RtlTypeInfo object
  654. const RtlFieldInfo * const * fields = type->queryFields();
  655. if (fields)
  656. {
  657. const RtlFieldInfo * const * cur = fields;
  658. for (;;)
  659. {
  660. const RtlFieldInfo * child = *cur;
  661. if (!child)
  662. break;
  663. // We don't need to delete other strings - they are owned by atom table.
  664. // But the initializer is decoded and thus owned by me
  665. if (!isVirtualInitializer(child->initializer))
  666. free((void *)child->initializer);
  667. delete child;
  668. cur++;
  669. }
  670. delete [] fields;
  671. }
  672. type->doDelete();
  673. }
  674. }
  675. void cleanupType(const RtlTypeInfo *type)
  676. {
  677. if (type)
  678. type->doCleanup();
  679. }
  680. const RtlTypeInfo *lookupType(const char *name, IPropertyTree *all)
  681. {
  682. const RtlTypeInfo ** found = types.getValue(name);
  683. if (found)
  684. return *found;
  685. const RtlTypeInfo *type = deserializeType(all->queryPropTree(name), all);
  686. types.setValue(name, type);
  687. return type;
  688. }
  689. const RtlTypeInfo *lookupType(unsigned idx)
  690. {
  691. // Could keep an expanding array of types instead - but the hash table is already there for json support...
  692. VStringBuffer key("%u", idx);
  693. const RtlTypeInfo ** found = types.getValue(key);
  694. if (found)
  695. return *found;
  696. throw makeStringException(-1, "Invalid serialized type information");
  697. }
  698. void addType(const RtlTypeInfo *type, unsigned idx)
  699. {
  700. VStringBuffer key("%u", idx);
  701. assert(types.getValue(key)==nullptr);
  702. types.setValue(key, type);
  703. }
  704. const char *keep(const char *string)
  705. {
  706. if (string)
  707. return str(atoms.addAtom(string));
  708. else
  709. return nullptr;
  710. }
  711. const RtlTypeInfo *deserializeType(IPropertyTree *type, IPropertyTree *all)
  712. {
  713. FieldTypeInfoStruct info;
  714. info.fieldType = type->getPropInt("fieldType");
  715. info.length = type->getPropInt("length");
  716. info.locale = keep(type->queryProp("locale"));
  717. const char *child = type->queryProp("child");
  718. if (child)
  719. info.childType = lookupType(child, all);
  720. unsigned baseType = (info.fieldType & RFTMkind);
  721. if ((baseType == type_record) || (baseType == type_ifblock))
  722. {
  723. unsigned numFields = type->getCount("fields");
  724. info.fieldsArray = new const RtlFieldInfo * [numFields+1];
  725. info.fieldsArray[numFields] = nullptr;
  726. Owned<IPropertyTreeIterator> fields = type->getElements("fields");
  727. unsigned n = 0;
  728. ForEach(*fields)
  729. {
  730. IPropertyTree &field = fields->query();
  731. const char *fieldTypeName = field.queryProp("type");
  732. const char *fieldName = keep(field.queryProp("name"));
  733. const char *fieldXpath = keep(field.queryProp("xpath"));
  734. unsigned flags = field.getPropInt("flags");
  735. const char *fieldInit = field.queryProp("init");
  736. const char *fieldVInit = field.queryProp("vinit");
  737. if (fieldInit)
  738. {
  739. StringBuffer decoded;
  740. JBASE64_Decode(fieldInit, decoded);
  741. fieldInit = decoded.detach(); // NOTE - this gets freed in cleanupType()
  742. }
  743. else if (fieldVInit)
  744. {
  745. fieldInit = (const char *)(memsize_t)atoi(fieldVInit);
  746. }
  747. info.fieldsArray[n] = new RtlFieldStrInfo(fieldName, fieldXpath, lookupType(fieldTypeName, all), flags, fieldInit);
  748. n++;
  749. }
  750. }
  751. if (baseType == type_ifblock)
  752. {
  753. //Filter field needs to be deserialized and the type resolved separately outside the deserialize call
  754. //because there isn't a RtlTypeInfo available to resolve the field (since we are currently deserializing it!)
  755. const char * filterText = type->queryProp("filter");
  756. StringBuffer fieldIdText;
  757. readFieldFromFieldFilter(fieldIdText, filterText);
  758. unsigned fieldId = atoi(fieldIdText);
  759. const RtlTypeInfo * fieldType = lookupType(type->queryProp("filterType"), all);
  760. info.filter = deserializeFieldFilter(fieldId, *fieldType, filterText);
  761. }
  762. const RtlTypeInfo * result = info.createRtlTypeInfo();
  763. if (baseType == type_record)
  764. patchIfBlockParentRow(result, static_cast<const RtlRecordTypeInfo *>(result));
  765. return result;
  766. }
  767. const RtlTypeInfo *deserializeType(MemoryBuffer &type)
  768. {
  769. FieldTypeInfoStruct info;
  770. type.read(info.fieldType);
  771. type.readPacked(info.length);
  772. if (info.fieldType & RFTMhasLocale)
  773. {
  774. const char *locale;
  775. type.read(locale);
  776. info.locale = keep(locale);
  777. }
  778. if (info.fieldType & RFTMhasChildType)
  779. {
  780. unsigned childIdx;
  781. type.readPacked(childIdx);
  782. info.childType = lookupType(childIdx);
  783. }
  784. unsigned baseType = (info.fieldType & RFTMkind);
  785. if (baseType == type_ifblock)
  786. {
  787. unsigned childIdx;
  788. type.readPacked(childIdx);
  789. const RtlTypeInfo * fieldType = lookupType(childIdx);
  790. unsigned fieldId;
  791. type.readPacked(fieldId);
  792. info.filter = deserializeFieldFilter(fieldId, *fieldType, type);
  793. }
  794. if (info.fieldType & RFTMhasFields)
  795. {
  796. unsigned numFields;
  797. type.readPacked(numFields);
  798. info.fieldsArray = new const RtlFieldInfo * [numFields+1];
  799. info.fieldsArray[numFields] = nullptr;
  800. for (unsigned n = 0; n < numFields; n++)
  801. {
  802. const char *fieldName;
  803. type.read(fieldName);
  804. if (fieldName[0] == '\0')
  805. fieldName = nullptr;
  806. unsigned fieldType;
  807. type.readPacked(fieldType);
  808. unsigned fieldFlags;
  809. type.read(fieldFlags);
  810. const char *xpath = nullptr;
  811. if (fieldFlags & RFTMhasXpath)
  812. type.read(xpath);
  813. void *init = nullptr;
  814. if (fieldFlags & RFTMhasInitializer)
  815. {
  816. unsigned initLength;
  817. type.readPacked(initLength);
  818. init = malloc(initLength);
  819. memcpy(init, type.readDirect(initLength), initLength);
  820. }
  821. else if (fieldFlags & RFTMhasVirtualInitializer)
  822. {
  823. byte virtualKind;
  824. type.read(virtualKind);
  825. init = (void *)(memsize_t)virtualKind;
  826. }
  827. fieldFlags &= ~RFTMserializerFlags;
  828. info.fieldsArray[n] = new RtlFieldStrInfo(keep(fieldName), keep(xpath), lookupType(fieldType), fieldFlags, (const char *) init);
  829. }
  830. }
  831. info.fieldType &= ~RFTMserializerFlags;
  832. const RtlTypeInfo * result = info.createRtlTypeInfo();
  833. if (baseType == type_record)
  834. patchIfBlockParentRow(result, static_cast<const RtlRecordTypeInfo *>(result));
  835. return result;
  836. }
  837. void patchIfBlockParentRow(const RtlTypeInfo * fieldType, const RtlRecordTypeInfo * parentRow)
  838. {
  839. const RtlFieldInfo * const * fields = fieldType->queryFields();
  840. for (;*fields;fields++)
  841. {
  842. const RtlFieldInfo * cur = *fields;
  843. if (!cur)
  844. break;
  845. const RtlTypeInfo * curType = cur->type;
  846. if ((curType->fieldType & RFTMkind) == type_ifblock)
  847. {
  848. const RtlDynamicIfBlockTypeInfo * constifblock = static_cast<const RtlDynamicIfBlockTypeInfo *>(curType);
  849. RtlDynamicIfBlockTypeInfo * ifblock = const_cast<RtlDynamicIfBlockTypeInfo *>(constifblock);
  850. ifblock->setParent(parentRow);
  851. patchIfBlockParentRow(curType, parentRow);
  852. }
  853. }
  854. }
  855. };
  856. extern ECLRTL_API IRtlFieldTypeDeserializer *createRtlFieldTypeDeserializer()
  857. {
  858. return new CRtlFieldTypeDeserializer();
  859. }
  860. extern ECLRTL_API StringBuffer &dumpTypeInfo(StringBuffer &ret, const RtlTypeInfo *t)
  861. {
  862. return CRtlFieldTypeSerializer::serialize(ret, t);
  863. }
  864. extern ECLRTL_API bool dumpTypeInfo(MemoryBuffer &ret, const RtlTypeInfo *t)
  865. {
  866. try
  867. {
  868. CRtlFieldTypeBinSerializer::serialize(ret, t);
  869. return true;
  870. }
  871. catch (IException *E)
  872. {
  873. EXCLOG(E);
  874. E->Release();
  875. return false;
  876. }
  877. }
  878. extern ECLRTL_API void serializeRecordType(size32_t & __lenResult, void * & __result, IOutputMetaData & metaVal)
  879. {
  880. MemoryBuffer ret;
  881. try
  882. {
  883. CRtlFieldTypeBinSerializer::serialize(ret, metaVal.queryTypeInfo());
  884. }
  885. catch (IException * e)
  886. {
  887. ret.clear();
  888. e->Release();
  889. }
  890. __lenResult = ret.length();
  891. __result = ret.detach();
  892. }
  893. extern ECLRTL_API void dumpRecordType(size32_t & __lenResult,char * & __result,IOutputMetaData &metaVal)
  894. {
  895. StringBuffer ret;
  896. try
  897. {
  898. CRtlFieldTypeSerializer::serialize(ret, metaVal.queryTypeInfo());
  899. #ifdef _DEBUG
  900. StringBuffer ret2;
  901. CRtlFieldTypeDeserializer deserializer;
  902. CRtlFieldTypeSerializer::serialize(ret2, deserializer.deserialize(ret));
  903. assert(streq(ret, ret2));
  904. MemoryBuffer out;
  905. CRtlFieldTypeBinSerializer::serialize(out, metaVal.queryTypeInfo());
  906. CRtlFieldTypeDeserializer bindeserializer;
  907. CRtlFieldTypeSerializer::serialize(ret2.clear(), bindeserializer.deserialize(out));
  908. assert(streq(ret, ret2));
  909. #endif
  910. }
  911. catch (IException * e)
  912. {
  913. e->errorMessage(ret.clear());
  914. e->Release();
  915. }
  916. __lenResult = ret.length();
  917. __result = ret.detach();
  918. }
  919. extern ECLRTL_API void getFieldVal(size32_t & __lenResult,char * & __result, int column, IOutputMetaData & metaVal, const byte *row)
  920. {
  921. __lenResult = 0;
  922. __result = nullptr;
  923. if (column >= 0)
  924. {
  925. const RtlRecord &r = metaVal.queryRecordAccessor(true);
  926. if ((unsigned) column < r.getNumFields())
  927. {
  928. unsigned numOffsets = r.getNumVarFields() + 1;
  929. size_t * variableOffsets = (size_t *)alloca(numOffsets * sizeof(size_t));
  930. RtlRow offsetCalculator(r, row, numOffsets, variableOffsets);
  931. offsetCalculator.getUtf8(__lenResult, __result, column);
  932. }
  933. }
  934. }
  935. extern ECLRTL_API int getFieldNum(const char *fieldName, IOutputMetaData & metaVal)
  936. {
  937. const RtlRecord r = metaVal.queryRecordAccessor(true);
  938. return r.getFieldNum(fieldName);
  939. }
  940. enum FieldMatchType {
  941. // On a field, exactly one of the below is set, but translator returns a bitmap indicating
  942. // which were required (and we can restrict translation to allow some types but not others)
  943. match_perfect = 0x00, // exact type match - use memcpy
  944. match_link = 0x01, // copy a nested dataset by linking
  945. match_move = 0x02, // at least one field has moved (set on translator)
  946. match_remove = 0x04, // at least one field has been removed (set on translator)
  947. match_truncate = 0x08, // dest is truncated copy of source - use memcpy
  948. match_extend = 0x10, // dest is padded version of source - use memcpy and memset
  949. match_typecast = 0x20, // type has changed - cast required
  950. match_none = 0x40, // No matching field in source - use null value
  951. match_recurse = 0x80, // Use recursive translator for child records/datasets
  952. match_fail = 0x100, // no translation possible
  953. match_keychange = 0x200, // at least one affected field not marked as payload (set on translator)
  954. match_virtual = 0x800, // at least one affected field is a virtual field (set on translator)
  955. // This flag may be set in conjunction with the others
  956. match_inifblock = 0x400, // matching to a field in an ifblock - may not be present
  957. match_deblob = 0x1000, // source needs fetching from a blob prior to translation
  958. match_dynamic = 0x2000, // source needs fetching from dynamic source (callback)
  959. match_filepos = 0x4000, // type moving in or out of filepos field - cast required
  960. };
  961. StringBuffer &describeFlags(StringBuffer &out, FieldMatchType flags)
  962. {
  963. if (flags == match_perfect)
  964. return out.append("perfect");
  965. unsigned origlen = out.length();
  966. if (flags & match_link) out.append("|link");
  967. if (flags & match_move) out.append("|move");
  968. if (flags & match_remove) out.append("|remove");
  969. if (flags & match_truncate) out.append("|truncate");
  970. if (flags & match_extend) out.append("|extend");
  971. if (flags & match_typecast) out.append("|typecast");
  972. if (flags & match_none) out.append("|none");
  973. if (flags & match_recurse) out.append("|recurse");
  974. if (flags & match_inifblock) out.append("|ifblock");
  975. if (flags & match_keychange) out.append("|keychange");
  976. if (flags & match_fail) out.append("|fail");
  977. if (flags & match_virtual) out.append("|virtual");
  978. if (flags & match_deblob) out.append("|blob");
  979. if (flags & match_dynamic) out.append("|dynamic");
  980. if (flags & match_filepos) out.append("|filepos");
  981. assertex(out.length() > origlen);
  982. return out.remove(origlen, 1);
  983. }
  984. inline constexpr FieldMatchType operator|(FieldMatchType a, FieldMatchType b) { return (FieldMatchType)((int)a | (int)b); }
  985. inline FieldMatchType &operator|=(FieldMatchType &a, FieldMatchType b) { return (FieldMatchType &) ((int &)a |= (int)b); }
  986. class GeneralRecordTranslator : public CInterfaceOf<IDynamicTransform>
  987. {
  988. public:
  989. GeneralRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo, bool _binarySource, type_vals _callbackRawType = type_any)
  990. : destRecInfo(_destRecInfo), sourceRecInfo(_srcRecInfo), binarySource(_binarySource), callbackRawType(_callbackRawType)
  991. {
  992. matchInfo = new MatchInfo[destRecInfo.getNumFields()];
  993. createMatchInfo();
  994. #ifdef _DEBUG
  995. //describe();
  996. #endif
  997. }
  998. ~GeneralRecordTranslator()
  999. {
  1000. delete [] matchInfo;
  1001. }
  1002. // IDynamicTransform impl.
  1003. virtual void describe() const override
  1004. {
  1005. doDescribe(0);
  1006. }
  1007. virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const byte *sourceRec) const override
  1008. {
  1009. assertex(binarySource);
  1010. return doTranslate(builder, callback, 0, sourceRec);
  1011. }
  1012. virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const RtlRow &sourceRow) const override
  1013. {
  1014. assertex(binarySource);
  1015. sourceRow.lazyCalcOffsets(-1); // MORE - could save the max one we actually need...
  1016. return doTranslateOpaqueType(builder, callback, 0, &sourceRow);
  1017. }
  1018. virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const IDynamicFieldValueFetcher & fetcher) const override
  1019. {
  1020. assertex(!binarySource);
  1021. return doTranslateOpaqueType(builder, callback, 0, &fetcher);
  1022. }
  1023. virtual bool canTranslate() const override
  1024. {
  1025. return (matchFlags & match_fail) == 0;
  1026. }
  1027. virtual bool needsTranslate() const override
  1028. {
  1029. return !binarySource || (matchFlags & ~(match_link|match_inifblock)) != 0;
  1030. }
  1031. virtual bool needsNonVirtualTranslate() const override
  1032. {
  1033. return (matchFlags & ~(match_link|match_virtual|match_keychange|match_inifblock)) != 0;
  1034. }
  1035. virtual bool keyedTranslated() const override
  1036. {
  1037. return (matchFlags & match_keychange) != 0;
  1038. }
  1039. private:
  1040. void doDescribe(unsigned indent) const
  1041. {
  1042. unsigned perfect=0;
  1043. unsigned reported=0;
  1044. for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
  1045. {
  1046. const char *source = destRecInfo.queryName(idx);
  1047. const MatchInfo &match = matchInfo[idx];
  1048. if (match.matchType == match_none)
  1049. DBGLOG("%*sNo match for field %s - default value will be used", indent, "", source);
  1050. else if (match.matchType == match_virtual)
  1051. DBGLOG("%*sUse virtual value for field %s", indent, "", source);
  1052. else
  1053. {
  1054. if (match.matchType != match_perfect)
  1055. {
  1056. reported++;
  1057. StringBuffer matchStr;
  1058. DBGLOG("%*sMatch (%s) to field %d for field %s (typecode %x)", indent, "", describeFlags(matchStr, match.matchType).str(), match.matchIdx, source, destRecInfo.queryType(idx)->fieldType);
  1059. if (match.subTrans)
  1060. match.subTrans->doDescribe(indent+2);
  1061. }
  1062. else
  1063. perfect++;
  1064. }
  1065. }
  1066. if (allUnmatched.ordinality())
  1067. {
  1068. VStringBuffer msg("%*sDropped field", indent, "");
  1069. if (allUnmatched.ordinality()>1)
  1070. msg.append('s');
  1071. for (unsigned idx = 0; idx < allUnmatched.ordinality() && idx < 5; idx++)
  1072. {
  1073. if (idx)
  1074. msg.append(',');
  1075. msg.appendf(" %s", sourceRecInfo.queryName(allUnmatched.item(idx)));
  1076. }
  1077. if (allUnmatched.ordinality() > 5)
  1078. msg.appendf(" and %u other fields", allUnmatched.ordinality() - 5);
  1079. DBGLOG("%s", msg.str());
  1080. }
  1081. if (!canTranslate())
  1082. DBGLOG("%*sTranslation is NOT possible", indent, "");
  1083. else if (needsTranslate())
  1084. {
  1085. StringBuffer matchStr;
  1086. if (perfect)
  1087. DBGLOG("%u %sfield%s matched perfectly", perfect, reported ? "other " : "", perfect==1 ? "" : "s");
  1088. DBGLOG("%*sTranslation is possible (%s)", indent, "", describeFlags(matchStr, matchFlags).str());
  1089. }
  1090. else
  1091. DBGLOG("%*sTranslation is not necessary", indent, "");
  1092. }
  1093. size32_t doTranslate(ARowBuilder &builder, IVirtualFieldCallback & callback, size32_t offset, const byte *sourceRec) const
  1094. {
  1095. unsigned numOffsets = sourceRecInfo.getNumVarFields() + 1;
  1096. size_t * variableOffsets = (size_t *)alloca(numOffsets * sizeof(size_t));
  1097. RtlRow sourceRow(sourceRecInfo, sourceRec, numOffsets, variableOffsets); // MORE - could save the max source offset we actually need, and only set up that many...
  1098. return doTranslateOpaqueType(builder, callback, offset, &sourceRow);
  1099. }
  1100. size32_t doTranslateOpaqueType(ARowBuilder &builder, IVirtualFieldCallback & callback, size32_t offset, const void *sourceRow) const
  1101. {
  1102. dbgassertex(canTranslate());
  1103. byte * destConditions = (byte *)alloca(destRecInfo.getNumIfBlocks() * sizeof(byte));
  1104. memset(destConditions, 2, destRecInfo.getNumIfBlocks() * sizeof(byte));
  1105. size32_t estimate = destRecInfo.getFixedSize();
  1106. bool hasBlobs = false;
  1107. if (!estimate)
  1108. {
  1109. if (binarySource)
  1110. estimate = estimateNewSize(*(const RtlRow *)sourceRow);
  1111. else
  1112. estimate = destRecInfo.getMinRecordSize();
  1113. builder.ensureCapacity(offset+estimate, "record");
  1114. }
  1115. size32_t origOffset = offset;
  1116. for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
  1117. {
  1118. const RtlFieldInfo *field = destRecInfo.queryField(idx);
  1119. if (field->omitable() && destRecInfo.excluded(field, builder.getSelf(), destConditions))
  1120. continue;
  1121. const RtlTypeInfo *type = field->type;
  1122. const MatchInfo &match = matchInfo[idx];
  1123. if (match.matchType == match_none || match.matchType==match_fail)
  1124. {
  1125. offset = type->buildNull(builder, offset, field);
  1126. }
  1127. else if (match.matchType == match_virtual)
  1128. {
  1129. switch (getVirtualInitializer(field->initializer))
  1130. {
  1131. case FVirtualFilePosition:
  1132. offset = type->buildInt(builder, offset, field, callback.getFilePosition(sourceRow));
  1133. break;
  1134. case FVirtualLocalFilePosition:
  1135. offset = type->buildInt(builder, offset, field, callback.getLocalFilePosition(sourceRow));
  1136. break;
  1137. case FVirtualFilename:
  1138. {
  1139. const char * filename = callback.queryLogicalFilename(sourceRow);
  1140. offset = type->buildString(builder, offset, field, strlen(filename), filename);
  1141. break;
  1142. }
  1143. default:
  1144. throwUnexpected();
  1145. }
  1146. }
  1147. else
  1148. {
  1149. unsigned matchField = match.matchIdx;
  1150. const RtlTypeInfo *sourceType = sourceRecInfo.queryType(matchField);
  1151. size_t sourceOffset = 0;
  1152. const byte *source = nullptr;
  1153. size_t copySize = 0;
  1154. if (binarySource)
  1155. {
  1156. const RtlRow &rtlRow = *(const RtlRow *)sourceRow;
  1157. sourceOffset = rtlRow.getOffset(matchField);
  1158. source = rtlRow.queryRow() + sourceOffset;
  1159. copySize = rtlRow.getSize(matchField);
  1160. }
  1161. if (match.matchType & match_deblob)
  1162. {
  1163. offset_t blobId = sourceType->getInt(source);
  1164. sourceType = sourceType->queryChildType();
  1165. sourceOffset = 0;
  1166. source = callback.lookupBlob(blobId);
  1167. copySize = sourceType->size(source, source);
  1168. hasBlobs = true;
  1169. }
  1170. if (copySize == 0 && (match.matchType & match_inifblock)) // Field is missing because of an ifblock - use default value
  1171. {
  1172. offset = type->buildNull(builder, offset, field);
  1173. }
  1174. else
  1175. {
  1176. switch (match.matchType & ~(match_inifblock|match_deblob))
  1177. {
  1178. case match_perfect:
  1179. {
  1180. // Look ahead for other perfect matches and combine the copies
  1181. if (!(match.matchType & match_deblob))
  1182. {
  1183. while (idx < destRecInfo.getNumFields()-1)
  1184. {
  1185. const MatchInfo &nextMatch = matchInfo[idx+1];
  1186. if (nextMatch.matchType == match_perfect && nextMatch.matchIdx == matchField+1)
  1187. {
  1188. idx++;
  1189. matchField++;
  1190. }
  1191. else
  1192. break;
  1193. }
  1194. copySize = ((const RtlRow *)sourceRow)->getOffset(matchField+1) - sourceOffset;
  1195. }
  1196. builder.ensureCapacity(offset+copySize, field->name);
  1197. memcpy(builder.getSelf()+offset, source, copySize);
  1198. offset += copySize;
  1199. break;
  1200. }
  1201. case match_truncate:
  1202. {
  1203. assert(type->isFixedSize());
  1204. copySize = type->getMinSize();
  1205. builder.ensureCapacity(offset+copySize, field->name);
  1206. memcpy(builder.getSelf()+offset, source, copySize);
  1207. offset += copySize;
  1208. break;
  1209. }
  1210. case match_extend:
  1211. {
  1212. assert(type->isFixedSize());
  1213. size32_t destSize = type->getMinSize();
  1214. builder.ensureCapacity(offset+destSize, field->name);
  1215. memcpy(builder.getSelf()+offset, source, copySize);
  1216. offset += copySize;
  1217. unsigned fillSize = destSize - copySize;
  1218. memset(builder.getSelf()+offset, match.fillChar, fillSize);
  1219. offset += fillSize;
  1220. break;
  1221. }
  1222. case match_filepos:
  1223. case match_typecast:
  1224. offset = translateScalar(builder, offset, field, *type, *sourceType, source);
  1225. break;
  1226. case match_typecast|match_dynamic:
  1227. {
  1228. const IDynamicFieldValueFetcher &callbackRowHandler = *(const IDynamicFieldValueFetcher *)sourceRow;
  1229. source = callbackRowHandler.queryValue(matchField, copySize);
  1230. if (callbackRawType == type_string)
  1231. offset = translateScalarFromString(builder, offset, field, *type, *sourceType, (const char *)source, (size_t)copySize);
  1232. else
  1233. offset = translateScalarFromUtf8(builder, offset, field, *type, *sourceType, (const char *)source, (size_t)copySize);
  1234. break;
  1235. }
  1236. case match_link:
  1237. {
  1238. // a 32-bit record count, and a (linked) pointer to an array of record pointers
  1239. byte *dest = builder.ensureCapacity(offset+sizeof(size32_t)+sizeof(const byte **), field->name)+offset;
  1240. *(size32_t *)dest = *(size32_t *)source;
  1241. *(const byte ***)(dest + sizeof(size32_t)) = rtlLinkRowset(*(const byte ***)(source + sizeof(size32_t)));
  1242. offset += sizeof(size32_t)+sizeof(const byte **);
  1243. break;
  1244. }
  1245. case match_recurse|match_dynamic:
  1246. {
  1247. const IDynamicFieldValueFetcher &callbackRowHandler = *(const IDynamicFieldValueFetcher *)sourceRow;
  1248. Owned<IDynamicRowIterator> iterator = callbackRowHandler.getNestedIterator(matchField);
  1249. if (type->getType()==type_record)
  1250. {
  1251. IDynamicFieldValueFetcher &fieldFetcher = iterator->query();
  1252. offset = match.subTrans->doTranslateOpaqueType(builder, callback, offset, &fieldFetcher);
  1253. }
  1254. else if (type->isLinkCounted())
  1255. {
  1256. // a 32-bit record count, and a pointer to an array of record pointers
  1257. IEngineRowAllocator *childAllocator = builder.queryAllocator()->createChildRowAllocator(type->queryChildType());
  1258. assertex(childAllocator); // May not be available when using serialized types (but unlikely to want to create linkcounted children remotely either)
  1259. size32_t sizeInBytes = sizeof(size32_t) + sizeof(void *);
  1260. builder.ensureCapacity(offset+sizeInBytes, field->name);
  1261. size32_t numRows = 0;
  1262. const byte **childRows = nullptr;
  1263. ForEach(*iterator)
  1264. {
  1265. IDynamicFieldValueFetcher &fieldFetcher = iterator->query();
  1266. RtlDynamicRowBuilder childBuilder(*childAllocator);
  1267. size32_t childLen = match.subTrans->doTranslateOpaqueType(childBuilder, callback, 0, &fieldFetcher);
  1268. childRows = childAllocator->appendRowOwn(childRows, ++numRows, (void *) childBuilder.finalizeRowClear(childLen));
  1269. }
  1270. if (type->getType() == type_dictionary)
  1271. {
  1272. const RtlTypeInfo * childType = type->queryChildType();
  1273. assertex(childType && childType->getType() == type_record);
  1274. CHThorHashLookupInfo lookupHelper(static_cast<const RtlRecordTypeInfo &>(*childType));
  1275. rtlCreateDictionaryFromDataset(numRows, childRows, childAllocator, lookupHelper);
  1276. }
  1277. // Go back in and patch the count, remembering it may have moved
  1278. rtlWriteInt4(builder.getSelf()+offset, numRows);
  1279. * ( const void * * ) (builder.getSelf()+offset+sizeof(size32_t)) = childRows;
  1280. offset += sizeInBytes;
  1281. }
  1282. else
  1283. {
  1284. size32_t countOffset = offset;
  1285. byte *dest = builder.ensureCapacity(offset+sizeof(size32_t), field->name)+offset;
  1286. offset += sizeof(size32_t);
  1287. size32_t initialOffset = offset;
  1288. *(size32_t *)dest = 0; // patched below when true figure known
  1289. ForEach(*iterator)
  1290. {
  1291. IDynamicFieldValueFetcher &fieldFetcher = iterator->query();
  1292. offset = match.subTrans->doTranslateOpaqueType(builder, callback, offset, &fieldFetcher);
  1293. }
  1294. dest = builder.getSelf() + countOffset; // Note - may have been moved by reallocs since last calculated
  1295. *(size32_t *)dest = offset - initialOffset;
  1296. }
  1297. break;
  1298. }
  1299. case match_recurse:
  1300. if (type->getType()==type_record)
  1301. offset = match.subTrans->doTranslate(builder, callback, offset, source);
  1302. else if (type->isLinkCounted())
  1303. {
  1304. // a 32-bit record count, and a pointer to an array of record pointers
  1305. Owned<IEngineRowAllocator> childAllocator = builder.queryAllocator()->createChildRowAllocator(type->queryChildType());
  1306. assertex(childAllocator); // May not be available when using serialized types (but unlikely to want to create linkcounted children remotely either)
  1307. size32_t sizeInBytes = sizeof(size32_t) + sizeof(void *);
  1308. builder.ensureCapacity(offset+sizeInBytes, field->name);
  1309. size32_t numRows = 0;
  1310. const byte **childRows = nullptr;
  1311. if (sourceType->isLinkCounted())
  1312. {
  1313. // a 32-bit count, then a pointer to the source rows
  1314. size32_t childCount = *(size32_t *) source;
  1315. source += sizeof(size32_t);
  1316. const byte ** sourceRows = *(const byte***) source;
  1317. for (size32_t childRow = 0; childRow < childCount; childRow++)
  1318. {
  1319. RtlDynamicRowBuilder childBuilder(*childAllocator);
  1320. size32_t childLen = match.subTrans->doTranslate(childBuilder, callback, 0, sourceRows[childRow]);
  1321. childRows = childAllocator->appendRowOwn(childRows, ++numRows, (void *) childBuilder.finalizeRowClear(childLen));
  1322. }
  1323. }
  1324. else
  1325. {
  1326. // a 32-bit size, then rows inline
  1327. size32_t childSize = *(size32_t *) source;
  1328. source += sizeof(size32_t);
  1329. const byte *initialSource = source;
  1330. while ((size_t)(source - initialSource) < childSize)
  1331. {
  1332. RtlDynamicRowBuilder childBuilder(*childAllocator);
  1333. size32_t childLen = match.subTrans->doTranslate(childBuilder, callback, 0, source);
  1334. childRows = childAllocator->appendRowOwn(childRows, ++numRows, (void *) childBuilder.finalizeRowClear(childLen));
  1335. source += sourceType->queryChildType()->size(source, nullptr); // MORE - shame to repeat a calculation that the translate above almost certainly just did
  1336. }
  1337. }
  1338. if (type->getType() == type_dictionary)
  1339. {
  1340. const RtlTypeInfo * childType = type->queryChildType();
  1341. assertex(childType && childType->getType() == type_record);
  1342. CHThorHashLookupInfo lookupHelper(static_cast<const RtlRecordTypeInfo &>(*childType));
  1343. rtlCreateDictionaryFromDataset(numRows, childRows, childAllocator, lookupHelper);
  1344. }
  1345. // Go back in and patch the count, remembering it may have moved
  1346. rtlWriteInt4(builder.getSelf()+offset, numRows);
  1347. * ( const void * * ) (builder.getSelf()+offset+sizeof(size32_t)) = childRows;
  1348. offset += sizeInBytes;
  1349. }
  1350. else
  1351. {
  1352. size32_t countOffset = offset;
  1353. byte *dest = builder.ensureCapacity(offset+sizeof(size32_t), field->name)+offset;
  1354. offset += sizeof(size32_t);
  1355. size32_t initialOffset = offset;
  1356. *(size32_t *)dest = 0; // patched below when true figure known
  1357. if (sourceType->isLinkCounted())
  1358. {
  1359. // a 32-bit count, then a pointer to the source rows
  1360. size32_t childCount = *(size32_t *) source;
  1361. source += sizeof(size32_t);
  1362. const byte ** sourceRows = *(const byte***) source;
  1363. for (size32_t childRow = 0; childRow < childCount; childRow++)
  1364. {
  1365. const byte * row = sourceRows[childRow];
  1366. //Dictionaries have blank rows - ignore them when serializing (to a dataset)
  1367. if (row)
  1368. offset = match.subTrans->doTranslate(builder, callback, offset, row);
  1369. }
  1370. }
  1371. else
  1372. {
  1373. // a 32-bit size, then rows inline
  1374. size32_t childSize = *(size32_t *) source;
  1375. source += sizeof(size32_t);
  1376. const byte *initialSource = source;
  1377. while ((size_t)(source - initialSource) < childSize)
  1378. {
  1379. offset = match.subTrans->doTranslate(builder, callback, offset, source);
  1380. source += sourceType->queryChildType()->size(source, nullptr); // MORE - shame to repeat a calculation that the translate above almost certainly just did
  1381. }
  1382. }
  1383. dest = builder.getSelf() + countOffset; // Note - may have been moved by reallocs since last calculated
  1384. *(size32_t *)dest = offset - initialOffset;
  1385. }
  1386. break;
  1387. default:
  1388. throwUnexpected();
  1389. }
  1390. }
  1391. }
  1392. }
  1393. if (estimate && offset-origOffset != estimate)
  1394. {
  1395. if (offset == origOffset)
  1396. {
  1397. //Zero size records are treated as single byte to avoid confusion with sizes returned from transforms etc.
  1398. offset++;
  1399. }
  1400. else
  1401. {
  1402. if (!hasBlobs)
  1403. assert(offset-origOffset > estimate); // Estimate is always supposed to be conservative
  1404. #ifdef TRACE_TRANSLATION
  1405. DBGLOG("Wrote %u bytes to record (estimate was %u)\n", offset-origOffset, estimate);
  1406. #endif
  1407. }
  1408. }
  1409. return offset;
  1410. }
  1411. inline FieldMatchType match() const
  1412. {
  1413. return matchFlags;
  1414. }
  1415. const RtlRecord &destRecInfo;
  1416. const RtlRecord &sourceRecInfo;
  1417. bool binarySource = true;
  1418. type_vals callbackRawType;
  1419. int fixedDelta = 0; // total size difference from all fixed size mappings
  1420. UnsignedArray allUnmatched; // List of all source fields that are unmatched (so that we can trace them)
  1421. UnsignedArray variableUnmatched; // List of all variable-size source fields that are unmatched
  1422. FieldMatchType matchFlags = match_perfect;
  1423. struct MatchInfo
  1424. {
  1425. unsigned matchIdx = 0;
  1426. FieldMatchType matchType = match_fail;
  1427. char fillChar = 0;
  1428. GeneralRecordTranslator *subTrans = nullptr;
  1429. ~MatchInfo()
  1430. {
  1431. delete subTrans;
  1432. }
  1433. } *matchInfo;
  1434. static size32_t translateScalarFromUtf8(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, const RtlTypeInfo &destType, const RtlTypeInfo &sourceType, const char *source, size_t srcSize)
  1435. {
  1436. switch(destType.getType())
  1437. {
  1438. case type_boolean:
  1439. case type_int:
  1440. case type_swapint:
  1441. case type_packedint:
  1442. case type_filepos:
  1443. case type_keyedint:
  1444. {
  1445. __int64 res = rtlStrToInt8(srcSize, source);
  1446. offset = destType.buildInt(builder, offset, field, res);
  1447. break;
  1448. }
  1449. case type_real:
  1450. {
  1451. double res = rtlStrToReal(srcSize, source);
  1452. offset = destType.buildReal(builder, offset, field, res);
  1453. break;
  1454. }
  1455. case type_data:
  1456. case type_string:
  1457. case type_decimal: // Go via string - not common enough to special-case
  1458. case type_varstring:
  1459. case type_qstring:
  1460. case type_utf8:
  1461. //MORE: Could special case casting from utf8 to utf8 similar to strings above
  1462. case type_unicode:
  1463. case type_varunicode:
  1464. {
  1465. size32_t utf8chars = rtlUtf8Length(srcSize, source);
  1466. offset = destType.buildUtf8(builder, offset, field, utf8chars, source);
  1467. break;
  1468. }
  1469. case type_set:
  1470. {
  1471. UNIMPLEMENTED; // JCS->GH - but perhaps can/should translate using iterator too?
  1472. break;
  1473. }
  1474. default:
  1475. throwUnexpected();
  1476. }
  1477. return offset;
  1478. }
  1479. static size32_t translateScalarFromString(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, const RtlTypeInfo &destType, const RtlTypeInfo &sourceType, const char *source, size_t srcSize)
  1480. {
  1481. switch(destType.getType())
  1482. {
  1483. case type_boolean:
  1484. case type_int:
  1485. case type_swapint:
  1486. case type_packedint:
  1487. case type_filepos:
  1488. case type_keyedint:
  1489. {
  1490. __int64 res = rtlStrToInt8(srcSize, source);
  1491. offset = destType.buildInt(builder, offset, field, res);
  1492. break;
  1493. }
  1494. case type_real:
  1495. {
  1496. double res = rtlStrToReal(srcSize, source);
  1497. offset = destType.buildReal(builder, offset, field, res);
  1498. break;
  1499. }
  1500. case type_data:
  1501. case type_string:
  1502. case type_decimal: // Go via string - not common enough to special-case
  1503. case type_varstring:
  1504. case type_qstring:
  1505. case type_utf8:
  1506. //MORE: Could special case casting from utf8 to utf8 similar to strings above
  1507. case type_unicode:
  1508. case type_varunicode:
  1509. {
  1510. offset = destType.buildString(builder, offset, field, srcSize, source);
  1511. break;
  1512. }
  1513. case type_set:
  1514. {
  1515. UNIMPLEMENTED; // JCS->GH - but perhaps can/should translate using iterator too?
  1516. break;
  1517. }
  1518. default:
  1519. throwUnexpected();
  1520. }
  1521. return offset;
  1522. }
  1523. static bool canTranslateNonScalar(const RtlTypeInfo * type, const RtlTypeInfo * sourceType)
  1524. {
  1525. auto target = type->getType();
  1526. auto source = sourceType->getType();
  1527. if (target == source)
  1528. return true;
  1529. if ((target == type_dictionary) && (source == type_table))
  1530. return true;
  1531. if ((target == type_table) && (source == type_dictionary))
  1532. return true;
  1533. return false;
  1534. }
  1535. void createMatchInfo()
  1536. {
  1537. unsigned defaulted = 0;
  1538. bool destHasNested = destRecInfo.hasNested();
  1539. bool sourceHasNested = sourceRecInfo.hasNested();
  1540. for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
  1541. {
  1542. const RtlFieldInfo *field = destRecInfo.queryField(idx);
  1543. const RtlTypeInfo *type = field->type;
  1544. MatchInfo &info = matchInfo[idx];
  1545. const char *name = destRecInfo.queryName(idx);
  1546. info.matchIdx = sourceRecInfo.getFieldNum(name);
  1547. if (info.matchIdx == (unsigned) -1)
  1548. {
  1549. const byte * initializer = (const byte *) field->initializer;
  1550. info.matchType = isVirtualInitializer(initializer) ? match_virtual : match_none;
  1551. if ((field->flags & RFTMinifblock) == 0)
  1552. {
  1553. size32_t defaultSize = (initializer && !isVirtualInitializer(initializer)) ? type->size(initializer, nullptr) : type->getMinSize();
  1554. fixedDelta -= defaultSize;
  1555. #ifdef TRACE_TRANSLATION
  1556. DBGLOG("Decreasing fixedDelta size by %d to %d for defaulted field %d (%s)", defaultSize, fixedDelta, idx, destRecInfo.queryName(idx));
  1557. #endif
  1558. }
  1559. if ((field->flags & RFTMispayloadfield) == 0)
  1560. matchFlags |= match_keychange;
  1561. defaulted++;
  1562. // If dest field is in a nested record, we need to check that there's no "non-record" field in source matching current nested record name
  1563. if (name)
  1564. {
  1565. if (destHasNested)
  1566. {
  1567. const char *ldot = strrchr(name, '.');
  1568. if (ldot)
  1569. {
  1570. StringBuffer recname(ldot-name, name);
  1571. if (sourceRecInfo.getFieldNum(recname) != (unsigned) -1)
  1572. info.matchType = match_fail; // No translation from non-record to record
  1573. }
  1574. }
  1575. if (sourceHasNested && sourceRecInfo.queryOriginalField(name))
  1576. {
  1577. // Similarly if dest field IS not a nested record, but there is a field in source which is.
  1578. // Note that we already know there is no matching field called name in the exapanded version of source,
  1579. // so any match we find must be a record
  1580. info.matchType = match_fail; // No translation from record to non-record
  1581. }
  1582. }
  1583. }
  1584. else
  1585. {
  1586. bool deblob = false;
  1587. const RtlTypeInfo *sourceType = sourceRecInfo.queryType(info.matchIdx);
  1588. unsigned sourceFlags = sourceRecInfo.queryField(info.matchIdx)->flags;
  1589. unsigned destFlags = field->flags;
  1590. if (binarySource && sourceType->isBlob())
  1591. {
  1592. if (type->isBlob())
  1593. {
  1594. }
  1595. else
  1596. {
  1597. sourceType = sourceType->queryChildType();
  1598. deblob = true;
  1599. }
  1600. }
  1601. if (!type->isScalar() || !sourceType->isScalar())
  1602. {
  1603. if (!canTranslateNonScalar(type, sourceType))
  1604. info.matchType = match_fail; // No translation from one non-scalar type to another
  1605. else
  1606. {
  1607. switch (type->getType())
  1608. {
  1609. case type_set:
  1610. if (binarySource)
  1611. {
  1612. if (type->queryChildType()->fieldType==sourceType->queryChildType()->fieldType &&
  1613. type->queryChildType()->length==sourceType->queryChildType()->length)
  1614. info.matchType = match_perfect;
  1615. else
  1616. info.matchType = match_typecast;
  1617. }
  1618. else
  1619. info.matchType = match_typecast|match_dynamic;
  1620. break;
  1621. case type_row: // These are not expected I think...
  1622. throwUnexpected();
  1623. case type_ifblock:
  1624. case type_record:
  1625. case type_table:
  1626. case type_dictionary:
  1627. {
  1628. const RtlRecord *subDest = destRecInfo.queryNested(idx);
  1629. const RtlRecord *subSrc = sourceRecInfo.queryNested(info.matchIdx);
  1630. info.subTrans = new GeneralRecordTranslator(*subDest, *subSrc, binarySource);
  1631. if (!info.subTrans->needsTranslate())
  1632. {
  1633. if (!binarySource)
  1634. info.matchType = match_recurse|match_dynamic;
  1635. else
  1636. {
  1637. // Child does not require translation, but check linkcount mode matches too!
  1638. if (type->isLinkCounted())
  1639. {
  1640. if (sourceType->isLinkCounted())
  1641. info.matchType = match_link;
  1642. else
  1643. info.matchType = match_recurse;
  1644. }
  1645. else
  1646. {
  1647. if (sourceType->isLinkCounted())
  1648. info.matchType = match_recurse;
  1649. else
  1650. info.matchType = match_perfect;
  1651. }
  1652. if (info.matchType != match_recurse)
  1653. {
  1654. delete info.subTrans;
  1655. info.subTrans = nullptr;
  1656. }
  1657. }
  1658. }
  1659. else if (info.subTrans->canTranslate())
  1660. {
  1661. info.matchType = binarySource ? match_recurse : (match_recurse|match_dynamic);
  1662. unsigned childFlags = info.subTrans->matchFlags;
  1663. //Ignore differences in the keyed flag for child structures (it will be set later if this field is keyed)
  1664. matchFlags |= (FieldMatchType)(childFlags & ~match_keychange);
  1665. }
  1666. else
  1667. info.matchType = match_fail;
  1668. break;
  1669. }
  1670. case type_blob:
  1671. if (!binarySource)
  1672. info.matchType = match_fail;
  1673. else if (sourceType->isBlob())
  1674. info.matchType = match_perfect; // We don't check that the child type matches
  1675. else
  1676. info.matchType = match_fail;
  1677. break;
  1678. default:
  1679. info.matchType = match_fail;
  1680. break;
  1681. }
  1682. }
  1683. }
  1684. else if (!binarySource)
  1685. info.matchType = match_typecast|match_dynamic;
  1686. else if ((type->fieldType==sourceType->fieldType))
  1687. {
  1688. if (type->length==sourceType->length)
  1689. {
  1690. info.matchType = match_perfect;
  1691. }
  1692. else
  1693. {
  1694. assert(type->isFixedSize()); // Both variable size would have matched length above
  1695. info.matchType = match_typecast;
  1696. if (type->length < sourceType->length)
  1697. {
  1698. if (type->canTruncate())
  1699. {
  1700. info.matchType = match_truncate;
  1701. if (((sourceFlags|destFlags) & RFTMinifblock) == 0)
  1702. fixedDelta += sourceType->getMinSize()-type->getMinSize();
  1703. #ifdef TRACE_TRANSLATION
  1704. DBGLOG("Increasing fixedDelta size by %d to %d for truncated field %d (%s)", sourceType->getMinSize()-type->getMinSize(), fixedDelta, idx, destRecInfo.queryName(idx));
  1705. #endif
  1706. }
  1707. }
  1708. else
  1709. {
  1710. if (type->canExtend(info.fillChar))
  1711. {
  1712. info.matchType = match_extend;
  1713. if (((sourceFlags|destFlags) & RFTMinifblock) == 0)
  1714. fixedDelta += sourceType->getMinSize()-type->getMinSize();
  1715. #ifdef TRACE_TRANSLATION
  1716. DBGLOG("Decreasing fixedDelta size by %d to %d for truncated field %d (%s)", type->getMinSize()-sourceType->getMinSize(), fixedDelta, idx, destRecInfo.queryName(idx));
  1717. #endif
  1718. }
  1719. }
  1720. }
  1721. }
  1722. else if ((type->getType()==type_filepos || sourceType->getType()==type_filepos) &&
  1723. type->isUnsigned()==sourceType->isUnsigned())
  1724. info.matchType = match_filepos;
  1725. else
  1726. info.matchType = match_typecast;
  1727. if (deblob)
  1728. info.matchType |= match_deblob;
  1729. if (sourceFlags & RFTMinifblock || field->flags & RFTMinifblock)
  1730. info.matchType |= match_inifblock; // Avoids incorrect commoning up of adjacent matches
  1731. // MORE - could note the highest interesting fieldnumber in the source and not bother filling in offsets after that
  1732. // Not sure it would help much though - usually need to know the total record size anyway in real life
  1733. if (idx != info.matchIdx)
  1734. matchFlags |= match_move;
  1735. //Whether this field is in an ifblock, or needs to be copied by linking it do not count as changes
  1736. FieldMatchType maskedType = (FieldMatchType)(info.matchType & ~(match_link|match_inifblock));
  1737. if (((maskedType != match_perfect) || (idx != info.matchIdx)) && ((field->flags & RFTMispayloadfield) == 0 || (sourceFlags & RFTMispayloadfield) == 0))
  1738. matchFlags |= match_keychange;
  1739. else if ((field->flags & RFTMispayloadfield) != (sourceFlags & RFTMispayloadfield))
  1740. matchFlags |= match_keychange;
  1741. }
  1742. matchFlags |= info.matchType;
  1743. }
  1744. if (sourceRecInfo.getNumFields() > destRecInfo.getNumFields()-defaulted)
  1745. {
  1746. matchFlags |= match_remove;
  1747. for (unsigned idx = 0; idx < sourceRecInfo.getNumFields(); idx++)
  1748. {
  1749. const RtlFieldInfo *field = sourceRecInfo.queryField(idx);
  1750. const char *name = sourceRecInfo.queryName(idx);
  1751. if (destRecInfo.getFieldNum(name) == (unsigned) -1)
  1752. {
  1753. // unmatched field
  1754. if ((field->flags & RFTMispayloadfield) == 0)
  1755. matchFlags |= match_keychange;
  1756. if (!destRecInfo.getFixedSize())
  1757. {
  1758. const RtlTypeInfo *type = field->type;
  1759. if (type->isFixedSize() && (field->flags & RFTMinifblock)==0)
  1760. {
  1761. #ifdef TRACE_TRANSLATION
  1762. DBGLOG("Reducing estimated size by %d for (fixed size) omitted field %s", (int) type->getMinSize(), field->name);
  1763. #endif
  1764. fixedDelta += type->getMinSize();
  1765. }
  1766. else
  1767. variableUnmatched.append(idx);
  1768. }
  1769. allUnmatched.append(idx);
  1770. }
  1771. }
  1772. #ifdef TRACE_TRANSLATION
  1773. DBGLOG("Delta from fixed-size fields is %d bytes", fixedDelta);
  1774. #endif
  1775. }
  1776. }
  1777. size32_t estimateNewSize(const RtlRow &sourceRow) const
  1778. {
  1779. #ifdef TRACE_TRANSLATION
  1780. DBGLOG("Source record size is %d", (int) sourceRow.getRecordSize());
  1781. #endif
  1782. size32_t expectedSize = sourceRow.getRecordSize();
  1783. assertex((int) expectedSize >= fixedDelta);
  1784. expectedSize -= fixedDelta;
  1785. #ifdef TRACE_TRANSLATION
  1786. DBGLOG("Source record size without fixed delta is %d", expectedSize);
  1787. #endif
  1788. ForEachItemIn(i, variableUnmatched)
  1789. {
  1790. unsigned fieldNo = variableUnmatched.item(i);
  1791. expectedSize -= sourceRow.getSize(fieldNo);
  1792. #ifdef TRACE_TRANSLATION
  1793. DBGLOG("Reducing estimated size by %d to %d for omitted field %d (%s)", (int) sourceRow.getSize(fieldNo), expectedSize, fieldNo, sourceRecInfo.queryName(fieldNo));
  1794. #endif
  1795. }
  1796. if (matchFlags & ~(match_perfect|match_link|match_none|match_virtual|match_extend|match_truncate))
  1797. {
  1798. for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
  1799. {
  1800. const MatchInfo &match = matchInfo[idx];
  1801. const RtlTypeInfo *type = destRecInfo.queryType(idx);
  1802. unsigned matchField = match.matchIdx;
  1803. if ((match.matchType & match_inifblock) == 0)
  1804. {
  1805. switch (match.matchType)
  1806. {
  1807. case match_perfect:
  1808. case match_link:
  1809. case match_none:
  1810. case match_virtual:
  1811. case match_extend:
  1812. case match_truncate:
  1813. // These ones were already included in fixedDelta
  1814. break;
  1815. default:
  1816. // This errs on the side of small - i.e. it assumes that all typecasts end up at minimum size
  1817. // We could do better in some cases e.g. variable string <-> variable unicode we can assume factor of 2,
  1818. // uft8 <-> string we could calculate here - but unlikely to be worth the effort.
  1819. // But it's fine for fixed size output fields, including truncate/extend
  1820. // We could also precalculate the expected delta if all omitted fields are fixed size - but not sure how likely/worthwhile that is.
  1821. auto minSize = type->getMinSize();
  1822. auto sourceSize = sourceRow.getSize(matchField);
  1823. expectedSize += minSize;
  1824. assertex(expectedSize >= sourceSize);
  1825. expectedSize -= sourceSize;
  1826. #ifdef TRACE_TRANSLATION
  1827. DBGLOG("Adjusting estimated size by (%d - %d) to %d for translated field %d (%s)", (int) sourceSize, minSize, expectedSize, matchField, sourceRecInfo.queryName(matchField));
  1828. #endif
  1829. break;
  1830. }
  1831. }
  1832. }
  1833. }
  1834. return expectedSize;
  1835. }
  1836. };
  1837. extern ECLRTL_API const IDynamicTransform *createRecordTranslator(const RtlRecord &destRecInfo, const RtlRecord &srcRecInfo)
  1838. {
  1839. return new GeneralRecordTranslator(destRecInfo, srcRecInfo, true);
  1840. }
  1841. extern ECLRTL_API const IDynamicTransform *createRecordTranslatorViaCallback(const RtlRecord &destRecInfo, const RtlRecord &srcRecInfo, type_vals rawType)
  1842. {
  1843. return new GeneralRecordTranslator(destRecInfo, srcRecInfo, false, rawType);
  1844. }
  1845. //---------------------------------------------------------------------------------------------------------------------
  1846. class CloneVirtualRecordTranslator : public CInterfaceOf<IDynamicTransform>
  1847. {
  1848. public:
  1849. CloneVirtualRecordTranslator(const RtlRecord &_destRecInfo, IOutputMetaData & _sourceMeta)
  1850. : destRecInfo(_destRecInfo), sourceMeta(_sourceMeta)
  1851. {
  1852. init();
  1853. }
  1854. // IDynamicTransform impl.
  1855. virtual void describe() const override
  1856. {
  1857. doDescribe(0);
  1858. }
  1859. virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const byte *sourceRec) const override
  1860. {
  1861. size32_t sourceSize = sourceMeta.getRecordSize(sourceRec);
  1862. return doAppendVirtuals(builder, callback, 0, sourceSize, sourceRec);
  1863. }
  1864. virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const RtlRow &sourceRow) const override
  1865. {
  1866. const byte * source = sourceRow.queryRow();
  1867. size32_t sourceSize = sourceMeta.getRecordSize(source);
  1868. return doAppendVirtuals(builder, callback, 0, sourceSize, source);
  1869. }
  1870. virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const IDynamicFieldValueFetcher & fetcher) const override
  1871. {
  1872. throwUnexpected();
  1873. }
  1874. virtual bool canTranslate() const override
  1875. {
  1876. return true;
  1877. }
  1878. virtual bool needsTranslate() const override
  1879. {
  1880. return true;
  1881. }
  1882. virtual bool needsNonVirtualTranslate() const override
  1883. {
  1884. return false;
  1885. }
  1886. virtual bool keyedTranslated() const override
  1887. {
  1888. return false;
  1889. }
  1890. private:
  1891. void doDescribe(unsigned indent) const
  1892. {
  1893. for (unsigned idx = firstVirtual; idx < destRecInfo.getNumFields(); idx++)
  1894. {
  1895. const RtlFieldInfo *field = destRecInfo.queryField(idx);
  1896. const char * dest = destRecInfo.queryName(idx);
  1897. const char * result = "";
  1898. switch (getVirtualInitializer(field->initializer))
  1899. {
  1900. case FVirtualFilePosition:
  1901. result = "FILEPOSITION";
  1902. break;
  1903. case FVirtualLocalFilePosition:
  1904. result = "LOCALFILEPOSITION";
  1905. break;
  1906. case FVirtualFilename:
  1907. result = "LOGICALFILENAME";
  1908. break;
  1909. }
  1910. DBGLOG("Use virtual(%s) for field %s", result, dest);
  1911. }
  1912. }
  1913. size32_t doAppendVirtuals(ARowBuilder &builder, IVirtualFieldCallback & callback, size32_t offset, size32_t sourceSize, const void *sourceRow) const
  1914. {
  1915. size32_t estimate = sourceSize + fixedVirtualSize;
  1916. builder.ensureCapacity(offset+estimate, "record");
  1917. memcpy(builder.getSelf() + offset, sourceRow, sourceSize);
  1918. unsigned destOffset = offset + sourceSize;
  1919. for (unsigned idx = firstVirtual; idx < destRecInfo.getNumFields(); idx++)
  1920. {
  1921. const RtlFieldInfo *field = destRecInfo.queryField(idx);
  1922. const RtlTypeInfo *type = field->type;
  1923. switch (getVirtualInitializer(field->initializer))
  1924. {
  1925. case FVirtualFilePosition:
  1926. destOffset = type->buildInt(builder, destOffset, field, callback.getFilePosition(sourceRow));
  1927. break;
  1928. case FVirtualLocalFilePosition:
  1929. destOffset = type->buildInt(builder, destOffset, field, callback.getLocalFilePosition(sourceRow));
  1930. break;
  1931. case FVirtualFilename:
  1932. {
  1933. const char * filename = callback.queryLogicalFilename(sourceRow);
  1934. destOffset = type->buildString(builder, destOffset, field, strlen(filename), filename);
  1935. break;
  1936. }
  1937. default:
  1938. throwUnexpected();
  1939. }
  1940. }
  1941. return destOffset;
  1942. }
  1943. void init()
  1944. {
  1945. unsigned idx = 0;
  1946. for (; idx < destRecInfo.getNumFields(); idx++)
  1947. {
  1948. const RtlFieldInfo *field = destRecInfo.queryField(idx);
  1949. const byte * initializer = (const byte *) field->initializer;
  1950. if (isVirtualInitializer(initializer))
  1951. break;
  1952. }
  1953. firstVirtual = idx;
  1954. size32_t size = 0;
  1955. for (; idx < destRecInfo.getNumFields(); idx++)
  1956. {
  1957. const RtlFieldInfo *field = destRecInfo.queryField(idx);
  1958. const RtlTypeInfo *type = field->type;
  1959. const byte * initializer = (const byte *) field->initializer;
  1960. assertex(isVirtualInitializer(initializer));
  1961. size += type->getMinSize();
  1962. }
  1963. fixedVirtualSize = size;
  1964. }
  1965. protected:
  1966. const RtlRecord &destRecInfo;
  1967. IOutputMetaData & sourceMeta;
  1968. unsigned firstVirtual = 0;
  1969. size32_t fixedVirtualSize = 0;
  1970. };
  1971. extern ECLRTL_API const IDynamicTransform *createCloneVirtualRecordTranslator(const RtlRecord &_destRecInfo, IOutputMetaData & _source)
  1972. {
  1973. return new CloneVirtualRecordTranslator(_destRecInfo, _source);
  1974. }
  1975. //---------------------------------------------------------------------------------------------------------------------
  1976. extern ECLRTL_API void throwTranslationError(const RtlRecord & destRecInfo, const RtlRecord & srcRecInfo, const char * filename)
  1977. {
  1978. Owned<const IDynamicTransform> translator = createRecordTranslator(destRecInfo, srcRecInfo);
  1979. #ifdef _DEBUG
  1980. translator->describe();
  1981. #endif
  1982. if (!translator->canTranslate())
  1983. throw MakeStringException(0, "Untranslatable record layout mismatch detected for: %s", filename);
  1984. throw MakeStringException(0, "Translatable key layout mismatch reading file %s but translation disabled", filename);
  1985. }
  1986. class TranslatedRowStream : public CInterfaceOf<IRowStream>
  1987. {
  1988. public:
  1989. TranslatedRowStream(IRowStream *_inputStream, IEngineRowAllocator *_resultAllocator, const RtlRecord &outputRecord, const RtlRecord &inputRecord)
  1990. : inputStream(_inputStream), resultAllocator(_resultAllocator)
  1991. {
  1992. translator.setown(createRecordTranslator(outputRecord, inputRecord));
  1993. translator->describe();
  1994. }
  1995. virtual const void *nextRow() override
  1996. {
  1997. if (eof)
  1998. return NULL;
  1999. const void *inRow = inputStream->nextRow();
  2000. if (!inRow)
  2001. {
  2002. if (eogSeen)
  2003. eof = true;
  2004. else
  2005. eogSeen = true;
  2006. return nullptr;
  2007. }
  2008. else
  2009. eogSeen = false;
  2010. RtlDynamicRowBuilder rowBuilder(resultAllocator);
  2011. size32_t len = translator->translate(rowBuilder, fieldCallback, (const byte *) inRow);
  2012. rtlReleaseRow(inRow);
  2013. return rowBuilder.finalizeRowClear(len);
  2014. }
  2015. virtual void stop() override
  2016. {
  2017. resultAllocator.clear();
  2018. }
  2019. bool canTranslate() const
  2020. {
  2021. return translator->canTranslate();
  2022. }
  2023. bool needsTranslate() const
  2024. {
  2025. return translator->needsTranslate();
  2026. }
  2027. UnexpectedVirtualFieldCallback fieldCallback; // I'm not sure if an non unexpected callback can be implemented
  2028. protected:
  2029. Linked<IRowStream> inputStream;
  2030. Linked<IEngineRowAllocator> resultAllocator;
  2031. Owned<const IDynamicTransform> translator;
  2032. unsigned numOffsets = 0;
  2033. size_t * variableOffsets = nullptr;
  2034. bool eof = false;
  2035. bool eogSeen = false;
  2036. };
  2037. extern ECLRTL_API IRowStream * transformRecord(IEngineRowAllocator * resultAllocator,IOutputMetaData & metaInput,IRowStream * input)
  2038. {
  2039. if (resultAllocator->queryOutputMeta()==&metaInput)
  2040. return LINK(input);
  2041. Owned<TranslatedRowStream> stream = new TranslatedRowStream(input, resultAllocator,
  2042. resultAllocator->queryOutputMeta()->queryRecordAccessor(true),
  2043. metaInput.queryRecordAccessor(true));
  2044. if (!stream->needsTranslate())
  2045. return LINK(input);
  2046. else if (!stream->canTranslate())
  2047. rtlFail(0, "Cannot translate record stream");
  2048. else
  2049. return stream.getClear();
  2050. }
  2051. // A key translator allows us to transform a RowFilter that refers to src to one that refers to dest.
  2052. // Basically just a map of those fields with matching types.
  2053. class CKeyTranslator : public CInterfaceOf<IKeyTranslator>
  2054. {
  2055. public:
  2056. CKeyTranslator(const RtlRecord &actual, const RtlRecord &expected)
  2057. {
  2058. translateNeeded = false;
  2059. for (unsigned expectedIdx = 0; expectedIdx < expected.getNumFields(); expectedIdx++)
  2060. {
  2061. unsigned actualIdx = actual.getFieldNum(expected.queryName(expectedIdx));
  2062. if (actualIdx != (unsigned) -1)
  2063. {
  2064. const RtlTypeInfo *expectedType = expected.queryType(expectedIdx);
  2065. const RtlTypeInfo *actualType = actual.queryType(actualIdx);
  2066. if (!actualType->equivalent(expectedType))
  2067. actualIdx = (unsigned) -2;
  2068. }
  2069. map.append(actualIdx);
  2070. if (actualIdx != expectedIdx)
  2071. translateNeeded = true;
  2072. }
  2073. }
  2074. virtual void describe() const override
  2075. {
  2076. ForEachItemIn(idx, map)
  2077. {
  2078. unsigned mapped = map.item(idx);
  2079. switch (mapped)
  2080. {
  2081. case (unsigned) -1: DBGLOG("No match for field %d", idx); break;
  2082. case (unsigned) -2: DBGLOG("Incompatible field match for field %d", idx); break;
  2083. default: DBGLOG("keyed field %d can map to field %d", idx, mapped); break;
  2084. }
  2085. }
  2086. }
  2087. virtual bool translate(RowFilter &filters) const override
  2088. {
  2089. bool mapNeeded = false;
  2090. if (translateNeeded)
  2091. {
  2092. unsigned numFields = filters.numFilterFields();
  2093. for (unsigned idx = 0; idx < numFields; idx++)
  2094. {
  2095. unsigned fieldNum = filters.queryFilter(idx).queryFieldIndex();
  2096. unsigned mappedFieldNum = map.isItem(fieldNum) ? map.item(fieldNum) : (unsigned) -1;
  2097. if (mappedFieldNum != fieldNum)
  2098. {
  2099. mapNeeded = true;
  2100. switch (mappedFieldNum)
  2101. {
  2102. case (unsigned) -1: throw makeStringExceptionV(0, "Cannot translate keyed filter on field %u - no matching field", idx);
  2103. case (unsigned) -2: throw makeStringExceptionV(0, "Cannot translate keyed filter on field %u - incompatible matching field type", idx);
  2104. default:
  2105. filters.remapField(idx, mappedFieldNum);
  2106. break;
  2107. }
  2108. }
  2109. }
  2110. if (mapNeeded)
  2111. filters.recalcFieldsRequired();
  2112. }
  2113. return mapNeeded;
  2114. }
  2115. virtual bool translate(RowFilter &filter, IConstArrayOf<IFieldFilter> &in) const override
  2116. {
  2117. bool mapNeeded = false;
  2118. if (translateNeeded)
  2119. {
  2120. unsigned numFields = in.length();
  2121. for (unsigned idx = 0; idx < numFields; idx++)
  2122. {
  2123. unsigned fieldNum = in.item(idx).queryFieldIndex();
  2124. unsigned mappedFieldNum = map.isItem(fieldNum) ? map.item(fieldNum) : (unsigned) -1;
  2125. if (mappedFieldNum != fieldNum)
  2126. {
  2127. mapNeeded = true;
  2128. switch (mappedFieldNum)
  2129. {
  2130. case (unsigned) -1: throw makeStringExceptionV(0, "Cannot translate keyed filter on field %u - no matching field", idx);
  2131. case (unsigned) -2: throw makeStringExceptionV(0, "Cannot translate keyed filter on field %u - incompatible matching field type", idx);
  2132. default:
  2133. filter.addFilter(*in.item(idx).remap(mappedFieldNum));
  2134. break;
  2135. }
  2136. }
  2137. else
  2138. filter.addFilter(OLINK(in.item(idx)));
  2139. }
  2140. }
  2141. return mapNeeded;
  2142. }
  2143. virtual bool needsTranslate() const
  2144. {
  2145. return translateNeeded;
  2146. }
  2147. protected:
  2148. UnsignedArray map;
  2149. bool translateNeeded = false;
  2150. };
  2151. extern ECLRTL_API const IKeyTranslator *createKeyTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo)
  2152. {
  2153. return new CKeyTranslator(_destRecInfo, _srcRecInfo);
  2154. }
  2155. //---------------------------------------------------------------------------------------------------------------------
  2156. const char * NullVirtualFieldCallback::queryLogicalFilename(const void * row)
  2157. {
  2158. return "";
  2159. }
  2160. unsigned __int64 NullVirtualFieldCallback::getFilePosition(const void * row)
  2161. {
  2162. return 0;
  2163. }
  2164. unsigned __int64 NullVirtualFieldCallback::getLocalFilePosition(const void * row)
  2165. {
  2166. return 0;
  2167. }
  2168. const byte * NullVirtualFieldCallback::lookupBlob(unsigned __int64 id)
  2169. {
  2170. return nullptr;
  2171. }
  2172. const char * UnexpectedVirtualFieldCallback::queryLogicalFilename(const void * row)
  2173. {
  2174. throwUnexpectedX("VIRTUAL(LOGICALFILENAME)");
  2175. }
  2176. unsigned __int64 UnexpectedVirtualFieldCallback::getFilePosition(const void * row)
  2177. {
  2178. throwUnexpectedX("VIRTUAL(FILEPOSITION)");
  2179. }
  2180. unsigned __int64 UnexpectedVirtualFieldCallback::getLocalFilePosition(const void * row)
  2181. {
  2182. throwUnexpectedX("VIRTUAL(LOCALFILEPOSITION)");
  2183. }
  2184. const byte * UnexpectedVirtualFieldCallback::lookupBlob(unsigned __int64 id)
  2185. {
  2186. throwUnexpectedX("BLOB");
  2187. }
  2188. unsigned __int64 FetchVirtualFieldCallback::getFilePosition(const void * row)
  2189. {
  2190. return filepos;
  2191. }
  2192. const char * LocalVirtualFieldCallback::queryLogicalFilename(const void * row)
  2193. {
  2194. return filename;
  2195. }
  2196. unsigned __int64 LocalVirtualFieldCallback::getFilePosition(const void * row)
  2197. {
  2198. return filepos;
  2199. }
  2200. unsigned __int64 LocalVirtualFieldCallback::getLocalFilePosition(const void * row)
  2201. {
  2202. return localfilepos;
  2203. }
  2204. const byte * LocalVirtualFieldCallback::lookupBlob(unsigned __int64 id)
  2205. {
  2206. throwUnexpectedX("BLOB");
  2207. }