rtlrecord.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2016 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include <math.h>
  15. #include <stdio.h>
  16. #include "jmisc.hpp"
  17. #include "jlib.hpp"
  18. #include "eclhelper.hpp"
  19. #include "eclrtl_imp.hpp"
  20. #include "rtlds_imp.hpp"
  21. #include "rtlrecord.hpp"
  22. /*
  23. * Potential different implementations (for all fixed size has no penalty):
  24. *
  25. * a. when row changes, update offsets of all offset fields
  26. * + access is simple, single array lookup
  27. * - lots more fields are updated even if the fields aren't used.
  28. *
  29. * b. when a row changes update the next offset for all variable size fields.
  30. * + minimal updates
  31. * - offset access is more complex
  32. *
  33. * c. when row changes clear cache, and calculate on demand.
  34. * + trivial update on row change
  35. * + no cost of fields not accessed
  36. * + may be required to implement ifblocks - since fields in the test expression must be evaluated before all the
  37. * offsets are known. Depends on the implementation of ifblocks!
  38. * - accessing an offset will involve a loop and be more expensive
  39. *
  40. * Handling complications:
  41. * a. Nested rows
  42. * A dilemma. Should they be expanded?
  43. * + If they are expanded then it makes it much simpler to use externally.
  44. * - The nested fields have compound names storing and matching them is a complication.
  45. * - The code generator doesn't expand.
  46. * + It is easier to calculate offsets since all fields are supported by the same class.
  47. * + Sizes for unexpanded rows would need their own size caching classes. That is more complex!
  48. * + The meta information currently processes child information.
  49. *
  50. * Not expanding is complicated if you also try and only calculate the offsets of fields in nested records once - since you end
  51. * up needing to call back and forth between instances and the static information.
  52. * However, if nested records are processed by using size(), and selections from them are processed by instantiating
  53. * an instance of the nested row it is all much simpler. The cost is potentially re-evaluating sizes of nested fields. Potentially
  54. * inefficient if most are fixed size, but some are variable.
  55. *
  56. * b. bitfields
  57. * the last bitfield in a bitfield container has the size of the container, the others have 0 size.
  58. *
  59. * c. ifblocks
  60. * Nasty. Allowing direct access means the flag would need checking, and a field would need a pointer
  61. * to its containing ifblock. Cleanest to have a different derived implementation which was used if the record
  62. * contained ifblocks which added calls to check ifblocks before size()/getValue() etc.
  63. * Will require an extra row parameter to every RtlTypeInfo::getX() function.
  64. * Evaluating the test expression without compiling will require expression interpreting.
  65. *
  66. * d. alien datatypes
  67. * As long as the rtlField class implements the functions then it shouldn't cause any problems. Evaluating at
  68. * from a record at runtime without compiling will be tricky - requires an interpreter.
  69. *
  70. * Other
  71. * Add a minSize to each field (unless already stored in the record information)
  72. *
  73. * Expression interpreting:
  74. * Replace the no_select with a CHqlBoundExpr(no_select, fieldid).
  75. * Evaluate (ctx [ logical->RtlFieldOffsetCalculator mapping ]).
  76. * Even better if mapped direct to something that represents the base cursor so no need to search ctx.
  77. * For nested selects the code would need to be consistent.
  78. */
  79. static unsigned countFields(const RtlFieldInfo * const * fields)
  80. {
  81. unsigned cnt = 0;
  82. for (;*fields;fields++)
  83. cnt++;
  84. return cnt;
  85. }
  86. static unsigned countFields(const RtlFieldInfo * const * fields, bool & containsNested)
  87. {
  88. unsigned cnt = 0;
  89. for (;*fields;fields++)
  90. {
  91. const RtlTypeInfo * type = (*fields)->type;
  92. if (type->getType() == type_record)
  93. {
  94. containsNested = true;
  95. const RtlFieldInfo * const * nested = type->queryFields();
  96. if (nested)
  97. cnt += countFields(nested, containsNested);
  98. }
  99. else
  100. cnt++;
  101. }
  102. return cnt;
  103. }
  104. static const RtlFieldInfo * * expandNestedRows(const RtlFieldInfo * * target, const RtlFieldInfo * const * fields)
  105. {
  106. for (;*fields;fields++)
  107. {
  108. const RtlFieldInfo * cur = *fields;
  109. const RtlTypeInfo * type = cur->type;
  110. if (type->getType() == type_record)
  111. {
  112. const RtlFieldInfo * const * nested = type->queryFields();
  113. if (nested)
  114. target = expandNestedRows(target, nested);
  115. }
  116. else
  117. *target++ = cur;
  118. }
  119. return target;
  120. }
  121. class FieldNameToFieldNumMap
  122. {
  123. public:
  124. FieldNameToFieldNumMap(const RtlFieldInfo * const * fields, bool expand)
  125. {
  126. unsigned idx = 0;
  127. if (expand)
  128. expandFields(nullptr, idx, fields);
  129. else
  130. {
  131. for (;*fields;fields++)
  132. {
  133. const RtlFieldInfo * cur = *fields;
  134. const RtlTypeInfo * type = cur->type;
  135. map.setValue(cur->name, idx);
  136. idx++;
  137. }
  138. }
  139. }
  140. unsigned lookup(const char *name) const
  141. {
  142. unsigned *result = map.getValue(name);
  143. if (result)
  144. return *result;
  145. else
  146. return (unsigned) -1;
  147. }
  148. protected:
  149. void expandFields(const char *prefix, unsigned &idx, const RtlFieldInfo * const * fields)
  150. {
  151. for (;*fields;fields++)
  152. {
  153. const RtlFieldInfo * cur = *fields;
  154. const RtlTypeInfo * type = cur->type;
  155. if (type->getType() == type_record)
  156. {
  157. const RtlFieldInfo * const * nested = type->queryFields();
  158. if (nested)
  159. {
  160. StringBuffer newPrefix(prefix);
  161. newPrefix.append(cur->name).append('.');
  162. expandFields(newPrefix, idx, nested);
  163. }
  164. }
  165. else if (prefix)
  166. {
  167. StringBuffer name(prefix);
  168. name.append(cur->name);
  169. map.setValue(name, idx);
  170. idx++;
  171. }
  172. else
  173. {
  174. map.setValue(cur->name, idx);
  175. idx++;
  176. }
  177. }
  178. }
  179. MapStringTo<unsigned> map;
  180. };
  181. RtlRecord::RtlRecord(const RtlRecordTypeInfo & record, bool expandFields)
  182. : RtlRecord(record.fields, expandFields) // delegated constructor
  183. {
  184. }
  185. RtlRecord::RtlRecord(const RtlFieldInfo * const *_fields, bool expandFields) : fields(_fields), originalFields(_fields), nameMap(nullptr)
  186. {
  187. //MORE: Does not cope with ifblocks.
  188. numVarFields = 0;
  189. //Optionally expand out nested rows.
  190. if (expandFields)
  191. {
  192. bool containsNested = false;
  193. numFields = countFields(fields, containsNested);
  194. if (containsNested)
  195. {
  196. const RtlFieldInfo * * allocated = new const RtlFieldInfo * [numFields+1];
  197. fields = allocated;
  198. const RtlFieldInfo * * target = expandNestedRows(allocated, originalFields);
  199. assertex(target == fields+numFields);
  200. *target = nullptr;
  201. }
  202. }
  203. else
  204. numFields = countFields(fields);
  205. for (unsigned i=0; i < numFields; i++)
  206. {
  207. if (!queryType(i)->isFixedSize())
  208. numVarFields++;
  209. }
  210. fixedOffsets = new size_t[numFields + 1];
  211. whichVariableOffset = new unsigned[numFields + 1];
  212. variableFieldIds = new unsigned[numVarFields];
  213. unsigned curVariable = 0;
  214. size_t fixedOffset = 0;
  215. for (unsigned i=0;; i++)
  216. {
  217. whichVariableOffset[i] = curVariable;
  218. fixedOffsets[i] = fixedOffset;
  219. if (i == numFields)
  220. break;
  221. const RtlTypeInfo * curType = queryType(i);
  222. if (curType->isFixedSize())
  223. {
  224. size_t thisSize = curType->size(nullptr, nullptr);
  225. fixedOffset += thisSize;
  226. }
  227. else
  228. {
  229. variableFieldIds[curVariable] = i;
  230. curVariable++;
  231. fixedOffset = 0;
  232. }
  233. }
  234. }
  235. RtlRecord::~RtlRecord()
  236. {
  237. if (fields != originalFields)
  238. delete [] fields;
  239. delete [] fixedOffsets;
  240. delete [] whichVariableOffset;
  241. delete [] variableFieldIds;
  242. delete nameMap;
  243. }
  244. void RtlRecord::calcRowOffsets(size_t * variableOffsets, const void * _row) const
  245. {
  246. const byte * row = static_cast<const byte *>(_row);
  247. for (unsigned i = 0; i < numVarFields; i++)
  248. {
  249. unsigned fieldIndex = variableFieldIds[i];
  250. size_t offset = getOffset(variableOffsets, fieldIndex);
  251. size_t fieldSize = queryType(fieldIndex)->size(row + offset, row);
  252. variableOffsets[i+1] = offset+fieldSize;
  253. }
  254. }
  255. size32_t RtlRecord::getMinRecordSize() const
  256. {
  257. if (numVarFields == 0)
  258. return fixedOffsets[numFields];
  259. size32_t minSize = 0;
  260. for (unsigned i=0; i < numFields; i++)
  261. minSize += queryType(i)->getMinSize();
  262. return minSize;
  263. }
  264. static const FieldNameToFieldNumMap *setupNameMap(const RtlFieldInfo * const * fields, bool expand, std::atomic<const FieldNameToFieldNumMap *> &aNameMap)
  265. {
  266. const FieldNameToFieldNumMap *lnameMap = new FieldNameToFieldNumMap(fields, expand);
  267. const FieldNameToFieldNumMap *expected = nullptr;
  268. if (aNameMap.compare_exchange_strong(expected, lnameMap))
  269. return lnameMap;
  270. else
  271. {
  272. // Other thread already set it while we were creating
  273. delete lnameMap;
  274. return expected; // has been updated to the value set by other thread
  275. }
  276. }
  277. unsigned RtlRecord::getFieldNum(const char *fieldName) const
  278. {
  279. // NOTE: the nameMap field cannot be declared as atomic, since the class definition is included in generated
  280. // code which is not (yet) compiled using C++11. If that changes then the reinterpret_cast can be removed.
  281. std::atomic<const FieldNameToFieldNumMap *> &aNameMap = reinterpret_cast<std::atomic<const FieldNameToFieldNumMap *> &>(nameMap);
  282. const FieldNameToFieldNumMap *useMap = aNameMap.load(std::memory_order_relaxed);
  283. if (!useMap)
  284. useMap = setupNameMap(originalFields, originalFields!=fields, aNameMap);
  285. return useMap->lookup(fieldName);
  286. }
  287. //---------------------------------------------------------------------------------------------------------------------
  288. RtlRow::RtlRow(const RtlRecord & _info, const void * optRow, unsigned numOffsets, size_t * _variableOffsets) : info(_info), variableOffsets(_variableOffsets)
  289. {
  290. assertex(numOffsets == info.getNumVarFields()+1);
  291. //variableOffset[0] is used for all fixed offset fields to avoid any special casing.
  292. variableOffsets[0] = 0;
  293. setRow(optRow);
  294. }
  295. __int64 RtlRow::getInt(unsigned field) const
  296. {
  297. const byte * self = reinterpret_cast<const byte *>(row);
  298. const RtlTypeInfo * type = info.queryType(field);
  299. return type->getInt(self + getOffset(field));
  300. }
  301. void RtlRow::getUtf8(size32_t & resultLen, char * & result, unsigned field) const
  302. {
  303. const byte * self = reinterpret_cast<const byte *>(row);
  304. const RtlTypeInfo * type = info.queryType(field);
  305. return type->getUtf8(resultLen, result, self + getOffset(field));
  306. }
  307. void RtlRow::setRow(const void * _row)
  308. {
  309. row = _row;
  310. if (_row)
  311. info.calcRowOffsets(variableOffsets, _row);
  312. }
  313. RtlDynRow::RtlDynRow(const RtlRecord & _info, const void * optRow) : RtlRow(_info, optRow, _info.getNumVarFields()+1, new size_t[_info.getNumVarFields()+1])
  314. {
  315. }
  316. RtlDynRow::~RtlDynRow()
  317. {
  318. delete [] variableOffsets;
  319. }
  320. //-----------------
  321. static const RtlRecord *setupRecordAccessor(const COutputMetaData &meta, bool expand, std::atomic<const RtlRecord *> &aRecordAccessor)
  322. {
  323. const RtlRecord *lRecordAccessor = new RtlRecord(meta.queryTypeInfo()->queryFields(), expand);
  324. const RtlRecord *expected = nullptr;
  325. if (aRecordAccessor.compare_exchange_strong(expected, lRecordAccessor))
  326. return lRecordAccessor;
  327. else
  328. {
  329. // Other thread already set it while we were creating
  330. delete lRecordAccessor;
  331. return expected; // has been updated to the value set by other thread
  332. }
  333. }
  334. COutputMetaData::COutputMetaData()
  335. {
  336. recordAccessor[0] = recordAccessor[1] = NULL;
  337. }
  338. COutputMetaData::~COutputMetaData()
  339. {
  340. delete recordAccessor[0]; delete recordAccessor[1];
  341. }
  342. const RtlRecord *COutputMetaData::queryRecordAccessor(bool expand) const
  343. {
  344. // NOTE: the recordAccessor field cannot be declared as atomic, since the class definition is included in generated
  345. // code which is not (yet) compiled using C++11. If that changes then the reinterpret_cast can be removed.
  346. std::atomic<const RtlRecord *> &aRecordAccessor = reinterpret_cast<std::atomic<const RtlRecord *> &>(recordAccessor[expand]);
  347. const RtlRecord *useAccessor = aRecordAccessor.load(std::memory_order_relaxed);
  348. if (!useAccessor)
  349. useAccessor = setupRecordAccessor(*this, expand, aRecordAccessor);
  350. return useAccessor;
  351. }
  352. class CVariableOutputRowSerializer : public COutputRowSerializer
  353. {
  354. public:
  355. inline CVariableOutputRowSerializer(unsigned _activityId, IOutputMetaData * _meta) : COutputRowSerializer(_activityId) { meta = _meta; }
  356. virtual void serialize(IRowSerializerTarget & out, const byte * self)
  357. {
  358. unsigned size = meta->getRecordSize(self);
  359. out.put(size, self);
  360. }
  361. protected:
  362. IOutputMetaData * meta;
  363. };
  364. class ECLRTL_API CSimpleSourceRowPrefetcher : public ISourceRowPrefetcher, public RtlCInterface
  365. {
  366. public:
  367. CSimpleSourceRowPrefetcher(IOutputMetaData & _meta, ICodeContext * _ctx, unsigned _activityId)
  368. {
  369. deserializer.setown(_meta.querySerializedDiskMeta()->createDiskDeserializer(_ctx, _activityId));
  370. rowAllocator.setown(_ctx->getRowAllocator(&_meta, _activityId));
  371. }
  372. RTLIMPLEMENT_IINTERFACE
  373. virtual void readAhead(IRowDeserializerSource & in)
  374. {
  375. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  376. size32_t len = deserializer->deserialize(rowBuilder, in);
  377. rtlReleaseRow(rowBuilder.finalizeRowClear(len));
  378. }
  379. protected:
  380. Owned<IOutputRowDeserializer> deserializer;
  381. Owned<IEngineRowAllocator> rowAllocator;
  382. };
  383. //---------------------------------------------------------------------------
  384. IOutputRowSerializer * COutputMetaData::createDiskSerializer(ICodeContext * ctx, unsigned activityId)
  385. {
  386. return new CVariableOutputRowSerializer(activityId, this);
  387. }
  388. ISourceRowPrefetcher * COutputMetaData::createDiskPrefetcher(ICodeContext * ctx, unsigned activityId)
  389. {
  390. ISourceRowPrefetcher * fetcher = defaultCreateDiskPrefetcher(ctx, activityId);
  391. if (fetcher)
  392. return fetcher;
  393. //Worse case implementation using a deserialize
  394. return new CSimpleSourceRowPrefetcher(*this, ctx, activityId);
  395. }
  396. ISourceRowPrefetcher *COutputMetaData::defaultCreateDiskPrefetcher(ICodeContext * ctx, unsigned activityId)
  397. {
  398. if (getMetaFlags() & MDFneedserializedisk)
  399. return querySerializedDiskMeta()->createDiskPrefetcher(ctx, activityId);
  400. CSourceRowPrefetcher * fetcher = doCreateDiskPrefetcher(activityId);
  401. if (fetcher)
  402. {
  403. fetcher->onCreate(ctx);
  404. return fetcher;
  405. }
  406. return NULL;
  407. }
  408. IOutputRowSerializer *CFixedOutputMetaData::createDiskSerializer(ICodeContext * ctx, unsigned activityId)
  409. {
  410. return new CFixedOutputRowSerializer(activityId, fixedSize);
  411. }
  412. IOutputRowDeserializer *CFixedOutputMetaData::createDiskDeserializer(ICodeContext * ctx, unsigned activityId)
  413. {
  414. return new CFixedOutputRowDeserializer(activityId, fixedSize);
  415. }
  416. ISourceRowPrefetcher *CFixedOutputMetaData::createDiskPrefetcher(ICodeContext * ctx, unsigned activityId)
  417. {
  418. ISourceRowPrefetcher * fetcher = defaultCreateDiskPrefetcher(ctx, activityId);
  419. if (fetcher)
  420. return fetcher;
  421. return new CFixedSourceRowPrefetcher(activityId, fixedSize);
  422. }
  423. IOutputRowSerializer * CActionOutputMetaData::createDiskSerializer(ICodeContext * ctx, unsigned activityId)
  424. {
  425. return new CFixedOutputRowSerializer(activityId, 0);
  426. }
  427. IOutputRowDeserializer * CActionOutputMetaData::createDiskDeserializer(ICodeContext * ctx, unsigned activityId)
  428. {
  429. return new CFixedOutputRowDeserializer(activityId, 0);
  430. }
  431. ISourceRowPrefetcher * CActionOutputMetaData::createDiskPrefetcher(ICodeContext * ctx, unsigned activityId)
  432. {
  433. return new CFixedSourceRowPrefetcher(activityId, 0);
  434. }