rtlrecord.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2016 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include <math.h>
  15. #include <stdio.h>
  16. #include "jmisc.hpp"
  17. #include "jlib.hpp"
  18. #include "eclhelper.hpp"
  19. #include "eclrtl_imp.hpp"
  20. #include "rtlds_imp.hpp"
  21. #include "rtlrecord.hpp"
  22. #include "rtldynfield.hpp"
  23. /*
  24. * Potential different implementations (for all fixed size has no penalty):
  25. *
  26. * a. when row changes, update offsets of all offset fields
  27. * + access is simple, single array lookup
  28. * - lots more fields are updated even if the fields aren't used.
  29. *
  30. * b. when a row changes update the next offset for all variable size fields.
  31. * + minimal updates
  32. * - offset access is more complex
  33. *
  34. * c. when row changes clear cache, and calculate on demand.
  35. * + trivial update on row change
  36. * + no cost of fields not accessed
  37. * + may be required to implement ifblocks - since fields in the test expression must be evaluated before all the
  38. * offsets are known. Depends on the implementation of ifblocks!
  39. * - accessing an offset will involve a loop and be more expensive
  40. *
  41. * Handling complications:
  42. * a. Nested rows
  43. * A dilemma. Should they be expanded?
  44. * + If they are expanded then it makes it much simpler to use externally.
  45. * - The nested fields have compound names storing and matching them is a complication.
  46. * - The code generator doesn't expand.
  47. * + It is easier to calculate offsets since all fields are supported by the same class.
  48. * + Sizes for unexpanded rows would need their own size caching classes. That is more complex!
  49. * + The meta information currently processes child information.
  50. *
  51. * Not expanding is complicated if you also try and only calculate the offsets of fields in nested records once - since you end
  52. * up needing to call back and forth between instances and the static information.
  53. * However, if nested records are processed by using size(), and selections from them are processed by instantiating
  54. * an instance of the nested row it is all much simpler. The cost is potentially re-evaluating sizes of nested fields. Potentially
  55. * inefficient if most are fixed size, but some are variable.
  56. *
  57. * b. bitfields
  58. * the last bitfield in a bitfield container has the size of the container, the others have 0 size.
  59. *
  60. * c. ifblocks
  61. * Nasty. Allowing direct access means the flag would need checking, and a field would need a pointer
  62. * to its containing ifblock. Cleanest to have a different derived implementation which was used if the record
  63. * contained ifblocks which added calls to check ifblocks before size()/getValue() etc.
  64. * Will require an extra row parameter to every RtlTypeInfo::getX() function.
  65. * Evaluating the test expression without compiling will require expression interpreting.
  66. *
  67. * d. alien datatypes
  68. * As long as the rtlField class implements the functions then it shouldn't cause any problems. Evaluating at
  69. * from a record at runtime without compiling will be tricky - requires an interpreter.
  70. *
  71. * Other
  72. * Add a minSize to each field (unless already stored in the record information)
  73. *
  74. * Expression interpreting:
  75. * Replace the no_select with a CHqlBoundExpr(no_select, fieldid).
  76. * Evaluate (ctx [ logical->RtlFieldOffsetCalculator mapping ]).
  77. * Even better if mapped direct to something that represents the base cursor so no need to search ctx.
  78. * For nested selects the code would need to be consistent.
  79. */
  80. static unsigned countFields(const RtlFieldInfo * const * fields, bool & containsNested)
  81. {
  82. unsigned cnt = 0;
  83. for (;*fields;fields++)
  84. {
  85. const RtlTypeInfo * type = (*fields)->type;
  86. if (type->getType() == type_record)
  87. {
  88. containsNested = true;
  89. const RtlFieldInfo * const * nested = type->queryFields();
  90. if (nested)
  91. cnt += countFields(nested, containsNested);
  92. }
  93. else
  94. cnt++;
  95. }
  96. return cnt;
  97. }
  98. static unsigned expandNestedRows(unsigned idx, const char *prefix, const RtlFieldInfo * const * fields, const RtlFieldInfo * * target, const char * *names)
  99. {
  100. for (;*fields;fields++)
  101. {
  102. const RtlFieldInfo * cur = *fields;
  103. const RtlTypeInfo * type = cur->type;
  104. if (type->getType() == type_record)
  105. {
  106. const RtlFieldInfo * const * nested = type->queryFields();
  107. if (nested)
  108. {
  109. StringBuffer newPrefix(prefix);
  110. newPrefix.append(cur->name).append('.');
  111. idx = expandNestedRows(idx, newPrefix.str(), nested, target, names);
  112. }
  113. }
  114. else
  115. {
  116. if (prefix)
  117. {
  118. StringBuffer name(prefix);
  119. name.append(cur->name);
  120. names[idx] = name.detach();
  121. }
  122. else
  123. names[idx] = nullptr;
  124. target[idx++] = cur;
  125. }
  126. }
  127. return idx;
  128. }
  129. class FieldNameToFieldNumMap
  130. {
  131. public:
  132. FieldNameToFieldNumMap(const RtlRecord &record)
  133. {
  134. unsigned numFields = record.getNumFields();
  135. for (unsigned idx = 0; idx < numFields;idx++)
  136. map.setValue(record.queryName(idx), idx);
  137. }
  138. unsigned lookup(const char *name) const
  139. {
  140. unsigned *result = map.getValue(name);
  141. if (result)
  142. return *result;
  143. else
  144. return (unsigned) -1;
  145. }
  146. MapConstStringTo<unsigned> map; // Note - does not copy strings - they should all have sufficient lifetime
  147. };
  148. RtlRecord::RtlRecord(const RtlRecordTypeInfo & record, bool expandFields)
  149. : RtlRecord(record.fields, expandFields) // delegated constructor
  150. {
  151. }
  152. RtlRecord::RtlRecord(const RtlFieldInfo * const *_fields, bool expandFields) : fields(_fields), originalFields(_fields), names(nullptr), nameMap(nullptr)
  153. {
  154. //MORE: Does not cope with ifblocks.
  155. numVarFields = 0;
  156. numTables = 0;
  157. //Optionally expand out nested rows.
  158. if (expandFields)
  159. {
  160. bool containsNested = false;
  161. numFields = countFields(fields, containsNested);
  162. if (containsNested)
  163. {
  164. const RtlFieldInfo * * allocated = new const RtlFieldInfo * [numFields+1];
  165. names = new const char *[numFields];
  166. fields = allocated;
  167. unsigned idx = expandNestedRows(0, nullptr, originalFields, allocated, names);
  168. assertex(idx == numFields);
  169. allocated[idx] = nullptr;
  170. }
  171. }
  172. else
  173. {
  174. numFields = countFields(fields);
  175. }
  176. for (unsigned i=0; i < numFields; i++)
  177. {
  178. const RtlTypeInfo *curType = queryType(i);
  179. if (!curType->isFixedSize())
  180. numVarFields++;
  181. if (curType->getType()==type_table || curType->getType()==type_record)
  182. numTables++;
  183. }
  184. fixedOffsets = new size_t[numFields + 1];
  185. whichVariableOffset = new unsigned[numFields + 1];
  186. variableFieldIds = new unsigned[numVarFields];
  187. if (numTables)
  188. {
  189. nestedTables = new const RtlRecord *[numTables];
  190. tableIds = new unsigned[numTables];
  191. }
  192. else
  193. {
  194. nestedTables = nullptr;
  195. tableIds = nullptr;
  196. }
  197. unsigned curVariable = 0;
  198. unsigned curTable = 0;
  199. size_t fixedOffset = 0;
  200. for (unsigned i=0;; i++)
  201. {
  202. whichVariableOffset[i] = curVariable;
  203. fixedOffsets[i] = fixedOffset;
  204. if (i == numFields)
  205. break;
  206. const RtlTypeInfo * curType = queryType(i);
  207. if (curType->isFixedSize())
  208. {
  209. size_t thisSize = curType->size(nullptr, nullptr);
  210. fixedOffset += thisSize;
  211. }
  212. else
  213. {
  214. variableFieldIds[curVariable] = i;
  215. curVariable++;
  216. fixedOffset = 0;
  217. }
  218. switch (curType->getType())
  219. {
  220. case type_table:
  221. tableIds[curTable] = i;
  222. nestedTables[curTable++] = new RtlRecord(curType->queryChildType()->queryFields(), expandFields);
  223. break;
  224. case type_record:
  225. tableIds[curTable] = i;
  226. nestedTables[curTable++] = new RtlRecord(curType->queryFields(), expandFields);
  227. break;
  228. }
  229. }
  230. }
  231. RtlRecord::~RtlRecord()
  232. {
  233. if (names)
  234. {
  235. for (unsigned i = 0; i < numFields; i++)
  236. {
  237. free((char *) names[i]);
  238. }
  239. delete [] names;
  240. }
  241. if (fields != originalFields)
  242. {
  243. delete [] fields;
  244. }
  245. delete [] fixedOffsets;
  246. delete [] whichVariableOffset;
  247. delete [] variableFieldIds;
  248. delete [] tableIds;
  249. if (nestedTables)
  250. {
  251. for (unsigned i = 0; i < numTables; i++)
  252. delete nestedTables[i];
  253. delete [] nestedTables;
  254. }
  255. delete nameMap;
  256. }
  257. void RtlRecord::calcRowOffsets(size_t * variableOffsets, const void * _row) const
  258. {
  259. const byte * row = static_cast<const byte *>(_row);
  260. for (unsigned i = 0; i < numVarFields; i++)
  261. {
  262. unsigned fieldIndex = variableFieldIds[i];
  263. size_t offset = getOffset(variableOffsets, fieldIndex);
  264. size_t fieldSize = queryType(fieldIndex)->size(row + offset, row);
  265. variableOffsets[i+1] = offset+fieldSize;
  266. }
  267. }
  268. size32_t RtlRecord::getMinRecordSize() const
  269. {
  270. if (numVarFields == 0)
  271. return fixedOffsets[numFields];
  272. size32_t minSize = 0;
  273. for (unsigned i=0; i < numFields; i++)
  274. minSize += queryType(i)->getMinSize();
  275. return minSize;
  276. }
  277. static const FieldNameToFieldNumMap *setupNameMap(const RtlRecord &record, std::atomic<const FieldNameToFieldNumMap *> &aNameMap)
  278. {
  279. const FieldNameToFieldNumMap *lnameMap = new FieldNameToFieldNumMap(record);
  280. const FieldNameToFieldNumMap *expected = nullptr;
  281. if (aNameMap.compare_exchange_strong(expected, lnameMap))
  282. return lnameMap;
  283. else
  284. {
  285. // Other thread already set it while we were creating
  286. delete lnameMap;
  287. return expected; // has been updated to the value set by other thread
  288. }
  289. }
  290. unsigned RtlRecord::getFieldNum(const char *fieldName) const
  291. {
  292. // NOTE: the nameMap field cannot be declared as atomic, since the class definition is included in generated
  293. // code which is not (yet) compiled using C++11. If that changes then the reinterpret_cast can be removed.
  294. std::atomic<const FieldNameToFieldNumMap *> &aNameMap = reinterpret_cast<std::atomic<const FieldNameToFieldNumMap *> &>(nameMap);
  295. const FieldNameToFieldNumMap *useMap = aNameMap.load(std::memory_order_relaxed);
  296. if (!useMap)
  297. useMap = setupNameMap(*this, aNameMap);
  298. return useMap->lookup(fieldName);
  299. }
  300. const char *RtlRecord::queryName(unsigned field) const
  301. {
  302. if (names && names[field])
  303. return names[field];
  304. return fields[field]->name;
  305. }
  306. const RtlRecord *RtlRecord::queryNested(unsigned fieldId) const
  307. {
  308. // Map goes in wrong direction (for size reasons). We could replace with a hashtable or binsearch but
  309. // should not be enough nested tables for it to be worth it;
  310. for (unsigned i = 0; i < numTables; i++)
  311. if (tableIds[i]==fieldId)
  312. return nestedTables[i];
  313. return nullptr;
  314. }
  315. //---------------------------------------------------------------------------------------------------------------------
  316. RtlRow::RtlRow(const RtlRecord & _info, const void * optRow, unsigned numOffsets, size_t * _variableOffsets) : info(_info), variableOffsets(_variableOffsets)
  317. {
  318. assertex(numOffsets == info.getNumVarFields()+1);
  319. //variableOffset[0] is used for all fixed offset fields to avoid any special casing.
  320. variableOffsets[0] = 0;
  321. setRow(optRow);
  322. }
  323. __int64 RtlRow::getInt(unsigned field) const
  324. {
  325. const byte * self = reinterpret_cast<const byte *>(row);
  326. const RtlTypeInfo * type = info.queryType(field);
  327. return type->getInt(self + getOffset(field));
  328. }
  329. double RtlRow::getReal(unsigned field) const
  330. {
  331. const byte * self = reinterpret_cast<const byte *>(row);
  332. const RtlTypeInfo * type = info.queryType(field);
  333. return type->getReal(self + getOffset(field));
  334. }
  335. void RtlRow::getString(size32_t & resultLen, char * & result, unsigned field) const
  336. {
  337. const byte * self = reinterpret_cast<const byte *>(row);
  338. const RtlTypeInfo * type = info.queryType(field);
  339. return type->getString(resultLen, result, self + getOffset(field));
  340. }
  341. void RtlRow::getUtf8(size32_t & resultLen, char * & result, unsigned field) const
  342. {
  343. const byte * self = reinterpret_cast<const byte *>(row);
  344. const RtlTypeInfo * type = info.queryType(field);
  345. return type->getUtf8(resultLen, result, self + getOffset(field));
  346. }
  347. void RtlRow::setRow(const void * _row)
  348. {
  349. row = _row;
  350. if (_row)
  351. info.calcRowOffsets(variableOffsets, _row);
  352. }
  353. RtlDynRow::RtlDynRow(const RtlRecord & _info, const void * optRow) : RtlRow(_info, optRow, _info.getNumVarFields()+1, new size_t[_info.getNumVarFields()+1])
  354. {
  355. }
  356. RtlDynRow::~RtlDynRow()
  357. {
  358. delete [] variableOffsets;
  359. }
  360. //-----------------
  361. static const RtlRecord *setupRecordAccessor(const COutputMetaData &meta, bool expand, std::atomic<const RtlRecord *> &aRecordAccessor)
  362. {
  363. const RtlRecord *lRecordAccessor = new RtlRecord(meta.queryTypeInfo()->queryFields(), expand);
  364. const RtlRecord *expected = nullptr;
  365. if (aRecordAccessor.compare_exchange_strong(expected, lRecordAccessor))
  366. return lRecordAccessor;
  367. else
  368. {
  369. // Other thread already set it while we were creating
  370. delete lRecordAccessor;
  371. return expected; // has been updated to the value set by other thread
  372. }
  373. }
  374. COutputMetaData::COutputMetaData()
  375. {
  376. recordAccessor[0] = recordAccessor[1] = NULL;
  377. }
  378. COutputMetaData::~COutputMetaData()
  379. {
  380. delete recordAccessor[0]; delete recordAccessor[1];
  381. }
  382. const RtlRecord *COutputMetaData::queryRecordAccessor(bool expand) const
  383. {
  384. // NOTE: the recordAccessor field cannot be declared as atomic, since the class definition is included in generated
  385. // code which is not (yet) compiled using C++11. If that changes then the reinterpret_cast can be removed.
  386. std::atomic<const RtlRecord *> &aRecordAccessor = reinterpret_cast<std::atomic<const RtlRecord *> &>(recordAccessor[expand]);
  387. const RtlRecord *useAccessor = aRecordAccessor.load(std::memory_order_relaxed);
  388. if (!useAccessor)
  389. useAccessor = setupRecordAccessor(*this, expand, aRecordAccessor);
  390. return useAccessor;
  391. }
  392. class CVariableOutputRowSerializer : public COutputRowSerializer
  393. {
  394. public:
  395. inline CVariableOutputRowSerializer(unsigned _activityId, IOutputMetaData * _meta) : COutputRowSerializer(_activityId) { meta = _meta; }
  396. virtual void serialize(IRowSerializerTarget & out, const byte * self)
  397. {
  398. unsigned size = meta->getRecordSize(self);
  399. out.put(size, self);
  400. }
  401. protected:
  402. IOutputMetaData * meta;
  403. };
  404. class ECLRTL_API CSimpleSourceRowPrefetcher : public ISourceRowPrefetcher, public RtlCInterface
  405. {
  406. public:
  407. CSimpleSourceRowPrefetcher(IOutputMetaData & _meta, ICodeContext * _ctx, unsigned _activityId)
  408. {
  409. deserializer.setown(_meta.querySerializedDiskMeta()->createDiskDeserializer(_ctx, _activityId));
  410. rowAllocator.setown(_ctx->getRowAllocator(&_meta, _activityId));
  411. }
  412. RTLIMPLEMENT_IINTERFACE
  413. virtual void readAhead(IRowDeserializerSource & in)
  414. {
  415. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  416. size32_t len = deserializer->deserialize(rowBuilder, in);
  417. rtlReleaseRow(rowBuilder.finalizeRowClear(len));
  418. }
  419. protected:
  420. Owned<IOutputRowDeserializer> deserializer;
  421. Owned<IEngineRowAllocator> rowAllocator;
  422. };
  423. //---------------------------------------------------------------------------
  424. IOutputRowSerializer * COutputMetaData::createDiskSerializer(ICodeContext * ctx, unsigned activityId)
  425. {
  426. return new CVariableOutputRowSerializer(activityId, this);
  427. }
  428. ISourceRowPrefetcher * COutputMetaData::createDiskPrefetcher(ICodeContext * ctx, unsigned activityId)
  429. {
  430. ISourceRowPrefetcher * fetcher = defaultCreateDiskPrefetcher(ctx, activityId);
  431. if (fetcher)
  432. return fetcher;
  433. //Worse case implementation using a deserialize
  434. return new CSimpleSourceRowPrefetcher(*this, ctx, activityId);
  435. }
  436. ISourceRowPrefetcher *COutputMetaData::defaultCreateDiskPrefetcher(ICodeContext * ctx, unsigned activityId)
  437. {
  438. if (getMetaFlags() & MDFneedserializedisk)
  439. return querySerializedDiskMeta()->createDiskPrefetcher(ctx, activityId);
  440. CSourceRowPrefetcher * fetcher = doCreateDiskPrefetcher(activityId);
  441. if (fetcher)
  442. {
  443. fetcher->onCreate(ctx);
  444. return fetcher;
  445. }
  446. return NULL;
  447. }
  448. IOutputRowSerializer *CFixedOutputMetaData::createDiskSerializer(ICodeContext * ctx, unsigned activityId)
  449. {
  450. return new CFixedOutputRowSerializer(activityId, fixedSize);
  451. }
  452. IOutputRowDeserializer *CFixedOutputMetaData::createDiskDeserializer(ICodeContext * ctx, unsigned activityId)
  453. {
  454. return new CFixedOutputRowDeserializer(activityId, fixedSize);
  455. }
  456. ISourceRowPrefetcher *CFixedOutputMetaData::createDiskPrefetcher(ICodeContext * ctx, unsigned activityId)
  457. {
  458. ISourceRowPrefetcher * fetcher = defaultCreateDiskPrefetcher(ctx, activityId);
  459. if (fetcher)
  460. return fetcher;
  461. return new CFixedSourceRowPrefetcher(activityId, fixedSize);
  462. }
  463. IOutputRowSerializer * CActionOutputMetaData::createDiskSerializer(ICodeContext * ctx, unsigned activityId)
  464. {
  465. return new CFixedOutputRowSerializer(activityId, 0);
  466. }
  467. IOutputRowDeserializer * CActionOutputMetaData::createDiskDeserializer(ICodeContext * ctx, unsigned activityId)
  468. {
  469. return new CFixedOutputRowDeserializer(activityId, 0);
  470. }
  471. ISourceRowPrefetcher * CActionOutputMetaData::createDiskPrefetcher(ICodeContext * ctx, unsigned activityId)
  472. {
  473. return new CFixedSourceRowPrefetcher(activityId, 0);
  474. }