rtlrecord.cpp 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2016 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include <math.h>
  15. #include <stdio.h>
  16. #include "jmisc.hpp"
  17. #include "jlib.hpp"
  18. #include "eclhelper.hpp"
  19. #include "eclrtl_imp.hpp"
  20. #include "rtlrecord.hpp"
  21. /*
  22. * Potential different implementations (for all fixed size has no penalty):
  23. *
  24. * a. when row changes, update offsets of all offset fields
  25. * + access is simple, single array lookup
  26. * - lots more fields are updated even if the fields aren't used.
  27. *
  28. * b. when a row changes update the next offset for all variable size fields.
  29. * + minimal updates
  30. * - offset access is more complex
  31. *
  32. * c. when row changes clear cache, and calculate on demand.
  33. * + trivial update on row change
  34. * + no cost of fields not accessed
  35. * + may be required to implement ifblocks - since fields in the test expression must be evaluated before all the
  36. * offsets are known. Depends on the implementation of ifblocks!
  37. * - accessing an offset will involve a loop and be more expensive
  38. *
  39. * Handling complications:
  40. * a. Nested rows
  41. * A dilemma. Should they be expanded?
  42. * + If they are expanded then it makes it much simpler to use externally.
  43. * - The nested fields have compound names storing and matching them is a complication.
  44. * - The code generator doesn't expand.
  45. * + It is easier to calculate offsets since all fields are supported by the same class.
  46. * + Sizes for unexpanded rows would need their own size caching classes. That is more complex!
  47. * + The meta information currently processes child information.
  48. *
  49. * Not expanding is complicated if you also try and only calculate the offsets of fields in nested records once - since you end
  50. * up needing to call back and forth between instances and the static information.
  51. * However, if nested records are processed by using size(), and selections from them are processed by instantiating
  52. * an instance of the nested row it is all much simpler. The cost is potentially re-evaluating sizes of nested fields. Potentially
  53. * inefficient if most are fixed size, but some are variable.
  54. *
  55. * b. bitfields
  56. * the last bitfield in a bitfield container has the size of the container, the others have 0 size.
  57. *
  58. * c. ifblocks
  59. * Nasty. Allowing direct access means the flag would need checking, and a field would need a pointer
  60. * to its containing ifblock. Cleanest to have a different derived implementation which was used if the record
  61. * contained ifblocks which added calls to check ifblocks before size()/getValue() etc.
  62. * Will require an extra row parameter to every RtlTypeInfo::getX() function.
  63. * Evaluating the test expression without compiling will require expression interpreting.
  64. *
  65. * d. alien datatypes
  66. * As long as the rtlField class implements the functions then it shouldn't cause any problems. Evaluating at
  67. * from a record at runtime without compiling will be tricky - requires an interpreter.
  68. *
  69. * Other
  70. * Add a minSize to each field (unless already stored in the record information)
  71. *
  72. * Expression interpreting:
  73. * Replace the no_select with a CHqlBoundExpr(no_select, fieldid).
  74. * Evaluate (ctx [ logical->RtlFieldOffsetCalculator mapping ]).
  75. * Even better if mapped direct to something that represents the base cursor so no need to search ctx.
  76. * For nested selects the code would need to be consistent.
  77. */
  78. static unsigned countFields(const RtlFieldInfo * const * fields)
  79. {
  80. unsigned cnt = 0;
  81. for (;*fields;fields++)
  82. cnt++;
  83. return cnt;
  84. }
  85. static unsigned countFields(const RtlFieldInfo * const * fields, bool & containsNested)
  86. {
  87. unsigned cnt = 0;
  88. for (;*fields;fields++)
  89. {
  90. const RtlTypeInfo * type = (*fields)->type;
  91. if (type->getType() == type_record)
  92. {
  93. containsNested = true;
  94. const RtlFieldInfo * const * nested = type->queryFields();
  95. if (nested)
  96. cnt += countFields(nested, containsNested);
  97. }
  98. else
  99. cnt++;
  100. }
  101. return cnt;
  102. }
  103. static const RtlFieldInfo * * expandNestedRows(const RtlFieldInfo * * target, const RtlFieldInfo * const * fields)
  104. {
  105. for (;*fields;fields++)
  106. {
  107. const RtlFieldInfo * cur = *fields;
  108. const RtlTypeInfo * type = cur->type;
  109. if (type->getType() == type_record)
  110. {
  111. const RtlFieldInfo * const * nested = type->queryFields();
  112. if (nested)
  113. target = expandNestedRows(target, nested);
  114. }
  115. else
  116. *target++ = cur;
  117. }
  118. return target;
  119. }
  120. RtlRecord::RtlRecord(const RtlRecordTypeInfo & record, bool expandFields) : fields(record.fields), originalFields(record.fields)
  121. {
  122. //MORE: Does not cope with ifblocks.
  123. numVarFields = 0;
  124. //Optionally expand out nested rows.
  125. if (expandFields)
  126. {
  127. bool containsNested = false;
  128. numFields = countFields(fields, containsNested);
  129. if (containsNested)
  130. {
  131. const RtlFieldInfo * * allocated = new const RtlFieldInfo * [numFields+1];
  132. fields = allocated;
  133. const RtlFieldInfo * * target = expandNestedRows(allocated, originalFields);
  134. assertex(target == fields+numFields);
  135. *target = nullptr;
  136. }
  137. }
  138. else
  139. numFields = countFields(fields);
  140. for (unsigned i=0; i < numFields; i++)
  141. {
  142. if (!queryType(i)->isFixedSize())
  143. numVarFields++;
  144. }
  145. fixedOffsets = new size_t[numFields + 1];
  146. whichVariableOffset = new unsigned[numFields + 1];
  147. variableFieldIds = new unsigned[numVarFields];
  148. unsigned curVariable = 0;
  149. size_t fixedOffset = 0;
  150. for (unsigned i=0;; i++)
  151. {
  152. whichVariableOffset[i] = curVariable;
  153. fixedOffsets[i] = fixedOffset;
  154. if (i == numFields)
  155. break;
  156. const RtlTypeInfo * curType = queryType(i);
  157. if (curType->isFixedSize())
  158. {
  159. size_t thisSize = curType->size(nullptr, nullptr);
  160. fixedOffset += thisSize;
  161. }
  162. else
  163. {
  164. variableFieldIds[curVariable] = i;
  165. curVariable++;
  166. fixedOffset = 0;
  167. }
  168. }
  169. }
  170. RtlRecord::~RtlRecord()
  171. {
  172. if (fields != originalFields)
  173. delete [] fields;
  174. delete [] fixedOffsets;
  175. delete [] whichVariableOffset;
  176. delete [] variableFieldIds;
  177. }
  178. void RtlRecord::calcRowOffsets(size_t * variableOffsets, const void * _row) const
  179. {
  180. const byte * row = static_cast<const byte *>(_row);
  181. for (unsigned i = 0; i < numVarFields; i++)
  182. {
  183. unsigned fieldIndex = variableFieldIds[i];
  184. size_t offset = getOffset(variableOffsets, fieldIndex);
  185. size_t fieldSize = queryType(fieldIndex)->size(row + offset, row);
  186. variableOffsets[i+1] = offset+fieldSize;
  187. }
  188. }
  189. size32_t RtlRecord::getMinRecordSize() const
  190. {
  191. if (numVarFields == 0)
  192. return fixedOffsets[numFields];
  193. size32_t minSize = 0;
  194. for (unsigned i=0; i < numFields; i++)
  195. minSize += queryType(i)->getMinSize();
  196. return minSize;
  197. }
  198. //---------------------------------------------------------------------------------------------------------------------
  199. RtlRow::RtlRow(const RtlRecord & _info, const void * optRow, unsigned numOffsets, size_t * _variableOffsets) : info(_info), variableOffsets(_variableOffsets)
  200. {
  201. assertex(numOffsets == info.getNumVarFields()+1);
  202. //variableOffset[0] is used for all fixed offset fields to avoid any special casing.
  203. variableOffsets[0] = 0;
  204. setRow(optRow);
  205. }
  206. __int64 RtlRow::getInt(unsigned field) const
  207. {
  208. const byte * self = reinterpret_cast<const byte *>(row);
  209. const RtlTypeInfo * type = info.queryType(field);
  210. return type->getInt(self + getOffset(field));
  211. }
  212. void RtlRow::getUtf8(size32_t & resultLen, char * & result, unsigned field) const
  213. {
  214. const byte * self = reinterpret_cast<const byte *>(row);
  215. const RtlTypeInfo * type = info.queryType(field);
  216. return type->getUtf8(resultLen, result, self + getOffset(field));
  217. }
  218. void RtlRow::setRow(const void * _row)
  219. {
  220. row = _row;
  221. if (_row)
  222. info.calcRowOffsets(variableOffsets, _row);
  223. }
  224. RtlDynRow::RtlDynRow(const RtlRecord & _info, const void * optRow) : RtlRow(_info, optRow, _info.getNumVarFields()+1, new size_t[_info.getNumVarFields()+1])
  225. {
  226. }
  227. RtlDynRow::~RtlDynRow()
  228. {
  229. delete [] variableOffsets;
  230. }