123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263 |
- /*##############################################################################
- HPCC SYSTEMS software Copyright (C) 2016 HPCC Systems®.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ############################################################################## */
- #include "platform.h"
- #include <math.h>
- #include <stdio.h>
- #include "jmisc.hpp"
- #include "jlib.hpp"
- #include "eclhelper.hpp"
- #include "eclrtl_imp.hpp"
- #include "rtlrecord.hpp"
- /*
- * Potential different implementations (for all fixed size has no penalty):
- *
- * a. when row changes, update offsets of all offset fields
- * + access is simple, single array lookup
- * - lots more fields are updated even if the fields aren't used.
- *
- * b. when a row changes update the next offset for all variable size fields.
- * + minimal updates
- * - offset access is more complex
- *
- * c. when row changes clear cache, and calculate on demand.
- * + trivial update on row change
- * + no cost of fields not accessed
- * + may be required to implement ifblocks - since fields in the test expression must be evaluated before all the
- * offsets are known. Depends on the implementation of ifblocks!
- * - accessing an offset will involve a loop and be more expensive
- *
- * Handling complications:
- * a. Nested rows
- * A dilemma. Should they be expanded?
- * + If they are expanded then it makes it much simpler to use externally.
- * - The nested fields have compound names storing and matching them is a complication.
- * - The code generator doesn't expand.
- * + It is easier to calculate offsets since all fields are supported by the same class.
- * + Sizes for unexpanded rows would need their own size caching classes. That is more complex!
- * + The meta information currently processes child information.
- *
- * Not expanding is complicated if you also try and only calculate the offsets of fields in nested records once - since you end
- * up needing to call back and forth between instances and the static information.
- * However, if nested records are processed by using size(), and selections from them are processed by instantiating
- * an instance of the nested row it is all much simpler. The cost is potentially re-evaluating sizes of nested fields. Potentially
- * inefficient if most are fixed size, but some are variable.
- *
- * b. bitfields
- * the last bitfield in a bitfield container has the size of the container, the others have 0 size.
- *
- * c. ifblocks
- * Nasty. Allowing direct access means the flag would need checking, and a field would need a pointer
- * to its containing ifblock. Cleanest to have a different derived implementation which was used if the record
- * contained ifblocks which added calls to check ifblocks before size()/getValue() etc.
- * Will require an extra row parameter to every RtlTypeInfo::getX() function.
- * Evaluating the test expression without compiling will require expression interpreting.
- *
- * d. alien datatypes
- * As long as the rtlField class implements the functions then it shouldn't cause any problems. Evaluating at
- * from a record at runtime without compiling will be tricky - requires an interpreter.
- *
- * Other
- * Add a minSize to each field (unless already stored in the record information)
- *
- * Expression interpreting:
- * Replace the no_select with a CHqlBoundExpr(no_select, fieldid).
- * Evaluate (ctx [ logical->RtlFieldOffsetCalculator mapping ]).
- * Even better if mapped direct to something that represents the base cursor so no need to search ctx.
- * For nested selects the code would need to be consistent.
- */
- static unsigned countFields(const RtlFieldInfo * const * fields)
- {
- unsigned cnt = 0;
- for (;*fields;fields++)
- cnt++;
- return cnt;
- }
- static unsigned countFields(const RtlFieldInfo * const * fields, bool & containsNested)
- {
- unsigned cnt = 0;
- for (;*fields;fields++)
- {
- const RtlTypeInfo * type = (*fields)->type;
- if (type->getType() == type_record)
- {
- containsNested = true;
- const RtlFieldInfo * const * nested = type->queryFields();
- if (nested)
- cnt += countFields(nested, containsNested);
- }
- else
- cnt++;
- }
- return cnt;
- }
- static const RtlFieldInfo * * expandNestedRows(const RtlFieldInfo * * target, const RtlFieldInfo * const * fields)
- {
- for (;*fields;fields++)
- {
- const RtlFieldInfo * cur = *fields;
- const RtlTypeInfo * type = cur->type;
- if (type->getType() == type_record)
- {
- const RtlFieldInfo * const * nested = type->queryFields();
- if (nested)
- target = expandNestedRows(target, nested);
- }
- else
- *target++ = cur;
- }
- return target;
- }
- RtlRecord::RtlRecord(const RtlRecordTypeInfo & record, bool expandFields) : fields(record.fields), originalFields(record.fields)
- {
- //MORE: Does not cope with ifblocks.
- numVarFields = 0;
- //Optionally expand out nested rows.
- if (expandFields)
- {
- bool containsNested = false;
- numFields = countFields(fields, containsNested);
- if (containsNested)
- {
- const RtlFieldInfo * * allocated = new const RtlFieldInfo * [numFields+1];
- fields = allocated;
- const RtlFieldInfo * * target = expandNestedRows(allocated, originalFields);
- assertex(target == fields+numFields);
- *target = nullptr;
- }
- }
- else
- numFields = countFields(fields);
- for (unsigned i=0; i < numFields; i++)
- {
- if (!queryType(i)->isFixedSize())
- numVarFields++;
- }
- fixedOffsets = new size_t[numFields + 1];
- whichVariableOffset = new unsigned[numFields + 1];
- variableFieldIds = new unsigned[numVarFields];
- unsigned curVariable = 0;
- size_t fixedOffset = 0;
- for (unsigned i=0;; i++)
- {
- whichVariableOffset[i] = curVariable;
- fixedOffsets[i] = fixedOffset;
- if (i == numFields)
- break;
- const RtlTypeInfo * curType = queryType(i);
- if (curType->isFixedSize())
- {
- size_t thisSize = curType->size(nullptr, nullptr);
- fixedOffset += thisSize;
- }
- else
- {
- variableFieldIds[curVariable] = i;
- curVariable++;
- fixedOffset = 0;
- }
- }
- }
- RtlRecord::~RtlRecord()
- {
- if (fields != originalFields)
- delete [] fields;
- delete [] fixedOffsets;
- delete [] whichVariableOffset;
- delete [] variableFieldIds;
- }
- void RtlRecord::calcRowOffsets(size_t * variableOffsets, const void * _row) const
- {
- const byte * row = static_cast<const byte *>(_row);
- for (unsigned i = 0; i < numVarFields; i++)
- {
- unsigned fieldIndex = variableFieldIds[i];
- size_t offset = getOffset(variableOffsets, fieldIndex);
- size_t fieldSize = queryType(fieldIndex)->size(row + offset, row);
- variableOffsets[i+1] = offset+fieldSize;
- }
- }
- size32_t RtlRecord::getMinRecordSize() const
- {
- if (numVarFields == 0)
- return fixedOffsets[numFields];
- size32_t minSize = 0;
- for (unsigned i=0; i < numFields; i++)
- minSize += queryType(i)->getMinSize();
- return minSize;
- }
- //---------------------------------------------------------------------------------------------------------------------
- RtlRow::RtlRow(const RtlRecord & _info, const void * optRow, unsigned numOffsets, size_t * _variableOffsets) : info(_info), variableOffsets(_variableOffsets)
- {
- assertex(numOffsets == info.getNumVarFields()+1);
- //variableOffset[0] is used for all fixed offset fields to avoid any special casing.
- variableOffsets[0] = 0;
- setRow(optRow);
- }
- __int64 RtlRow::getInt(unsigned field) const
- {
- const byte * self = reinterpret_cast<const byte *>(row);
- const RtlTypeInfo * type = info.queryType(field);
- return type->getInt(self + getOffset(field));
- }
- void RtlRow::getUtf8(size32_t & resultLen, char * & result, unsigned field) const
- {
- const byte * self = reinterpret_cast<const byte *>(row);
- const RtlTypeInfo * type = info.queryType(field);
- return type->getUtf8(resultLen, result, self + getOffset(field));
- }
- void RtlRow::setRow(const void * _row)
- {
- row = _row;
- if (_row)
- info.calcRowOffsets(variableOffsets, _row);
- }
- RtlDynRow::RtlDynRow(const RtlRecord & _info, const void * optRow) : RtlRow(_info, optRow, _info.getNumVarFields()+1, new size_t[_info.getNumVarFields()+1])
- {
- }
- RtlDynRow::~RtlDynRow()
- {
- delete [] variableOffsets;
- }
|