12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288 |
- /*##############################################################################
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR getValue OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ############################################################################## */
- #include <initializer_list>
- #include "jlib.hpp"
- #include "jdebug.hpp"
- #include "jsort.hpp"
- #include "jexcept.hpp"
- #include "rtlnewkey.hpp"
- #include "eclrtl_imp.hpp"
- #include "rtlrecord.hpp"
- #include "rtlkey.hpp"
- #include "rtlnewkey.hpp"
- #include "rtlfield.hpp"
- #include "rtldynfield.hpp"
- /*
- * Read a single quoted string from in until the terminating quote is found
- *
- * @param out The resulting string with embedded escaped quote characters resolved.
- * @param in A reference to the start of the string. Updated to point to the end of the string.
- *
- */
- static void readString(StringBuffer &out, const char * &in)
- {
- const char *start = in;
- // Find terminating quote, skipping any escaped ones
- for (;;)
- {
- char c = *in++;
- if (!c)
- throw MakeStringException(0, "Invalid filter - missing closing '");
- if (c=='\'')
- break;
- if (c=='\\')
- c = *in++;
- }
- StringBuffer errmsg;
- unsigned errpos;
- if (!checkUnicodeLiteral(start, in-start-1, errpos, errmsg))
- throw makeStringExceptionV(0, "Invalid filter - %s", errmsg.str());
- rtlDataAttr temp;
- size32_t newlen = 0; // NOTE - this will be in codepoints, not bytes
- rtlCodepageToUtf8XUnescape(newlen, temp.refstr(), in-start-1, start, "UTF-8");
- size32_t newsize = rtlUtf8Size(newlen, temp.getstr());
- out.append(newsize, temp.getstr());
- }
- /*
- * Read a single string from in until a matching terminator is found
- *
- * @param out The resulting string.
- * @param in A reference to the start of the string. Updated to point to the end of the string.
- * @param term Characters that can serve as terminators.
- */
- static void readUntilTerminator(StringBuffer & out, const char * & in, const char * terminators)
- {
- const char * start = in;
- const char * pbrk = strpbrk(start, terminators);
- if (!pbrk)
- throw makeStringExceptionV(0, "Invalid filter - expected terminator '%s'", terminators);
- out.append(pbrk - start, start);
- in = pbrk;
- }
- void readFieldFromFieldFilter(StringBuffer & fieldText, const char * & src)
- {
- readUntilTerminator(fieldText, src, "=*:");
- }
- void deserializeSet(ISetCreator & creator, const char * filter)
- {
- while (*filter)
- {
- char startRange = *filter++;
- if (startRange != '(' && startRange != '[')
- throw MakeStringException(0, "Invalid filter string: expected [ or ( at start of range");
- StringBuffer upperString, lowerString;
- if (*filter=='\'')
- {
- filter++;
- readString(lowerString, filter);
- }
- else
- readUntilTerminator(lowerString, filter, ",])");
- if (*filter == ',')
- {
- filter++;
- if (*filter=='\'')
- {
- filter++;
- readString(upperString, filter);
- }
- else
- readUntilTerminator(upperString, filter, "])");
- }
- else
- upperString.set(lowerString);
- char endRange = *filter++;
- if (endRange != ')' && endRange != ']')
- throw MakeStringException(0, "Invalid filter string: expected ] or ) at end of range");
- if (*filter==',')
- filter++;
- else if (*filter)
- throw MakeStringException(0, "Invalid filter string: expected , between ranges");
- TransitionMask lowerMask = (startRange == '(') ? CMPgt : CMPge;
- TransitionMask upperMask = (endRange == ')') ? CMPlt : CMPle;
- creator.addRange(lowerMask, lowerString, upperMask, upperString);
- }
- }
- // A class wrapped for adding ranges to IValueSets
- class ValueSetCreator : implements ISetCreator
- {
- public:
- ValueSetCreator(IValueSet & _set) : set(_set) {}
- virtual void addRange(TransitionMask lowerMask, const StringBuffer & lowerString, TransitionMask upperMask, const StringBuffer & upperString) override
- {
- Owned<IValueTransition> lower = lowerString ? set.createUtf8Transition(lowerMask, rtlUtf8Length(lowerString.length(), lowerString), lowerString) : nullptr;
- Owned<IValueTransition> upper = upperString ? set.createUtf8Transition(upperMask, rtlUtf8Length(upperString.length(), upperString), upperString) : nullptr;
- set.addRange(lower, upper);
- }
- protected:
- IValueSet & set;
- };
- void deserializeSet(IValueSet & set, const char * filter)
- {
- ValueSetCreator creator(set);
- deserializeSet(creator, filter);
- }
- void getStretchedValue(MemoryBuffer & target, const RtlTypeInfo & newType, const RtlTypeInfo & oldType, const byte * value)
- {
- MemoryBufferBuilder builder(target, 0);
- translateScalar(builder, 0, nullptr, newType, oldType, value);
- }
- static bool incrementBuffer(byte *buf, size32_t size)
- {
- int i = size;
- while (i--)
- {
- buf[i]++;
- if (buf[i]!=0)
- return true;
- }
- return false;
- }
- static bool decrementBuffer(byte *buf, size32_t size)
- {
- int i = size;
- while (i--)
- {
- buf[i]--;
- if (buf[i]!=0xff)
- return true;
- }
- return false;
- }
- //---------------------------------------------------------------------------------------------------------------------
- /*
- * This class represents a value and a comparison condition and is used for representing ranges of values
- *
- * A lowerbound will always have the CMPgt bit set in the mask, and upper bound will have CMPlt set in the mask.
- * The value is always represented in the same way as a field of that type would be in a record.
- */
- class ValueTransition : implements CInterfaceOf<IValueTransition>
- {
- public:
- ValueTransition(TransitionMask _mask, const RtlTypeInfo & type, const void *_value)
- {
- mask = _mask;
- dbgassertex(_value || isMinimum() || isMaximum());
- if (_value)
- {
- size32_t size = type.size((const byte *)_value, nullptr);
- value.allocateN(size, false);
- memcpy(value, _value, size);
- }
- }
- ValueTransition(const RtlTypeInfo & type, MemoryBuffer & in)
- {
- byte inmask;
- in.read(inmask);
- if (!(inmask & CMPnovalue))
- {
- mask = (TransitionMask)inmask;
- size32_t size = type.size(in.readDirect(0), nullptr);
- value.allocateN(size, false);
- in.read(size, value);
- }
- else
- {
- mask = (TransitionMask)(inmask & ~CMPnovalue);
- }
- }
- ValueTransition(TransitionMask _mask) : mask(_mask)
- {
- dbgassertex(isMaximum() || isMinimum());
- }
- bool equals(const RtlTypeInfo & type, const ValueTransition & other) const
- {
- if (mask != other.mask)
- return false;
- if (value && other.value)
- {
- return type.compare(value, other.value) == 0;
- }
- else
- return !value && !other.value;
- }
- const byte *queryValue() const
- {
- return value;
- }
- MemoryBuffer & serialize(const RtlTypeInfo & type, MemoryBuffer & out) const
- {
- byte outmask = mask;
- if (value)
- {
- size32_t size = type.size(value, nullptr);
- out.append(outmask);
- out.append(size, value);
- }
- else
- {
- outmask |= CMPnovalue;
- out.append(outmask);
- }
- return out;
- }
- int compareRaw(const RtlTypeInfo & type, const byte * field) const
- {
- if (!value)
- {
- if (mask & CMPmin)
- return +1;
- if (mask & CMPmax)
- return -1;
- }
- return type.compare(field, value.get());
- }
- int compare(const RtlTypeInfo & type, const byte * field) const
- {
- int c = compareRaw(type, field);
- if (c == 0)
- {
- if (mask & CMPeq)
- return 0;
- //Lower bound
- if (mask & CMPgt)
- return -1;
- //Check against upper bound
- if (mask & CMPlt)
- return +1;
- throwUnexpected();
- }
- return c;
- }
- int compareRaw(const RtlTypeInfo & type, const ValueTransition & other) const
- {
- if (!value || !other.value)
- {
- if (isMinimum())
- return other.isMinimum() ? 0 : -1;
- if (isMaximum())
- return other.isMaximum() ? 0 : +1;
- if (other.isMinimum())
- return +1;
- if (other.isMaximum())
- return -1;
- throwUnexpected();
- }
- return type.compare(value, other.value);
- }
- StringBuffer & describe(const RtlTypeInfo & type, StringBuffer & out) const
- {
- if (mask & CMPgt)
- {
- if (mask & CMPeq)
- out.append("[");
- else
- out.append("(");
- }
- if (value)
- {
- size32_t len;
- rtlDataAttr text;
- type.getUtf8(len, text.refstr(), value);
- size32_t size = rtlUtf8Size(len, text.getstr());
- if (type.isNumeric())
- {
- out.append(size, text.getstr());
- }
- else
- {
- out.append("'");
- appendUtf8AsECL(out, size, text.getstr());
- out.append("'");
- }
- }
- if (mask & CMPlt)
- {
- if (mask & CMPeq)
- out.append("]");
- else
- out.append(")");
- }
- return out;
- }
- bool isPreviousBound(const RtlTypeInfo & type, const ValueTransition & other) const
- {
- if (isInclusiveBound() || other.isInclusiveBound())
- {
- if (compareRaw(type, other) == 0)
- return true;
- }
- if (isInclusiveBound() && other.isInclusiveBound())
- {
- //MORE: if isPreviousValue(other)
- //if (oneless(hival, t.getValue()))
- }
- return false;
- }
- //If this transition isn't shared then modify it directly, otherwise clone. Always return a linked object.
- ValueTransition * modifyTransition(const RtlTypeInfo & type, TransitionMask newMask)
- {
- assertex(value);
- if (IsShared())
- {
- return new ValueTransition(newMask, type, value);
- }
- else
- {
- mask = newMask;
- return LINK(this);
- }
- }
- ValueTransition * modifyInverse(const RtlTypeInfo & type)
- {
- TransitionMask newMask = (TransitionMask)(mask ^ (CMPgt|CMPlt|CMPeq));
- return modifyTransition(type, newMask);
- }
- ValueTransition * cast(const RtlTypeInfo & newType, const RtlTypeInfo & oldType)
- {
- if (!value)
- return LINK(this);
- MemoryBuffer resized;
- getStretchedValue(resized, newType, oldType, value);
- MemoryBuffer recast;
- getStretchedValue(recast, oldType, newType, resized.bytes());
- TransitionMask newMask = mask;
- if (oldType.compare(value, recast.bytes()) != 0)
- {
- // x >= 'ab' becomes x > 'a' => clear the equal item
- // x < 'ab' becomes x <= 'a' => set the equal item
- if (newMask & CMPlt)
- newMask |= CMPeq;
- else if (newMask & CMPgt)
- newMask &= ~CMPeq;
- }
- return new ValueTransition(newMask, newType, resized.toByteArray());
- }
- bool isLowerBound() const { return (mask & CMPgt) != 0; }
- bool isUpperBound() const { return (mask & CMPlt) != 0; }
- bool isInclusiveBound() const { return (mask & CMPeq) != 0; }
- bool isMinimum() const { return (mask & CMPmin) != 0; }
- bool isMaximum() const { return (mask & CMPmax) != 0; }
- void setLow(void * buffer, size32_t offset, const RtlTypeInfo &type) const
- {
- byte *dst = ((byte *) buffer) + offset;
- if (value)
- {
- memcpy(dst, value, type.getMinSize());
- if (!isInclusiveBound())
- incrementBuffer(dst, type.getMinSize());
- }
- else
- memset(dst, 0, type.getMinSize());
- }
- void setHigh(void * buffer, size32_t offset, const RtlTypeInfo &type) const
- {
- byte *dst = ((byte *) buffer) + offset;
- if (value)
- {
- memcpy(dst, value, type.getMinSize());
- if (!isInclusiveBound())
- decrementBuffer(dst, type.getMinSize());
- }
- else
- memset(dst, 0xff, type.getMinSize());
- }
- private:
- TransitionMask mask;
- OwnedMalloc<byte> value;
- };
- typedef IArrayOf<ValueTransition> ValueTransitionArray;
- //---------------------------------------------------------------------------------------------------------------------
- /*
- * The ValueSet class represents a set of ranges of values.
- *
- * The transitions always come in pairs - an upper and lower bound. Each bound can be inclusive or exclusive.
- */
- class ValueSet : public CInterfaceOf<IValueSet>
- {
- public:
- ValueSet(const RtlTypeInfo & _type) : type(_type)
- {
- }
- ValueSet(const RtlTypeInfo & _type, MemoryBuffer & in) : type(_type)
- {
- unsigned cnt;
- in.readPacked(cnt);
- for (unsigned i = 0; i < cnt; i++)
- transitions.append(*new ValueTransition(type, in));
- }
- // Methods for creating a value set
- virtual IValueTransition * createTransition(TransitionMask mask, unsigned __int64 value) const override
- {
- MemoryBuffer buff;
- MemoryBufferBuilder builder(buff, 0);
- type.buildInt(builder, 0, nullptr, value);
- return new ValueTransition(mask, type, buff.toByteArray());
- }
- virtual IValueTransition * createStringTransition(TransitionMask mask, size32_t len, const char * value) const override
- {
- MemoryBuffer buff;
- MemoryBufferBuilder builder(buff, 0);
- type.buildString(builder, 0, nullptr, len, value);
- return new ValueTransition(mask, type, buff.toByteArray());
- }
- virtual IValueTransition * createUtf8Transition(TransitionMask mask, size32_t len, const char * value) const override
- {
- MemoryBuffer buff;
- MemoryBufferBuilder builder(buff, 0);
- type.buildUtf8(builder, 0, nullptr, len, value);
- return new ValueTransition(mask, type, buff.toByteArray());
- }
- virtual void addRange(IValueTransition * _lower, IValueTransition * _upper) override
- {
- ValueTransition * lower = static_cast<ValueTransition *>(_lower);
- ValueTransition * upper = static_cast<ValueTransition *>(_upper);
- Owned<ValueTransition> minBound;
- Owned<ValueTransition> maxBound;
- if (!lower)
- {
- minBound.setown(new ValueTransition(CMPminmask));
- lower = minBound;
- }
- if (!upper)
- {
- maxBound.setown(new ValueTransition(CMPmaxmask));
- upper = maxBound;
- }
- if (!lower->isLowerBound() || !upper->isUpperBound())
- throw MakeStringException(1, "Invalid range bounds");
- //If lower > upper then it is an empty range
- int rc = lower->compareRaw(type, *upper);
- if (rc > 0)
- return;
- //Check for exclusive ranges for a single value
- if (rc == 0)
- {
- if (!lower->isInclusiveBound() || !upper->isInclusiveBound())
- return;
- }
- // binchop to find last transition > val...
- unsigned int low = 0;
- unsigned high = transitions.ordinality();
- while (low < high)
- {
- unsigned mid = low + (high - low) / 2;
- int rc = lower->compareRaw(type, transitions.item(mid));
- if (rc <= 0)
- high = mid;
- else
- low = mid+1;
- }
- unsigned idx;
- if (transitions.isItem(low))
- {
- ValueTransition & next = transitions.item(low);
- if (lower->compareRaw(type, next) == 0)
- {
- if (next.isLowerBound())
- {
- if (!next.isInclusiveBound() && lower->isInclusiveBound())
- transitions.replace(*LINK(lower), low);
- idx = low+1;
- }
- else
- {
- if (next.isInclusiveBound() || lower->isInclusiveBound())
- {
- transitions.remove(low);
- idx = low;
- }
- else
- {
- transitions.add(*LINK(lower), low+1);
- idx = low + 2;
- }
- }
- }
- else
- {
- if (next.isLowerBound())
- {
- transitions.add(*LINK(lower), low);
- idx = low + 1;
- }
- else
- {
- //previous must exist and be lower => ignore this one
- idx = low;
- }
- }
- }
- else
- {
- transitions.append(*LINK(lower));
- transitions.append(*LINK(upper));
- return;
- }
- //Walk the remaining transitions until you find one that is higher than the current one (or run out)
- while (transitions.isItem(idx))
- {
- ValueTransition & cur = transitions.item(idx);
- int rc = cur.compareRaw(type, *upper);
- if (rc > 0)
- {
- if (cur.isUpperBound())
- return;
- break;
- }
- if (rc == 0)
- {
- if (cur.isUpperBound())
- {
- if (!cur.isInclusiveBound() && upper->isInclusiveBound())
- transitions.replace(*LINK(upper), idx);
- return;
- }
- //Upper value matches the next lower bound - could either remove the next item if either is inclusive, otherwise add
- if (cur.isInclusiveBound() || upper->isInclusiveBound())
- {
- transitions.remove(idx);
- return;
- }
- break;
- }
- transitions.remove(idx);
- }
- if (transitions.isItem(idx))
- {
- ValueTransition & cur = transitions.item(idx);
- assertex(cur.isLowerBound());
- if (upper->isPreviousBound(type, cur))
- {
- transitions.remove(idx);
- return;
- }
- }
- transitions.add(*LINK(upper), idx);
- }
- virtual void addAll() override
- {
- reset();
- addRange(nullptr, nullptr);
- }
- virtual void killRange(IValueTransition * _lower, IValueTransition * _upper) override
- {
- ValueTransition * lower = static_cast<ValueTransition *>(_lower);
- ValueTransition * upper = static_cast<ValueTransition *>(_upper);
- Owned<ValueTransition> minBound;
- Owned<ValueTransition> maxBound;
- if (!lower)
- {
- minBound.setown(new ValueTransition(CMPminmask));
- lower = minBound;
- }
- if (!upper)
- {
- maxBound.setown(new ValueTransition(CMPmaxmask));
- upper = maxBound;
- }
- if (!lower->isLowerBound() || !upper->isUpperBound())
- throw MakeStringException(1, "Invalid range bounds");
- //If lower > upper then it is an empty range
- int rc = lower->compareRaw(type, *upper);
- if (rc > 0)
- return;
- //Check for exclusive ranges for a single value
- if (rc == 0)
- {
- if (!lower->isInclusiveBound() || !upper->isInclusiveBound())
- return;
- }
- // binchop to find last transition > val...
- unsigned int low = 0;
- unsigned high = transitions.ordinality();
- while (low < high)
- {
- unsigned mid = low + (high - low) / 2;
- int rc = lower->compareRaw(type, transitions.item(mid));
- if (rc <= 0)
- high = mid;
- else
- low = mid+1;
- }
- unsigned idx = low;
- if (!transitions.isItem(low))
- return;
- //First terminate any set that overlaps the start of the range
- ValueTransition & next = transitions.item(low);
- if (lower->compareRaw(type, next) == 0)
- {
- if (next.isLowerBound())
- {
- // Range [x..y] remove [x..]
- if (!lower->isInclusiveBound() && next.isInclusiveBound())
- {
- //[x,y] - (x,z) = [x],{ (x,y)-(x,z)
- transitions.add(*lower->modifyTransition(type, CMPle), low+1);
- idx = low+2;
- }
- else
- transitions.remove(low);
- }
- else
- {
- //[x..y]-[y..z) -> [x..y)
- if (lower->isInclusiveBound() && next.isInclusiveBound())
- {
- //Convert to an exclusive bound. This must be different from the lower bound
- //(otherwise it would have matched that bound 1st)
- ValueTransition * newTransition = next.modifyTransition(type, CMPlt);
- transitions.replace(*newTransition, low);
- idx = low+1;
- }
- else
- idx = low+1;
- }
- }
- else
- {
- if (next.isUpperBound())
- {
- transitions.add(*lower->modifyTransition(type, lower->isInclusiveBound() ? CMPlt : CMPle), idx);
- idx++;
- }
- }
- //Walk the remaining transitions until you find one that is higher than the current one (or run out)
- while (transitions.isItem(idx))
- {
- ValueTransition & cur = transitions.item(idx);
- int rc = cur.compareRaw(type, *upper);
- if (rc > 0)
- {
- if (cur.isUpperBound())
- transitions.add(*upper->modifyTransition(type, upper->isInclusiveBound() ? CMPgt : CMPge), idx);
- return;
- }
- if (rc == 0)
- {
- if (cur.isUpperBound())
- {
- //[x..y] remove [y..y)
- if (cur.isInclusiveBound() && !upper->isInclusiveBound())
- transitions.add(*upper->modifyTransition(type, CMPge), idx);
- else
- transitions.remove(idx);
- return;
- }
- else
- {
- if (!upper->isInclusiveBound())
- return;
- }
- }
- transitions.remove(idx);
- }
- }
- virtual void reset() override
- {
- transitions.kill();
- }
- virtual void invertSet() override
- {
- if (transitions.empty())
- {
- addAll();
- return;
- }
- unsigned idx = 0;
- ValueTransitionArray newTransitions;
- if (!transitions.item(0).isMinimum())
- newTransitions.append(*new ValueTransition(CMPminmask));
- else
- idx++;
- bool unboundedUpper = transitions.tos().isMaximum();
- unsigned max = transitions.ordinality();
- if (unboundedUpper)
- max--;
- for (; idx < max; idx ++)
- newTransitions.append(*transitions.item(idx).modifyInverse(type));
- if (!unboundedUpper)
- newTransitions.append(*new ValueTransition(CMPmaxmask));
- transitions.swapWith(newTransitions);
- }
- virtual void unionSet(const IValueSet * _other) override
- {
- //Iterate through the ranges in other and add them to this set
- const ValueSet * other = static_cast<const ValueSet *>(_other);
- for (unsigned i=0; i < other->transitions.ordinality(); i+=2)
- addRange(&other->transitions.item(i), &other->transitions.item(i+1));
- }
- virtual void excludeSet(const IValueSet * _other) override
- {
- //Iterate through the ranges in other and add them to this set
- const ValueSet * other = static_cast<const ValueSet *>(_other);
- for (unsigned i=0; i < other->transitions.ordinality(); i+=2)
- killRange(&other->transitions.item(i), &other->transitions.item(i+1));
- }
- virtual void intersectSet(IValueSet const * _other) override
- {
- const ValueSet * other = static_cast<const ValueSet *>(_other);
- //Iterate through the ranges in other and remove them from this set
- unsigned curOther = 0;
- unsigned otherMax = other->numTransitions();
- ValueTransitionArray newTransitions;
- for (unsigned i = 0; i < numTransitions(); i+=2)
- {
- ValueTransition & lower = transitions.item(i);
- ValueTransition & upper = transitions.item(i+1);
- for (;;)
- {
- if (curOther == otherMax)
- goto done; // break out of both loops.
- ValueTransition & otherLower = other->transitions.item(curOther);
- ValueTransition & otherUpper = other->transitions.item(curOther+1);
- //If upper is lower than the other lower bound then no rows overlap, skip this range
- int rc1 = upper.compareRaw(type, otherLower);
- if (rc1 < 0 ||
- ((rc1 == 0) && (!upper.isInclusiveBound() || !otherLower.isInclusiveBound())))
- break;
- //If other upper is lower than the lower bound then no rows overlap, skip other range
- int rc2 = otherUpper.compareRaw(type, lower);
- if (rc2 < 0 ||
- ((rc2 == 0) && (!otherUpper.isInclusiveBound() || !lower.isInclusiveBound())))
- {
- curOther += 2;
- continue;
- }
- //Lower bound of the intersection is the higher of the two lower bounds
- int rc3 = lower.compareRaw(type, otherLower);
- if (rc3 < 0 || ((rc3 == 0) && lower.isInclusiveBound()))
- newTransitions.append(OLINK(otherLower));
- else
- newTransitions.append(OLINK(lower));
- //Upper bound of the intersection is the lower of the two bounds - and move onto next range that is consumed
- int rc4 = upper.compareRaw(type, otherUpper);
- if (rc4 < 0 || ((rc4 == 0) && !upper.isInclusiveBound()))
- {
- newTransitions.append(OLINK(upper));
- break;
- }
- else
- {
- newTransitions.append(OLINK(otherUpper));
- curOther += 2;
- }
- }
- }
- done:
- transitions.swapWith(newTransitions);
- }
- virtual ValueTransition * createRawTransition(TransitionMask mask, const void * value) const override
- {
- return new ValueTransition(mask, type, value);
- }
- virtual void addRawRange(const void * lower, const void * upper) override
- {
- Owned<ValueTransition> lowerBound = lower ? createRawTransition(CMPge, lower) : nullptr;
- Owned<ValueTransition> upperBound = upper ? createRawTransition(CMPle, upper) : nullptr;
- addRange(lowerBound, upperBound);
- }
- virtual void killRawRange(const void * lower, const void * upper) override
- {
- Owned<ValueTransition> lowerBound = lower ? createRawTransition(CMPge, lower) : nullptr;
- Owned<ValueTransition> upperBound = upper ? createRawTransition(CMPle, upper) : nullptr;
- killRange(lowerBound, upperBound);
- }
- virtual bool equals(const IValueSet & _other) const override
- {
- const ValueSet & other = static_cast<const ValueSet &>(_other);
- if (!type.equivalent(&other.type))
- return false;
- if (transitions.ordinality() != other.transitions.ordinality())
- return false;
- ForEachItemIn(i, transitions)
- {
- if (!transitions.item(i).equals(type, other.transitions.item(i)))
- return false;
- }
- return true;
- }
- virtual IValueSet * cast(const RtlTypeInfo & newType) const
- {
- Owned<IValueSet> newSet = new ValueSet(newType);
- unsigned max = transitions.ordinality();
- for (unsigned i=0; i < max; i += 2)
- {
- Owned<IValueTransition> newLower = transitions.item(i).cast(newType, type);
- Owned<IValueTransition> newUpper = transitions.item(i+1).cast(newType, type);
- newSet->addRange(newLower, newUpper);
- }
- return newSet.getClear();
- }
- // Methods for using a value set
- virtual bool isWild() const override;
- virtual const void *querySingleValue() const override;
- virtual unsigned numRanges() const override;
- virtual int compareLowest(const byte * field, unsigned range) const override;
- virtual int compareHighest(const byte * field, unsigned range) const override;
- virtual int findForwardMatchRange(const byte * field, unsigned & matchRange) const override; // why int - seems to return bool
- // Does this field match any range?
- virtual bool matches(const byte * field) const override;
- // Does this field match this particular range?
- virtual bool matches(const byte * field, unsigned range) const override;
- virtual StringBuffer & serialize(StringBuffer & out) const override
- {
- //Does this need to include the type information?
- return describe(out);
- }
- virtual MemoryBuffer & serialize(MemoryBuffer & out) const override
- {
- out.appendPacked((unsigned)transitions.ordinality());
- ForEachItemIn(i, transitions)
- transitions.item(i).serialize(type, out);
- return out;
- }
- virtual const RtlTypeInfo & queryType() const override
- {
- return type;
- }
- virtual void setLow(void *buffer, size32_t offset, const RtlTypeInfo &parentType) const override
- {
- dbgassertex(type.isFixedSize() && parentType.isFixedSize());
- queryTransition(0)->setLow(buffer, offset, type);
- if (&parentType != &type)
- {
- unsigned fullSize = parentType.getMinSize();
- unsigned subSize = type.getMinSize();
- assertex(subSize <= fullSize);
- memset(((byte *) buffer) + offset + subSize, 0, fullSize-subSize);
- }
- }
- virtual bool incrementKey(void *buffer, size32_t offset, const RtlTypeInfo &parentType) const override
- {
- dbgassertex(type.isFixedSize() && parentType.isFixedSize());
- byte *ptr = ((byte *) buffer) + offset;
- bool ok = incrementBuffer(ptr, type.getMinSize());
- if (ok)
- {
- unsigned nextRange;
- bool res = findForwardMatchRange(ptr, nextRange);
- if (!res)
- {
- if (nextRange == numRanges())
- return false;
- queryTransition(nextRange*2)->setLow(buffer, offset, type);
- if (&parentType != &type)
- {
- unsigned fullSize = parentType.getMinSize();
- unsigned subSize = type.getMinSize();
- dbgassertex(subSize <= fullSize);
- memset(((byte *) buffer) + offset + subSize, 0, fullSize-subSize);
- }
- }
- }
- return ok;
- }
- virtual void endRange(void *buffer, size32_t offset, const RtlTypeInfo &parentType) const override
- {
- dbgassertex(type.isFixedSize() && parentType.isFixedSize());
- byte *ptr = ((byte *) buffer) + offset;
- unsigned nextRange;
- bool res = findForwardMatchRange(ptr, nextRange);
- assertex(res);
- queryTransition(nextRange*2+1)->setHigh(buffer, offset, type);
- if (&parentType != &type)
- {
- unsigned fullSize = parentType.getMinSize();
- unsigned subSize = type.getMinSize();
- dbgassertex(subSize <= fullSize);
- memset(((byte *) buffer) + offset + subSize, 0xff, fullSize-subSize);
- }
- }
- virtual void setHigh(void *buffer, size32_t offset, const RtlTypeInfo &parentType) const override
- {
- dbgassertex(type.isFixedSize() && parentType.isFixedSize());
- queryTransition(numTransitions()-1)->setHigh(buffer, offset, type);
- if (&parentType != &type)
- {
- unsigned fullSize = parentType.getMinSize();
- unsigned subSize = type.getMinSize();
- dbgassertex(subSize <= fullSize);
- memset(((byte *) buffer) + offset + subSize, 0xff, fullSize-subSize);
- }
- }
- protected:
- inline int compareRaw(const byte * field, ValueTransition * transition) const __attribute__((always_inline))
- {
- return transition->compareRaw(type, field);
- }
- inline int compare(const byte * field, ValueTransition * transition) const __attribute__((always_inline))
- {
- return transition->compare(type, field);
- }
- ValueTransition * queryTransition(unsigned i) const { return &transitions.item(i); }
- unsigned numTransitions() const { return transitions.ordinality(); }
- StringBuffer & describe(StringBuffer & out) const
- {
- for (unsigned i = 0; i < transitions.ordinality(); i += 2)
- {
- if (i != 0)
- out.append(",");
- ValueTransition & lower = transitions.item(i);
- ValueTransition & upper = transitions.item(i+1);
- lower.describe(type, out);
- if (lower.compareRaw(type, upper) != 0)
- {
- out.append(",");
- upper.describe(type, out);
- }
- else
- out.append("]");
- }
- return out;
- }
- bool hasLowerBound() const
- {
- if (transitions.empty())
- return false;
- return !transitions.item(0).isMinimum();
- }
- bool hasUpperBound() const
- {
- if (transitions.empty())
- return false;
- return !transitions.tos().isMaximum();
- }
- protected:
- const RtlTypeInfo & type;
- ValueTransitionArray transitions;
- };
- unsigned ValueSet::numRanges() const
- {
- return transitions.ordinality() / 2;
- }
- bool ValueSet::isWild() const
- {
- if (transitions.ordinality() != 2)
- return false;
- return queryTransition(0)->isMinimum() && queryTransition(1)->isMaximum();
- }
- const void *ValueSet::querySingleValue() const
- {
- if (transitions.ordinality() == 2)
- {
- if (queryTransition(0)->isInclusiveBound() && queryTransition(1)->isInclusiveBound() &&
- type.compare(queryTransition(0)->queryValue(), queryTransition(1)->queryValue())==0)
- {
- return queryTransition(0)->queryValue();
- }
- }
- return nullptr;
- }
- bool ValueSet::matches(const byte * field) const
- {
- //NOTE: It is guaranteed that transitions with the same value are merged if possible - which allows the loop to terminate early on equality.
- unsigned high = numRanges();
- unsigned low = 0;
- while (low < high)
- {
- unsigned mid = low + (high-low)/2;
- unsigned lower = mid * 2;
- //Using compareRaw allows early termination for equalities that actually fail to match.
- int rc = compareRaw(field, queryTransition(lower));
- if (rc < 0)
- {
- high = mid;
- }
- else if (rc == 0)
- {
- if (queryTransition(lower)->isInclusiveBound())
- return true;
- else
- return false;
- }
- else
- {
- unsigned upper = lower + 1;
- rc = compareRaw(field, queryTransition(upper));
- if (rc < 0)
- return true;
- if (rc == 0)
- {
- if (queryTransition(upper)->isInclusiveBound())
- return true;
- else
- return false;
- }
- low = mid+1;
- }
- }
- return false;
- }
- bool ValueSet::matches(const byte * field, unsigned range) const
- {
- if (range >= numRanges())
- return false;
- unsigned lower = range * 2;
- int rc = compare(field, queryTransition(lower));
- if (rc < 0)
- return false;
- if (rc == 0)
- return true;
- unsigned upper = lower + 1;
- rc = compare(field, queryTransition(upper));
- return (rc <= 0);
- }
- int ValueSet::compareLowest(const byte * field, unsigned range) const
- {
- return compare(field, queryTransition(range*2));
- }
- int ValueSet::compareHighest(const byte * field, unsigned range) const
- {
- return compare(field, queryTransition(range*2+1));
- }
- /*
- //find the last range where the lower bound <= the field, returns 0 if the field matches the lower bound, > 0 otherwise.
- //matchRange is set to the range number, set to numRanges if there is no possible match. Uses a binary chop
- int ValueSet::findCandidateRange(const byte * field, unsigned & matchRange) const
- {
- //NOTE: It is guaranteed that transitions are merged if possible - which allows the loop to terminate early.
- unsigned high = numRanges();
- unsigned low = 0;
- int rc = 0;
- while (low < high)
- {
- unsigned mid = low + (high - low) / 2;
- unsigned lower = mid * 2;
- rc = compare(field, queryTransition(lower));
- if (rc <= 0)
- high = mid;
- else
- low = mid + 1;
- }
- matchRange = low;
- return rc;
- }
- //find the last range where the lower bound <= the field, returns 0 if the field matches the lower bound, > 0 otherwise.
- //starts searching from curRange (which is likely to match). Uses a sequential search.
- int ValueSet::checkNextCandidateRange(const byte * field, unsigned & curRange) const
- {
- unsigned cur = curRange;
- while (cur < numRanges())
- {
- unsigned lower = cur * 2;
- int rc = compare(field, queryTransition(lower));
- if (rc >= 0)
- {
- curRange = cur;
- return rc;
- }
- cur++;
- }
- curRange = numRanges();
- return 0;
- }
- */
- //If field lies within a range return true and set matchRange to the index of the range.
- //If field is outside a range return false and set matchRange to the index of the next range, i.e. min(range.lower) where range.lower > field
- //Find the largest transition value that is <= field, and then check if within upper limit
- //Could have a version that starts from the previous match to shorten the binary chop, or use a linear search instead
- int ValueSet::findForwardMatchRange(const byte * field, unsigned & matchRange) const
- {
- unsigned ranges = numRanges();
- unsigned high = ranges;
- unsigned low = 0;
- //Find the largest transition that is <= the search value
- while (high - low > 1)
- {
- unsigned mid = low + (high - low) / 2;
- unsigned lower = mid * 2;
- int rc = compare(field, queryTransition(lower));
- if (rc < 0)
- {
- // field is less than transition, so transition with value < search must be before.
- high = mid; // exclude mid and all transitions after it
- }
- else if (rc > 0)
- {
- // field is greater than transition, so transition with value > search must be this or after.
- low = mid; // exclude mid-1 and all transitions before it
- }
- else
- {
- matchRange = mid;
- return true;
- }
- }
- //rc is comparison from element mid.
- if (low != ranges)
- {
- if (compare(field, queryTransition(low*2)) >= 0)
- {
- int rc = compare(field, queryTransition(low*2+1));
- if (rc <= 0)
- {
- matchRange = low;
- return true;
- }
- low++;
- }
- }
- matchRange = low;
- return false;
- }
- IValueSet * createValueSet(const RtlTypeInfo & _type) { return new ValueSet(_type); }
- IValueSet * createValueSet(const RtlTypeInfo & _type, MemoryBuffer & in) { return new ValueSet(_type, in); }
- //---------------------------------------------------------------------------------------------------------------------
- /*
- * Base implementation class for field filters.
- */
- class FieldFilter : public CInterfaceOf<IFieldFilter>
- {
- public:
- FieldFilter(unsigned _field, const RtlTypeInfo & _type) : field(_field), type(_type) {}
- //More complex index matching
- virtual int compareRow(const RtlRow & left, const RtlRow & right) const override
- {
- return type.compare(left.queryField(field), right.queryField(field));
- }
- virtual unsigned queryFieldIndex() const override
- {
- return field;
- }
- virtual const RtlTypeInfo & queryType() const override
- {
- return type;
- }
- virtual bool getBloomHash(hash64_t &hash) const override
- {
- return false;
- }
- virtual unsigned queryScore() const override
- {
- // MORE - the score should probably depend on the number and nature of ranges too.
- unsigned score = type.getMinSize();
- if (!score)
- score = 5; // Arbitrary guess for average field length in a variable size field
- return score;
- }
- protected:
- unsigned field;
- const RtlTypeInfo & type;
- };
- /*
- * A field filter that can match a set of values.
- */
- class SetFieldFilter : public FieldFilter
- {
- public:
- SetFieldFilter(unsigned _field, IValueSet * _values) : FieldFilter(_field, _values->queryType()), values(_values) {}
- SetFieldFilter(unsigned _field, const RtlTypeInfo & fieldType, IValueSet * _values) : FieldFilter(_field, fieldType), values(_values) {}
- //Simple row matching
- virtual bool matches(const RtlRow & row) const override;
- virtual bool isEmpty() const override;
- virtual bool isWild() const override;
- //More complex index matching
- virtual unsigned numRanges() const override;
- virtual int compareLowest(const RtlRow & left, unsigned range) const override;
- virtual int compareHighest(const RtlRow & left, unsigned range) const override;
- virtual int findForwardMatchRange(const RtlRow & row, unsigned & matchRange) const override;
- virtual IFieldFilter *remap(unsigned newField) const override { return new SetFieldFilter(newField, values); }
- virtual StringBuffer & serialize(StringBuffer & out) const override;
- virtual MemoryBuffer & serialize(MemoryBuffer & out) const override;
- // jhtree
- virtual void setLow(void *buffer, size32_t offset) const override;
- virtual bool incrementKey(void *buffer, size32_t offset) const override;
- virtual void endRange(void *buffer, size32_t offset) const override;
- virtual void setHigh(void *buffer, size32_t offset) const override;
- // Human-readable description for tracing/debugging
- virtual StringBuffer &describe(StringBuffer &out) const override;
- protected:
- Linked<IValueSet> values;
- };
- bool SetFieldFilter::matches(const RtlRow & row) const
- {
- return values->matches(row.queryField(field));
- }
- bool SetFieldFilter::isEmpty() const
- {
- return values->numRanges() == 0;
- }
- bool SetFieldFilter::isWild() const
- {
- return values->isWild();
- }
- StringBuffer &SetFieldFilter::describe(StringBuffer &out) const
- {
- // MORE - could consider abbreviating very large sets...
- return values->serialize(out);
- }
- unsigned SetFieldFilter::numRanges() const
- {
- return values->numRanges();
- }
- int SetFieldFilter::compareLowest(const RtlRow & left, unsigned range) const
- {
- return values->compareLowest(left.queryField(field), range);
- }
- int SetFieldFilter::compareHighest(const RtlRow & left, unsigned range) const
- {
- return values->compareHighest(left.queryField(field), range);
- }
- int SetFieldFilter::findForwardMatchRange(const RtlRow & row, unsigned & matchRange) const
- {
- return values->findForwardMatchRange(row.queryField(field), matchRange);
- }
- StringBuffer & SetFieldFilter::serialize(StringBuffer & out) const
- {
- out.append(field).append('=');
- return values->serialize(out);
- }
- MemoryBuffer & SetFieldFilter::serialize(MemoryBuffer & out) const
- {
- out.appendPacked(field).append('=');
- return values->serialize(out);
- }
- void SetFieldFilter::setLow(void *buffer, size32_t offset) const
- {
- dbgassertex(!isEmpty());
- values->setLow(buffer, offset, type);
- }
- bool SetFieldFilter::incrementKey(void *buffer, size32_t offset) const
- {
- return values->incrementKey(buffer, offset, type);
- }
- void SetFieldFilter::endRange(void *buffer, size32_t offset) const
- {
- values->endRange(buffer, offset, type);
- }
- void SetFieldFilter::setHigh(void *buffer, size32_t offset) const
- {
- values->setHigh(buffer, offset, type);
- }
- //---------------------------------------------------------------------------------------------------------------------
- class SingleFieldFilter : public FieldFilter
- {
- public:
- SingleFieldFilter(unsigned _field, const RtlTypeInfo & fieldType, const byte * _value);
- ~SingleFieldFilter();
- //Simple row matching
- virtual bool matches(const RtlRow & row) const override;
- virtual bool isEmpty() const override { return false; }
- virtual bool isWild() const override { return false; }
- //More complex index matching
- virtual unsigned numRanges() const override { return 1; };
- virtual int compareLowest(const RtlRow & left, unsigned range) const override;
- virtual int compareHighest(const RtlRow & left, unsigned range) const override;
- virtual int findForwardMatchRange(const RtlRow & row, unsigned & matchRange) const override;
- virtual IFieldFilter *remap(unsigned newField) const override { return new SingleFieldFilter(newField, type, value); }
- virtual StringBuffer & serialize(StringBuffer & out) const override;
- virtual MemoryBuffer & serialize(MemoryBuffer & out) const override;
- // jhtree
- virtual bool getBloomHash(hash64_t &hash) const override;
- virtual void setLow(void *buffer, size32_t offset) const override;
- virtual bool incrementKey(void *buffer, size32_t offset) const override;
- virtual void endRange(void *buffer, size32_t offset) const override;
- virtual void setHigh(void *buffer, size32_t offset) const override;
- // Human-readable description for tracing/debugging
- virtual StringBuffer &describe(StringBuffer &out) const override;
- protected:
- const byte *value;
- };
- SingleFieldFilter::SingleFieldFilter(unsigned _field, const RtlTypeInfo & fieldType, const byte * _value)
- : FieldFilter(_field, fieldType)
- {
- size32_t size = type.size(_value, nullptr);
- byte *val = (byte *) malloc(size);
- memcpy(val, _value, size);
- value = val;
- }
- SingleFieldFilter::~SingleFieldFilter()
- {
- free((void *) value);
- }
- bool SingleFieldFilter::matches(const RtlRow & row) const
- {
- return type.compare(row.queryField(field), value) == 0;
- }
- int SingleFieldFilter::compareLowest(const RtlRow & row, unsigned range) const
- {
- dbgassertex(!range);
- return type.compare(row.queryField(field), value);
- }
- int SingleFieldFilter::compareHighest(const RtlRow & row, unsigned range) const
- {
- dbgassertex(!range);
- return type.compare(row.queryField(field), value);
- }
- bool SingleFieldFilter::getBloomHash(hash64_t &hash) const
- {
- if (!value)
- return false;
- hash = rtlHash64Data(type.size(value, nullptr), value, hash);
- return true;
- }
- int SingleFieldFilter::findForwardMatchRange(const RtlRow & row, unsigned & matchRange) const
- {
- int rc = type.compare(row.queryField(field), value);
- if (rc==0)
- {
- matchRange = 0;
- return true;
- }
- else if (rc < 0)
- {
- matchRange = 0;
- }
- else
- {
- matchRange = 1;
- }
- return false;
- }
- StringBuffer & SingleFieldFilter::serialize(StringBuffer & out) const
- {
- out.append(field).append("=[");
- size32_t len;
- rtlDataAttr text;
- type.getUtf8(len, text.refstr(), value);
- size32_t size = rtlUtf8Size(len, text.getstr());
- if (type.isNumeric())
- {
- out.append(size, text.getstr());
- }
- else
- {
- out.append("'");
- appendUtf8AsECL(out, size, text.getstr());
- out.append("'");
- }
- return out.append("]");
- }
- StringBuffer &SingleFieldFilter::describe(StringBuffer &out) const
- {
- size32_t size;
- rtlDataAttr text;
- type.getString(size, text.refstr(), value);
- if (type.isNumeric())
- return out.append(size, text.getstr());
- else
- return out.appendf("'%*s'", size, text.getstr());
- }
- MemoryBuffer & SingleFieldFilter::serialize(MemoryBuffer & out) const
- {
- out.appendPacked(field).append('=');
- out.appendPacked(1); // Signifying a single transition - which in turn means a SingleFieldFilter not a set.
- size32_t size = type.size(value, nullptr);
- out.append(size, value);
- return out;
- }
- void SingleFieldFilter::setLow(void *buffer, size32_t offset) const
- {
- memcpy(((byte *)buffer) + offset, value, type.size(value, nullptr));
- }
- bool SingleFieldFilter::incrementKey(void *buffer, size32_t offset) const
- {
- // Set to next permitted value above current
- byte *ptr = ((byte *)buffer) + offset;
- if (type.compare(ptr, value) < 0)
- {
- memcpy(ptr, value, type.size(value, nullptr));
- return true;
- }
- else
- return false;
- }
- void SingleFieldFilter::endRange(void *buffer, size32_t offset) const
- {
- // Set to last permitted value in the range that includes current (which is asserted to be valid)
- dbgassertex(type.compare(((byte *)buffer) + offset, value)==0);
- }
- void SingleFieldFilter::setHigh(void *buffer, size32_t offset) const
- {
- memcpy(((byte *)buffer) + offset, value, type.size(value, nullptr));
- }
- IFieldFilter * createFieldFilter(unsigned fieldId, IValueSet *values)
- {
- const void *single = values->querySingleValue();
- if (single)
- return new SingleFieldFilter(fieldId, values->queryType(), (const byte *) single);
- else if (values->isWild())
- return createWildFieldFilter(fieldId, values->queryType());
- else
- return new SetFieldFilter(fieldId, values);
- }
- IFieldFilter * createEmptyFieldFilter(unsigned fieldId, const RtlTypeInfo & type)
- {
- Owned<IValueSet> values = createValueSet(type);
- return new SetFieldFilter(fieldId, values);
- }
- IFieldFilter * createFieldFilter(unsigned fieldId, const RtlTypeInfo & type, const void * value)
- {
- return new SingleFieldFilter(fieldId, type, (const byte *) value);
- }
- //---------------------------------------------------------------------------------------------------------------------
- /*
- * A link counted version of an RtlTypeInfo
- */
- class SharedRtlTypeInfo : public CInterface
- {
- public:
- SharedRtlTypeInfo(const RtlTypeInfo * _type) : type(_type) {}
- ~SharedRtlTypeInfo() { type->doDelete(); }
- const RtlTypeInfo * const type;
- };
- /*
- * A field filter that can match a set of values.
- */
- class SubStringFieldFilter : public SetFieldFilter
- {
- public:
- SubStringFieldFilter(unsigned _field, const RtlTypeInfo & _fieldType, SharedRtlTypeInfo * _subType, IValueSet * _values)
- : SetFieldFilter(_field, _fieldType, _values), subType(_subType)
- {
- subLength = subType->type->length;
- }
- virtual StringBuffer & serialize(StringBuffer & out) const override;
- virtual MemoryBuffer & serialize(MemoryBuffer & out) const override;
- protected:
- Linked<SharedRtlTypeInfo> subType;
- size32_t subLength;
- };
- StringBuffer & SubStringFieldFilter::serialize(StringBuffer & out) const
- {
- out.append(field).append(':').append(subLength).append("=");
- return values->serialize(out);
- }
- MemoryBuffer & SubStringFieldFilter::serialize(MemoryBuffer & out) const
- {
- out.appendPacked(field).append(':').append(subLength);
- return values->serialize(out);
- }
- class FixedSubStringFieldFilter : public SubStringFieldFilter
- {
- public:
- FixedSubStringFieldFilter(unsigned _field, const RtlTypeInfo & _fieldType, SharedRtlTypeInfo * _subType, IValueSet * _values)
- : SubStringFieldFilter(_field, _fieldType, _subType, _values)
- {
- }
- virtual IFieldFilter *remap(unsigned newField) const override { return new SubStringFieldFilter(newField, type, subType, values); }
- };
- class VariableSubStringFieldFilter : public SubStringFieldFilter
- {
- public:
- VariableSubStringFieldFilter(unsigned _field, const RtlTypeInfo & _fieldType, SharedRtlTypeInfo * _subType, IValueSet * _values)
- : SubStringFieldFilter(_field, _fieldType, _subType, _values),
- maxTempLength(subLength * 4) //Maximum expansion from length to size is 4 bytes for utf8
- {
- }
- //Simple row matching
- virtual bool matches(const RtlRow & row) const override;
- //More complex index matching
- virtual int compareLowest(const RtlRow & left, unsigned range) const override;
- virtual int compareHighest(const RtlRow & left, unsigned range) const override;
- virtual int findForwardMatchRange(const RtlRow & row, unsigned & matchRange) const override;
- virtual IFieldFilter *remap(unsigned newField) const override { return new VariableSubStringFieldFilter(newField, type, subType, values); }
- protected:
- const unsigned maxTempLength;
- };
- bool VariableSubStringFieldFilter::matches(const RtlRow & row) const
- {
- const byte * ptr = row.queryField(field);
- size32_t len = *reinterpret_cast<const size32_t *>(ptr);
- if (likely(len >= subLength))
- return values->matches(ptr + sizeof(size32_t));
- //Clone and expand the string to the expected length
- byte * temp = (byte *)alloca(maxTempLength);
- RtlStaticRowBuilder builder(temp, maxTempLength);
- translateScalar(builder, 0, nullptr, *subType->type, type, ptr);
- return values->matches(temp);
- }
- int VariableSubStringFieldFilter::compareLowest(const RtlRow & left, unsigned range) const
- {
- const byte * ptr = left.queryField(field);
- size32_t len = *reinterpret_cast<const size32_t *>(ptr);
- if (likely(len >= subLength))
- return values->compareLowest(ptr + sizeof(size32_t), range);
- //Clone and expand the string to the expected length
- byte * temp = (byte *)alloca(maxTempLength);
- RtlStaticRowBuilder builder(temp, maxTempLength);
- translateScalar(builder, 0, nullptr, *subType->type, type, ptr);
- return values->compareLowest(temp, range);
- }
- int VariableSubStringFieldFilter::compareHighest(const RtlRow & left, unsigned range) const
- {
- const byte * ptr = left.queryField(field);
- size32_t len = *reinterpret_cast<const size32_t *>(ptr);
- if (likely(len >= subLength))
- return values->compareHighest(ptr + sizeof(size32_t), range);
- //Clone and expand the string to the expected length
- byte * temp = (byte *)alloca(maxTempLength);
- RtlStaticRowBuilder builder(temp, maxTempLength);
- translateScalar(builder, 0, nullptr, *subType->type, type, ptr);
- return values->compareHighest(temp, range);
- }
- int VariableSubStringFieldFilter::findForwardMatchRange(const RtlRow & row, unsigned & matchRange) const
- {
- const byte * ptr = row.queryField(field);
- size32_t len = *reinterpret_cast<const size32_t *>(ptr);
- if (likely(len >= subLength))
- return values->findForwardMatchRange(ptr + sizeof(size32_t), matchRange);
- //Clone and expand the string to the expected length
- byte * temp = (byte *)alloca(maxTempLength);
- RtlStaticRowBuilder builder(temp, maxTempLength);
- translateScalar(builder, 0, nullptr, *subType->type, type, ptr);
- return values->findForwardMatchRange(temp, matchRange);
- }
- IFieldFilter * createSubStringFieldFilter(unsigned fieldId, size32_t subLength, IValueSet * values)
- {
- const RtlTypeInfo & type = values->queryType();
- if ((int) subLength <= 0)
- {
- //Does the set include a blank value?
- MemoryBuffer blank;
- MemoryBufferBuilder builder(blank, 0);
- type.buildString(builder, 0, nullptr, 0, "");
- bool valuesMatchBlank = values->matches(blank.bytes());
- if (valuesMatchBlank)
- return createWildFieldFilter(fieldId, type);
- else
- return createEmptyFieldFilter(fieldId, type);
- }
- //Check for substring that doesn't truncate the field.
- if (type.isFixedSize())
- {
- size32_t fieldLength = type.length;
- if (subLength >= fieldLength)
- return createFieldFilter(fieldId, values);
- }
- switch (type.getType())
- {
- case type_string:
- case type_qstring:
- case type_unicode:
- case type_utf8:
- case type_data:
- break;
- default:
- throw MakeStringException(2, "Invalid type for substring filter");
- }
- //Created a truncated type
- FieldTypeInfoStruct info;
- info.fieldType = (type.fieldType & ~RFTMunknownsize);
- info.length = subLength;
- Owned<SharedRtlTypeInfo> subType(new SharedRtlTypeInfo(info.createRtlTypeInfo()));
- //Create a new set of values truncated to the appropriate length.
- Owned<IValueSet> newValues = values->cast(*subType->type);
- if (type.isFixedSize())
- {
- //The standard compare will only look at the first subLength characters.
- return new FixedSubStringFieldFilter(fieldId, type, subType, newValues);
- }
- //Check that the temporary buffer that *might* be required is a sensible size for storing on the stack
- if (subLength > 256)
- throw MakeStringException(3, "Substring [1..%u] range is too large for a variable size field", subLength);
- //Use a class which will expand the string to the sub length if it is shorter
- return new VariableSubStringFieldFilter(fieldId, type, subType, newValues);
- }
- //---------------------------------------------------------------------------------------------------------------------
- /*
- * A field filter that can match any value.
- */
- class WildFieldFilter : public FieldFilter
- {
- public:
- WildFieldFilter(unsigned _field, const RtlTypeInfo & _type) : FieldFilter(_field, _type) {}
- //Simple row matching
- virtual bool matches(const RtlRow & row) const override
- {
- return true;
- }
- virtual bool isEmpty() const override
- {
- return false;
- }
- virtual bool isWild() const override
- {
- return true;
- }
- //More complex index matching
- virtual unsigned numRanges() const override
- {
- return 1;
- }
- virtual int compareLowest(const RtlRow & left, unsigned range) const override
- {
- //Should pass through to compare using the type.
- //return type.compareLowest(left.queryField(fieldId));
- //MORE
- return +1;
- }
- virtual int compareHighest(const RtlRow & left, unsigned range) const override
- {
- //Should pass through to compare using the type.
- //return type.compareLowest(left.queryField(fieldId));
- //MORE
- return -1;
- }
- virtual int findForwardMatchRange(const RtlRow & row, unsigned & matchRange) const override
- {
- matchRange = 0;
- return true;
- }
- virtual unsigned queryScore() const override
- {
- return 0;
- }
- virtual IFieldFilter *remap(unsigned newField) const override { return new WildFieldFilter(newField, type); }
- virtual StringBuffer & serialize(StringBuffer & out) const override
- {
- return out.append(field).append('*');
- }
- virtual StringBuffer & describe(StringBuffer & out) const override
- {
- return out.append('*');
- }
- virtual MemoryBuffer & serialize(MemoryBuffer & out) const override
- {
- return out.appendPacked(field).append('*');
- }
- virtual void setLow(void *buffer, size32_t offset) const
- {
- byte *ptr = ((byte*) buffer) + offset;
- assert(type.isFixedSize());
- memset(ptr, 0x0, type.getMinSize());
- }
- virtual bool incrementKey(void *buffer, size32_t offset) const override
- {
- byte *ptr = ((byte*) buffer) + offset;
- assert(type.isFixedSize());
- return incrementBuffer(ptr, type.getMinSize());
- }
- virtual void endRange(void *buffer, size32_t offset) const
- {
- setHigh(buffer, offset);
- }
- virtual void setHigh(void *buffer, size32_t offset) const
- {
- byte *ptr = ((byte*) buffer) + offset;
- assert(type.isFixedSize());
- memset(ptr, 0xff, type.getMinSize());
- }
- };
- IFieldFilter * createWildFieldFilter(unsigned fieldId, const RtlTypeInfo & type)
- {
- //MORE: Is it worth special casing, or would a SetFieldFilter of null..null be just as good?
- return new WildFieldFilter(fieldId, type);
- }
- //---------------------------------------------------------------------------------------------------------------------
- //Note, the fieldId could be serialized within the string, but it is needed to determine the type, and
- //passing it in allows this code to be decoupled from the type serialization code.
- IFieldFilter * deserializeFieldFilter(unsigned fieldId, const RtlTypeInfo & type, const char * src)
- {
- switch (*src)
- {
- case '*':
- return createWildFieldFilter(fieldId, type);
- case '=':
- {
- Owned<IValueSet> values = createValueSet(type);
- deserializeSet(*values, src+1);
- return createFieldFilter(fieldId, values);
- }
- case ':':
- {
- char * end;
- size32_t subLength = strtoul(src+1, &end, 10);
- if (*end != '=')
- UNIMPLEMENTED_X("Expected =");
- //Created a truncated type
- FieldTypeInfoStruct info;
- info.fieldType = (type.fieldType & ~RFTMunknownsize);
- info.length = subLength;
- Owned<SharedRtlTypeInfo> subType(new SharedRtlTypeInfo(info.createRtlTypeInfo()));
- //The serialized set is already truncated to the appropriate length.
- Owned<IValueSet> values = createValueSet(*subType->type);
- deserializeSet(*values, end+1);
- if (type.isFixedSize())
- {
- return new FixedSubStringFieldFilter(fieldId, type, subType, values);
- }
- return new VariableSubStringFieldFilter(fieldId, type, subType, values);
- }
- }
- UNIMPLEMENTED_X("Unknown Field Filter");
- }
- IFieldFilter * deserializeFieldFilter(const RtlRecord & record, const char * src)
- {
- StringBuffer fieldText;
- readFieldFromFieldFilter(fieldText, src);
- unsigned fieldNum;
- if (isdigit(fieldText.str()[0]))
- fieldNum = atoi(fieldText.str());
- else
- fieldNum = record.getFieldNum(fieldText);
- return deserializeFieldFilter(fieldNum, *record.queryType(fieldNum), src);
- }
- IFieldFilter * deserializeFieldFilter(unsigned fieldId, const RtlTypeInfo & type, MemoryBuffer & in)
- {
- char kind;
- in.read(kind);
- switch (kind)
- {
- case '*':
- return createWildFieldFilter(fieldId, type);
- case '=':
- {
- unsigned numTransitions;
- size32_t pos = in.getPos();
- in.readPacked(numTransitions);
- if (numTransitions==1)
- {
- size32_t size = type.size(in.readDirect(0), nullptr);
- return new SingleFieldFilter(fieldId, type, (const byte *) in.readDirect(size));
- }
- else
- {
- in.reset(pos);
- Owned<IValueSet> values = createValueSet(type, in);
- return createFieldFilter(fieldId, values);
- }
- }
- case ':':
- {
- size32_t subLength;
- in.read(subLength);
- //Created a truncated type
- FieldTypeInfoStruct info;
- info.fieldType = (type.fieldType & ~RFTMunknownsize);
- info.length = subLength;
- Owned<SharedRtlTypeInfo> subType(new SharedRtlTypeInfo(info.createRtlTypeInfo()));
- //The serialized set is already truncated to the appropriate length.
- Owned<IValueSet> values = createValueSet(*subType->type, in);
- if (type.isFixedSize())
- {
- return new FixedSubStringFieldFilter(fieldId, type, subType, values);
- }
- return new VariableSubStringFieldFilter(fieldId, type, subType, values);
- }
- }
- throwUnexpectedX("Unknown Field Filter");
- }
- IFieldFilter * deserializeFieldFilter(const RtlRecord & searchRecord, MemoryBuffer & in)
- {
- unsigned fieldNum;
- in.readPacked(fieldNum);
- return deserializeFieldFilter(fieldNum, *searchRecord.queryType(fieldNum), in);
- }
- //---------------------------------------------------------------------------------------------------------------------
- static int compareFieldFilters(IInterface * const * left, IInterface * const * right)
- {
- IFieldFilter * leftFilter = static_cast<IFieldFilter *>(*left);
- IFieldFilter * rightFilter = static_cast<IFieldFilter *>(*right);
- return leftFilter->queryFieldIndex() - rightFilter->queryFieldIndex();
- }
- void RowFilter::addFilter(const IFieldFilter & filter)
- {
- filters.append(filter);
- unsigned fieldNum = filter.queryFieldIndex();
- if (fieldNum >= numFieldsRequired)
- numFieldsRequired = fieldNum+1;
- }
- const IFieldFilter & RowFilter::addFilter(const RtlRecord & record, const char * filterText)
- {
- IFieldFilter & filter = *deserializeFieldFilter(record, filterText);
- filters.append(filter);
- unsigned fieldNum = filter.queryFieldIndex();
- if (fieldNum >= numFieldsRequired)
- numFieldsRequired = fieldNum+1;
- return filter;
- }
- bool RowFilter::matches(const RtlRow & row) const
- {
- row.lazyCalcOffsets(numFieldsRequired);
- ForEachItemIn(i, filters)
- {
- if (!filters.item(i).matches(row))
- return false;
- }
- return true;
- }
- void RowFilter::appendFilters(IConstArrayOf<IFieldFilter> & _filters)
- {
- ForEachItemIn(i, _filters)
- {
- addFilter(OLINK(_filters.item(i)));
- }
- }
- void RowFilter::createSegmentMonitors(IIndexReadContext *irc)
- {
- ForEachItemIn(i, filters)
- irc->append(FFkeyed, LINK(&filters.item(i)));
- }
- void RowFilter::extractKeyFilter(const RtlRecord & record, IConstArrayOf<IFieldFilter> & keyFilters) const
- {
- if (!filters)
- return;
- // for an index must be in field order, and all values present
- IConstArrayOf<IFieldFilter> temp;
- ForEachItemIn(i, filters)
- temp.append(OLINK(filters.item(i)));
- temp.sort(compareFieldFilters);
- unsigned maxField = temp.tos().queryFieldIndex();
- unsigned curIdx=0;
- for (unsigned field = 0; field <= maxField; field++)
- {
- const IFieldFilter & cur = temp.item(curIdx);
- if (field == cur.queryFieldIndex())
- {
- keyFilters.append(OLINK(cur));
- curIdx++;
- }
- else
- keyFilters.append(*createWildFieldFilter(field, *record.queryType(field)));
- }
- }
- void RowFilter::extractMemKeyFilter(const RtlRecord & record, const UnsignedArray &sortOrder, IConstArrayOf<IFieldFilter> & keyFilters) const
- {
- if (!filters)
- return;
- // for in-memory index, we want filters in the same order as the sort fields, with wilds added
- ForEachItemIn(idx, sortOrder)
- {
- unsigned sortField = sortOrder.item(idx);
- bool needWild = true;
- ForEachItemIn(fidx, filters)
- {
- const IFieldFilter &filter = filters.item(fidx);
- if (filter.queryFieldIndex()==sortField)
- {
- keyFilters.append(OLINK(filter));
- needWild = false;
- break;
- }
- }
- if (needWild)
- keyFilters.append(*createWildFieldFilter(sortField, *record.queryType(sortField)));
- }
- }
- const IFieldFilter *RowFilter::findFilter(unsigned fieldNum) const
- {
- ForEachItemIn(i, filters)
- {
- const IFieldFilter &field = filters.item(i);
- if (field.queryFieldIndex() == fieldNum)
- return &field;
- }
- return nullptr;
- }
- const IFieldFilter *RowFilter::extractFilter(unsigned fieldNum)
- {
- ForEachItemIn(i, filters)
- {
- const IFieldFilter &field = filters.item(i);
- if (field.queryFieldIndex() == fieldNum)
- {
- filters.remove(i, true);
- return &field;
- }
- }
- return nullptr;
- }
- void RowFilter::remove(unsigned idx)
- {
- filters.remove(idx);
- }
- void RowFilter::clear()
- {
- filters.kill();
- numFieldsRequired = 0;
- }
- void RowFilter::recalcFieldsRequired()
- {
- numFieldsRequired = 0;
- ForEachItemIn(i, filters)
- {
- const IFieldFilter &field = filters.item(i);
- if (field.queryFieldIndex() >= numFieldsRequired)
- numFieldsRequired = field.queryFieldIndex()+1;
- }
- }
- void RowFilter::remapField(unsigned filterIdx, unsigned newFieldNum)
- {
- filters.replace(*filters.item(filterIdx).remap(newFieldNum), filterIdx);
- }
- //---------------------------------------------------------------------------------------------------------------------
- bool RowCursor::setRowForward(const byte * row)
- {
- currentRow.setRow(row, numFieldsRequired);
- unsigned field = 0;
- //Now check which of the fields matches, and update matchedRanges to indicate
- //MORE: Add an optimization:
- //First of all check all of the fields that were previously matched. If the previous matches are in the same range,
- //then the next field must either be in the same range, or a following range.
- /*
- for (; field < numMatched; field++)
- {
- const IFieldFilter & filter = queryFilter(field);
- if (!filter.withinUpperRange(currentRow, matchPos(field)))
- {
- //This field now either matches a later range, or doesn't match at all.
- //Find the new range for the current field that could include the row
- cur.checkNextCandidateRange(currentRow, matchPos(i));
- if (isValid(i))
- return 0;
- //Finished processing
- if (i == 0)
- return UINT_MAX;
- //caller needs to find a new row that is larger that the current values for fields 0..i
- //Now need to search for a value if
- //Search for a new candidate value from this filter
- return i+1;
- }
- //if (!filter.withinUpperRange(currentRow,
- }
- */
- for (; field < filters.ordinality(); field++)
- {
- const IFieldFilter & filter = queryFilter(field);
- //If the field is within a range return true and the range. If outside the range return the next range.
- unsigned matchRange;
- bool matched = filter.findForwardMatchRange(currentRow, matchRange);
- if (!matched)
- {
- if (matchRange >= filter.numRanges())
- return findNextRange(field);
- nextUnmatchedRange = matchRange;
- break;
- }
- matchedRanges.replace(matchRange, field);
- }
- numMatched = field;
- return numMatched == filters.ordinality();
- }
- //The filter for field "field" has been exhausted, work out which value should be compared next
- bool RowCursor::findNextRange(unsigned field)
- {
- unsigned matchRange;
- for (;;)
- {
- if (field == 0)
- {
- eos = true;
- return false;
- }
- field = field-1;
- matchRange = matchedRanges.item(field);
- const IFieldFilter & filter = queryFilter(field);
- //If the field value is less than the upper bound of the current range, search for the next value above
- //the current value
- if (filter.compareHighest(currentRow, matchRange) < 0)
- {
- numMatched = field;
- //MORE: Optimize the case where the field can be incremented - if so other fields can compare against lowest
- //if (filter.incrementValue(currentRow))
- // nextUnmatchedRange = 0;
- //else
- nextUnmatchedRange = -1U;
- return false;
- }
- matchRange++;
- if (matchRange < filter.numRanges())
- break;
- }
- nextUnmatchedRange = matchRange;
- numMatched = field-1;
- return false;
- }
- //---------------------------------------------------------------------------------------------
- /*
- Conclusions:
- * Need compareLowest to be able to implement first()/ next with wildcards. Could implement with a typed lowest transition....
- * Is it actually any different from start transition for most cases -
- * index read is very efficient using a single memcmp when finding the next.
- * Moving the compare into the transition makes the memcmp harder - although could be optimized + allow combining.
- * The string sets should work on a field. The segmonitor should map row->field
- * Could have multiple adjacent field fields once have variable length.
- * Moving the compare into the transition makes index formats without decompression very tricky. Could possibly work around it by having a byte provider interface which could be called to get the next value.
- *
- *
- */
|