Procházet zdrojové kódy

Merge pull request #11076 from richardkchapman/jhtree-overrides

HPCC-19544 Clean up CKeyCursor ahead of migration to new field filters

Reviewed-by: Gavin Halliday <ghalliday@hpccsystems.com>
Gavin Halliday před 7 roky
rodič
revize
7e08de5843

+ 1 - 1
common/remote/sockfile.cpp

@@ -4725,7 +4725,7 @@ class CRemoteFileServer : implements IRemoteFileServer, public CInterface
         unsigned pos = reply.length();
         while (keyManager->lookup(true))
         {
-            unsigned size = keyManager->queryRecordSize();
+            unsigned size = keyManager->queryRowSize();
             const byte *result = keyManager->queryKeyBuffer();
             reply.append(size);
             reply.append(size, result);

+ 0 - 138
rtl/eclrtl/rtlkey.cpp

@@ -510,137 +510,6 @@ public:
     virtual KeySegmentMonitorSerializeType serializeType() const override { return KSMST_CSINGLELITTLEKEYSEGMENTMONITOR; }
 };
 
-
-
-class COverrideableKeySegmentMonitor : public IOverrideableKeySegmentMonitor, public CInterface
-{
-    const void *overridden;
-    unsigned hash;
-
-public:
-    IMPLEMENT_IINTERFACE
-
-    COverrideableKeySegmentMonitor(IKeySegmentMonitor * _base) 
-    {
-        base.setown(_base); 
-        overridden = NULL;
-        hash = base->queryHashCode();
-        hash = FNV_32_HASHONE_VALUE(hash, (byte) 123); 
-    }
-
-    COverrideableKeySegmentMonitor(MemoryBuffer &mb) 
-    {
-        mb.read(hash);
-        base.setown(deserializeKeySegmentMonitor(mb)); 
-        overridden = NULL;
-    }
-
-    virtual void setOverrideBuffer(const void *ptr) override
-    {
-        overridden = ptr;
-    }
-
-    virtual unsigned queryHashCode() const override
-    {
-        return hash;
-    }
-
-    virtual bool getBloomHash(hash64_t &hash) const override
-    {
-        // MORE - I don't know what correct answer is but this is safest!
-        // Perhaps it should return hash of base/overridden as appropriate?
-        return false;
-    }
-
-    virtual bool matchesBuffer(const void *keyval) const override
-    {
-        if (overridden)
-        {
-            unsigned offset = base->getOffset();
-            return memcmp((char *) keyval+offset, (char *) overridden+offset, base->getSize()) == 0;
-        }
-        else
-            return base->matchesBuffer(keyval);
-    }
-    virtual bool matches(const RtlRow *keyval) const override
-    {
-        return matchesBuffer(keyval->queryRow());
-    }
-
-    virtual bool increment(void *keyval) const override
-    {
-        if (overridden)
-        {
-            // Set to next permitted value above current
-            unsigned offset = base->getOffset();
-            if (memcmp((char *) keyval+offset, (char *) overridden+offset, base->getSize()) < 0)
-            {
-                memcpy((char *) keyval+offset, (char *) overridden+offset, base->getSize());
-                return true;
-            }
-            return false;
-        }
-        else
-            return base->increment(keyval);
-    }
-    virtual void setLow(void *keyval) const override
-    {
-        if (overridden)
-        {
-            unsigned offset = base->getOffset();
-            memcpy((char *) keyval+offset, (char *) overridden+offset, base->getSize());
-        }
-        else
-            base->setLow(keyval);
-    }
-    virtual void setHigh(void *keyval) const override
-    {
-        if (overridden)
-        {
-            unsigned offset = base->getOffset();
-            memcpy((char *) keyval+offset, (char *) overridden+offset, base->getSize());
-        }
-        else
-            base->setHigh(keyval);
-    }
-    virtual void endRange(void *keyval) const override
-    {
-        if (overridden)
-        {
-            unsigned offset = base->getOffset();
-            memcpy((char *) keyval+offset, (char *) overridden+offset, base->getSize());
-        }
-        base->endRange(keyval);
-    }
-    virtual void copy(void * expandedRow, const void *rawRight) const override
-    {
-        base->copy(expandedRow, rawRight);
-    }
-
-    virtual bool isWild() const override                             { return overridden ? false : base->isWild(); }
-    virtual unsigned getFieldIdx() const override                    { return base->getFieldIdx(); }
-    virtual unsigned getOffset() const override                      { return base->getOffset(); }
-    virtual unsigned getSize() const override                        { return base->getSize(); }
-    virtual bool isEmpty() const override                            { return base->isEmpty(); }
-    virtual bool isSigned() const override                           { return base->isSigned(); }
-    virtual bool isLittleEndian() const override                     { return base->isLittleEndian(); }
-    virtual bool isWellKeyed() const override                        { return overridden ? true : base->isWellKeyed(); }
-    virtual bool isOptional() const override                         { return base->isOptional(); }
-    virtual unsigned numFieldsRequired() const override              { return base->numFieldsRequired(); }
-    virtual bool isSimple() const override                           { return base->isSimple();  }
-
-    virtual bool equivalentTo(const IKeySegmentMonitor &other) const override { throwUnexpected(); }
-    virtual int docompare(const void * expandedLeft, const void *rawRight) const override { throwUnexpected(); }
-    virtual bool setOffset(unsigned _offset) override { throwUnexpected(); }
-    virtual MemoryBuffer &serialize(MemoryBuffer &mb) const override { throwUnexpected(); }
-    virtual KeySegmentMonitorSerializeType serializeType() const override { throwUnexpected(); }
-    virtual IKeySegmentMonitor *clone() const override { throwUnexpected(); }
-
-protected:
-    Owned<IKeySegmentMonitor> base;
-};
-
-
 ECLRTL_API IStringSet *createRtlStringSet(size32_t size)
 {
     return createStringSet(size);
@@ -735,11 +604,6 @@ ECLRTL_API IKeySegmentMonitor *createSingleKeySegmentMonitor(bool optional, unsi
     return new CSingleKeySegmentMonitor(optional, value, _fieldIdx, offset, size);
 }
 
-ECLRTL_API IOverrideableKeySegmentMonitor *createOverrideableKeySegmentMonitor(IKeySegmentMonitor *base)
-{
-    return new COverrideableKeySegmentMonitor(base);
-}
-
 ECLRTL_API IKeySegmentMonitor *createSingleBigSignedKeySegmentMonitor(bool optional, unsigned fieldIdx, unsigned offset, unsigned size, const void * value)
 {
     return new CSingleBigSignedKeySegmentMonitor(optional, value, fieldIdx, offset, size);
@@ -788,8 +652,6 @@ ECLRTL_API IKeySegmentMonitor *deserializeKeySegmentMonitor(MemoryBuffer &mb)
             return new CSingleLittleSignedKeySegmentMonitor(mb);
         case KSMST_CSINGLELITTLEKEYSEGMENTMONITOR:
             return new CSingleLittleKeySegmentMonitor(mb);
-        case KSMST_OVERRIDEABLEKEYSEGMENTMONITOR:
-            return new COverrideableKeySegmentMonitor(mb);
     }
     return NULL; // up to caller to check
 }

+ 0 - 8
rtl/eclrtl/rtlkey.hpp

@@ -28,7 +28,6 @@ enum KeySegmentMonitorSerializeType
     KSMST_SINGLEBIGSIGNEDKEYSEGMENTMONITOR,
     KSMST_SINGLELITTLESIGNEDKEYSEGMENTMONITOR,
     KSMST_CSINGLELITTLEKEYSEGMENTMONITOR,
-    KSMST_OVERRIDEABLEKEYSEGMENTMONITOR,
     KSMST_max
 };
 
@@ -115,11 +114,6 @@ public:
     virtual bool setOffset(unsigned _offset) = 0;  // Used by old record layout translator - to be removed at some point
 };
 
-interface IOverrideableKeySegmentMonitor  : public IKeySegmentMonitor
-{
-    virtual void setOverrideBuffer(const void *ptr) = 0;
-};
-
 interface IBlobProvider
 {
     virtual byte * lookupBlob(unsigned __int64 id) = 0;         // return reference, not freed by code generator, can dispose once transform() has returned.
@@ -166,8 +160,6 @@ ECLRTL_API IKeySegmentMonitor *createSingleBigSignedKeySegmentMonitor(bool optio
 ECLRTL_API IKeySegmentMonitor *createSingleLittleSignedKeySegmentMonitor(bool optional, unsigned _fieldIdx, unsigned offset, unsigned size, const void * value);
 ECLRTL_API IKeySegmentMonitor *createSingleLittleKeySegmentMonitor(bool optional, unsigned _fieldIdx, unsigned offset, unsigned size, const void * value);
 
-ECLRTL_API IOverrideableKeySegmentMonitor *createOverrideableKeySegmentMonitor(IKeySegmentMonitor *base);
-
 ECLRTL_API IKeySegmentMonitor *deserializeKeySegmentMonitor(MemoryBuffer &mb);
 ECLRTL_API void deserializeSet(IStringSet & set, size32_t minRecordSize, const RtlTypeInfo * fieldType, const char * filter);
 

+ 1 - 1
system/jhtree/ctfile.cpp

@@ -794,7 +794,7 @@ bool CJHTreeNode::getValueAt(unsigned int index, char *dst) const
         {
             //It would make sense to have the fileposition at the start of the row from he perspective of the
             //internal representation, but that would complicate everything else which assumes the keyed
-            //fields start at the begining of the row.
+            //fields start at the beginning of the row.
             if (rowexp.get())
             {
                 rowexp->expandRow(dst,index,sizeof(offset_t),keyLen);

Rozdílová data souboru nebyla zobrazena, protože soubor je příliš velký
+ 464 - 631
system/jhtree/jhtree.cpp


+ 40 - 14
system/jhtree/jhtree.hpp

@@ -38,22 +38,49 @@ interface jhtree_decl IDelayedFile : public IInterface
     virtual IFileIO *getFileIO() = 0;
 };
 
+class KeyStatsCollector
+{
+public:
+    IContextLogger *ctx;
+    unsigned seeks = 0;
+    unsigned scans = 0;
+    unsigned wildseeks = 0;
+    unsigned skips = 0;
+    unsigned nullskips = 0;
+
+    KeyStatsCollector(IContextLogger *_ctx) : ctx(_ctx) {}
+    void reset();
+    void noteSeeks(unsigned lseeks, unsigned lscans, unsigned lwildseeks);
+    void noteSkips(unsigned lskips, unsigned lnullSkips);
+
+};
+
 interface jhtree_decl IKeyCursor : public IInterface
 {
-    virtual bool next(char *dst) = 0;
-    virtual bool prev(char *dst) = 0;
-    virtual bool first(char *dst) = 0;
-    virtual bool last(char *dst) = 0;
-    virtual bool gtEqual(const char *src, char *dst, bool seekForward = false) = 0; // returns first record >= src
-    virtual bool ltEqual(const char *src, char *dst, bool seekForward = false) = 0; // returns last record <= src
-    virtual size32_t getSize() = 0;
+    virtual bool next(char *dst, KeyStatsCollector &stats) = 0;
+    virtual bool first(char *dst, KeyStatsCollector &stats) = 0;
+    virtual bool last(char *dst, KeyStatsCollector &stats) = 0;
+    virtual bool gtEqual(const char *src, char *dst, KeyStatsCollector &stats) = 0; // returns first record >= src
+    virtual bool ltEqual(const char *src, KeyStatsCollector &stats) = 0; // returns last record <= src
+    virtual const char *queryName() const = 0;
+    virtual size32_t getSize() = 0;  // Size of current row
+    virtual size32_t getKeyedSize() const = 0;  // Size of keyed fields
     virtual void serializeCursorPos(MemoryBuffer &mb) = 0;
-    virtual void deserializeCursorPos(MemoryBuffer &mb, char *keyBuffer) = 0;
+    virtual void deserializeCursorPos(MemoryBuffer &mb, KeyStatsCollector &stats) = 0;
     virtual unsigned __int64 getSequence() = 0;
     virtual const byte *loadBlob(unsigned __int64 blobid, size32_t &blobsize) = 0;
-    virtual void releaseBlobs() = 0;
-    virtual void reset() = 0;
-    virtual bool bloomFilterReject(const SegMonitorList &segs) const = 0;  // returns true if record cannot possibly match
+    virtual void reset(unsigned sortFromSeg = 0) = 0;
+    virtual bool lookup(bool exact, KeyStatsCollector &stats) = 0;
+
+    virtual bool lookupSkip(const void *seek, size32_t seekOffset, size32_t seeklen, KeyStatsCollector &stats) = 0;
+    virtual bool skipTo(const void *_seek, size32_t seekOffset, size32_t seeklen) = 0;
+    virtual IKeyCursor *fixSortSegs(unsigned sortFieldOffset) = 0;
+
+    virtual unsigned __int64 getCount(KeyStatsCollector &stats) = 0;
+    virtual unsigned __int64 checkCount(unsigned __int64 max, KeyStatsCollector &stats) = 0;
+    virtual unsigned __int64 getCurrentRangeCount(unsigned groupSegCount, KeyStatsCollector &stats) = 0;
+    virtual bool nextRange(unsigned groupSegCount) = 0;
+    virtual const byte *queryKeyBuffer() const = 0;
 };
 
 interface IKeyIndex;
@@ -67,7 +94,7 @@ interface jhtree_decl IKeyIndexBase : public IInterface
 
 interface jhtree_decl IKeyIndex : public IKeyIndexBase
 {
-    virtual IKeyCursor *getCursor(IContextLogger *ctx) = 0;
+    virtual IKeyCursor *getCursor(const SegMonitorList *segs) = 0;
     virtual size32_t keySize() = 0;
     virtual bool isFullySorted() = 0;
     virtual bool isTopLevelKey() = 0;
@@ -177,6 +204,7 @@ class jhtree_decl SegMonitorList : implements IInterface, implements IIndexReadC
 public:
     IMPLEMENT_IINTERFACE_O;
     SegMonitorList(const RtlRecord &_recInfo, bool _needWild);
+    SegMonitorList(const SegMonitorList &_from, const char *fixedVals, unsigned sortFieldOffset);
     IArrayOf<IKeySegmentMonitor> segMonitors;
 
     void reset();
@@ -211,7 +239,6 @@ interface IKeyManager : public IInterface, extends IIndexReadContext
     virtual const byte *queryKeyBuffer() = 0; //if using RLT: fpos is the translated value, so correct in a normal row
     virtual unsigned __int64 querySequence() = 0;
     virtual size32_t queryRowSize() = 0;     // Size of current row as returned by queryKeyBuffer()
-    virtual unsigned queryRecordSize() = 0;  // Max size
 
     virtual bool lookup(bool exact) = 0;
     virtual unsigned __int64 getCount() = 0;
@@ -225,7 +252,6 @@ interface IKeyManager : public IInterface, extends IIndexReadContext
     virtual unsigned querySeeks() const = 0;
     virtual unsigned queryScans() const = 0;
     virtual unsigned querySkips() const = 0;
-    virtual unsigned queryNullSkips() const = 0;
     virtual const byte *loadBlob(unsigned __int64 blobid, size32_t &blobsize) = 0;
     virtual void releaseBlobs() = 0;
     virtual void resetCounts() = 0;

+ 57 - 18
system/jhtree/jhtree.ipp

@@ -104,7 +104,7 @@ public:
     virtual bool IsShared() const { return CInterface::IsShared(); }
 
 // IKeyIndex impl.
-    virtual IKeyCursor *getCursor(IContextLogger *ctx);
+    virtual IKeyCursor *getCursor(const SegMonitorList *segs) override;
 
     virtual size32_t keySize();
     virtual bool hasPayload();
@@ -167,37 +167,76 @@ public:
 
 class jhtree_decl CKeyCursor : public IKeyCursor, public CInterface
 {
-private:
-    IContextLogger *ctx;
+protected:
     CKeyIndex &key;
+    const SegMonitorList *segs;
+    char *keyBuffer = nullptr;
     Owned<CJHTreeNode> node;
     unsigned int nodeKey;
-    ConstPointerArray activeBlobs;
 
-    CJHTreeNode *locateFirstNode();
-    CJHTreeNode *locateLastNode();
+    bool eof=false;
+    bool matched=false;
 
 public:
     IMPLEMENT_IINTERFACE;
-    CKeyCursor(CKeyIndex &_key, IContextLogger *ctx);
+    CKeyCursor(CKeyIndex &_key, const SegMonitorList *segs);
     ~CKeyCursor();
 
-    virtual bool next(char *dst);
-    virtual bool prev(char *dst);
-    virtual bool first(char *dst);
-    virtual bool last(char *dst);
-    virtual bool gtEqual(const char *src, char *dst, bool seekForward);
-    virtual bool ltEqual(const char *src, char *dst, bool seekForward);
+    virtual bool next(char *dst, KeyStatsCollector &stats) override;
+    virtual bool first(char *dst, KeyStatsCollector &stats) override;
+    virtual bool last(char *dst, KeyStatsCollector &stats) override;
+    virtual bool gtEqual(const char *src, char *dst, KeyStatsCollector &stats) override;
+    virtual bool ltEqual(const char *src, KeyStatsCollector &stats) override;
+    virtual const char *queryName() const override;
     virtual size32_t getSize();
+    virtual size32_t getKeyedSize() const;
     virtual offset_t getFPos(); 
     virtual void serializeCursorPos(MemoryBuffer &mb);
-    virtual void deserializeCursorPos(MemoryBuffer &mb, char *keyBuffer);
+    virtual void deserializeCursorPos(MemoryBuffer &mb, KeyStatsCollector &stats);
     virtual unsigned __int64 getSequence(); 
     virtual const byte *loadBlob(unsigned __int64 blobid, size32_t &blobsize);
-    virtual void releaseBlobs();
-    virtual void reset();
-    virtual bool bloomFilterReject(const SegMonitorList &segs) const override;  // returns true if record cannot possibly match
+    virtual void reset(unsigned sortFromSeg = 0);
+    virtual bool lookup(bool exact, KeyStatsCollector &stats) override;
+    virtual bool lookupSkip(const void *seek, size32_t seekOffset, size32_t seeklen, KeyStatsCollector &stats) override;
+    virtual bool skipTo(const void *_seek, size32_t seekOffset, size32_t seeklen) override;
+    virtual IKeyCursor *fixSortSegs(unsigned sortFieldOffset) override;
+
+    virtual unsigned __int64 getCount(KeyStatsCollector &stats) override;
+    virtual unsigned __int64 checkCount(unsigned __int64 max, KeyStatsCollector &stats) override;
+    virtual unsigned __int64 getCurrentRangeCount(unsigned groupSegCount, KeyStatsCollector &stats) override;
+    virtual bool nextRange(unsigned groupSegCount) override;
+    virtual const byte *queryKeyBuffer() const override;
+protected:
+    CKeyCursor(const CKeyCursor &from);
+
+    bool _lookup(bool exact, unsigned lastSeg, KeyStatsCollector &stats);
+    void reportExcessiveSeeks(unsigned numSeeks, unsigned lastSeg, KeyStatsCollector &stats);
+    CJHTreeNode *locateFirstNode(KeyStatsCollector &stats);
+    CJHTreeNode *locateLastNode(KeyStatsCollector &stats);
+
+
+    inline void setLow(unsigned segNo)
+    {
+        segs->setLow(segNo, keyBuffer);
+    }
+    inline unsigned setLowAfter(size32_t offset)
+    {
+        return segs->setLowAfter(offset, keyBuffer);
+    }
+    inline bool incrementKey(unsigned segno) const
+    {
+        return segs->incrementKey(segno, keyBuffer);
+    }
+    inline void endRange(unsigned segno)
+    {
+        segs->endRange(segno, keyBuffer);
+    }
 };
 
-
+class CPartialKeyCursor : public CKeyCursor
+{
+public:
+    CPartialKeyCursor(const CKeyCursor &from, unsigned sortFieldOffset);
+    ~CPartialKeyCursor();
+};
 #endif

+ 6 - 5
system/jhtree/keydiff.cpp

@@ -55,9 +55,9 @@ public:
         *fpos = 0;
     }
 
-    bool getCursorNext(IKeyCursor * keyCursor)
+    bool getCursorNext(IKeyCursor * keyCursor, KeyStatsCollector &stats)
     {
-        if(keyCursor->next(row))
+        if(keyCursor->next(row, stats))
         {
             thisrowsize = keyCursor->getSize() - sizeof(offset_t);
             *fpos = rtlReadBigUInt8(row + thisrowsize);
@@ -215,7 +215,7 @@ private:
 class CKeyReader: public CInterface
 {
 public:
-    CKeyReader(char const * filename) : count(0)
+    CKeyReader(char const * filename) : count(0), stats(nullptr)
     {
         keyFile.setown(createIFile(filename));
         keyFileIO.setown(keyFile->open(IFOread));
@@ -259,7 +259,7 @@ public:
     {
         if(eof)
             return false;
-        if(buffer.getCursorNext(keyCursor))
+        if(buffer.getCursorNext(keyCursor, stats))
         {
             buffer.tally(crc);
             count++;
@@ -282,7 +282,7 @@ public:
         char * buff = reinterpret_cast<char *>(malloc(rowsize));
         while(!eof)
         {
-            if(keyCursor->next(buff))
+            if(keyCursor->next(buff, stats))
             {
                 size32_t offset = keyCursor->getSize() - sizeof(offset_t);
                 offset_t fpos = rtlReadBigUInt8(buff + offset);
@@ -330,6 +330,7 @@ private:
     Owned<IFileIO> keyFileIO;
     Owned<IKeyIndex> keyIndex;
     Owned<IKeyCursor> keyCursor;
+    KeyStatsCollector stats;
     CRC32 crc;
     size32_t keyedsize;
     size32_t rowsize;