jhtree.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef _JHTREE_INCL
  14. #define _JHTREE_INCL
  15. #ifdef JHTREE_EXPORTS
  16. #define jhtree_decl DECL_EXPORT
  17. #else
  18. #define jhtree_decl DECL_IMPORT
  19. #endif
  20. #include "jiface.hpp"
  21. #include "jfile.hpp"
  22. #include "jlog.hpp"
  23. #include "errorlist.h"
  24. enum NodeType : char;
  25. class BloomFilter;
  26. interface IIndexFilterList;
  27. interface jhtree_decl IDelayedFile : public IInterface
  28. {
  29. virtual IMemoryMappedFile *getMappedFile() = 0;
  30. virtual IFileIO *getFileIO() = 0;
  31. };
  32. class KeyStatsCollector
  33. {
  34. public:
  35. IContextLogger *ctx;
  36. unsigned seeks = 0;
  37. unsigned scans = 0;
  38. unsigned wildseeks = 0;
  39. unsigned skips = 0;
  40. unsigned nullskips = 0;
  41. KeyStatsCollector(IContextLogger *_ctx) : ctx(_ctx) {}
  42. void reset();
  43. void noteSeeks(unsigned lseeks, unsigned lscans, unsigned lwildseeks);
  44. void noteSkips(unsigned lskips, unsigned lnullSkips);
  45. };
  46. interface jhtree_decl IKeyCursor : public IInterface
  47. {
  48. virtual bool next(char *dst, KeyStatsCollector &stats) = 0; // MORE - remove
  49. virtual const char *queryName() const = 0;
  50. virtual size32_t getSize() = 0; // Size of current row
  51. virtual size32_t getKeyedSize() const = 0; // Size of keyed fields
  52. virtual void serializeCursorPos(MemoryBuffer &mb) = 0;
  53. virtual void deserializeCursorPos(MemoryBuffer &mb, KeyStatsCollector &stats) = 0;
  54. virtual unsigned __int64 getSequence() = 0;
  55. virtual const byte *loadBlob(unsigned __int64 blobid, size32_t &blobsize) = 0;
  56. virtual void reset() = 0;
  57. virtual bool lookup(bool exact, KeyStatsCollector &stats) = 0;
  58. virtual bool lookupSkip(const void *seek, size32_t seekOffset, size32_t seeklen, KeyStatsCollector &stats) = 0;
  59. virtual bool skipTo(const void *_seek, size32_t seekOffset, size32_t seeklen) = 0;
  60. virtual IKeyCursor *fixSortSegs(unsigned sortFieldOffset) = 0;
  61. virtual unsigned __int64 getCount(KeyStatsCollector &stats) = 0;
  62. virtual unsigned __int64 checkCount(unsigned __int64 max, KeyStatsCollector &stats) = 0;
  63. virtual unsigned __int64 getCurrentRangeCount(unsigned groupSegCount, KeyStatsCollector &stats) = 0;
  64. virtual bool nextRange(unsigned groupSegCount) = 0;
  65. virtual const byte *queryKeyBuffer() const = 0;
  66. virtual void mergeStats(CRuntimeStatisticCollection & stats) const = 0;
  67. };
  68. interface IKeyIndex;
  69. interface jhtree_decl IKeyIndexBase : public IInterface
  70. {
  71. virtual unsigned numParts() = 0;
  72. virtual IKeyIndex *queryPart(unsigned idx) = 0;
  73. virtual bool IsShared() const = 0;
  74. };
  75. interface jhtree_decl IKeyIndex : public IKeyIndexBase
  76. {
  77. virtual IKeyCursor *getCursor(const IIndexFilterList *filter, bool logExcessiveSeeks) = 0;
  78. virtual size32_t keySize() = 0;
  79. virtual bool isFullySorted() = 0;
  80. virtual bool isTopLevelKey() = 0;
  81. virtual __uint64 getPartitionFieldMask() = 0;
  82. virtual unsigned numPartitions() = 0;
  83. virtual unsigned getFlags() = 0;
  84. virtual void dumpNode(FILE *out, offset_t pos, unsigned rowCount, bool isRaw) = 0;
  85. virtual unsigned queryScans() = 0;
  86. virtual unsigned querySeeks() = 0;
  87. virtual size32_t keyedSize() = 0;
  88. virtual bool hasPayload() = 0;
  89. virtual const char *queryFileName() = 0;
  90. virtual offset_t queryBlobHead() = 0;
  91. virtual void resetCounts() = 0;
  92. virtual offset_t queryLatestGetNodeOffset() const = 0;
  93. virtual offset_t queryMetadataHead() = 0;
  94. virtual IPropertyTree * getMetadata() = 0;
  95. virtual unsigned getNodeSize() = 0;
  96. virtual const IFileIO *queryFileIO() const = 0;
  97. virtual bool hasSpecialFileposition() const = 0;
  98. virtual bool needsRowBuffer() const = 0;
  99. virtual bool prewarmPage(offset_t offset, NodeType type) = 0;
  100. virtual void mergeStats(CRuntimeStatisticCollection & stats) const = 0;
  101. };
  102. interface IKeyArray : extends IInterface
  103. {
  104. virtual bool IsShared() const = 0;
  105. virtual IKeyIndexBase *queryKeyPart(unsigned partNo) = 0;
  106. virtual unsigned length() = 0;
  107. virtual void addKey(IKeyIndexBase *f) = 0;
  108. };
  109. interface jhtree_decl IKeyIndexSet : public IKeyIndexBase
  110. {
  111. virtual void addIndex(IKeyIndex *newPart) = 0;
  112. virtual void setRecordCount(offset_t count) = 0;
  113. virtual void setTotalSize(offset_t size) = 0;
  114. virtual offset_t getRecordCount() = 0;
  115. virtual offset_t getTotalSize() = 0;
  116. };
  117. interface ICacheInfoRecorder
  118. {
  119. virtual void noteWarm(unsigned fileIdx, offset_t page, size32_t len, NodeType type) = 0;
  120. };
  121. extern jhtree_decl void clearKeyStoreCache(bool killAll);
  122. extern jhtree_decl void clearKeyStoreCacheEntry(const char *name);
  123. extern jhtree_decl void clearKeyStoreCacheEntry(const IFileIO *io);
  124. extern jhtree_decl unsigned setKeyIndexCacheSize(unsigned limit);
  125. extern jhtree_decl void clearNodeCache();
  126. // these methods return previous values
  127. extern jhtree_decl size32_t setNodeCacheMem(size32_t cacheSize);
  128. extern jhtree_decl size32_t setLeafCacheMem(size32_t cacheSize);
  129. extern jhtree_decl size32_t setBlobCacheMem(size32_t cacheSize);
  130. extern jhtree_decl void setLegacyNodeCache(bool _value);
  131. extern jhtree_decl void getNodeCacheInfo(ICacheInfoRecorder &cacheInfo);
  132. extern jhtree_decl IKeyIndex *createKeyIndex(const char *filename, unsigned crc, bool isTLK);
  133. extern jhtree_decl IKeyIndex *createKeyIndex(const char *filename, unsigned crc, IFileIO &ifile, unsigned fileIdx, bool isTLK);
  134. extern jhtree_decl IKeyIndex *createKeyIndex(const char *filename, unsigned crc, IDelayedFile &ifile, unsigned fileIdx, bool isTLK);
  135. extern jhtree_decl bool isIndexFile(const char *fileName);
  136. extern jhtree_decl bool isIndexFile(IFile *file);
  137. extern jhtree_decl void validateKeyFile(const char *keyfile, offset_t nodepos = 0);
  138. extern jhtree_decl IKeyIndexSet *createKeyIndexSet();
  139. extern jhtree_decl IKeyArray *createKeyArray();
  140. extern jhtree_decl StringBuffer &getIndexMetrics(StringBuffer &);
  141. extern jhtree_decl void resetIndexMetrics();
  142. extern jhtree_decl RelaxedAtomic<unsigned> nodesLoaded;
  143. extern jhtree_decl RelaxedAtomic<unsigned> cacheHits;
  144. extern jhtree_decl RelaxedAtomic<unsigned> cacheAdds;
  145. extern jhtree_decl RelaxedAtomic<unsigned> blobCacheHits;
  146. extern jhtree_decl RelaxedAtomic<unsigned> blobCacheAdds;
  147. extern jhtree_decl RelaxedAtomic<unsigned> blobCacheDups;
  148. extern jhtree_decl RelaxedAtomic<unsigned> leafCacheHits;
  149. extern jhtree_decl RelaxedAtomic<unsigned> leafCacheAdds;
  150. extern jhtree_decl RelaxedAtomic<unsigned> leafCacheDups;
  151. extern jhtree_decl RelaxedAtomic<unsigned> nodeCacheHits;
  152. extern jhtree_decl RelaxedAtomic<unsigned> nodeCacheAdds;
  153. extern jhtree_decl RelaxedAtomic<unsigned> nodeCacheDups;
  154. extern jhtree_decl bool linuxYield;
  155. extern jhtree_decl bool traceSmartStepping;
  156. extern jhtree_decl bool flushJHtreeCacheOnOOM;
  157. extern jhtree_decl bool useMemoryMappedIndexes;
  158. extern jhtree_decl void clearNodeStats();
  159. #define CHEAP_UCHAR_DEF
  160. #ifdef _WIN32
  161. typedef char16_t UChar;
  162. #else //_WIN32
  163. typedef unsigned short UChar;
  164. #endif //_WIN32
  165. #include "rtlkey.hpp"
  166. #include "rtlnewkey.hpp"
  167. #include "jmisc.hpp"
  168. class RtlRecord;
  169. interface IDynamicTransform;
  170. class jhtree_decl SegMonitorList : public CInterfaceOf<IIndexFilterList>
  171. {
  172. unsigned cachedLRS = 0;
  173. bool modified = true;
  174. const RtlRecord &recInfo;
  175. unsigned keySegCount;
  176. IArrayOf<IKeySegmentMonitor> segMonitors;
  177. size32_t getSize() const;
  178. unsigned _lastRealSeg() const;
  179. SegMonitorList(const SegMonitorList &_from, const char *fixedVals, unsigned sortFieldOffset);
  180. public:
  181. SegMonitorList(const RtlRecord &_recInfo);
  182. // interface IIndexReadContext
  183. virtual void append(IKeySegmentMonitor *segment) override;
  184. virtual IIndexFilter *item(unsigned i) const override;
  185. virtual void append(FFoption option, const IFieldFilter * filter) override;
  186. // interface IIndexFilterList
  187. virtual void setLow(unsigned segno, void *keyBuffer) const override;
  188. virtual unsigned setLowAfter(size32_t offset, void *keyBuffer) const override;
  189. virtual bool incrementKey(unsigned segno, void *keyBuffer) const override;
  190. virtual void endRange(unsigned segno, void *keyBuffer) const override;
  191. virtual unsigned lastRealSeg() const override { assertex(!modified); return cachedLRS; }
  192. unsigned lastFullSeg() const override;
  193. virtual unsigned numFilterFields() const override { return segMonitors.length(); }
  194. virtual IIndexFilterList *fixSortSegs(const char *fixedVals, unsigned sortFieldOffset) const override
  195. {
  196. return new SegMonitorList(*this, fixedVals, sortFieldOffset);
  197. }
  198. virtual void reset() override;
  199. virtual void checkSize(size32_t keyedSize, char const * keyname) const override;
  200. virtual void recalculateCache() override;
  201. virtual void finish(size32_t keyedSize) override;
  202. virtual void describe(StringBuffer &out) const override;
  203. virtual bool matchesBuffer(const void *buffer, unsigned lastSeg, unsigned &matchSeg) const override;
  204. virtual unsigned getFieldOffset(unsigned idx) const override { return recInfo.getFixedOffset(idx); }
  205. virtual bool canMatch() const override;
  206. };
  207. interface IIndexLookup : extends IInterface // similar to a small subset of IKeyManager
  208. {
  209. virtual void ensureAvailable() = 0;
  210. virtual const void *nextKey() = 0;
  211. virtual unsigned __int64 getCount() = 0;
  212. virtual unsigned __int64 checkCount(unsigned __int64 limit) = 0;
  213. virtual unsigned querySeeks() const = 0;
  214. virtual unsigned queryScans() const = 0;
  215. virtual unsigned querySkips() const = 0;
  216. virtual unsigned queryWildSeeks() const = 0;
  217. };
  218. interface IKeyManager : public IInterface, extends IIndexReadContext
  219. {
  220. virtual void reset(bool crappyHack = false) = 0;
  221. virtual void releaseSegmentMonitors() = 0;
  222. virtual const byte *queryKeyBuffer() = 0; //if using RLT: fpos is the translated value, so correct in a normal row
  223. virtual unsigned __int64 querySequence() = 0;
  224. virtual size32_t queryRowSize() = 0; // Size of current row as returned by queryKeyBuffer()
  225. virtual bool lookup(bool exact) = 0;
  226. virtual unsigned __int64 getCount() = 0;
  227. virtual unsigned __int64 getCurrentRangeCount(unsigned groupSegCount) = 0;
  228. virtual bool nextRange(unsigned groupSegCount) = 0;
  229. virtual void setKey(IKeyIndexBase * _key) = 0;
  230. virtual void setChooseNLimit(unsigned __int64 _rowLimit) = 0; // for choosen type functionality
  231. virtual unsigned __int64 checkCount(unsigned __int64 limit) = 0;
  232. virtual void serializeCursorPos(MemoryBuffer &mb) = 0;
  233. virtual void deserializeCursorPos(MemoryBuffer &mb) = 0;
  234. virtual unsigned querySeeks() const = 0;
  235. virtual unsigned queryScans() const = 0;
  236. virtual unsigned querySkips() const = 0;
  237. virtual unsigned queryWildSeeks() const = 0;
  238. virtual const byte *loadBlob(unsigned __int64 blobid, size32_t &blobsize) = 0;
  239. virtual void releaseBlobs() = 0;
  240. virtual void resetCounts() = 0;
  241. virtual void setLayoutTranslator(const IDynamicTransform * trans) = 0;
  242. virtual void finishSegmentMonitors() = 0;
  243. virtual void describeFilter(StringBuffer &out) const = 0;
  244. virtual bool lookupSkip(const void *seek, size32_t seekGEOffset, size32_t seeklen) = 0;
  245. virtual unsigned getPartition() = 0; // Use PARTITION() to retrieve partno, if possible, or zero to mean read all
  246. virtual unsigned numActiveKeys() const = 0;
  247. virtual void mergeStats(CRuntimeStatisticCollection & stats) const = 0;
  248. };
  249. inline offset_t extractFpos(IKeyManager * manager)
  250. {
  251. byte const * keyRow = manager->queryKeyBuffer();
  252. size32_t rowSize = manager->queryRowSize();
  253. size32_t offset = rowSize - sizeof(offset_t);
  254. return rtlReadBigUInt8(keyRow + offset);
  255. }
  256. class RtlRecord;
  257. extern jhtree_decl IKeyManager *createLocalKeyManager(const RtlRecord &_recInfo, IKeyIndex * _key, IContextLogger *ctx, bool _newFilters, bool _logExcessiveSeeks);
  258. extern jhtree_decl IKeyManager *createKeyMerger(const RtlRecord &_recInfo, IKeyIndexSet * _key, unsigned sortFieldOffset, IContextLogger *ctx, bool _newFilters, bool _logExcessiveSeeks);
  259. extern jhtree_decl IKeyManager *createSingleKeyMerger(const RtlRecord &_recInfo, IKeyIndex * _onekey, unsigned sortFieldOffset, IContextLogger *ctx, bool _newFilters, bool _logExcessiveSeeks);
  260. class KLBlobProviderAdapter : implements IBlobProvider
  261. {
  262. IKeyManager *klManager;
  263. public:
  264. KLBlobProviderAdapter(IKeyManager *_klManager) : klManager(_klManager) {};
  265. ~KLBlobProviderAdapter()
  266. {
  267. if (klManager)
  268. klManager->releaseBlobs();
  269. }
  270. virtual const byte * lookupBlob(unsigned __int64 id) { size32_t dummy; return klManager->loadBlob(id, dummy); }
  271. };
  272. extern jhtree_decl bool isCompressedIndex(const char *filename);
  273. extern jhtree_decl bool isIndexFile(IFileIO *fileIO);
  274. extern jhtree_decl bool isIndexFile(IFile *filename);
  275. extern jhtree_decl IIndexLookup *createIndexLookup(IKeyManager *keyManager);
  276. #define JHTREE_KEY_NOT_SORTED JHTREE_ERROR_START
  277. #endif