ctfile.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef CTFILE_HPP
  14. #define CTFILE_HPP
  15. #include "jiface.hpp"
  16. #include "jhutil.hpp"
  17. #include "hlzw.h"
  18. #include "jcrc.hpp"
  19. #include "jio.hpp"
  20. #include "jfile.hpp"
  21. #define NODESIZE 8192
  22. #define TRAILING_HEADER_ONLY 0x01 // Leading header not updated - use trailing one
  23. #define HTREE_TOPLEVEL_KEY 0x02
  24. #define COL_PREFIX 0x04
  25. #define HTREE_QUICK_COMPRESSED 0x08 // See QUICK_COMPRESSED_KEY below
  26. #define HTREE_VARSIZE 0x10
  27. #define HTREE_FULLSORT_KEY 0x20
  28. #define USE_TRAILING_HEADER 0x80 // Real index header node located at end of file
  29. #define HTREE_COMPRESSED_KEY 0x40
  30. #define HTREE_QUICK_COMPRESSED_KEY 0x48
  31. #define KEYBUILD_VERSION 1 // unsigned short. NB: This should upped if a change would make existing keys incompatible with current build.
  32. #define KEYBUILD_MAXLENGTH 0x7FFF
  33. // structure to be read into - NO VIRTUALS.
  34. // This header layout corresponds to FairCom cTree layout for compatibility with old systems ...
  35. struct __declspec(novtable) jhtree_decl KeyHdr
  36. {
  37. __int64 phyrec; /* last byte offset of file 00x */
  38. __int64 delstk; /* top of delete stack: fixed len data 08x */
  39. __int64 numrec; /* last byte offset written 10x */
  40. __int64 reshdr; /* resource header 18x */
  41. __int64 lstmbr; /* last super file member/position 20x */
  42. __int64 sernum; /* serial number 28x */
  43. __int64 nument; /* active entries 30x */
  44. __int64 root; /* B-Tree root 38x */
  45. __int64 fileid; /* unique file id 40x */
  46. __int64 servid; /* unique server id 48x */
  47. short verson; /* configuration options at create 50x */
  48. unsigned short nodeSize; /* node record size 52x */
  49. unsigned short reclen; /* data record length 54x */
  50. unsigned short extsiz; /* extend file (chunk) size 56x */
  51. unsigned short flmode; /* file mode (virtual, etc) 58x */
  52. unsigned short logtyp; /* permanent components of file mode 5ax */
  53. unsigned short maxkbl; /* maximum key bytes leaf-var 5cx */
  54. unsigned short maxkbn; /* maximum key bytes non leaf-var 5ex */
  55. char updflg; /* update (corrupt) flag 60x */
  56. char ktype; /* file type flag 61x */
  57. char autodup;/* duplicate flag 62x */
  58. char deltyp; /* flag for type of idx delete 63x */
  59. unsigned char keypad; /* padding byte 64x */
  60. unsigned char flflvr; /* file flavor 65x */
  61. unsigned char flalgn; /* file alignment 66x */
  62. unsigned char flpntr; /* file pointer size 67x */
  63. unsigned short clstyp; /* flag for file type 68x */
  64. unsigned short length; /* key length 6ax */
  65. short nmem; /* number of members 6cx */
  66. short kmem; /* member number 6ex */
  67. __int64 lanchr; /* left most leaf anchor 70x */
  68. __int64 supid; /* super file member # 78x */
  69. __int64 hdrpos; /* header position 80x */
  70. __int64 sihdr; /* superfile header index hdr position 88x */
  71. __int64 timeid; /* time id# 90x */
  72. unsigned short suptyp; /* super file type 98x */
  73. unsigned short maxmrk; /* maximum exc mark entries per leaf 9ax */
  74. unsigned short namlen; /* MAX_NAME at creation 9cx */
  75. unsigned short xflmod; /* extended file mode info 9ex */
  76. __int64 defrel; /* file def release mask a0x */
  77. __int64 hghtrn; /* tran# high water mark for idx a8x */
  78. __int64 hdrseq; /* wrthdr sequence # b0x */
  79. __int64 tstamp; /* update time stamp b8x */
  80. __int64 rs3[3]; /* future use c0x */
  81. __int64 fposOffset; /* amount by which file positions are biased d8x */
  82. __int64 fileSize; /* fileSize - was once used in the bias calculation e0x */
  83. short nodeKeyLength; /* key length in intermediate level nodes e8x */
  84. unsigned short version; /* build version - to be updated if key format changes eax*/
  85. short unused[2]; /* unused ecx */
  86. __int64 blobHead; /* fpos of first blob node f0x */
  87. __int64 metadataHead; /* fpos of first metadata node f8x */
  88. __int64 bloomHead; /* fpos of bloom table data, if present 100x */
  89. __uint64 partitionFieldMask; /* Bitmap indicating partition keyed fields */
  90. };
  91. //#pragma pack(1)
  92. #pragma pack(push,1)
  93. struct jhtree_decl NodeHdr
  94. {
  95. __int64 rightSib;
  96. __int64 leftSib;
  97. unsigned short numKeys;
  98. unsigned short keyBytes;
  99. unsigned crc32;
  100. char unusedMemNumber;
  101. char leafFlag;
  102. bool isValid(unsigned nodeSize)
  103. {
  104. return
  105. (rightSib % nodeSize == 0) &&
  106. (leftSib % nodeSize == 0) &&
  107. (unusedMemNumber==0) &&
  108. (keyBytes < nodeSize);
  109. }
  110. };
  111. //#pragma pack(4)
  112. #pragma pack(pop)
  113. class CWritableKeyNode : public CInterface
  114. {
  115. public:
  116. virtual void write(IFileIOStream *, CRC32 *crc) = 0;
  117. };
  118. class jhtree_decl CKeyHdr : public CWritableKeyNode
  119. {
  120. private:
  121. KeyHdr hdr;
  122. public:
  123. CKeyHdr();
  124. void load(KeyHdr &_hdr);
  125. virtual void write(IFileIOStream *, CRC32 *crc) override;
  126. unsigned int getMaxKeyLength();
  127. bool isVariable();
  128. inline unsigned int getNodeKeyLength()
  129. {
  130. return hdr.nodeKeyLength != -1 ? hdr.nodeKeyLength : getMaxKeyLength();
  131. }
  132. inline bool hasPayload()
  133. {
  134. return (hdr.nodeKeyLength != -1);
  135. }
  136. inline unsigned char getKeyPad() { return hdr.keypad; }
  137. inline char getKeyType() { return hdr.ktype; }
  138. inline offset_t getRootFPos() { return hdr.root; }
  139. inline unsigned short getMaxNodeBytes() { return hdr.maxkbl; }
  140. inline KeyHdr *getHdrStruct() { return &hdr; }
  141. inline static size32_t getSize() { return sizeof(KeyHdr); }
  142. inline unsigned getNodeSize() { return hdr.nodeSize; }
  143. inline bool hasSpecialFileposition() const { return true; }
  144. inline bool isRowCompressed() const { return (hdr.ktype & HTREE_QUICK_COMPRESSED_KEY) == HTREE_QUICK_COMPRESSED_KEY; }
  145. __uint64 getPartitionFieldMask()
  146. {
  147. if (hdr.partitionFieldMask == (__uint64) -1)
  148. return 0;
  149. else
  150. return hdr.partitionFieldMask;
  151. }
  152. unsigned numPartitions()
  153. {
  154. if (hdr.ktype & HTREE_TOPLEVEL_KEY)
  155. return (unsigned) hdr.nument-1;
  156. else
  157. return 0;
  158. }
  159. };
  160. class jhtree_decl CNodeBase : public CWritableKeyNode
  161. {
  162. protected:
  163. NodeHdr hdr;
  164. byte keyType;
  165. size32_t keyLen;
  166. size32_t keyCompareLen;
  167. offset_t fpos;
  168. CKeyHdr *keyHdr;
  169. bool isVariable;
  170. public:
  171. virtual void write(IFileIOStream *, CRC32 *crc) { throwUnexpected(); }
  172. inline offset_t getFpos() const { return fpos; }
  173. inline size32_t getKeyLen() const { return keyLen; }
  174. inline size32_t getNumKeys() const { return hdr.numKeys; }
  175. inline bool isBlob() const { return hdr.leafFlag == 2; }
  176. inline bool isMetadata() const { return hdr.leafFlag == 3; }
  177. inline bool isBloom() const { return hdr.leafFlag == 4; }
  178. inline bool isLeaf() const { return hdr.leafFlag != 0; }
  179. public:
  180. CNodeBase();
  181. void load(CKeyHdr *keyHdr, offset_t fpos);
  182. ~CNodeBase();
  183. };
  184. class jhtree_decl CJHTreeNode : public CNodeBase
  185. {
  186. protected:
  187. size32_t keyRecLen;
  188. char *keyBuf;
  189. void unpack(const void *node, bool needCopy);
  190. unsigned __int64 firstSequence;
  191. size32_t expandedSize;
  192. static char *expandKeys(void *src,unsigned keylength,size32_t &retsize);
  193. static void releaseMem(void *togo, size32_t size);
  194. static void *allocMem(size32_t size);
  195. public:
  196. CJHTreeNode();
  197. virtual void load(CKeyHdr *keyHdr, const void *rawData, offset_t pos, bool needCopy);
  198. ~CJHTreeNode();
  199. size32_t getMemSize() { return expandedSize; }
  200. // reading methods
  201. offset_t prevNodeFpos() const;
  202. offset_t nextNodeFpos() const ;
  203. virtual bool getValueAt(unsigned int num, char *key) const;
  204. virtual const char *queryValueAt(unsigned int index, char *scratchBuffer) const;
  205. virtual const char *queryKeyAt(unsigned int index, char *scratchBuffer) const;
  206. virtual size32_t getSizeAt(unsigned int num) const;
  207. virtual offset_t getFPosAt(unsigned int num) const;
  208. virtual int compareValueAt(const char *src, unsigned int index) const;
  209. bool contains(const char *src) const;
  210. inline offset_t getRightSib() const { return hdr.rightSib; }
  211. inline offset_t getLeftSib() const { return hdr.leftSib; }
  212. unsigned __int64 getSequence(unsigned int num) const;
  213. };
  214. class CJHVarTreeNode : public CJHTreeNode
  215. {
  216. const char **recArray;
  217. public:
  218. CJHVarTreeNode();
  219. ~CJHVarTreeNode();
  220. virtual void load(CKeyHdr *keyHdr, const void *rawData, offset_t pos, bool needCopy);
  221. virtual bool getValueAt(unsigned int num, char *key) const;
  222. virtual const char *queryValueAt(unsigned int index, char *scratchBuffer) const;
  223. virtual const char *queryKeyAt(unsigned int index, char *scratchBuffer) const;
  224. virtual size32_t getSizeAt(unsigned int num) const;
  225. virtual offset_t getFPosAt(unsigned int num) const;
  226. virtual int compareValueAt(const char *src, unsigned int index) const;
  227. };
  228. class CJHRowCompressedNode : public CJHTreeNode
  229. {
  230. Owned<IRandRowExpander> rowexp; // expander for rand rowdiff
  231. static IRandRowExpander *expandQuickKeys(void *src, bool needCopy);
  232. public:
  233. virtual void load(CKeyHdr *keyHdr, const void *rawData, offset_t pos, bool needCopy);
  234. virtual bool getValueAt(unsigned int num, char *key) const;
  235. virtual const char *queryValueAt(unsigned int index, char *scratchBuffer) const;
  236. virtual const char *queryKeyAt(unsigned int index, char *scratchBuffer) const;
  237. virtual offset_t getFPosAt(unsigned int num) const;
  238. virtual int compareValueAt(const char *src, unsigned int index) const;
  239. };
  240. class CJHTreeBlobNode : public CJHTreeNode
  241. {
  242. public:
  243. CJHTreeBlobNode ();
  244. ~CJHTreeBlobNode ();
  245. virtual bool getValueAt(unsigned int num, char *key) const {throwUnexpected();}
  246. virtual const char *queryValueAt(unsigned int index, char *scratchBuffer) const {throwUnexpected();}
  247. virtual const char *queryKeyAt(unsigned int index, char *scratchBuffer) const {throwUnexpected();}
  248. virtual offset_t getFPosAt(unsigned int num) const {throwUnexpected();}
  249. virtual size32_t getSizeAt(unsigned int num) const {throwUnexpected();}
  250. virtual int compareValueAt(const char *src, unsigned int index) const {throwUnexpected();}
  251. virtual void dump() {throwUnexpected();}
  252. size32_t getTotalBlobSize(unsigned offset);
  253. size32_t getBlobData(unsigned offset, void *dst);
  254. };
  255. class CJHTreeMetadataNode : public CJHTreeNode
  256. {
  257. public:
  258. virtual bool getValueAt(unsigned int num, char *key) const {throwUnexpected();}
  259. virtual const char *queryValueAt(unsigned int index, char *scratchBuffer) const {throwUnexpected();}
  260. virtual const char *queryKeyAt(unsigned int index, char *scratchBuffer) const {throwUnexpected();}
  261. virtual offset_t getFPosAt(unsigned int num) const {throwUnexpected();}
  262. virtual size32_t getSizeAt(unsigned int num) const {throwUnexpected();}
  263. virtual int compareValueAt(const char *src, unsigned int index) const {throwUnexpected();}
  264. virtual void dump() {throwUnexpected();}
  265. void get(StringBuffer & out);
  266. };
  267. class CJHTreeBloomTableNode : public CJHTreeNode
  268. {
  269. public:
  270. virtual bool getValueAt(unsigned int num, char *key) const {throwUnexpected();}
  271. virtual const char *queryValueAt(unsigned int index, char *scratchBuffer) const {throwUnexpected();}
  272. virtual const char *queryKeyAt(unsigned int index, char *scratchBuffer) const {throwUnexpected();}
  273. virtual offset_t getFPosAt(unsigned int num) const {throwUnexpected();}
  274. virtual size32_t getSizeAt(unsigned int num) const {throwUnexpected();}
  275. virtual int compareValueAt(const char *src, unsigned int index) const {throwUnexpected();}
  276. virtual void dump() {throwUnexpected();}
  277. void get(MemoryBuffer & out);
  278. __int64 get8();
  279. unsigned get4();
  280. private:
  281. unsigned read = 0;
  282. };
  283. class jhtree_decl CNodeHeader : public CNodeBase
  284. {
  285. public:
  286. CNodeHeader();
  287. void load(NodeHdr &hdr);
  288. inline offset_t getRightSib() const { return hdr.rightSib; }
  289. inline offset_t getLeftSib() const { return hdr.leftSib; }
  290. };
  291. class jhtree_decl CWriteNodeBase : public CNodeBase
  292. {
  293. protected:
  294. char *nodeBuf;
  295. char *keyPtr;
  296. int maxBytes;
  297. KeyCompressor lzwcomp;
  298. void writeHdr();
  299. public:
  300. CWriteNodeBase(offset_t fpos, CKeyHdr *keyHdr);
  301. ~CWriteNodeBase();
  302. virtual void write(IFileIOStream *, CRC32 *crc) override;
  303. void setLeftSib(offset_t leftSib) { hdr.leftSib = leftSib; }
  304. void setRightSib(offset_t rightSib) { hdr.rightSib = rightSib; }
  305. };
  306. class jhtree_decl CWriteNode : public CWriteNodeBase
  307. {
  308. private:
  309. char *lastKeyValue;
  310. unsigned __int64 lastSequence;
  311. public:
  312. CWriteNode(offset_t fpos, CKeyHdr *keyHdr, bool isLeaf);
  313. ~CWriteNode();
  314. size32_t compressValue(const char *keyData, size32_t size, char *result);
  315. bool add(offset_t pos, const void *data, size32_t size, unsigned __int64 sequence);
  316. const void *getLastKeyValue() const { return lastKeyValue; }
  317. unsigned __int64 getLastSequence() const { return lastSequence; }
  318. };
  319. class jhtree_decl CBlobWriteNode : public CWriteNodeBase
  320. {
  321. static unsigned __int64 makeBlobId(offset_t nodepos, unsigned offset);
  322. public:
  323. CBlobWriteNode(offset_t _fpos, CKeyHdr *keyHdr);
  324. ~CBlobWriteNode();
  325. unsigned __int64 add(const char * &data, size32_t &size);
  326. };
  327. class jhtree_decl CMetadataWriteNode : public CWriteNodeBase
  328. {
  329. public:
  330. CMetadataWriteNode(offset_t _fpos, CKeyHdr *keyHdr);
  331. size32_t set(const char * &data, size32_t &size);
  332. };
  333. class jhtree_decl CBloomFilterWriteNode : public CWriteNodeBase
  334. {
  335. public:
  336. CBloomFilterWriteNode(offset_t _fpos, CKeyHdr *keyHdr);
  337. size32_t set(const byte * &data, size32_t &size);
  338. void put4(unsigned val);
  339. void put8(__int64 val);
  340. };
  341. enum KeyExceptionCodes
  342. {
  343. KeyExcpt_IncompatVersion = 1,
  344. };
  345. interface jhtree_decl IKeyException : extends IException { };
  346. IKeyException *MakeKeyException(int code, const char *format, ...) __attribute__((format(printf, 2, 3)));
  347. #endif