fvsource.ipp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef FVSOURCE_IPP
  14. #define FVSOURCE_IPP
  15. #include "fvdatasource.hpp"
  16. #include "dllserver.hpp"
  17. #include "hqlexpr.hpp"
  18. #include "eclhelper.hpp"
  19. //Following constants configure different sizes etc.
  20. #define DISK_BLOCK_SIZE 0x10000 // Size of chunks read directly from file.
  21. #define PAGED_WU_LIMIT 0x20000 // Page load work unit results >= this size.
  22. #define WU_BLOCK_SIZE 0x4000 // Size of chunks read from Work unit
  23. #define DISKREAD_PAGE_SIZE 200 // Number of rows to read in each chunk from file.
  24. interface IRecordSizeEx : public IRecordSize
  25. {
  26. using IRecordSize::getRecordSize;
  27. virtual size32_t getRecordSize(unsigned maxLength, const void *rec) = 0;
  28. };
  29. class RecordSizeToEx : public CInterface, implements IRecordSizeEx
  30. {
  31. public:
  32. RecordSizeToEx(IRecordSize * _recordSize) : recordSize(_recordSize) {}
  33. IMPLEMENT_IINTERFACE
  34. virtual size32_t getRecordSize(const void *rec)
  35. {
  36. return recordSize->getRecordSize(rec);
  37. }
  38. virtual size32_t getRecordSize(unsigned maxLength, const void *rec)
  39. {
  40. return recordSize->getRecordSize(rec);
  41. }
  42. virtual size32_t getFixedSize() const
  43. {
  44. return recordSize->getFixedSize();
  45. }
  46. virtual size32_t getMinRecordSize() const
  47. {
  48. return recordSize->getMinRecordSize();
  49. }
  50. private:
  51. Linked<IRecordSize> recordSize;
  52. };
  53. //NB: In the following the following convention is used:
  54. // storedX - size/structure in WU/on disk
  55. // returnedX - size/structure of the data actually sent to the program
  56. // transformedX - size/structure of data after applying transformation.
  57. // for workunit storedX == returnedX for disk returnedX==transformedX
  58. class DataSourceMetaData;
  59. class DataSourceMetaItem : public CInterface
  60. {
  61. public:
  62. DataSourceMetaItem(unsigned _flags, const char * _name, const char * _xpath, ITypeInfo * _type);
  63. DataSourceMetaItem(unsigned flags, MemoryBuffer & in);
  64. virtual void serialize(MemoryBuffer & out) const;
  65. virtual DataSourceMetaData * queryChildMeta() { return NULL; }
  66. bool isXmlAttribute() const { return (tagname.length() && *tagname.get()=='@'); }
  67. public:
  68. StringAttr name;
  69. StringAttr xpath;
  70. StringAttr tagname;
  71. IntArray nestedAttributes;
  72. OwnedITypeInfo type;
  73. byte flags;
  74. bool hasMixedContent;
  75. };
  76. class DataSourceMetaData : public CInterface, implements IFvDataSourceMetaData, public IRecordSizeEx
  77. {
  78. friend class DataSourceSetItem;
  79. friend class DataSourceDatasetItem;
  80. public:
  81. DataSourceMetaData(IHqlExpression * _record, byte _numFieldsToIgnore, bool _randomIsOk, bool _isGrouped, unsigned _keyedSize);
  82. DataSourceMetaData(); // for NULL implementation
  83. DataSourceMetaData(type_t type);
  84. DataSourceMetaData(MemoryBuffer & in);
  85. IMPLEMENT_IINTERFACE
  86. virtual unsigned numColumns() const;
  87. virtual ITypeInfo * queryType(unsigned column) const;
  88. virtual const char * queryName(unsigned column) const;
  89. virtual const char * queryXPath(unsigned column) const;
  90. virtual bool supportsRandomSeek() const;
  91. virtual void serialize(MemoryBuffer & out) const;
  92. virtual unsigned queryFieldFlags(unsigned column) const;
  93. virtual const char *queryXmlTag(unsigned column) const;
  94. virtual const char *queryXmlTag() const;
  95. virtual const IntArray &queryAttrList() const;
  96. virtual const IntArray &queryAttrList(unsigned column) const;
  97. virtual bool mixedContent(unsigned column) const;
  98. virtual bool mixedContent() const;
  99. virtual IFvDataSourceMetaData * queryChildMeta(unsigned column) const;
  100. virtual IFvDataSource * createChildDataSource(unsigned column, unsigned len, const void * data);
  101. virtual unsigned numKeyedColumns() const;
  102. void addFileposition();
  103. void addGrouping();
  104. void addVirtualField(const char * name, const char * xpath, ITypeInfo * type);
  105. void extractKeyedInfo(UnsignedArray & offsets, TypeInfoArray & types);
  106. unsigned fixedSize() { return minRecordSize; }
  107. bool isFixedSize() { return isStoredFixedWidth; }
  108. bool isSingleSet() { return ((fields.ordinality() == 1) && (fields.item(0).type->getTypeCode() == type_set)); }
  109. inline unsigned getMaxRecordSize() { return maxRecordSize; }
  110. inline bool isKey() { return keyedSize != 0; }
  111. //IRecordSizeEx....
  112. virtual size32_t getRecordSize(const void *rec);
  113. virtual size32_t getFixedSize() const;
  114. virtual size32_t getRecordSize(unsigned maxLength, const void *rec)
  115. {
  116. return getRecordSize(rec);
  117. }
  118. virtual size32_t getMinRecordSize() const;
  119. protected:
  120. void addSimpleField(const char * name, const char * xpath, ITypeInfo * type, unsigned flag=FVFFnone);
  121. void gatherFields(IHqlExpression * expr, bool isConditional, bool *pMixedContent);
  122. void gatherChildFields(IHqlExpression * expr, bool isConditional, bool *pMixedContent);
  123. void gatherAttributes();
  124. void gatherNestedAttributes(DataSourceMetaItem &rec, aindex_t &idx);
  125. void init();
  126. protected:
  127. CIArrayOf<DataSourceMetaItem> fields;
  128. IntArray attributes;
  129. unsigned keyedSize;
  130. unsigned minRecordSize;
  131. unsigned maxRecordSize;
  132. unsigned bitsRemaining;
  133. unsigned numVirtualFields;
  134. bool isStoredFixedWidth;
  135. bool randomIsOk;
  136. bool hasMixedContent;
  137. byte numFieldsToIgnore;
  138. StringAttr tagname;
  139. };
  140. class DataSourceDatasetItem : public DataSourceMetaItem
  141. {
  142. public:
  143. DataSourceDatasetItem(const char * _name, const char * _xpath, IHqlExpression * expr);
  144. DataSourceDatasetItem(unsigned flags, MemoryBuffer & in);
  145. virtual DataSourceMetaData * queryChildMeta() { return &record; }
  146. virtual void serialize(MemoryBuffer & out) const;
  147. protected:
  148. DataSourceMetaData record;
  149. };
  150. class DataSourceSetItem : public DataSourceMetaItem
  151. {
  152. public:
  153. DataSourceSetItem(const char * _name, const char * _xpath, ITypeInfo * _type);
  154. DataSourceSetItem(unsigned flags, MemoryBuffer & in);
  155. virtual DataSourceMetaData * queryChildMeta() { return &record; }
  156. virtual void serialize(MemoryBuffer & out) const;
  157. protected:
  158. void createChild();
  159. protected:
  160. DataSourceMetaData record;
  161. };
  162. //---------------------------------------------------------------------------
  163. class RowBlock : public CInterface
  164. {
  165. public:
  166. RowBlock(MemoryBuffer & _buffer, __int64 _start, __int64 _startOffset);
  167. RowBlock(__int64 _start, __int64 _startOffset);
  168. virtual const void * fetchRow(__int64 offset, size32_t & len) = 0;
  169. virtual const void * getRow(__int64 search, size32_t & len, unsigned __int64 & rowOffset) = 0;
  170. __int64 getStartRow() const { return start; }
  171. __int64 getNextRow() const { return start + numRows; }
  172. virtual void getNextStoredOffset(__int64 & row, offset_t & offset);
  173. protected:
  174. MemoryBuffer buffer;
  175. __int64 start;
  176. __int64 startOffset;
  177. unsigned numRows;
  178. };
  179. class FixedRowBlock : public RowBlock
  180. {
  181. public:
  182. FixedRowBlock(MemoryBuffer & _buffer, __int64 _start, __int64 _startOffset, size32_t _fixedRecordSize);
  183. virtual const void * fetchRow(__int64 offset, size32_t & len);
  184. virtual const void * getRow(__int64 search, size32_t & len, unsigned __int64 & rowOffset);
  185. protected:
  186. size32_t fixedRecordSize;
  187. };
  188. class VariableRowBlock : public RowBlock
  189. {
  190. public:
  191. VariableRowBlock(MemoryBuffer & _buffer, __int64 _start, __int64 _startOffset, IRecordSizeEx * recordSize, bool isLast);
  192. VariableRowBlock(MemoryBuffer & inBuffer, __int64 _start); // used by remote
  193. virtual const void * fetchRow(__int64 offset, size32_t & len);
  194. virtual const void * getRow(__int64 search, size32_t & len, unsigned __int64 & rowOffset);
  195. protected:
  196. UnsignedArray rowIndex;
  197. };
  198. //---------------------------------------------------------------------------
  199. class FilePosFixedRowBlock : public FixedRowBlock
  200. {
  201. public:
  202. FilePosFixedRowBlock(MemoryBuffer & _buffer, __int64 _start, __int64 _startOffset, size32_t _fixedRecordSize) : FixedRowBlock(_buffer, _start, _startOffset, _fixedRecordSize) {}
  203. virtual void getNextStoredOffset(__int64 & row, offset_t & offset);
  204. };
  205. class FilePosVariableRowBlock : public VariableRowBlock
  206. {
  207. public:
  208. FilePosVariableRowBlock(MemoryBuffer & _buffer, __int64 _start, __int64 _startOffset, IRecordSizeEx * recordSize, bool isLast) : VariableRowBlock(_buffer, _start, _startOffset, recordSize, isLast) {}
  209. virtual void getNextStoredOffset(__int64 & row, offset_t & offset);
  210. };
  211. //---------------------------------------------------------------------------
  212. struct RowLocation
  213. {
  214. RowLocation() { matchRow = 0; matchLength = 0; bestRow = 0; bestOffset = 0; }
  215. const void * matchRow;
  216. size32_t matchLength;
  217. __int64 bestRow;
  218. offset_t bestOffset;
  219. };
  220. class RowCache
  221. {
  222. enum { MaxBlocksCached = 20, MinBlocksCached = 10 };
  223. public:
  224. void addRowsOwn(RowBlock * rows);
  225. bool getCacheRow(__int64 row, RowLocation & location);
  226. protected:
  227. void makeRoom();
  228. unsigned getBestRow(__int64 row);
  229. unsigned getInsertPosition(__int64 row);
  230. protected:
  231. CIArrayOf<RowBlock> allRows;
  232. Int64Array ages;
  233. };
  234. //---------------------------------------------------------------------------
  235. class FVDataSource : public ADataSource
  236. {
  237. public:
  238. FVDataSource();
  239. ~FVDataSource();
  240. virtual IFvDataSourceMetaData * queryMetaData();
  241. virtual bool fetchRow(MemoryBuffer & out, __int64 offset);
  242. virtual bool fetchRawRow(MemoryBuffer & out, __int64 offset);
  243. virtual bool getRow(MemoryBuffer & out, __int64 row);
  244. virtual bool getRawRow(MemoryBuffer & out, __int64 row);
  245. virtual void onClose() { openCount--; }
  246. virtual void onOpen() { openCount++; }
  247. protected:
  248. virtual bool fetchRowData(MemoryBuffer & out, __int64 offset) = 0;
  249. virtual bool getRowData(__int64 row, size32_t & length, const void * & data, unsigned __int64 & offset) = 0;
  250. protected:
  251. void addFileposition();
  252. void copyRow(MemoryBuffer & out, const void * src, size32_t length);
  253. void loadDll(const char * wuid);
  254. bool setReturnedInfoFromResult();
  255. protected:
  256. StringAttr wuid;
  257. Owned<IConstWUResult> wuResult;
  258. HqlExprAttr returnedRecord;
  259. Owned<DataSourceMetaData> returnedMeta;
  260. Owned<IRecordSizeEx> returnedRecordSize;
  261. Owned<DataSourceMetaData> transformedMeta;
  262. HqlExprAttr transformedRecord;
  263. Owned<ILoadedDllEntry> loadedDll;
  264. IArray pluginDlls;
  265. rowTransformFunction transformer;
  266. unsigned extraFieldsSize;
  267. unsigned openCount;
  268. bool appendFileposition;
  269. };
  270. class PagedDataSource : public FVDataSource
  271. {
  272. public:
  273. PagedDataSource() { totalRows = UNKNOWN_NUM_ROWS; }
  274. virtual __int64 numRows(bool force = false);
  275. virtual bool getRowData(__int64 row, size32_t & length, const void * & data, unsigned __int64 & offset);
  276. protected:
  277. virtual bool loadBlock(__int64 startRow, offset_t startOffset) = 0;
  278. virtual void improveLocation(__int64 row, RowLocation & location);
  279. protected:
  280. unsigned __int64 totalRows;
  281. RowCache cache;
  282. };
  283. class NullDataSource : public ADataSource
  284. {
  285. public:
  286. NullDataSource() {}
  287. NullDataSource(IHqlExpression * _record, bool _isGrouped, unsigned _keyedSize);
  288. virtual bool init() { return true; }
  289. virtual IFvDataSourceMetaData * queryMetaData() { return &meta; }
  290. virtual __int64 numRows(bool force = false) { return 0; }
  291. virtual bool fetchRow(MemoryBuffer & out, __int64 offset) { return false; }
  292. virtual bool fetchRawRow(MemoryBuffer & out, __int64 offset) { return false; }
  293. virtual bool getRow(MemoryBuffer & out, __int64 row){ return false; }
  294. virtual bool getRawRow(MemoryBuffer & out, __int64 row){ return false; }
  295. virtual bool isIndex() { return false; }
  296. virtual bool optimizeFilter(unsigned offset, unsigned len, const void * data) { return true; } // empty anyway...
  297. virtual void onClose() { }
  298. virtual void onOpen() { }
  299. protected:
  300. DataSourceMetaData meta;
  301. };
  302. class NestedDataSource : public FVDataSource
  303. {
  304. public:
  305. NestedDataSource(DataSourceMetaData & _meta, unsigned len, const void * data);
  306. //interface IFvDataSource
  307. virtual bool fetchRowData(MemoryBuffer & out, __int64 offset) { return false; }
  308. virtual bool getRowData(__int64 row, size32_t & length, const void * & data, unsigned __int64 & offset);
  309. virtual bool init();
  310. virtual bool isIndex() { return false; }
  311. virtual __int64 numRows(bool force = false);
  312. virtual bool optimizeFilter(unsigned offset, unsigned len, const void * data) { return false; }
  313. protected:
  314. unsigned __int64 totalSize;
  315. Owned<RowBlock> rows;
  316. };
  317. class FailureDataSource : public NullDataSource
  318. {
  319. public:
  320. FailureDataSource(IHqlExpression * _record, IException * _error, bool _isGrouped, unsigned _keyedSize);
  321. virtual void onOpen() { throw LINK(error); }
  322. protected:
  323. Linked<IException> error;
  324. };
  325. #define FullStringMatch ((unsigned)-1)
  326. extern IHqlExpression * parseQuery(const char * text);
  327. #endif