fvsource.ipp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /*##############################################################################
  2. Copyright (C) 2011 HPCC Systems.
  3. All rights reserved. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU Affero General Public License as
  5. published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Affero General Public License for more details.
  11. You should have received a copy of the GNU Affero General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ############################################################################## */
  14. #ifndef FVSOURCE_IPP
  15. #define FVSOURCE_IPP
  16. #include "fvdatasource.hpp"
  17. #include "dllserver.hpp"
  18. #include "hqlexpr.hpp"
  19. #include "eclhelper.hpp"
  20. //Following constants configure different sizes etc.
  21. #define DISK_BLOCK_SIZE 8096 // Size of chunks read directly from file.
  22. #define PAGED_WU_LIMIT 0x20000 // Page load work unit results >= this size.
  23. #define WU_BLOCK_SIZE 0x4000 // Size of chunks read from Work unit
  24. #define DISKREAD_PAGE_SIZE 200 // Number of rows to read in each chunk from file.
  25. interface IRecordSizeEx : public IRecordSize
  26. {
  27. IRecordSize::getRecordSize;
  28. virtual size32_t getRecordSize(unsigned maxLength, const void *rec) = 0;
  29. };
  30. class RecordSizeToEx : public CInterface, implements IRecordSizeEx
  31. {
  32. public:
  33. RecordSizeToEx(IRecordSize * _recordSize) : recordSize(_recordSize) {}
  34. IMPLEMENT_IINTERFACE
  35. virtual size32_t getRecordSize(const void *rec)
  36. {
  37. return recordSize->getRecordSize(rec);
  38. }
  39. virtual size32_t getRecordSize(unsigned maxLength, const void *rec)
  40. {
  41. return recordSize->getRecordSize(rec);
  42. }
  43. virtual size32_t getFixedSize() const
  44. {
  45. return recordSize->getFixedSize();
  46. }
  47. private:
  48. Linked<IRecordSize> recordSize;
  49. };
  50. //NB: In the following the following convention is used:
  51. // storedX - size/structure in WU/on disk
  52. // returnedX - size/structure of the data actually sent to the program
  53. // transformedX - size/structure of data after applying transformation.
  54. // for workunit storedX == returnedX for disk returnedX==transformedX
  55. class DataSourceMetaData;
  56. class DataSourceMetaItem : public CInterface
  57. {
  58. public:
  59. DataSourceMetaItem(unsigned _flags, const char * _name, const char * _xpath, ITypeInfo * _type);
  60. DataSourceMetaItem(unsigned flags, MemoryBuffer & in);
  61. virtual void serialize(MemoryBuffer & out) const;
  62. virtual DataSourceMetaData * queryChildMeta() { return NULL; }
  63. public:
  64. StringAttr name;
  65. StringAttr xpath;
  66. OwnedITypeInfo type;
  67. byte flags;
  68. };
  69. class DataSourceMetaData : public CInterface, implements IFvDataSourceMetaData, public IRecordSizeEx
  70. {
  71. friend class DataSourceSetItem;
  72. public:
  73. DataSourceMetaData(IHqlExpression * _record, byte _numFieldsToIgnore, bool _randomIsOk, bool _isGrouped, unsigned _keyedSize);
  74. DataSourceMetaData(); // for NULL implementation
  75. DataSourceMetaData(type_t type);
  76. DataSourceMetaData(MemoryBuffer & in);
  77. IMPLEMENT_IINTERFACE
  78. virtual unsigned numColumns() const;
  79. virtual ITypeInfo * queryType(unsigned column) const;
  80. virtual const char * queryName(unsigned column) const;
  81. virtual const char * queryXPath(unsigned column) const;
  82. virtual bool supportsRandomSeek() const;
  83. virtual void serialize(MemoryBuffer & out) const;
  84. virtual unsigned queryFieldFlags(unsigned column) const;
  85. virtual IFvDataSourceMetaData * queryChildMeta(unsigned column) const;
  86. virtual IFvDataSource * createChildDataSource(unsigned column, unsigned len, const void * data);
  87. virtual unsigned numKeyedColumns() const;
  88. void addFileposition();
  89. void addGrouping();
  90. void addVirtualField(const char * name, const char * xpath, ITypeInfo * type);
  91. void extractKeyedInfo(UnsignedArray & offsets, TypeInfoArray & types);
  92. unsigned fixedSize() { return storedFixedSize; }
  93. bool isFixedSize() { return isStoredFixedWidth; }
  94. bool isSingleSet() { return ((fields.ordinality() == 1) && (fields.item(0).type->getTypeCode() == type_set)); }
  95. inline unsigned getMaxRecordSize() { return maxRecordSize; }
  96. inline bool isKey() { return keyedSize != 0; }
  97. //IRecordSizeEx....
  98. virtual size32_t getRecordSize(const void *rec);
  99. virtual size32_t getFixedSize() const;
  100. virtual size32_t getRecordSize(unsigned maxLength, const void *rec)
  101. {
  102. return getRecordSize(rec);
  103. }
  104. protected:
  105. void addSimpleField(const char * name, const char * xpath, ITypeInfo * type);
  106. void gatherFields(IHqlExpression * expr, bool isConditional);
  107. void gatherChildFields(IHqlExpression * expr, bool isConditional);
  108. void init();
  109. protected:
  110. CIArrayOf<DataSourceMetaItem> fields;
  111. unsigned keyedSize;
  112. unsigned storedFixedSize;
  113. unsigned maxRecordSize;
  114. unsigned bitsRemaining;
  115. unsigned numVirtualFields;
  116. bool isStoredFixedWidth;
  117. bool randomIsOk;
  118. byte numFieldsToIgnore;
  119. };
  120. class DataSourceDatasetItem : public DataSourceMetaItem
  121. {
  122. public:
  123. DataSourceDatasetItem(const char * _name, const char * _xpath, IHqlExpression * expr);
  124. DataSourceDatasetItem(unsigned flags, MemoryBuffer & in);
  125. virtual DataSourceMetaData * queryChildMeta() { return &record; }
  126. virtual void serialize(MemoryBuffer & out) const;
  127. protected:
  128. DataSourceMetaData record;
  129. };
  130. class DataSourceSetItem : public DataSourceMetaItem
  131. {
  132. public:
  133. DataSourceSetItem(const char * _name, const char * _xpath, ITypeInfo * _type);
  134. DataSourceSetItem(unsigned flags, MemoryBuffer & in);
  135. virtual DataSourceMetaData * queryChildMeta() { return &record; }
  136. virtual void serialize(MemoryBuffer & out) const;
  137. protected:
  138. void createChild();
  139. protected:
  140. DataSourceMetaData record;
  141. };
  142. //---------------------------------------------------------------------------
  143. class RowBlock : public CInterface
  144. {
  145. public:
  146. RowBlock(MemoryBuffer & _buffer, __int64 _start, __int64 _startOffset);
  147. RowBlock(__int64 _start, __int64 _startOffset);
  148. virtual const void * fetchRow(__int64 offset, size32_t & len) = 0;
  149. virtual const void * getRow(__int64 search, size32_t & len, unsigned __int64 & rowOffset) = 0;
  150. __int64 getStartRow() const { return start; }
  151. __int64 getNextRow() const { return start + numRows; }
  152. virtual void getNextStoredOffset(__int64 & row, offset_t & offset);
  153. protected:
  154. MemoryBuffer buffer;
  155. __int64 start;
  156. __int64 startOffset;
  157. unsigned numRows;
  158. };
  159. class FixedRowBlock : public RowBlock
  160. {
  161. public:
  162. FixedRowBlock(MemoryBuffer & _buffer, __int64 _start, __int64 _startOffset, size32_t _fixedRecordSize);
  163. virtual const void * fetchRow(__int64 offset, size32_t & len);
  164. virtual const void * getRow(__int64 search, size32_t & len, unsigned __int64 & rowOffset);
  165. protected:
  166. size32_t fixedRecordSize;
  167. };
  168. class VariableRowBlock : public RowBlock
  169. {
  170. public:
  171. VariableRowBlock(MemoryBuffer & _buffer, __int64 _start, __int64 _startOffset, IRecordSizeEx * recordSize, bool isLast);
  172. VariableRowBlock(MemoryBuffer & inBuffer, __int64 _start); // used by remote
  173. virtual const void * fetchRow(__int64 offset, size32_t & len);
  174. virtual const void * getRow(__int64 search, size32_t & len, unsigned __int64 & rowOffset);
  175. protected:
  176. UnsignedArray rowIndex;
  177. };
  178. //---------------------------------------------------------------------------
  179. class FilePosFixedRowBlock : public FixedRowBlock
  180. {
  181. public:
  182. FilePosFixedRowBlock(MemoryBuffer & _buffer, __int64 _start, __int64 _startOffset, size32_t _fixedRecordSize) : FixedRowBlock(_buffer, _start, _startOffset, _fixedRecordSize) {}
  183. virtual void getNextStoredOffset(__int64 & row, offset_t & offset);
  184. };
  185. class FilePosVariableRowBlock : public VariableRowBlock
  186. {
  187. public:
  188. FilePosVariableRowBlock(MemoryBuffer & _buffer, __int64 _start, __int64 _startOffset, IRecordSizeEx * recordSize, bool isLast) : VariableRowBlock(_buffer, _start, _startOffset, recordSize, isLast) {}
  189. virtual void getNextStoredOffset(__int64 & row, offset_t & offset);
  190. };
  191. //---------------------------------------------------------------------------
  192. struct RowLocation
  193. {
  194. RowLocation() { matchRow = 0; matchLength = 0; bestRow = 0; bestOffset = 0; }
  195. const void * matchRow;
  196. size32_t matchLength;
  197. __int64 bestRow;
  198. offset_t bestOffset;
  199. };
  200. class RowCache
  201. {
  202. enum { MaxBlocksCached = 20, MinBlocksCached = 10 };
  203. public:
  204. void addRowsOwn(RowBlock * rows);
  205. bool getCacheRow(__int64 row, RowLocation & location);
  206. protected:
  207. void makeRoom();
  208. unsigned getBestRow(__int64 row);
  209. unsigned getInsertPosition(__int64 row);
  210. protected:
  211. CIArrayOf<RowBlock> allRows;
  212. Int64Array ages;
  213. };
  214. //---------------------------------------------------------------------------
  215. class FVDataSource : public ADataSource
  216. {
  217. public:
  218. FVDataSource();
  219. ~FVDataSource();
  220. virtual IFvDataSourceMetaData * queryMetaData();
  221. virtual bool fetchRow(MemoryBuffer & out, __int64 offset);
  222. virtual bool fetchRawRow(MemoryBuffer & out, __int64 offset);
  223. virtual bool getRow(MemoryBuffer & out, __int64 row);
  224. virtual bool getRawRow(MemoryBuffer & out, __int64 row);
  225. virtual void onClose() { openCount--; }
  226. virtual void onOpen() { openCount++; }
  227. protected:
  228. virtual bool fetchRowData(MemoryBuffer & out, __int64 offset) = 0;
  229. virtual bool getRowData(__int64 row, size32_t & length, const void * & data, unsigned __int64 & offset) = 0;
  230. protected:
  231. void addFileposition();
  232. void copyRow(MemoryBuffer & out, const void * src, size32_t length);
  233. void loadDll(const char * wuid);
  234. bool setReturnedInfoFromResult();
  235. protected:
  236. StringAttr wuid;
  237. Owned<IConstWUResult> wuResult;
  238. HqlExprAttr returnedRecord;
  239. Owned<DataSourceMetaData> returnedMeta;
  240. Owned<IRecordSizeEx> returnedRecordSize;
  241. Owned<DataSourceMetaData> transformedMeta;
  242. HqlExprAttr transformedRecord;
  243. Owned<ILoadedDllEntry> loadedDll;
  244. Array pluginDlls;
  245. rowTransformFunction transformer;
  246. unsigned extraFieldsSize;
  247. unsigned openCount;
  248. bool appendFileposition;
  249. };
  250. class PagedDataSource : public FVDataSource
  251. {
  252. public:
  253. PagedDataSource() { totalRows = UNKNOWN_NUM_ROWS; }
  254. virtual __int64 numRows(bool force = false);
  255. virtual bool getRowData(__int64 row, size32_t & length, const void * & data, unsigned __int64 & offset);
  256. protected:
  257. virtual bool loadBlock(__int64 startRow, offset_t startOffset) = 0;
  258. virtual void improveLocation(__int64 row, RowLocation & location);
  259. protected:
  260. unsigned __int64 totalRows;
  261. RowCache cache;
  262. };
  263. class NullDataSource : public ADataSource
  264. {
  265. public:
  266. NullDataSource() {}
  267. NullDataSource(IHqlExpression * _record, bool _isGrouped, unsigned _keyedSize);
  268. virtual bool init() { return true; }
  269. virtual IFvDataSourceMetaData * queryMetaData() { return &meta; }
  270. virtual __int64 numRows(bool force = false) { return 0; }
  271. virtual bool fetchRow(MemoryBuffer & out, __int64 offset) { return false; }
  272. virtual bool fetchRawRow(MemoryBuffer & out, __int64 offset) { return false; }
  273. virtual bool getRow(MemoryBuffer & out, __int64 row){ return false; }
  274. virtual bool getRawRow(MemoryBuffer & out, __int64 row){ return false; }
  275. virtual bool isIndex() { return false; }
  276. virtual bool optimizeFilter(unsigned offset, unsigned len, const void * data) { return true; } // empty anyway...
  277. virtual void onClose() { }
  278. virtual void onOpen() { }
  279. protected:
  280. DataSourceMetaData meta;
  281. };
  282. class NestedDataSource : public FVDataSource
  283. {
  284. public:
  285. NestedDataSource(DataSourceMetaData & _meta, unsigned len, const void * data);
  286. //interface IFvDataSource
  287. virtual bool fetchRowData(MemoryBuffer & out, __int64 offset) { return false; }
  288. virtual bool getRowData(__int64 row, size32_t & length, const void * & data, unsigned __int64 & offset);
  289. virtual bool init();
  290. virtual bool isIndex() { return false; }
  291. virtual __int64 numRows(bool force = false);
  292. virtual bool optimizeFilter(unsigned offset, unsigned len, const void * data) { return false; }
  293. protected:
  294. unsigned __int64 totalSize;
  295. Owned<RowBlock> rows;
  296. };
  297. class FailureDataSource : public NullDataSource
  298. {
  299. public:
  300. FailureDataSource(IHqlExpression * _record, IException * _error, bool _isGrouped, unsigned _keyedSize);
  301. virtual void onOpen() { throw LINK(error); }
  302. protected:
  303. Linked<IException> error;
  304. };
  305. #define FullStringMatch ((unsigned)-1)
  306. extern IHqlExpression * parseQuery(const char * text);
  307. #endif