thorcommon.hpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef THORCOMMON_HPP
  14. #define THORCOMMON_HPP
  15. #include "jiface.hpp"
  16. #include "jcrc.hpp"
  17. #include "jsort.hpp"
  18. #include "jdebug.hpp"
  19. #include "eclhelper.hpp"
  20. #include "thorhelper.hpp"
  21. #include "thorxmlwrite.hpp"
  22. static unsigned const defaultDaliResultOutputMax = 2000; // MB
  23. static unsigned const defaultDaliResultLimit = 10; // MB
  24. static unsigned const defaultMaxCsvRowSize = 10; // MB
  25. #define OPT_OUTPUTLIMIT_LEGACY "outputLimit" // OUTPUT Mb limit (legacy property name, renamed to outputLimitMb in 5.2)
  26. #define OPT_OUTPUTLIMIT "outputLimitMb" // OUTPUT Mb limit (default = 10 [MB])
  27. #define OPT_MAXCSVROWSIZE "maxCsvRowSizeMb" // Upper limit on csv read line size (default = 10 [MB])
  28. class THORHELPER_API CSizingSerializer : implements IRowSerializerTarget
  29. {
  30. size32_t totalsize;
  31. public:
  32. inline CSizingSerializer() { reset(); }
  33. inline void reset() { totalsize = 0; }
  34. inline size32_t size() { return totalsize; }
  35. virtual void put(size32_t len, const void * ptr);
  36. virtual size32_t beginNested(size32_t count);
  37. virtual void endNested(size32_t position);
  38. };
  39. class THORHELPER_API CMemoryRowSerializer: implements IRowSerializerTarget
  40. {
  41. MemoryBuffer & buffer;
  42. unsigned nesting;
  43. public:
  44. inline CMemoryRowSerializer(MemoryBuffer & _buffer)
  45. : buffer(_buffer)
  46. {
  47. nesting = 0;
  48. }
  49. virtual void put(size32_t len, const void * ptr);
  50. virtual size32_t beginNested(size32_t count);
  51. virtual void endNested(size32_t sizePos);
  52. };
  53. // useful package
  54. interface IRowInterfaces: extends IInterface
  55. {
  56. virtual IEngineRowAllocator * queryRowAllocator()=0;
  57. virtual IOutputRowSerializer * queryRowSerializer()=0;
  58. virtual IOutputRowDeserializer * queryRowDeserializer()=0;
  59. virtual IOutputMetaData *queryRowMetaData()=0;
  60. virtual unsigned queryActivityId() const=0;
  61. virtual ICodeContext *queryCodeContext()=0;
  62. };
  63. extern THORHELPER_API void useMemoryMappedRead(bool on);
  64. extern THORHELPER_API IRowInterfaces *createRowInterfaces(IOutputMetaData *meta, unsigned actid, ICodeContext *context);
  65. enum RowReaderWriterFlags
  66. {
  67. rw_grouped = 0x1,
  68. rw_crc = 0x2,
  69. rw_extend = 0x4,
  70. rw_compress = 0x8,
  71. rw_compressblkcrc = 0x10, // block compression, this sets/checks crc's at block level
  72. rw_fastlz = 0x20, // if rw_compress
  73. rw_autoflush = 0x40,
  74. rw_buffered = 0x80
  75. };
  76. #define DEFAULT_RWFLAGS (rw_buffered|rw_autoflush|rw_compressblkcrc)
  77. inline bool TestRwFlag(unsigned flags, RowReaderWriterFlags flag) { return 0 != (flags & flag); }
  78. interface IExtRowStream: extends IRowStream
  79. {
  80. virtual offset_t getOffset() = 0;
  81. virtual void stop(CRC32 *crcout=NULL) = 0;
  82. virtual const void *prefetchRow(size32_t *sz=NULL) = 0;
  83. virtual void prefetchDone() = 0;
  84. virtual void reinit(offset_t offset,offset_t len,unsigned __int64 maxrows) = 0;
  85. };
  86. interface IExtRowWriter: extends IRowWriter
  87. {
  88. virtual offset_t getPosition() = 0;
  89. virtual void flush(CRC32 *crcout=NULL) = 0;
  90. };
  91. interface IExpander;
  92. extern THORHELPER_API IExtRowStream *createRowStream(IFile *file, IRowInterfaces *rowif, unsigned flags=DEFAULT_RWFLAGS, IExpander *eexp=NULL);
  93. extern THORHELPER_API IExtRowStream *createRowStreamEx(IFile *file, IRowInterfaces *rowif, offset_t offset=0, offset_t len=(offset_t)-1, unsigned __int64 maxrows=(unsigned __int64)-1, unsigned flags=DEFAULT_RWFLAGS, IExpander *eexp=NULL);
  94. interface ICompressor;
  95. extern THORHELPER_API IExtRowWriter *createRowWriter(IFile *file, IRowInterfaces *rowIf, unsigned flags=DEFAULT_RWFLAGS, ICompressor *compressor=NULL);
  96. extern THORHELPER_API IExtRowWriter *createRowWriter(IFileIO *fileIO, IRowInterfaces *rowIf, unsigned flags=DEFAULT_RWFLAGS);
  97. extern THORHELPER_API IExtRowWriter *createRowWriter(IFileIOStream *strm, IRowInterfaces *rowIf, unsigned flags=DEFAULT_RWFLAGS); // strm should be unbuffered
  98. interface THORHELPER_API IDiskMerger : extends IInterface
  99. {
  100. virtual void put(const void **rows, unsigned numrows) = 0;
  101. virtual void putIndirect(const void ***rowptrs, unsigned numrows) = 0; // like put only with an additional dereference, i.e. row i is *(rowptrs[i])
  102. virtual void put(ISortedRowProvider * rows) = 0;
  103. virtual IRowStream *merge(ICompare *icompare,bool partdedup=false) = 0;
  104. virtual count_t mergeTo(IRowWriter *dest,ICompare *icompare,bool partdedup=false) = 0; // alternative to merge
  105. virtual IRowWriter *createWriteBlock() = 0;
  106. };
  107. extern THORHELPER_API IDiskMerger *createDiskMerger(IRowInterfaces *rowInterfaces, IRowLinkCounter *linker, const char *tempnamebase);
  108. extern THORHELPER_API void testDiskSort();
  109. #define TIME_ACTIVITIES
  110. class ActivityTimeAccumulator
  111. {
  112. friend class ActivityTimer;
  113. public:
  114. ActivityTimeAccumulator()
  115. {
  116. startCycles = 0;
  117. totalCycles = 0;
  118. endCycles = 0;
  119. firstRow = 0;
  120. firstExitCycles = 0;
  121. }
  122. public:
  123. cycle_t startCycles; // Wall clock time of first entry to this activity
  124. cycle_t totalCycles; // Time spent in this activity
  125. cycle_t endCycles; // Wall clock time of last entry to this activity
  126. unsigned __int64 firstRow; // Timestamp of first row (nanoseconds since epoch)
  127. cycle_t firstExitCycles; // Wall clock time of first exit from this activity
  128. // Return the total amount of time (in nanoseconds) spent in this activity (first entry to last exit)
  129. inline unsigned __int64 elapsed() const { return cycle_to_nanosec(endCycles-startCycles); }
  130. // Return the total amount of time (in nanoseconds) spent in the first call of this activity (first entry to first exit)
  131. inline unsigned __int64 latency() const { return cycle_to_nanosec(firstExitCycles-startCycles); }
  132. void addStatistics(IStatisticGatherer & builder) const
  133. {
  134. if (totalCycles)
  135. {
  136. builder.addStatistic(StWhenFirstRow, firstRow);
  137. builder.addStatistic(StTimeElapsed, elapsed());
  138. builder.addStatistic(StTimeTotalExecute, cycle_to_nanosec(totalCycles));
  139. builder.addStatistic(StTimeFirstExecute, latency());
  140. }
  141. }
  142. };
  143. #ifdef TIME_ACTIVITIES
  144. #include "jdebug.hpp"
  145. class ActivityTimer
  146. {
  147. unsigned __int64 startCycles;
  148. ActivityTimeAccumulator &accumulator;
  149. protected:
  150. const bool enabled;
  151. bool isFirstRow;
  152. public:
  153. ActivityTimer(ActivityTimeAccumulator &_accumulator, const bool _enabled)
  154. : accumulator(_accumulator), enabled(_enabled), isFirstRow(false)
  155. {
  156. if (enabled)
  157. {
  158. startCycles = get_cycles_now();
  159. if (!accumulator.firstRow)
  160. {
  161. isFirstRow = true;
  162. accumulator.startCycles = startCycles;
  163. accumulator.firstRow = getTimeStampNowValue();
  164. }
  165. }
  166. else
  167. startCycles = 0;
  168. }
  169. ~ActivityTimer()
  170. {
  171. if (enabled)
  172. {
  173. cycle_t nowCycles = get_cycles_now();
  174. accumulator.endCycles = nowCycles;
  175. cycle_t elapsedCycles = nowCycles - startCycles;
  176. accumulator.totalCycles += elapsedCycles;
  177. if (isFirstRow)
  178. accumulator.firstExitCycles = nowCycles;
  179. }
  180. }
  181. };
  182. class SimpleActivityTimer
  183. {
  184. cycle_t startCycles;
  185. cycle_t &accumulator;
  186. protected:
  187. const bool enabled;
  188. public:
  189. inline SimpleActivityTimer(cycle_t &_accumulator, const bool _enabled)
  190. : accumulator(_accumulator), enabled(_enabled)
  191. {
  192. if (enabled)
  193. startCycles = get_cycles_now();
  194. else
  195. startCycles = 0;
  196. }
  197. inline ~SimpleActivityTimer()
  198. {
  199. if (enabled)
  200. {
  201. cycle_t nowCycles = get_cycles_now();
  202. cycle_t elapsedCycles = nowCycles - startCycles;
  203. accumulator += elapsedCycles;
  204. }
  205. }
  206. };
  207. #else
  208. struct ActivityTimer
  209. {
  210. inline ActivityTimer(ActivityTimeAccumulator &_accumulator, const bool _enabled) { }
  211. };
  212. struct SimpleActivityTimer
  213. {
  214. inline SimpleActivityTimer(unsigned __int64 &_accumulator, const bool _enabled) { }
  215. };
  216. #endif
  217. class THORHELPER_API IndirectCodeContext : implements ICodeContext
  218. {
  219. public:
  220. IndirectCodeContext(ICodeContext * _ctx = NULL) : ctx(_ctx) {}
  221. void set(ICodeContext * _ctx) { ctx = _ctx; }
  222. virtual const char *loadResource(unsigned id)
  223. {
  224. return ctx->loadResource(id);
  225. }
  226. virtual void setResultBool(const char *name, unsigned sequence, bool value)
  227. {
  228. ctx->setResultBool(name, sequence, value);
  229. }
  230. virtual void setResultData(const char *name, unsigned sequence, int len, const void * data)
  231. {
  232. ctx->setResultData(name, sequence, len, data);
  233. }
  234. virtual void setResultDecimal(const char * stepname, unsigned sequence, int len, int precision, bool isSigned, const void *val)
  235. {
  236. ctx->setResultDecimal(stepname, sequence, len, precision, isSigned, val);
  237. }
  238. virtual void setResultInt(const char *name, unsigned sequence, __int64 value, unsigned size)
  239. {
  240. ctx->setResultInt(name, sequence, value, size);
  241. }
  242. virtual void setResultRaw(const char *name, unsigned sequence, int len, const void * data)
  243. {
  244. ctx->setResultRaw(name, sequence, len, data);
  245. }
  246. virtual void setResultReal(const char * stepname, unsigned sequence, double value)
  247. {
  248. ctx->setResultReal(stepname, sequence, value);
  249. }
  250. virtual void setResultSet(const char *name, unsigned sequence, bool isAll, size32_t len, const void * data, ISetToXmlTransformer * transformer)
  251. {
  252. ctx->setResultSet(name, sequence, isAll, len, data, transformer);
  253. }
  254. virtual void setResultString(const char *name, unsigned sequence, int len, const char * str)
  255. {
  256. ctx->setResultString(name, sequence, len, str);
  257. }
  258. virtual void setResultUInt(const char *name, unsigned sequence, unsigned __int64 value, unsigned size)
  259. {
  260. ctx->setResultUInt(name, sequence, value, size);
  261. }
  262. virtual void setResultUnicode(const char *name, unsigned sequence, int len, UChar const * str)
  263. {
  264. ctx->setResultUnicode(name, sequence, len, str);
  265. }
  266. virtual void setResultVarString(const char * name, unsigned sequence, const char * value)
  267. {
  268. ctx->setResultVarString(name, sequence, value);
  269. }
  270. virtual void setResultVarUnicode(const char * name, unsigned sequence, UChar const * value)
  271. {
  272. ctx->setResultVarUnicode(name, sequence, value);
  273. }
  274. virtual bool getResultBool(const char * name, unsigned sequence)
  275. {
  276. return ctx->getResultBool(name, sequence);
  277. }
  278. virtual void getResultData(unsigned & tlen, void * & tgt, const char * name, unsigned sequence)
  279. {
  280. ctx->getResultData(tlen, tgt, name, sequence);
  281. }
  282. virtual void getResultDecimal(unsigned tlen, int precision, bool isSigned, void * tgt, const char * stepname, unsigned sequence)
  283. {
  284. ctx->getResultDecimal(tlen, precision, isSigned, tgt, stepname, sequence);
  285. }
  286. virtual void getResultRaw(unsigned & tlen, void * & tgt, const char * name, unsigned sequence, IXmlToRowTransformer * xmlTransformer, ICsvToRowTransformer * csvTransformer)
  287. {
  288. ctx->getResultRaw(tlen, tgt, name, sequence, xmlTransformer, csvTransformer);
  289. }
  290. virtual void getResultSet(bool & isAll, size32_t & tlen, void * & tgt, const char * name, unsigned sequence, IXmlToRowTransformer * xmlTransformer, ICsvToRowTransformer * csvTransformer)
  291. {
  292. ctx->getResultSet(isAll, tlen, tgt, name, sequence, xmlTransformer, csvTransformer);
  293. }
  294. virtual __int64 getResultInt(const char * name, unsigned sequence)
  295. {
  296. return ctx->getResultInt(name, sequence);
  297. }
  298. virtual double getResultReal(const char * name, unsigned sequence)
  299. {
  300. return ctx->getResultReal(name, sequence);
  301. }
  302. virtual void getResultString(unsigned & tlen, char * & tgt, const char * name, unsigned sequence)
  303. {
  304. ctx->getResultString(tlen, tgt, name, sequence);
  305. }
  306. virtual void getResultStringF(unsigned tlen, char * tgt, const char * name, unsigned sequence)
  307. {
  308. ctx->getResultStringF(tlen, tgt, name, sequence);
  309. }
  310. virtual void getResultUnicode(unsigned & tlen, UChar * & tgt, const char * name, unsigned sequence)
  311. {
  312. ctx->getResultUnicode(tlen, tgt, name, sequence);
  313. }
  314. virtual char *getResultVarString(const char * name, unsigned sequence)
  315. {
  316. return ctx->getResultVarString(name, sequence);
  317. }
  318. virtual UChar *getResultVarUnicode(const char * name, unsigned sequence)
  319. {
  320. return ctx->getResultVarUnicode(name, sequence);
  321. }
  322. virtual unsigned getResultHash(const char * name, unsigned sequence)
  323. {
  324. return ctx->getResultHash(name, sequence);
  325. }
  326. virtual unsigned getExternalResultHash(const char * wuid, const char * name, unsigned sequence)
  327. {
  328. return ctx->getExternalResultHash(wuid, name, sequence);
  329. }
  330. virtual char *getWuid()
  331. {
  332. return ctx->getWuid();
  333. }
  334. virtual void getExternalResultRaw(unsigned & tlen, void * & tgt, const char * wuid, const char * stepname, unsigned sequence, IXmlToRowTransformer * xmlTransformer, ICsvToRowTransformer * csvTransformer)
  335. {
  336. ctx->getExternalResultRaw(tlen, tgt, wuid, stepname, sequence, xmlTransformer, csvTransformer);
  337. }
  338. virtual void executeGraph(const char * graphName, bool realThor, size32_t parentExtractSize, const void * parentExtract)
  339. {
  340. ctx->executeGraph(graphName, realThor, parentExtractSize, parentExtract);
  341. }
  342. virtual char * getExpandLogicalName(const char * logicalName)
  343. {
  344. return ctx->getExpandLogicalName(logicalName);
  345. }
  346. virtual void addWuException(const char * text, unsigned code, unsigned severity, const char *source)
  347. {
  348. ctx->addWuException(text, code, severity, source);
  349. }
  350. virtual void addWuAssertFailure(unsigned code, const char * text, const char * filename, unsigned lineno, unsigned column, bool isAbort)
  351. {
  352. ctx->addWuAssertFailure(code, text, filename, lineno, column, isAbort);
  353. }
  354. virtual IUserDescriptor *queryUserDescriptor()
  355. {
  356. return ctx->queryUserDescriptor();
  357. }
  358. virtual IThorChildGraph * resolveChildQuery(__int64 activityId, IHThorArg * colocal)
  359. {
  360. return ctx->resolveChildQuery(activityId, colocal);
  361. }
  362. virtual unsigned __int64 getDatasetHash(const char * name, unsigned __int64 hash)
  363. {
  364. return ctx->getDatasetHash(name, hash);
  365. }
  366. virtual unsigned getNodes()
  367. {
  368. return ctx->getNodes();
  369. }
  370. virtual unsigned getNodeNum()
  371. {
  372. return ctx->getNodeNum();
  373. }
  374. virtual char *getFilePart(const char *logicalPart, bool create)
  375. {
  376. return ctx->getFilePart(logicalPart, create);
  377. }
  378. virtual unsigned __int64 getFileOffset(const char *logicalPart)
  379. {
  380. return ctx->getFileOffset(logicalPart);
  381. }
  382. virtual IDistributedFileTransaction *querySuperFileTransaction()
  383. {
  384. return ctx->querySuperFileTransaction();
  385. }
  386. virtual char *getEnv(const char *name, const char *defaultValue) const
  387. {
  388. return ctx->getEnv(name, defaultValue);
  389. }
  390. virtual char *getJobName()
  391. {
  392. return ctx->getJobName();
  393. }
  394. virtual char *getJobOwner()
  395. {
  396. return ctx->getJobOwner();
  397. }
  398. virtual char *getClusterName()
  399. {
  400. return ctx->getClusterName();
  401. }
  402. virtual char *getGroupName()
  403. {
  404. return ctx->getGroupName();
  405. }
  406. virtual char * queryIndexMetaData(char const * lfn, char const * xpath)
  407. {
  408. return ctx->queryIndexMetaData(lfn, xpath);
  409. }
  410. virtual unsigned getPriority() const
  411. {
  412. return ctx->getPriority();
  413. }
  414. virtual char *getPlatform()
  415. {
  416. return ctx->getPlatform();
  417. }
  418. virtual char *getOS()
  419. {
  420. return ctx->getOS();
  421. }
  422. virtual IEclGraphResults * resolveLocalQuery(__int64 activityId)
  423. {
  424. return ctx->resolveLocalQuery(activityId);
  425. }
  426. virtual char *getEnv(const char *name, const char *defaultValue)
  427. {
  428. return ctx->getEnv(name, defaultValue);
  429. }
  430. virtual unsigned logString(const char *text) const
  431. {
  432. return ctx->logString(text);
  433. }
  434. virtual const IContextLogger &queryContextLogger() const
  435. {
  436. return ctx->queryContextLogger();
  437. }
  438. virtual IDebuggableContext *queryDebugContext() const
  439. {
  440. return ctx->queryDebugContext();
  441. }
  442. virtual IEngineRowAllocator * getRowAllocator(IOutputMetaData * meta, unsigned activityId) const
  443. {
  444. return ctx->getRowAllocator(meta, activityId);
  445. }
  446. virtual const char *cloneVString(const char *str) const
  447. {
  448. return ctx->cloneVString(str);
  449. }
  450. virtual const char *cloneVString(size32_t len, const char *str) const
  451. {
  452. return ctx->cloneVString(len, str);
  453. }
  454. virtual void getResultRowset(size32_t & tcount, byte * * & tgt, const char * name, unsigned sequence, IEngineRowAllocator * _rowAllocator, bool isGrouped, IXmlToRowTransformer * xmlTransformer, ICsvToRowTransformer * csvTransformer)
  455. {
  456. ctx->getResultRowset(tcount, tgt, name, sequence, _rowAllocator, isGrouped, xmlTransformer, csvTransformer);
  457. }
  458. virtual void getResultDictionary(size32_t & tcount, byte * * & tgt, IEngineRowAllocator * _rowAllocator, const char * name, unsigned sequence, IXmlToRowTransformer * xmlTransformer, ICsvToRowTransformer * csvTransformer, IHThorHashLookupInfo * hasher)
  459. {
  460. ctx->getResultDictionary(tcount, tgt, _rowAllocator, name, sequence, xmlTransformer, csvTransformer, hasher);
  461. }
  462. virtual void getRowXML(size32_t & lenResult, char * & result, IOutputMetaData & info, const void * row, unsigned flags)
  463. {
  464. convertRowToXML(lenResult, result, info, row, flags);
  465. }
  466. virtual void getRowJSON(size32_t & lenResult, char * & result, IOutputMetaData & info, const void * row, unsigned flags)
  467. {
  468. convertRowToJSON(lenResult, result, info, row, flags);
  469. }
  470. virtual const void * fromXml(IEngineRowAllocator * _rowAllocator, size32_t len, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  471. {
  472. return ctx->fromXml(_rowAllocator, len, utf8, xmlTransformer, stripWhitespace);
  473. }
  474. virtual const void * fromJson(IEngineRowAllocator * _rowAllocator, size32_t len, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  475. {
  476. return ctx->fromJson(_rowAllocator, len, utf8, xmlTransformer, stripWhitespace);
  477. }
  478. virtual IEngineContext *queryEngineContext()
  479. {
  480. return ctx->queryEngineContext();
  481. }
  482. virtual char *getDaliServers()
  483. {
  484. return ctx->getDaliServers();
  485. }
  486. virtual IWorkUnit *updateWorkUnit() const
  487. {
  488. return ctx->updateWorkUnit();
  489. }
  490. protected:
  491. ICodeContext * ctx;
  492. };
  493. extern THORHELPER_API bool isActivitySink(ThorActivityKind kind);
  494. extern THORHELPER_API bool isActivitySource(ThorActivityKind kind);
  495. extern THORHELPER_API const char * getActivityText(ThorActivityKind kind);
  496. #endif // THORHELPER_HPP