hthor.cpp 381 KB


  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include <algorithm>
  14. #include "hthor.ipp"
  15. #include "jexcept.hpp"
  16. #include "jmisc.hpp"
  17. #include "jthread.hpp"
  18. #include "jsocket.hpp"
  19. #include "jprop.hpp"
  20. #include "jdebug.hpp"
  21. #include "jlzw.hpp"
  22. #include "jisem.hpp"
  23. #include "roxiedebug.hpp"
  24. #include "roxierow.hpp"
  25. #include "roxiemem.hpp"
  26. #include "eclhelper.hpp"
  27. #include "workunit.hpp"
  28. #include "jfile.hpp"
  29. #include "keybuild.hpp"
  30. #include "rmtclient.hpp"
  31. #include "hrpc.hpp"
  32. #include "hrpcsock.hpp"
  33. #include "dafdesc.hpp"
  34. #include "dautils.hpp"
  35. #include "dasess.hpp"
  36. #include "dadfs.hpp"
  37. #include "thorfile.hpp"
  38. #include "thorsort.hpp"
  39. #include "thorparse.ipp"
  40. #include "thorxmlwrite.hpp"
  41. #include "rtlformat.hpp"
  42. #include "thorcommon.hpp"
  43. #include "jsmartsock.hpp"
  44. #include "thorstep.hpp"
  45. #include "eclagent.ipp"
  46. #include "roxierowbuff.hpp"
  47. #include "ftbase.ipp"
  48. #include "rtldynfield.hpp"
  49. #include "rtlnewkey.hpp"
  50. #include "thormeta.hpp"
  51. #include "thorread.hpp"
  52. #include "ws_dfsclient.hpp"
  53. #define EMPTY_LOOP_LIMIT 1000
  54. static unsigned const hthorReadBufferSize = 0x10000;
  55. static offset_t const defaultHThorDiskWriteSizeLimit = I64C(10*1024*1024*1024); //10 GB, per Nigel
  56. using roxiemem::IRowManager;
  57. using roxiemem::OwnedRoxieRow;
  58. using roxiemem::OwnedRoxieString;
  59. using roxiemem::OwnedConstRoxieRow;
  60. IRowManager * theRowManager;
  61. void setHThorRowManager(IRowManager * manager)
  62. {
  63. theRowManager = manager;
  64. }
  65. IRowManager * queryRowManager()
  66. {
  67. return theRowManager;
  68. }
  69. void throwOOMException(size_t size, char const * label)
  70. {
  71. throw MakeStringException(0, "Out of Memory in hthor: trying to allocate %" I64F "u bytes for %s", (unsigned __int64) size, label);
  72. }
  73. void * checked_malloc(size_t size, char const * label)
  74. {
  75. void * ret = malloc(size);
  76. if(!ret)
  77. throwOOMException(size, label);
  78. return ret;
  79. }
  80. void * checked_calloc(size_t size, size_t num, char const * label)
  81. {
  82. void * ret = calloc(size, num);
  83. if(!ret)
  84. throwOOMException(size*num, label);
  85. return ret;
  86. }
  87. inline bool checkWriteIsCompressed(unsigned int flags, size32_t fixedSize, bool grouped)
  88. {
  89. return ((flags & TDWnewcompress) || ((flags & TDXcompress) && ((0 == fixedSize) || (fixedSize+(grouped?1:0) >= MIN_ROWCOMPRESS_RECSIZE))));
  90. }
  91. inline bool checkReadIsCompressed(unsigned int flags, size32_t fixedSize, bool grouped)
  92. {
  93. return ((flags & TDXcompress) && ((0 == fixedSize) || (fixedSize+(grouped?1:0) >= MIN_ROWCOMPRESS_RECSIZE)));
  94. }
  95. //=====================================================================================================
  96. //=====================================================================================================
  97. CRowBuffer::CRowBuffer(IRecordSize * _recsize, bool _grouped) : recsize(_recsize), grouped(_grouped)
  98. {
  99. fixsize = recsize->getFixedSize();
  100. count = 0;
  101. index = 0;
  102. }
  103. void CRowBuffer::insert(const void * next)
  104. {
  105. buff.append(next);
  106. count++;
  107. }
  108. bool CRowBuffer::pull(IHThorInput * input, unsigned __int64 rowLimit)
  109. {
  110. while(true)
  111. {
  112. OwnedConstRoxieRow next(input->nextRow());
  113. if(!next)
  114. {
  115. next.setown(input->nextRow());
  116. if(!next)
  117. break;
  118. if(grouped)
  119. buff.append(NULL);
  120. }
  121. insert(next.getClear());
  122. if(count > rowLimit)
  123. return false;
  124. }
  125. return true;
  126. }
  127. void CRowBuffer::clear()
  128. {
  129. buff.clear();
  130. index = 0;
  131. count = 0;
  132. }
  133. const void * CRowBuffer::next()
  134. {
  135. if(buff.isItem(index))
  136. return buff.itemClear(index++);
  137. else
  138. return NULL;
  139. }
  140. ILocalOrDistributedFile *resolveLFNFlat(IAgentContext &agent, const char *logicalName, const char *errorTxt, bool optional, bool isPrivilegedUser)
  141. {
  142. Owned<ILocalOrDistributedFile> ldFile = agent.resolveLFN(logicalName, errorTxt, optional, true, false, nullptr, isPrivilegedUser);
  143. if (!ldFile)
  144. return nullptr;
  145. IDistributedFile *dFile = ldFile->queryDistributedFile();
  146. if (dFile && isFileKey(dFile))
  147. throw MakeStringException(0, "Attempting to read index as a flat file: %s", logicalName);
  148. return ldFile.getClear();
  149. }
  150. bool isRemoteReadCandidate(const IAgentContext &agent, const RemoteFilename &rfn)
  151. {
  152. #ifndef _CONTAINERIZED
  153. if (!agent.queryWorkUnit()->getDebugValueBool("forceRemoteDisabled", false))
  154. {
  155. if (!rfn.isLocal())
  156. return true;
  157. StringBuffer localPath;
  158. rfn.getLocalPath(localPath);
  159. if (agent.queryWorkUnit()->getDebugValueBool("forceRemoteRead", testForceRemote(localPath)))
  160. return true;
  161. }
  162. #endif
  163. return false;
  164. }
  165. //=====================================================================================================
  166. CHThorActivityBase::CHThorActivityBase(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg & _help, ThorActivityKind _kind, EclGraph & _graph)
  167. : help(_help), kind(_kind), graph(_graph), agent(_agent), outputMeta(help.queryOutputMeta()), activityId(_activityId), subgraphId(_subgraphId)
  168. {
  169. }
  170. void CHThorActivityBase::setInput(unsigned index, IHThorInput *_input)
  171. {
  172. assertex(index == 0);
  173. input = _input;
  174. }
  175. IHThorInput *CHThorActivityBase::queryOutput(unsigned index)
  176. {
  177. agent.fail(255, "internal logic error: CHThorActivityBase::queryOutput");
  178. // never returns....
  179. return NULL;
  180. }
  181. void CHThorActivityBase::ready()
  182. {
  183. if (input)
  184. input->ready();
  185. if (needsAllocator())
  186. createRowAllocator();
  187. initialProcessed = processed;
  188. }
  189. CHThorActivityBase::~CHThorActivityBase()
  190. {
  191. ::Release(rowAllocator);
  192. }
  193. void CHThorActivityBase::createRowAllocator()
  194. {
  195. if (!rowAllocator)
  196. rowAllocator = agent.queryCodeContext()->getRowAllocator(outputMeta.queryOriginal(), activityId);
  197. }
  198. __int64 CHThorActivityBase::getCount()
  199. {
  200. throw MakeStringException(2, "Internal error: CHThorActivityBase::getCount");
  201. return 0;
  202. }
  203. void CHThorActivityBase::execute()
  204. {
  205. agent.fail(255, "internal logic error: CHThorActivityBase::execute");
  206. }
  207. void CHThorActivityBase::extractResult(unsigned & len, void * & ret)
  208. {
  209. agent.fail(255, "internal logic error: CHThorActivityBase::extractResult");
  210. }
  211. void CHThorActivityBase::stop()
  212. {
  213. if (input)
  214. input->stop();
  215. }
  216. void CHThorActivityBase::resetEOF()
  217. {
  218. if (input)
  219. input->resetEOF();
  220. }
  221. void CHThorActivityBase::updateProgress(IStatisticGatherer &progress) const
  222. {
  223. if (queryOutputs()>0)
  224. updateProgressForOther(progress, activityId, subgraphId);
  225. if (input)
  226. input->updateProgress(progress);
  227. }
  228. void CHThorActivityBase::updateProgressForOther(IStatisticGatherer &progress, unsigned otherActivity, unsigned otherSubgraph) const
  229. {
  230. updateProgressForOther(progress, otherActivity, otherSubgraph, 0, processed);
  231. }
  232. void CHThorActivityBase::updateProgressForOther(IStatisticGatherer &progress, unsigned otherActivity, unsigned otherSubgraph, unsigned whichOutput, unsigned __int64 numProcessed) const
  233. {
  234. StatsEdgeScope scope(progress, otherActivity, whichOutput);
  235. progress.addStatistic(StNumRowsProcessed, numProcessed);
  236. progress.addStatistic(StNumStarts, 1); // wrong for an activity in a subquery
  237. progress.addStatistic(StNumStops, 1);
  238. progress.addStatistic(StNumSlaves, 1); // MORE: A bit pointless for an hthor graph
  239. }
  240. ILocalEclGraphResults * CHThorActivityBase::resolveLocalQuery(__int64 graphId)
  241. {
  242. return static_cast<ILocalEclGraphResults *>(agent.queryCodeContext()->resolveLocalQuery(graphId));
  243. }
  244. IException * CHThorActivityBase::makeWrappedException(IException * e) const
  245. {
  246. if(dynamic_cast<IHThorException *>(e) || dynamic_cast<IUserException *>(e))
  247. return e;
  248. else
  249. return makeHThorException(kind, activityId, subgraphId, e);
  250. }
  251. IException * CHThorActivityBase::makeWrappedException(IException * e, char const * extra) const
  252. {
  253. if(dynamic_cast<IHThorException *>(e) || dynamic_cast<IUserException *>(e))
  254. return e;
  255. else
  256. return makeHThorException(kind, activityId, subgraphId, e, extra);
  257. }
  258. bool CHThorActivityBase::isPassThrough()
  259. {
  260. return false;
  261. }
  262. //=====================================================================================================
  263. CHThorSimpleActivityBase::CHThorSimpleActivityBase(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg & _help, ThorActivityKind _kind, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _help, _kind, _graph)
  264. {
  265. }
  266. IHThorInput * CHThorSimpleActivityBase::queryOutput(unsigned index)
  267. {
  268. assertex(index == 0);
  269. return this;
  270. }
  271. bool CHThorSimpleActivityBase::isGrouped()
  272. {
  273. return input ? input->isGrouped() : outputMeta.isGrouped();
  274. }
  275. IOutputMetaData * CHThorSimpleActivityBase::queryOutputMeta() const
  276. {
  277. return outputMeta;
  278. }
  279. //=====================================================================================================
  280. class CHThorClusterWriteHandler : public ClusterWriteHandler
  281. {
  282. IAgentContext &agent;
  283. public:
  284. CHThorClusterWriteHandler(char const * _logicalName, char const * _activityType, IAgentContext &_agent)
  285. : ClusterWriteHandler(_logicalName, _activityType), agent(_agent)
  286. {
  287. }
  288. private:
  289. virtual void getTempFilename(StringAttr & out) const
  290. {
  291. StringBuffer buff;
  292. agent.getTempfileBase(buff).append(PATHSEPCHAR).appendf("cluster_write_%p.%" I64F "d_%u", this, (__int64)GetCurrentThreadId(), GetCurrentProcessId());
  293. out.set(buff.str());
  294. }
  295. };
  296. ClusterWriteHandler *createClusterWriteHandler(IAgentContext &agent, IHThorIndexWriteArg *iwHelper, IHThorDiskWriteArg *dwHelper, const char * lfn, StringAttr &fn, bool extend)
  297. {
  298. //In the containerized system, the default data plane for this component is in the configuration
  299. StringBuffer defaultCluster;
  300. getDefaultStoragePlane(defaultCluster);
  301. Owned<CHThorClusterWriteHandler> clusterHandler;
  302. unsigned clusterIdx = 0;
  303. while(true)
  304. {
  305. OwnedRoxieString helperCluster(iwHelper ? iwHelper->getCluster(clusterIdx++) : dwHelper->getCluster(clusterIdx++));
  306. const char *cluster = helperCluster;
  307. if (!helperCluster && (clusterIdx == 1))
  308. {
  309. if (defaultCluster.length())
  310. cluster = defaultCluster;
  311. }
  312. if (!cluster)
  313. break;
  314. if(!clusterHandler)
  315. {
  316. if(extend)
  317. throw MakeStringException(0, "Cannot combine EXTEND and CLUSTER flags on disk write of file %s", lfn);
  318. clusterHandler.setown(new CHThorClusterWriteHandler(lfn, "OUTPUT", agent));
  319. }
  320. clusterHandler->addCluster(cluster);
  321. }
  322. if(clusterHandler)
  323. {
  324. clusterHandler->getLocalPhysicalFilename(fn);
  325. }
  326. else if (!agent.queryResolveFilesLocally())
  327. {
  328. StringBuffer filenameText;
  329. bool wasDFS;
  330. makeSinglePhysicalPartName(lfn, filenameText, true, wasDFS);
  331. fn.set(filenameText.str());
  332. }
  333. else
  334. {
  335. fn.set(lfn);
  336. }
  337. StringBuffer dir;
  338. splitFilename(fn, &dir, &dir, NULL, NULL);
  339. recursiveCreateDirectory(dir.str());
  340. return clusterHandler.getClear();
  341. }
  342. //=====================================================================================================
  343. CHThorDiskWriteActivity::CHThorDiskWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  344. {
  345. incomplete = false;
  346. }
  347. CHThorDiskWriteActivity::~CHThorDiskWriteActivity()
  348. {
  349. diskout.clear();
  350. if(incomplete)
  351. {
  352. PROGLOG("Disk write incomplete, deleting physical file: %s", filename.get());
  353. diskout.clear();
  354. outSeq.clear();
  355. file->remove();
  356. }
  357. }
  358. void CHThorDiskWriteActivity::ready()
  359. {
  360. CHThorActivityBase::ready();
  361. grouped = (helper.getFlags() & TDXgrouped) != 0;
  362. extend = ((helper.getFlags() & TDWextend) != 0);
  363. overwrite = ((helper.getFlags() & TDWoverwrite) != 0);
  364. resolve();
  365. uncompressedBytesWritten = 0;
  366. numRecords = 0;
  367. sizeLimit = agent.queryWorkUnit()->getDebugValueInt64("hthorDiskWriteSizeLimit", defaultHThorDiskWriteSizeLimit);
  368. rowIf.setown(createRowInterfaces(input->queryOutputMeta(), activityId, 0, agent.queryCodeContext()));
  369. open();
  370. }
  371. void CHThorDiskWriteActivity::execute()
  372. {
  373. // Loop thru the results
  374. numRecords = 0;
  375. while (next())
  376. numRecords++;
  377. finishOutput();
  378. }
  379. void CHThorDiskWriteActivity::stop()
  380. {
  381. outSeq->flush(NULL);
  382. if(blockcompressed)
  383. uncompressedBytesWritten = outSeq->getPosition();
  384. updateWorkUnitResult(numRecords);
  385. close();
  386. if((helper.getFlags() & (TDXtemporary | TDXjobtemp) ) == 0 && !agent.queryResolveFilesLocally())
  387. publish();
  388. io.clear();
  389. incomplete = false;
  390. if(clusterHandler)
  391. clusterHandler->finish(file);
  392. CHThorActivityBase::stop();
  393. if (helper.getFlags() & TDXvarfilename)
  394. filename.clear();
  395. }
  396. void CHThorDiskWriteActivity::resolve()
  397. {
  398. OwnedRoxieString rawname = helper.getFileName();
  399. mangleHelperFileName(mangledHelperFileName, rawname, agent.queryWuid(), helper.getFlags());
  400. assertex(mangledHelperFileName.str());
  401. if((helper.getFlags() & (TDXtemporary | TDXjobtemp)) == 0)
  402. {
  403. Owned<ILocalOrDistributedFile> f = agent.resolveLFN(mangledHelperFileName.str(),"Cannot write, invalid logical name",true,false,true,&lfn,defaultPrivilegedUser);
  404. if (f)
  405. {
  406. if (f->queryDistributedFile())
  407. {
  408. // An already existing dali file
  409. if(extend)
  410. agent.logFileAccess(f->queryDistributedFile(), "HThor", "EXTENDED", graph);
  411. else if(overwrite) {
  412. LOG(MCoperatorInfo, "Removing %s from DFS", lfn.str());
  413. agent.logFileAccess(f->queryDistributedFile(), "HThor", "DELETED", graph);
  414. if (!agent.queryResolveFilesLocally())
  415. f->queryDistributedFile()->detach();
  416. else
  417. {
  418. Owned<IFile> file = createIFile(lfn);
  419. if (file->exists())
  420. file->remove();
  421. }
  422. }
  423. else
  424. throw MakeStringException(99, "Cannot write %s, file already exists (missing OVERWRITE attribute?)", lfn.str());
  425. }
  426. else if (f->exists() || f->isExternal() || agent.queryResolveFilesLocally())
  427. {
  428. // special/local/external file
  429. if (f->numParts()!=1)
  430. throw MakeStringException(99, "Cannot write %s, external file has multiple parts)", lfn.str());
  431. RemoteFilename rfn;
  432. f->getPartFilename(rfn,0);
  433. StringBuffer full;
  434. if (rfn.isLocal())
  435. rfn.getLocalPath(full);
  436. else
  437. rfn.getRemotePath(full);
  438. filename.set(full);
  439. if (isSpecialPath(filename))
  440. {
  441. PROGLOG("Writing to query %s", filename.get());
  442. return;
  443. }
  444. if (stdIoHandle(filename)>=0) {
  445. PROGLOG("Writing to %s", filename.get());
  446. return;
  447. }
  448. Owned<IFile> file = createIFile(filename);
  449. if (file->exists())
  450. {
  451. if (!overwrite)
  452. throw MakeStringException(99, "Cannot write %s, file already exists (missing OVERWRITE attribute?)", full.str());
  453. file->remove();
  454. }
  455. //Ensure target folder exists
  456. if (!recursiveCreateDirectoryForFile(filename.get()))
  457. {
  458. throw MakeStringException(99, "Cannot create file folder for %s", filename.str());
  459. }
  460. PROGLOG("Writing to file %s", filename.get());
  461. }
  462. f.clear();
  463. }
  464. if (filename.isEmpty()) // wasn't local or special (i.e. DFS file)
  465. {
  466. CDfsLogicalFileName dfsLogicalName;
  467. dfsLogicalName.allowOsPath(agent.queryResolveFilesLocally());
  468. if (!dfsLogicalName.setValidate(lfn.str()))
  469. {
  470. throw MakeStringException(99, "Could not resolve DFS Logical file %s", lfn.str());
  471. }
  472. clusterHandler.setown(createClusterWriteHandler(agent, NULL, &helper, dfsLogicalName.get(), filename, extend));
  473. }
  474. }
  475. else
  476. {
  477. StringBuffer mangledName;
  478. mangleLocalTempFilename(mangledName, mangledHelperFileName.str(), nullptr);
  479. filename.set(agent.noteTemporaryFile(mangledName.str()));
  480. PROGLOG("DISKWRITE: using temporary filename %s", filename.get());
  481. }
  482. }
  483. void CHThorDiskWriteActivity::open()
  484. {
  485. // Open an output file...
  486. file.setown(createIFile(filename));
  487. serializedOutputMeta.set(input->queryOutputMeta()->querySerializedDiskMeta());//returns outputMeta if serialization not needed
  488. Linked<IRecordSize> groupedMeta = input->queryOutputMeta()->querySerializedDiskMeta();
  489. if (grouped)
  490. groupedMeta.setown(createDeltaRecordSize(groupedMeta, +1));
  491. blockcompressed = checkWriteIsCompressed(helper.getFlags(), serializedOutputMeta.getFixedSize(), grouped);//TDWnewcompress for new compression, else check for row compression
  492. void *ekey;
  493. size32_t ekeylen;
  494. helper.getEncryptKey(ekeylen,ekey);
  495. encrypted = false;
  496. Owned<ICompressor> ecomp;
  497. if (ekeylen!=0)
  498. {
  499. ecomp.setown(createAESCompressor256(ekeylen,ekey));
  500. memset(ekey,0,ekeylen);
  501. rtlFree(ekey);
  502. encrypted = true;
  503. blockcompressed = true;
  504. }
  505. if(blockcompressed)
  506. io.setown(createCompressedFileWriter(file, groupedMeta->getFixedSize(), extend, true, ecomp, COMPRESS_METHOD_LZW));
  507. else
  508. io.setown(file->open(extend ? IFOwrite : IFOcreate));
  509. if(!io)
  510. throw MakeStringException(errno, "Failed to create%s file %s for writing", (encrypted ? " encrypted" : (blockcompressed ? " compressed" : "")), filename.get());
  511. incomplete = true;
  512. diskout.setown(createBufferedIOStream(io));
  513. if(extend)
  514. diskout->seek(0, IFSend);
  515. unsigned rwFlags = rw_autoflush;
  516. if (grouped)
  517. rwFlags |= rw_grouped;
  518. if (true) // MORE: Should this be controlled by an activity hint/flag?
  519. rwFlags |= rw_crc;
  520. IExtRowWriter * writer = createRowWriter(diskout, rowIf, rwFlags);
  521. outSeq.setown(writer);
  522. }
  523. const void * CHThorDiskWriteActivity::getNext()
  524. { // through operation (writes and returns row)
  525. // needs a one row lookahead to preserve group
  526. if (!nextrow.get())
  527. {
  528. nextrow.setown(input->nextRow());
  529. if (!nextrow.get())
  530. {
  531. nextrow.setown(input->nextRow());
  532. if (nextrow.get()&&grouped) // only write eog if not at eof
  533. outSeq->putRow(NULL);
  534. return NULL;
  535. }
  536. }
  537. outSeq->putRow(nextrow.getLink());
  538. checkSizeLimit();
  539. return nextrow.getClear();
  540. }
  541. bool CHThorDiskWriteActivity::next()
  542. {
  543. if (!nextrow.get())
  544. {
  545. OwnedConstRoxieRow row(input->nextRow());
  546. if (!row.get())
  547. {
  548. row.setown(input->nextRow());
  549. if (!row.get())
  550. return false; // we are done
  551. if (grouped)
  552. outSeq->putRow(NULL);
  553. }
  554. outSeq->putRow(row.getClear());
  555. }
  556. else
  557. outSeq->putRow(nextrow.getClear());
  558. checkSizeLimit();
  559. return true;
  560. }
  561. void CHThorDiskWriteActivity::finishOutput()
  562. {
  563. }
  564. void CHThorDiskWriteActivity::close()
  565. {
  566. diskout.clear();
  567. outSeq.clear();
  568. if(clusterHandler)
  569. clusterHandler->copyPhysical(file, agent.queryWorkUnit()->getDebugValueBool("__output_cluster_no_copy_physical", false));
  570. }
  571. void CHThorDiskWriteActivity::publish()
  572. {
  573. StringBuffer dir,base;
  574. offset_t fileSize = file->size();
  575. if(clusterHandler)
  576. clusterHandler->getDirAndFilename(dir, base);
  577. else
  578. splitFilename(filename, &dir, &dir, &base, &base);
  579. Owned<IFileDescriptor> desc = createFileDescriptor();
  580. desc->setDefaultDir(dir.str());
  581. Owned<IPropertyTree> attrs;
  582. if(clusterHandler)
  583. attrs.setown(createPTree("Part")); // clusterHandler is going to set attributes
  584. else
  585. {
  586. // add cluster
  587. StringBuffer mygroupname;
  588. Owned<IGroup> mygrp;
  589. if (isContainerized())
  590. {
  591. queryNamedGroupStore().getNasGroupName(mygroupname, 1);
  592. mygrp.setown(queryNamedGroupStore().lookup(mygroupname));
  593. }
  594. else
  595. {
  596. if (!agent.queryResolveFilesLocally())
  597. mygrp.setown(agent.getHThorGroup(mygroupname));
  598. }
  599. ClusterPartDiskMapSpec partmap; // will get this from group at some point
  600. desc->setNumParts(1);
  601. desc->setPartMask(base.str());
  602. desc->addCluster(mygroupname.str(),mygrp, partmap);
  603. attrs.set(&desc->queryPart(0)->queryProperties());
  604. }
  605. //properties of the first file part.
  606. if(blockcompressed)
  607. {
  608. attrs->setPropInt64("@size", uncompressedBytesWritten);
  609. attrs->setPropInt64("@compressedSize", fileSize);
  610. }
  611. else
  612. attrs->setPropInt64("@size", fileSize);
  613. attrs->setPropInt64("@recordCount", numRecords);
  614. CDateTime createTime, modifiedTime, accessedTime;
  615. file->getTime(&createTime, &modifiedTime, &accessedTime);
  616. // round file time down to nearest sec. Nanosec accurancy is not preserved elsewhere and can lead to mismatch later.
  617. unsigned hour, min, sec, nanosec;
  618. modifiedTime.getTime(hour, min, sec, nanosec);
  619. modifiedTime.setTime(hour, min, sec, 0);
  620. StringBuffer timestr;
  621. modifiedTime.getString(timestr);
  622. if(timestr.length())
  623. attrs->setProp("@modified", timestr.str());
  624. if(clusterHandler)
  625. clusterHandler->setDescriptorParts(desc, base.str(), attrs);
  626. // properties of the logical file
  627. IPropertyTree & properties = desc->queryProperties();
  628. properties.setPropInt64("@size", (blockcompressed) ? uncompressedBytesWritten : fileSize);
  629. if (encrypted)
  630. properties.setPropBool("@encrypted", true);
  631. if (blockcompressed)
  632. properties.setPropBool("@blockCompressed", true);
  633. if (helper.getFlags() & TDWpersist)
  634. properties.setPropBool("@persistent", true);
  635. if (grouped)
  636. properties.setPropBool("@grouped", true);
  637. properties.setPropInt64("@recordCount", numRecords);
  638. properties.setProp("@owner", agent.queryWorkUnit()->queryUser());
  639. if (helper.getFlags() & (TDWowned|TDXjobtemp|TDXtemporary))
  640. properties.setPropBool("@owned", true);
  641. if (helper.getFlags() & TDWresult)
  642. properties.setPropBool("@result", true);
  643. properties.setProp("@workunit", agent.queryWorkUnit()->queryWuid());
  644. properties.setProp("@job", agent.queryWorkUnit()->queryJobName());
  645. setFormat(desc);
  646. if (helper.getFlags() & TDWexpires)
  647. setExpiryTime(properties, helper.getExpiryDays());
  648. if (helper.getFlags() & TDWupdate)
  649. {
  650. unsigned eclCRC;
  651. unsigned __int64 totalCRC;
  652. helper.getUpdateCRCs(eclCRC, totalCRC);
  653. properties.setPropInt("@eclCRC", eclCRC);
  654. properties.setPropInt64("@totalCRC", totalCRC);
  655. }
  656. properties.setPropInt("@formatCrc", helper.getFormatCrc());
  657. if (helper.getFlags() & TDWrestricted)
  658. properties.setPropBool("restricted", true);
  659. if (io)
  660. {
  661. numDiskWrites = io->getStatistic(StNumDiskWrites);
  662. properties.setPropInt64("@numDiskWrites", numDiskWrites);
  663. }
  664. StringBuffer lfn;
  665. expandLogicalFilename(lfn, mangledHelperFileName.str(), agent.queryWorkUnit(), agent.queryResolveFilesLocally(), false);
  666. CDfsLogicalFileName logicalName;
  667. if (agent.queryResolveFilesLocally())
  668. logicalName.allowOsPath(true);
  669. if (!logicalName.setValidate(lfn.str()))
  670. throw MakeStringException(99, "Cannot publish %s, invalid logical name", lfn.str());
  671. if (!logicalName.isExternal()) // no need to publish externals
  672. {
  673. Owned<IDistributedFile> file = queryDistributedFileDirectory().createNew(desc);
  674. if(file->getModificationTime(modifiedTime))
  675. file->setAccessedTime(modifiedTime);
  676. if ((helper.getFlags() & TDXtemporary) == 0)
  677. {
  678. StringBuffer clusterName;
  679. file->getClusterName(0, clusterName);
  680. diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, numDiskWrites, 0));
  681. }
  682. file->attach(logicalName.get(), agent.queryCodeContext()->queryUserDescriptor());
  683. agent.logFileAccess(file, "HThor", "CREATED", graph);
  684. }
  685. }
  686. void CHThorDiskWriteActivity::updateProgress(IStatisticGatherer &progress) const
  687. {
  688. CHThorActivityBase::updateProgress(progress);
  689. StatsActivityScope scope(progress, activityId);
  690. progress.addStatistic(StNumDiskWrites, numDiskWrites);
  691. if ((helper.getFlags() & TDXtemporary) == 0)
  692. progress.addStatistic(StCostFileAccess, diskAccessCost);
  693. }
  694. void CHThorDiskWriteActivity::updateWorkUnitResult(unsigned __int64 reccount)
  695. {
  696. if(lfn.length()) //this is required as long as temp files don't get a name which can be stored in the WU and automatically deleted by the WU
  697. {
  698. WorkunitUpdate wu = agent.updateWorkUnit();
  699. StringArray clusters;
  700. if (clusterHandler)
  701. clusterHandler->getClusters(clusters);
  702. else
  703. clusters.append(wu->queryClusterName());
  704. unsigned flags = helper.getFlags();
  705. if (!agent.queryResolveFilesLocally())
  706. {
  707. WUFileKind fileKind;
  708. if (TDXtemporary & flags)
  709. fileKind = WUFileTemporary;
  710. else if(TDXjobtemp & flags)
  711. fileKind = WUFileJobOwned;
  712. else if(TDWowned & flags)
  713. fileKind = WUFileOwned;
  714. else
  715. fileKind = WUFileStandard;
  716. wu->addFile(lfn.str(), &clusters, helper.getTempUsageCount(), fileKind, NULL);
  717. }
  718. else if ((TDXtemporary | TDXjobtemp) & flags)
  719. agent.noteTemporaryFilespec(filename);//note for later deletion
  720. if (!(flags & TDXtemporary) && helper.getSequence() >= 0)
  721. {
  722. Owned<IWUResult> result = wu->updateResultBySequence(helper.getSequence());
  723. if (result)
  724. {
  725. result->setResultTotalRowCount(reccount);
  726. result->setResultStatus(ResultStatusCalculated);
  727. if (helper.getFlags() & TDWresult)
  728. result->setResultFilename(lfn.str());
  729. else
  730. result->setResultLogicalName(lfn.str());
  731. }
  732. }
  733. }
  734. }
  735. void CHThorDiskWriteActivity::setFormat(IFileDescriptor * desc)
  736. {
  737. if ((serializedOutputMeta.isFixedSize()) && !isOutputTransformed())
  738. desc->queryProperties().setPropInt("@recordSize", serializedOutputMeta.getFixedSize() + (grouped ? 1 : 0));
  739. const char *recordECL = helper.queryRecordECL();
  740. if (recordECL && *recordECL)
  741. desc->queryProperties().setProp("ECL", recordECL);
  742. setRtlFormat(desc->queryProperties(), helper.queryDiskRecordSize());
  743. desc->queryProperties().setProp("@kind", "flat");
  744. }
  745. void CHThorDiskWriteActivity::checkSizeLimit()
  746. {
  747. if(sizeLimit && outSeq && (outSeq->getPosition() > sizeLimit))
  748. {
  749. StringBuffer msg;
  750. msg.append("Exceeded disk write size limit of ").append(sizeLimit).append(" while writing file ").append(mangledHelperFileName.str());
  751. throw MakeStringExceptionDirect(0, msg.str());
  752. }
  753. }
  754. //=====================================================================================================
  755. CHThorSpillActivity::CHThorSpillActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSpillArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorDiskWriteActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  756. {
  757. }
  758. void CHThorSpillActivity::setInput(unsigned index, IHThorInput *_input)
  759. {
  760. CHThorActivityBase::setInput(index, _input);
  761. }
  762. void CHThorSpillActivity::ready()
  763. {
  764. CHThorDiskWriteActivity::ready();
  765. }
  766. void CHThorSpillActivity::execute()
  767. {
  768. UNIMPLEMENTED;
  769. }
  770. const void *CHThorSpillActivity::nextRow()
  771. {
  772. const void *nextrec = getNext();
  773. if (nextrec)
  774. {
  775. numRecords++;
  776. processed++;
  777. }
  778. return nextrec;
  779. }
  780. void CHThorSpillActivity::stop()
  781. {
  782. for (;;)
  783. {
  784. OwnedConstRoxieRow nextrec(nextRow());
  785. if (!nextrec)
  786. {
  787. nextrec.setown(nextRow());
  788. if (!nextrec)
  789. break;
  790. }
  791. }
  792. finishOutput();
  793. CHThorDiskWriteActivity::stop();
  794. }
  795. //=====================================================================================================
  796. CHThorCsvWriteActivity::CHThorCsvWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCsvWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorDiskWriteActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  797. {
  798. csvOutput.init(helper.queryCsvParameters(),agent.queryWorkUnit()->getDebugValueBool("oldCSVoutputFormat", false));
  799. }
  800. void CHThorCsvWriteActivity::execute()
  801. {
  802. OwnedRoxieString header(helper.queryCsvParameters()->getHeader());
  803. if (header) {
  804. csvOutput.beginLine();
  805. csvOutput.writeHeaderLn(strlen(header), header);
  806. diskout->write(csvOutput.length(), csvOutput.str());
  807. }
  808. // Loop thru the results
  809. numRecords = 0;
  810. for (;;)
  811. {
  812. OwnedConstRoxieRow nextrec(input->nextRow());
  813. if (!nextrec)
  814. {
  815. nextrec.setown(input->nextRow());
  816. if (!nextrec)
  817. break;
  818. }
  819. try
  820. {
  821. csvOutput.beginLine();
  822. helper.writeRow((const byte *)nextrec.get(), &csvOutput);
  823. csvOutput.endLine();
  824. }
  825. catch(IException * e)
  826. {
  827. throw makeWrappedException(e);
  828. }
  829. diskout->write(csvOutput.length(), csvOutput.str());
  830. numRecords++;
  831. }
  832. OwnedRoxieString footer(helper.queryCsvParameters()->getFooter());
  833. if (footer) {
  834. csvOutput.beginLine();
  835. csvOutput.writeHeaderLn(strlen(footer), footer);
  836. diskout->write(csvOutput.length(), csvOutput.str());
  837. }
  838. }
  839. void CHThorCsvWriteActivity::setFormat(IFileDescriptor * desc)
  840. {
  841. // MORE - should call parent's setFormat too?
  842. ICsvParameters * csvInfo = helper.queryCsvParameters();
  843. OwnedRoxieString rs(csvInfo->getSeparator(0));
  844. StringBuffer separator;
  845. const char *s = rs;
  846. while (s && *s)
  847. {
  848. if (',' == *s)
  849. separator.append("\\,");
  850. else
  851. separator.append(*s);
  852. ++s;
  853. }
  854. desc->queryProperties().setProp("@csvSeparate", separator.str());
  855. desc->queryProperties().setProp("@csvQuote", rs.setown(csvInfo->getQuote(0)));
  856. desc->queryProperties().setProp("@csvTerminate", rs.setown(csvInfo->getTerminator(0)));
  857. desc->queryProperties().setProp("@csvEscape", rs.setown(csvInfo->getEscape(0)));
  858. desc->queryProperties().setProp("@format","utf8n");
  859. desc->queryProperties().setProp("@kind", "csv");
  860. const char *recordECL = helper.queryRecordECL();
  861. if (recordECL && *recordECL)
  862. desc->queryProperties().setProp("ECL", recordECL);
  863. setRtlFormat(desc->queryProperties(), helper.queryDiskRecordSize());
  864. }
  865. //=====================================================================================================
  866. CHThorXmlWriteActivity::CHThorXmlWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorXmlWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorDiskWriteActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), headerLength(0), footerLength(0)
  867. {
  868. OwnedRoxieString xmlpath(helper.getXmlIteratorPath());
  869. if (!xmlpath)
  870. rowTag.append(DEFAULTXMLROWTAG);
  871. else
  872. {
  873. const char *path = xmlpath;
  874. if (*path == '/') path++;
  875. if (strchr(path, '/')) UNIMPLEMENTED; // more what do we do with /mydata/row
  876. rowTag.append(path);
  877. }
  878. }
  879. void CHThorXmlWriteActivity::execute()
  880. {
  881. // Loop thru the results
  882. numRecords = 0;
  883. StringBuffer header;
  884. OwnedRoxieString suppliedHeader(helper.getHeader());
  885. if (kind==TAKjsonwrite)
  886. buildJsonHeader(header, suppliedHeader, rowTag);
  887. else if (suppliedHeader)
  888. header.set(suppliedHeader);
  889. else
  890. header.append(DEFAULTXMLHEADER).newline();
  891. headerLength = header.length();
  892. diskout->write(headerLength, header.str());
  893. Owned<IXmlWriterExt> writer = createIXmlWriterExt(helper.getXmlFlags(), 0, NULL, (kind==TAKjsonwrite) ? WTJSONRootless : WTStandard);
  894. writer->outputBeginArray(rowTag); //need to set up the array
  895. writer->clear(); //but not output it
  896. for (;;)
  897. {
  898. OwnedConstRoxieRow nextrec(input->nextRow());
  899. if (!nextrec)
  900. {
  901. nextrec.setown(input->nextRow());
  902. if (!nextrec)
  903. break;
  904. }
  905. try
  906. {
  907. writer->clear().outputBeginNested(rowTag, false);
  908. helper.toXML((const byte *)nextrec.get(), *writer);
  909. writer->outputEndNested(rowTag);
  910. }
  911. catch(IException * e)
  912. {
  913. throw makeWrappedException(e);
  914. }
  915. diskout->write(writer->length(), writer->str());
  916. numRecords++;
  917. }
  918. OwnedRoxieString suppliedFooter(helper.getFooter());
  919. StringBuffer footer;
  920. if (kind==TAKjsonwrite)
  921. buildJsonFooter(footer.newline(), suppliedFooter, rowTag);
  922. else if (suppliedFooter)
  923. footer.append(suppliedFooter);
  924. else
  925. footer.append(DEFAULTXMLFOOTER).newline();
  926. footerLength=footer.length();
  927. diskout->write(footerLength, footer);
  928. }
  929. void CHThorXmlWriteActivity::setFormat(IFileDescriptor * desc)
  930. {
  931. desc->queryProperties().setProp("@format","utf8n");
  932. desc->queryProperties().setProp("@rowTag",rowTag.str());
  933. desc->queryProperties().setProp("@kind", (kind==TAKjsonwrite) ? "json" : "xml");
  934. desc->queryProperties().setPropInt(FPheaderLength, headerLength);
  935. desc->queryProperties().setPropInt(FPfooterLength, footerLength);
  936. const char *recordECL = helper.queryRecordECL();
  937. if (recordECL && *recordECL)
  938. desc->queryProperties().setProp("ECL", recordECL);
  939. setRtlFormat(desc->queryProperties(), helper.queryDiskRecordSize());
  940. }
  941. //=====================================================================================================
  942. void throwPipeProcessError(unsigned err, char const * preposition, char const * program, IPipeProcess * pipe)
  943. {
  944. StringBuffer msg;
  945. msg.append("Error piping ").append(preposition).append(" (").append(program).append("): ");
  946. if (START_FAILURE == err) // PIPE process didn't start at all, START_FAILURE is our own error code
  947. msg.append("process failed to start");
  948. else
  949. msg.append("process failed with code ").append(err);
  950. if(pipe->hasError())
  951. {
  952. try
  953. {
  954. char error[512];
  955. size32_t sz = pipe->readError(sizeof(error), error);
  956. if(sz && sz!=(size32_t)-1)
  957. msg.append(", stderr: '").append(sz, error).append("'");
  958. }
  959. catch (IException *e)
  960. {
  961. EXCLOG(e, "Error reading pipe stderr");
  962. e->Release();
  963. }
  964. }
  965. throw MakeStringExceptionDirect(2, msg.str());
  966. }
  967. //=====================================================================================================
  968. CHThorIndexWriteActivity::CHThorIndexWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorIndexWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  969. {
  970. incomplete = false;
  971. StringBuffer lfn;
  972. OwnedRoxieString fname(helper.getFileName());
  973. expandLogicalFilename(lfn, fname, agent.queryWorkUnit(), agent.queryResolveFilesLocally(), false);
  974. if (!agent.queryResolveFilesLocally())
  975. {
  976. Owned<IDistributedFile> f = wsdfs::lookup(lfn, agent.queryCodeContext()->queryUserDescriptor(), true, false, false, nullptr, defaultNonPrivilegedUser, INFINITE);
  977. if (f)
  978. {
  979. if (TIWoverwrite & helper.getFlags())
  980. {
  981. LOG(MCuserInfo, "Removing %s from DFS", lfn.str());
  982. agent.logFileAccess(f, "HThor", "DELETED", _graph);
  983. f->detach();
  984. }
  985. else // not quite sure about raising exceptions in constructors
  986. throw MakeStringException(99, "Cannot write %s, file already exists (missing OVERWRITE attribute?)", lfn.str());
  987. }
  988. }
  989. clusterHandler.setown(createClusterWriteHandler(agent, &helper, NULL, lfn, filename, false));
  990. sizeLimit = agent.queryWorkUnit()->getDebugValueInt64("hthorDiskWriteSizeLimit", defaultHThorDiskWriteSizeLimit);
  991. defaultNoSeek = agent.queryWorkUnit()->getDebugValueBool("noSeekBuildIndex", isContainerized());
  992. }
  993. CHThorIndexWriteActivity::~CHThorIndexWriteActivity()
  994. {
  995. if(incomplete)
  996. {
  997. PROGLOG("Index write incomplete, deleting physical file: %s", filename.get());
  998. file->remove();
  999. }
  1000. }
  1001. void CHThorIndexWriteActivity::execute()
  1002. {
  1003. size32_t maxDiskRecordSize;
  1004. if (helper.queryDiskRecordSize()->isVariableSize())
  1005. {
  1006. if (helper.getFlags() & TIWmaxlength)
  1007. maxDiskRecordSize = helper.getMaxKeySize();
  1008. else
  1009. maxDiskRecordSize = KEYBUILD_MAXLENGTH; // Current default behaviour, could be improved in the future
  1010. }
  1011. else
  1012. maxDiskRecordSize = helper.queryDiskRecordSize()->getFixedSize();
  1013. if (maxDiskRecordSize > KEYBUILD_MAXLENGTH)
  1014. throw MakeStringException(99, "Index maximum record length (%d) exceeds 32K internal limit", maxDiskRecordSize);
  1015. OwnedMalloc<char> rowBuffer(maxDiskRecordSize, true);
  1016. // Loop thru the results
  1017. unsigned __int64 reccount = 0;
  1018. unsigned int fileCrc = -1;
  1019. file.setown(createIFile(filename.get()));
  1020. {
  1021. OwnedIFileIO io;
  1022. try
  1023. {
  1024. io.setown(file->open(IFOcreate));
  1025. }
  1026. catch(IException * e)
  1027. {
  1028. e->Release();
  1029. clearKeyStoreCache(false);
  1030. io.setown(file->open(IFOcreate));
  1031. }
  1032. incomplete = true;
  1033. bool needsSeek = true;
  1034. bool isVariable = helper.queryDiskRecordSize()->isVariableSize();
  1035. unsigned flags = COL_PREFIX | HTREE_FULLSORT_KEY;
  1036. if (helper.getFlags() & TIWrowcompress)
  1037. flags |= HTREE_COMPRESSED_KEY|HTREE_QUICK_COMPRESSED_KEY;
  1038. else if (!(helper.getFlags() & TIWnolzwcompress))
  1039. flags |= HTREE_COMPRESSED_KEY;
  1040. if (isVariable)
  1041. flags |= HTREE_VARSIZE;
  1042. Owned<IPropertyTree> metadata;
  1043. buildUserMetadata(metadata);
  1044. buildLayoutMetadata(metadata);
  1045. unsigned nodeSize = metadata->getPropInt("_nodeSize", NODESIZE);
  1046. if (metadata->getPropBool("_noSeek", defaultNoSeek))
  1047. {
  1048. flags |= TRAILING_HEADER_ONLY;
  1049. needsSeek = false;
  1050. }
  1051. if (metadata->getPropBool("_useTrailingHeader", true))
  1052. flags |= USE_TRAILING_HEADER;
  1053. size32_t keyMaxSize = helper.queryDiskRecordSize()->getRecordSize(NULL);
  1054. if (hasTrailingFileposition(helper.queryDiskRecordSize()->queryTypeInfo()))
  1055. keyMaxSize -= sizeof(offset_t);
  1056. Owned<IFileIOStream> out = createBufferedIOStream(io, 0x100000);
  1057. if (!needsSeek)
  1058. out.setown(createNoSeekIOStream(out));
  1059. Owned<IKeyBuilder> builder = createKeyBuilder(out, flags, keyMaxSize, nodeSize, helper.getKeyedSize(), 0, &helper, true, false);
  1060. class BcWrapper : implements IBlobCreator
  1061. {
  1062. IKeyBuilder *builder;
  1063. public:
  1064. BcWrapper(IKeyBuilder *_builder) : builder(_builder) {}
  1065. virtual unsigned __int64 createBlob(size32_t size, const void * ptr)
  1066. {
  1067. return builder->createBlob(size, (const char *) ptr);
  1068. }
  1069. } bc(builder);
  1070. for (;;)
  1071. {
  1072. OwnedConstRoxieRow nextrec(input->nextRow());
  1073. if (!nextrec)
  1074. {
  1075. nextrec.setown(input->nextRow());
  1076. if (!nextrec)
  1077. break;
  1078. }
  1079. try
  1080. {
  1081. unsigned __int64 fpos;
  1082. RtlStaticRowBuilder rowBuilder(rowBuffer, maxDiskRecordSize);
  1083. size32_t thisSize = helper.transform(rowBuilder, nextrec, &bc, fpos);
  1084. builder->processKeyData(rowBuffer, fpos, thisSize);
  1085. }
  1086. catch(IException * e)
  1087. {
  1088. throw makeWrappedException(e);
  1089. }
  1090. if(sizeLimit && (out->tell() > sizeLimit))
  1091. {
  1092. StringBuffer msg;
  1093. OwnedRoxieString fname(helper.getFileName());
  1094. msg.append("Exceeded disk write size limit of ").append(sizeLimit).append(" while writing index ").append(fname);
  1095. throw MakeStringExceptionDirect(0, msg.str());
  1096. }
  1097. reccount++;
  1098. }
  1099. builder->finish(metadata, &fileCrc);
  1100. duplicateKeyCount = builder->getDuplicateCount();
  1101. cummulativeDuplicateKeyCount += duplicateKeyCount;
  1102. numDiskWrites = io->getStatistic(StNumDiskWrites);
  1103. out->flush();
  1104. out.clear();
  1105. }
  1106. if(clusterHandler)
  1107. clusterHandler->copyPhysical(file, agent.queryWorkUnit()->getDebugValueBool("__output_cluster_no_copy_physical", false));
  1108. clearKeyStoreCacheEntry(file->queryFilename());
  1109. // Now publish to name services
  1110. StringBuffer dir,base;
  1111. offset_t indexFileSize = file->size();
  1112. if(clusterHandler)
  1113. clusterHandler->getDirAndFilename(dir, base);
  1114. else
  1115. splitFilename(filename, &dir, &dir, &base, &base);
  1116. Owned<IFileDescriptor> desc = createFileDescriptor();
  1117. desc->setDefaultDir(dir.str());
  1118. //properties of the first file part.
  1119. Owned<IPropertyTree> attrs;
  1120. if(clusterHandler)
  1121. attrs.setown(createPTree("Part")); // clusterHandler is going to set attributes
  1122. else
  1123. {
  1124. // add cluster
  1125. StringBuffer mygroupname;
  1126. Owned<IGroup> mygrp = NULL;
  1127. if (isContainerized())
  1128. {
  1129. queryNamedGroupStore().getNasGroupName(mygroupname, 1);
  1130. mygrp.setown(queryNamedGroupStore().lookup(mygroupname));
  1131. }
  1132. else
  1133. {
  1134. if (!agent.queryResolveFilesLocally())
  1135. mygrp.setown(agent.getHThorGroup(mygroupname));
  1136. }
  1137. ClusterPartDiskMapSpec partmap; // will get this from group at some point
  1138. desc->setNumParts(1);
  1139. desc->setPartMask(base.str());
  1140. desc->addCluster(mygroupname.str(),mygrp, partmap);
  1141. attrs.set(&desc->queryPart(0)->queryProperties());
  1142. }
  1143. attrs->setPropInt64("@size", indexFileSize);
  1144. attrs->setPropInt64("@recordCount", reccount);
  1145. CDateTime createTime, modifiedTime, accessedTime;
  1146. file->getTime(&createTime, &modifiedTime, &accessedTime);
  1147. // round file time down to nearest sec. Nanosec accurancy is not preserved elsewhere and can lead to mismatch later.
  1148. unsigned hour, min, sec, nanosec;
  1149. modifiedTime.getTime(hour, min, sec, nanosec);
  1150. modifiedTime.setTime(hour, min, sec, 0);
  1151. StringBuffer timestr;
  1152. modifiedTime.getString(timestr);
  1153. if(timestr.length())
  1154. attrs->setProp("@modified", timestr.str());
  1155. if(clusterHandler)
  1156. clusterHandler->setDescriptorParts(desc, base.str(), attrs);
  1157. // properties of the logical file
  1158. IPropertyTree & properties = desc->queryProperties();
  1159. properties.setProp("@kind", "key");
  1160. properties.setPropInt64("@size", indexFileSize);
  1161. properties.setPropInt64("@recordCount", reccount);
  1162. properties.setProp("@owner", agent.queryWorkUnit()->queryUser());
  1163. properties.setProp("@workunit", agent.queryWorkUnit()->queryWuid());
  1164. properties.setProp("@job", agent.queryWorkUnit()->queryJobName());
  1165. properties.setPropInt64("@duplicateKeyCount",duplicateKeyCount);
  1166. properties.setPropInt64("@numDiskWrites", numDiskWrites);
  1167. char const * rececl = helper.queryRecordECL();
  1168. if(rececl && *rececl)
  1169. properties.setProp("ECL", rececl);
  1170. if (helper.getFlags() & TIWexpires)
  1171. setExpiryTime(properties, helper.getExpiryDays());
  1172. if (helper.getFlags() & TIWupdate)
  1173. {
  1174. unsigned eclCRC;
  1175. unsigned __int64 totalCRC;
  1176. helper.getUpdateCRCs(eclCRC, totalCRC);
  1177. properties.setPropInt("@eclCRC", eclCRC);
  1178. properties.setPropInt64("@totalCRC", totalCRC);
  1179. }
  1180. properties.setPropInt("@fileCrc", fileCrc);
  1181. properties.setPropInt("@formatCrc", helper.getFormatCrc());
  1182. // Legacy record layout info
  1183. void * layoutMetaBuff;
  1184. size32_t layoutMetaSize;
  1185. if(helper.getIndexLayout(layoutMetaSize, layoutMetaBuff))
  1186. {
  1187. properties.setPropBin("_record_layout", layoutMetaSize, layoutMetaBuff);
  1188. rtlFree(layoutMetaBuff);
  1189. }
  1190. if (helper.getFlags() & TIWrestricted)
  1191. properties.setPropBool("restricted", true);
  1192. // New record layout info
  1193. setRtlFormat(properties, helper.queryDiskRecordSize());
  1194. // Bloom info
  1195. const IBloomBuilderInfo * const *bloomFilters = helper.queryBloomInfo();
  1196. while (bloomFilters && *bloomFilters)
  1197. {
  1198. const IBloomBuilderInfo *info = *bloomFilters++;
  1199. IPropertyTree *bloom = properties.addPropTree("Bloom");
  1200. bloom->setPropInt64("@bloomFieldMask", info->getBloomFields());
  1201. bloom->setPropInt64("@bloomLimit", info->getBloomLimit()); // MORE - if we didn't actually build because of the limit that might be interesting. Though that's going to vary by part.
  1202. VStringBuffer pval("%f", info->getBloomProbability());
  1203. bloom->setProp("@bloomProbability", pval.str());
  1204. }
  1205. StringBuffer lfn;
  1206. Owned<IDistributedFile> dfile = NULL;
  1207. if (!agent.queryResolveFilesLocally())
  1208. {
  1209. dfile.setown(queryDistributedFileDirectory().createNew(desc));
  1210. OwnedRoxieString fname(helper.getFileName());
  1211. expandLogicalFilename(lfn, fname, agent.queryWorkUnit(), agent.queryResolveFilesLocally(), false);
  1212. dfile->attach(lfn.str(),agent.queryCodeContext()->queryUserDescriptor());
  1213. agent.logFileAccess(dfile, "HThor", "CREATED", graph);
  1214. StringBuffer clusterName;
  1215. dfile->getClusterName(0, clusterName);
  1216. diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, numDiskWrites, 0));
  1217. }
  1218. else
  1219. lfn = filename;
  1220. incomplete = false;
  1221. if(clusterHandler)
  1222. clusterHandler->finish(file);
  1223. // and update wu info
  1224. if (helper.getSequence() >= 0)
  1225. {
  1226. WorkunitUpdate wu = agent.updateWorkUnit();
  1227. Owned<IWUResult> result = wu->updateResultBySequence(helper.getSequence());
  1228. if (result)
  1229. {
  1230. result->setResultTotalRowCount(reccount);
  1231. result->setResultStatus(ResultStatusCalculated);
  1232. result->setResultLogicalName(lfn.str());
  1233. }
  1234. }
  1235. }
  1236. void CHThorIndexWriteActivity::buildUserMetadata(Owned<IPropertyTree> & metadata)
  1237. {
  1238. size32_t nameLen;
  1239. char * nameBuff;
  1240. size32_t valueLen;
  1241. char * valueBuff;
  1242. unsigned idx = 0;
  1243. while(helper.getIndexMeta(nameLen, nameBuff, valueLen, valueBuff, idx++))
  1244. {
  1245. StringBuffer name(nameLen, nameBuff);
  1246. StringBuffer value(valueLen, valueBuff);
  1247. if(*nameBuff == '_' && !checkReservedMetadataName(name))
  1248. {
  1249. OwnedRoxieString fname(helper.getFileName());
  1250. throw MakeStringException(0, "Invalid name %s in user metadata for index %s (names beginning with underscore are reserved)", name.str(), fname.get());
  1251. }
  1252. if(!validateXMLTag(name.str()))
  1253. {
  1254. OwnedRoxieString fname(helper.getFileName());
  1255. throw MakeStringException(0, "Invalid name %s in user metadata for index %s (not legal XML element name)", name.str(), fname.get());
  1256. }
  1257. if(!metadata) metadata.setown(createPTree("metadata"));
  1258. metadata->setProp(name.str(), value.str());
  1259. }
  1260. }
  1261. void CHThorIndexWriteActivity::buildLayoutMetadata(Owned<IPropertyTree> & metadata)
  1262. {
  1263. if(!metadata) metadata.setown(createPTree("metadata"));
  1264. metadata->setProp("_record_ECL", helper.queryRecordECL());
  1265. setRtlFormat(*metadata, helper.queryDiskRecordSize());
  1266. }
  1267. //=====================================================================================================
  1268. class CHThorPipeReadActivity : public CHThorSimpleActivityBase
  1269. {
  1270. IHThorPipeReadArg &helper;
  1271. Owned<IPipeProcess> pipe;
  1272. StringAttr pipeCommand;
  1273. Owned<IOutputRowDeserializer> rowDeserializer;
  1274. Owned<IReadRowStream> readTransformer;
  1275. bool groupSignalled;
  1276. public:
  1277. CHThorPipeReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorPipeReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  1278. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1279. {
  1280. groupSignalled = true;
  1281. }
  1282. virtual bool needsAllocator() const { return true; }
  1283. virtual void ready()
  1284. {
  1285. groupSignalled = true; // i.e. don't start with a NULL row
  1286. CHThorSimpleActivityBase::ready();
  1287. rowDeserializer.setown(rowAllocator->createDiskDeserializer(agent.queryCodeContext()));
  1288. OwnedRoxieString xmlIteratorPath(helper.getXmlIteratorPath());
  1289. readTransformer.setown(createReadRowStream(rowAllocator, rowDeserializer, helper.queryXmlTransformer(), helper.queryCsvTransformer(), xmlIteratorPath, helper.getPipeFlags()));
  1290. OwnedRoxieString pipeProgram(helper.getPipeProgram());
  1291. openPipe(pipeProgram);
  1292. }
  1293. virtual void stop()
  1294. {
  1295. //Need to close the output (or read it in its entirety), otherwise we might wait forever for the
  1296. //program to finish
  1297. if (pipe)
  1298. pipe->closeOutput();
  1299. pipe.clear();
  1300. readTransformer->setStream(NULL);
  1301. CHThorSimpleActivityBase::stop();
  1302. }
  1303. virtual const void *nextRow()
  1304. {
  1305. while (!waitForPipe())
  1306. {
  1307. if (!pipe)
  1308. return NULL;
  1309. if (helper.getPipeFlags() & TPFgroupeachrow)
  1310. {
  1311. if (!groupSignalled)
  1312. {
  1313. groupSignalled = true;
  1314. return NULL;
  1315. }
  1316. }
  1317. }
  1318. const void *ret = readTransformer->next();
  1319. assertex(ret != NULL); // if ret can ever be NULL then we need to recode this logic
  1320. processed++;
  1321. groupSignalled = false;
  1322. return ret;
  1323. }
  1324. protected:
  1325. bool waitForPipe()
  1326. {
  1327. if (!pipe)
  1328. return false; // done
  1329. if (!readTransformer->eos())
  1330. return true;
  1331. verifyPipe();
  1332. return false;
  1333. }
  1334. void openPipe(char const * cmd)
  1335. {
  1336. pipeCommand.setown(cmd);
  1337. pipe.setown(createPipeProcess(agent.queryAllowedPipePrograms()));
  1338. if(!pipe->run(NULL, cmd, ".", false, true, true, 0x10000))
  1339. {
  1340. // NB: pipe->run can't rely on the child process failing fast enough to return false here, failure picked up later with stderr context.
  1341. WARNLOG(2, "Could not run pipe process %s", cmd);
  1342. }
  1343. Owned<ISimpleReadStream> pipeReader = pipe->getOutputStream();
  1344. readTransformer->setStream(pipeReader.get());
  1345. }
  1346. void verifyPipe()
  1347. {
  1348. if (pipe)
  1349. {
  1350. unsigned err = pipe->wait();
  1351. if(err && !(helper.getPipeFlags() & TPFnofail))
  1352. throwPipeProcessError(err, "from", pipeCommand.get(), pipe);
  1353. pipe.clear();
  1354. }
  1355. }
  1356. };
  1357. //=====================================================================================================
  1358. // Through pipe code - taken from Roxie implementation
  1359. interface IPipeRecordPullerCallback : extends IExceptionHandler
  1360. {
  1361. virtual void processRow(const void *row) = 0;
  1362. virtual void processDone() = 0;
  1363. virtual const void *nextInput() = 0;
  1364. };
  1365. class CPipeRecordPullerThread : public Thread
  1366. {
  1367. protected:
  1368. IPipeRecordPullerCallback *helper;
  1369. bool eog;
  1370. public:
  1371. CPipeRecordPullerThread() : Thread("PipeRecordPullerThread")
  1372. {
  1373. helper = NULL;
  1374. eog = false;
  1375. }
  1376. void setInput(IPipeRecordPullerCallback *_helper)
  1377. {
  1378. helper = _helper;
  1379. }
  1380. virtual int run()
  1381. {
  1382. try
  1383. {
  1384. for (;;)
  1385. {
  1386. const void * row = helper->nextInput();
  1387. if (row)
  1388. {
  1389. eog = false;
  1390. helper->processRow(row);
  1391. }
  1392. else if (!eog)
  1393. {
  1394. eog = true;
  1395. }
  1396. else
  1397. {
  1398. break;
  1399. }
  1400. }
  1401. helper->processDone();
  1402. }
  1403. catch (IException *e)
  1404. {
  1405. helper->fireException(e);
  1406. }
  1407. catch (...)
  1408. {
  1409. helper->fireException(MakeStringException(2, "Unexpected exception caught in PipeRecordPullerThread::run"));
  1410. }
  1411. return 0;
  1412. }
  1413. };
  1414. class CHThorPipeThroughActivity : public CHThorSimpleActivityBase, implements IPipeRecordPullerCallback
  1415. {
  1416. IHThorPipeThroughArg &helper;
  1417. CPipeRecordPullerThread puller;
  1418. Owned<IPipeProcess> pipe;
  1419. StringAttr pipeCommand;
  1420. InterruptableSemaphore pipeVerified;
  1421. InterruptableSemaphore pipeOpened;
  1422. CachedOutputMetaData inputMeta;
  1423. Owned<IOutputRowSerializer> rowSerializer;
  1424. Owned<IOutputRowDeserializer> rowDeserializer;
  1425. Owned<IPipeWriteXformHelper> writeTransformer;
  1426. Owned<IReadRowStream> readTransformer;
  1427. bool firstRead;
  1428. bool recreate;
  1429. bool inputExhausted;
  1430. bool groupSignalled;
  1431. public:
  1432. CHThorPipeThroughActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorPipeThroughArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  1433. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1434. {
  1435. recreate = helper.recreateEachRow();
  1436. groupSignalled = true;
  1437. firstRead = false;
  1438. inputExhausted = false;
  1439. puller.setInput(this);
  1440. }
  1441. virtual void ready()
  1442. {
  1443. CHThorSimpleActivityBase::ready();
  1444. // From the create() in roxie
  1445. inputMeta.set(input->queryOutputMeta());
  1446. rowSerializer.setown(inputMeta.createDiskSerializer(agent.queryCodeContext(), activityId));
  1447. rowDeserializer.setown(rowAllocator->createDiskDeserializer(agent.queryCodeContext()));
  1448. writeTransformer.setown(createPipeWriteXformHelper(helper.getPipeFlags(), helper.queryXmlOutput(), helper.queryCsvOutput(), rowSerializer));
  1449. // From the start() in roxie
  1450. firstRead = true;
  1451. inputExhausted = false;
  1452. groupSignalled = true; // i.e. don't start with a NULL row
  1453. pipeVerified.reinit();
  1454. pipeOpened.reinit();
  1455. writeTransformer->ready();
  1456. if (!readTransformer)
  1457. {
  1458. OwnedRoxieString xmlIterator(helper.getXmlIteratorPath());
  1459. readTransformer.setown(createReadRowStream(rowAllocator, rowDeserializer, helper.queryXmlTransformer(), helper.queryCsvTransformer(), xmlIterator, helper.getPipeFlags()));
  1460. }
  1461. if(!recreate)
  1462. {
  1463. OwnedRoxieString pipeProgram(helper.getPipeProgram());
  1464. openPipe(pipeProgram);
  1465. }
  1466. puller.start();
  1467. }
  1468. void stop()
  1469. {
  1470. //Need to close the output (or read it in its entirety), otherwise we might wait forever for the
  1471. //program to finish
  1472. if (pipe)
  1473. pipe->closeOutput();
  1474. pipeVerified.interrupt(NULL);
  1475. pipeOpened.interrupt(NULL);
  1476. puller.join();
  1477. CHThorSimpleActivityBase::stop();
  1478. pipe.clear();
  1479. readTransformer->setStream(NULL);
  1480. }
  1481. virtual bool needsAllocator() const { return true; }
  1482. virtual const void *nextRow()
  1483. {
  1484. while (!waitForPipe())
  1485. {
  1486. if (!pipe)
  1487. return NULL;
  1488. if (helper.getPipeFlags() & TPFgroupeachrow)
  1489. {
  1490. if (!groupSignalled)
  1491. {
  1492. groupSignalled = true;
  1493. return NULL;
  1494. }
  1495. }
  1496. }
  1497. const void *ret = readTransformer->next();
  1498. assertex(ret != NULL); // if ret can ever be NULL then we need to recode this logic
  1499. processed++;
  1500. groupSignalled = false;
  1501. return ret;
  1502. }
  1503. virtual bool isGrouped()
  1504. {
  1505. return outputMeta.isGrouped();
  1506. }
  1507. virtual void processRow(const void *row)
  1508. {
  1509. // called from puller thread
  1510. if(recreate)
  1511. openPipe(helper.getNameFromRow(row));
  1512. try
  1513. {
  1514. writeTransformer->writeTranslatedText(row, pipe);
  1515. }
  1516. catch (IException *)
  1517. {
  1518. ReleaseRoxieRow(row);
  1519. throw;
  1520. }
  1521. ReleaseRoxieRow(row);
  1522. if(recreate)
  1523. {
  1524. closePipe();
  1525. pipeVerified.wait();
  1526. }
  1527. }
  1528. virtual void processDone()
  1529. {
  1530. // called from puller thread
  1531. if(recreate)
  1532. {
  1533. inputExhausted = true;
  1534. pipeOpened.signal();
  1535. }
  1536. else
  1537. {
  1538. closePipe();
  1539. pipeVerified.wait();
  1540. }
  1541. }
  1542. virtual const void *nextInput()
  1543. {
  1544. return input->nextRow();
  1545. }
  1546. virtual bool fireException(IException *e)
  1547. {
  1548. inputExhausted = true;
  1549. pipeOpened.interrupt(LINK(e));
  1550. pipeVerified.interrupt(e);
  1551. return true;
  1552. }
  1553. private:
  1554. bool waitForPipe()
  1555. {
  1556. Owned<IException> pipeException;
  1557. try
  1558. {
  1559. if (firstRead)
  1560. {
  1561. pipeOpened.wait();
  1562. firstRead = false;
  1563. }
  1564. if (!pipe)
  1565. return false; // done
  1566. if (!readTransformer->eos())
  1567. return true;
  1568. }
  1569. catch (IException *e)
  1570. {
  1571. // NB: the original exception is probably a IPipeProcessException, but because InterruptableSemaphore rethrows it, we must catch it as an IException
  1572. pipeException.setown(e);
  1573. }
  1574. verifyPipe();
  1575. if (pipeException) // NB: verifyPipe may throw error based on pipe prog. output 1st.
  1576. throw pipeException.getClear();
  1577. if (recreate && !inputExhausted)
  1578. pipeOpened.wait();
  1579. return false;
  1580. }
  1581. void openPipe(char const * cmd)
  1582. {
  1583. pipeCommand.setown(cmd);
  1584. pipe.setown(createPipeProcess(agent.queryAllowedPipePrograms()));
  1585. if(!pipe->run(NULL, cmd, ".", true, true, true, 0x10000))
  1586. {
  1587. // NB: pipe->run can't rely on the child process failing fast enough to return false here, failure picked up later with stderr context.
  1588. WARNLOG(2, "Could not run pipe process %s", cmd);
  1589. }
  1590. else
  1591. writeTransformer->writeHeader(pipe);
  1592. Owned<ISimpleReadStream> pipeReader = pipe->getOutputStream();
  1593. readTransformer->setStream(pipeReader.get());
  1594. pipeOpened.signal();
  1595. }
  1596. void closePipe()
  1597. {
  1598. writeTransformer->writeFooter(pipe);
  1599. pipe->closeInput();
  1600. }
  1601. void verifyPipe()
  1602. {
  1603. if (pipe)
  1604. {
  1605. unsigned err = pipe->wait();
  1606. if(err && !(helper.getPipeFlags() & TPFnofail))
  1607. throwPipeProcessError(err, "through", pipeCommand.get(), pipe);
  1608. pipe.clear();
  1609. pipeVerified.signal();
  1610. }
  1611. }
  1612. };
  1613. class CHThorPipeWriteActivity : public CHThorActivityBase
  1614. {
  1615. IHThorPipeWriteArg &helper;
  1616. Owned<IPipeProcess> pipe;
  1617. StringAttr pipeCommand;
  1618. CachedOutputMetaData inputMeta;
  1619. Owned<IOutputRowSerializer> rowSerializer;
  1620. Owned<IPipeWriteXformHelper> writeTransformer;
  1621. bool firstRead;
  1622. bool recreate;
  1623. bool inputExhausted;
  1624. public:
  1625. IMPLEMENT_SINKACTIVITY;
  1626. CHThorPipeWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorPipeWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  1627. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1628. {
  1629. recreate = helper.recreateEachRow();
  1630. firstRead = false;
  1631. inputExhausted = false;
  1632. }
  1633. virtual bool needsAllocator() const { return true; }
  1634. virtual void ready()
  1635. {
  1636. CHThorActivityBase::ready();
  1637. inputMeta.set(input->queryOutputMeta());
  1638. rowSerializer.setown(inputMeta.createDiskSerializer(agent.queryCodeContext(), activityId));
  1639. writeTransformer.setown(createPipeWriteXformHelper(helper.getPipeFlags(), helper.queryXmlOutput(), helper.queryCsvOutput(), rowSerializer));
  1640. firstRead = true;
  1641. inputExhausted = false;
  1642. writeTransformer->ready();
  1643. if(!recreate)
  1644. {
  1645. OwnedRoxieString pipeProgram(helper.getPipeProgram());
  1646. openPipe(pipeProgram);
  1647. }
  1648. }
  1649. virtual void execute()
  1650. {
  1651. Owned<IException> pipeException;
  1652. try
  1653. {
  1654. for (;;)
  1655. {
  1656. OwnedConstRoxieRow row(input->nextRow());
  1657. if (!row)
  1658. {
  1659. row.setown(input->nextRow());
  1660. if (!row)
  1661. break;
  1662. }
  1663. processed++;
  1664. if (recreate)
  1665. openPipe(helper.getNameFromRow(row));
  1666. writeTransformer->writeTranslatedText(row, pipe);
  1667. if (recreate)
  1668. {
  1669. closePipe();
  1670. verifyPipe();
  1671. }
  1672. }
  1673. if (!recreate)
  1674. closePipe();
  1675. }
  1676. catch (IException *e)
  1677. {
  1678. // NB: the original exception is probably a IPipeProcessException, but because InterruptableSemaphore rethrows it, we must catch it as an IException
  1679. pipeException.setown(e);
  1680. }
  1681. verifyPipe();
  1682. if (pipeException) // NB: verifyPipe may throw error based on pipe prog. output 1st.
  1683. throw pipeException.getClear();
  1684. if (helper.getSequence() >= 0)
  1685. {
  1686. WorkunitUpdate wu = agent.updateWorkUnit();
  1687. Owned<IWUResult> result = wu->updateResultBySequence(helper.getSequence());
  1688. if (result)
  1689. {
  1690. result->setResultTotalRowCount(processed);
  1691. result->setResultStatus(ResultStatusCalculated);
  1692. }
  1693. }
  1694. }
  1695. private:
  1696. void openPipe(char const * cmd)
  1697. {
  1698. pipeCommand.setown(cmd);
  1699. pipe.setown(createPipeProcess(agent.queryAllowedPipePrograms()));
  1700. if (!pipe->run(NULL, cmd, ".", true, false, true, 0x10000))
  1701. {
  1702. // NB: pipe->run can't rely on the child process failing fast enough to return false here, failure picked up later with stderr context.
  1703. WARNLOG(2, "Could not run pipe process %s", cmd);
  1704. }
  1705. else
  1706. writeTransformer->writeHeader(pipe);
  1707. }
  1708. void closePipe()
  1709. {
  1710. writeTransformer->writeFooter(pipe);
  1711. pipe->closeInput();
  1712. }
  1713. void verifyPipe()
  1714. {
  1715. if (pipe)
  1716. {
  1717. unsigned err = pipe->wait();
  1718. if(err && !(helper.getPipeFlags() & TPFnofail))
  1719. throwPipeProcessError(err, "to", pipeCommand.get(), pipe);
  1720. pipe.clear();
  1721. }
  1722. }
  1723. };
  1724. //=====================================================================================================
  1725. CHThorIterateActivity::CHThorIterateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorIterateArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1726. {
  1727. }
  1728. void CHThorIterateActivity::stop()
  1729. {
  1730. CHThorSimpleActivityBase::stop();
  1731. right.clear();
  1732. left.clear();
  1733. }
  1734. void CHThorIterateActivity::ready()
  1735. {
  1736. CHThorSimpleActivityBase::ready();
  1737. if (!defaultRecord)
  1738. {
  1739. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  1740. size32_t thisSize = helper.createDefault(rowBuilder);
  1741. defaultRecord.setown(rowBuilder.finalizeRowClear(thisSize));
  1742. }
  1743. counter = 0;
  1744. }
  1745. const void *CHThorIterateActivity::nextRow()
  1746. {
  1747. for (;;)
  1748. {
  1749. right.setown(input->nextRow());
  1750. if(!right)
  1751. {
  1752. bool skippedGroup = (!left) && (counter > 0); //we have just skipped entire group, but shouldn't output a double null
  1753. left.clear();
  1754. counter = 0;
  1755. if(skippedGroup) continue;
  1756. return NULL;
  1757. }
  1758. try
  1759. {
  1760. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  1761. unsigned outSize = helper.transform(rowBuilder, left ? left : defaultRecord, right, ++counter);
  1762. if (outSize)
  1763. {
  1764. left.setown(rowBuilder.finalizeRowClear(outSize));
  1765. processed++;
  1766. return left.getLink();
  1767. }
  1768. }
  1769. catch(IException * e)
  1770. {
  1771. throw makeWrappedException(e);
  1772. }
  1773. }
  1774. }
  1775. //=====================================================================================================
  1776. CHThorProcessActivity::CHThorProcessActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorProcessArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1777. {
  1778. }
  1779. CHThorProcessActivity::~CHThorProcessActivity()
  1780. {
  1781. }
  1782. void CHThorProcessActivity::ready()
  1783. {
  1784. CHThorSimpleActivityBase::ready();
  1785. rightRowAllocator.setown(agent.queryCodeContext()->getRowAllocator( helper.queryRightRecordSize(), activityId));
  1786. RtlDynamicRowBuilder rowBuilder(rightRowAllocator);
  1787. size32_t thisSize = helper.createInitialRight(rowBuilder);
  1788. initialRight.setown(rowBuilder.finalizeRowClear(thisSize));
  1789. curRight.set(initialRight);
  1790. counter = 0;
  1791. }
  1792. const void *CHThorProcessActivity::nextRow()
  1793. {
  1794. try
  1795. {
  1796. for (;;)
  1797. {
  1798. OwnedConstRoxieRow next(input->nextRow());
  1799. if (!next)
  1800. {
  1801. bool eog = (curRight != initialRight); // processed any records?
  1802. counter = 0;
  1803. curRight.set(initialRight);
  1804. if (eog)
  1805. return NULL;
  1806. next.setown(input->nextRow());
  1807. if (!next)
  1808. return NULL;
  1809. }
  1810. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  1811. RtlDynamicRowBuilder rightRowBuilder(rightRowAllocator);
  1812. size32_t outSize = helper.transform(rowBuilder, rightRowBuilder, next, curRight, ++counter);
  1813. if (outSize)
  1814. {
  1815. size32_t rightSize = rightRowAllocator->queryOutputMeta()->getRecordSize(rightRowBuilder.getSelf()); // yuk
  1816. curRight.setown(rightRowBuilder.finalizeRowClear(rightSize));
  1817. processed++;
  1818. return rowBuilder.finalizeRowClear(outSize);
  1819. }
  1820. }
  1821. }
  1822. catch(IException * e)
  1823. {
  1824. throw makeWrappedException(e);
  1825. }
  1826. }
  1827. //=====================================================================================================
  1828. CHThorNormalizeActivity::CHThorNormalizeActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNormalizeArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1829. {
  1830. IRecordSize* recSize = outputMeta;
  1831. if (recSize == NULL)
  1832. throw MakeStringException(2, "Unexpected null pointer from helper.queryOutputMeta()");
  1833. }
  1834. CHThorNormalizeActivity::~CHThorNormalizeActivity()
  1835. {
  1836. }
  1837. void CHThorNormalizeActivity::ready()
  1838. {
  1839. CHThorSimpleActivityBase::ready();
  1840. numThisRow = 0;
  1841. curRow = 0;
  1842. numProcessedLastGroup = processed;
  1843. }
  1844. const void *CHThorNormalizeActivity::nextRow()
  1845. {
  1846. for (;;)
  1847. {
  1848. while (curRow == numThisRow)
  1849. {
  1850. inbuff.setown(input->nextRow());
  1851. if (!inbuff && (processed == numProcessedLastGroup))
  1852. inbuff.setown(input->nextRow());
  1853. if (!inbuff)
  1854. {
  1855. numProcessedLastGroup = processed;
  1856. return NULL;
  1857. }
  1858. curRow = 0;
  1859. numThisRow = helper.numExpandedRows(inbuff);
  1860. }
  1861. try
  1862. {
  1863. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  1864. memsize_t thisSize = helper.transform(rowBuilder, inbuff, ++curRow);
  1865. if(thisSize != 0)
  1866. {
  1867. processed++;
  1868. return rowBuilder.finalizeRowClear(thisSize);
  1869. }
  1870. }
  1871. catch(IException * e)
  1872. {
  1873. throw makeWrappedException(e);
  1874. }
  1875. }
  1876. }
  1877. //=====================================================================================================
  1878. CHThorNormalizeChildActivity::CHThorNormalizeChildActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNormalizeChildArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1879. {
  1880. }
  1881. CHThorNormalizeChildActivity::~CHThorNormalizeChildActivity()
  1882. {
  1883. }
  1884. bool CHThorNormalizeChildActivity::advanceInput()
  1885. {
  1886. for (;;)
  1887. {
  1888. inbuff.setown(input->nextRow());
  1889. if (!inbuff && (processed == numProcessedLastGroup))
  1890. inbuff.setown(input->nextRow());
  1891. if (!inbuff)
  1892. {
  1893. numProcessedLastGroup = processed;
  1894. return false;
  1895. }
  1896. curChildRow = cursor->first(inbuff);
  1897. if (curChildRow)
  1898. {
  1899. curRow = 0;
  1900. return true;
  1901. }
  1902. }
  1903. }
  1904. void CHThorNormalizeChildActivity::stop()
  1905. {
  1906. inbuff.clear();
  1907. CHThorSimpleActivityBase::stop();
  1908. }
  1909. void CHThorNormalizeChildActivity::ready()
  1910. {
  1911. CHThorSimpleActivityBase::ready();
  1912. curRow = 0;
  1913. numProcessedLastGroup = processed;
  1914. cursor = helper.queryIterator();
  1915. curChildRow = NULL;
  1916. }
  1917. const void *CHThorNormalizeChildActivity::nextRow()
  1918. {
  1919. for (;;)
  1920. {
  1921. if (!inbuff)
  1922. {
  1923. if (!advanceInput())
  1924. return NULL;
  1925. }
  1926. try
  1927. {
  1928. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  1929. size32_t outSize = helper.transform(rowBuilder, inbuff, curChildRow, ++curRow);
  1930. curChildRow = cursor->next();
  1931. if (!curChildRow)
  1932. inbuff.clear();
  1933. if (outSize != 0)
  1934. {
  1935. processed++;
  1936. return rowBuilder.finalizeRowClear(outSize);
  1937. }
  1938. }
  1939. catch(IException * e)
  1940. {
  1941. throw makeWrappedException(e);
  1942. }
  1943. }
  1944. }
  1945. //=================================================================================
  1946. bool CHThorNormalizeLinkedChildActivity::advanceInput()
  1947. {
  1948. for (;;)
  1949. {
  1950. curParent.setown(input->nextRow());
  1951. if (!curParent && (processed == numProcessedLastGroup))
  1952. curParent.setown(input->nextRow());
  1953. if (!curParent)
  1954. {
  1955. numProcessedLastGroup = processed;
  1956. return false;
  1957. }
  1958. curChild.set(helper.first(curParent));
  1959. if (curChild)
  1960. return true;
  1961. }
  1962. }
  1963. CHThorNormalizeLinkedChildActivity::CHThorNormalizeLinkedChildActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNormalizeLinkedChildArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  1964. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1965. {
  1966. }
  1967. CHThorNormalizeLinkedChildActivity::~CHThorNormalizeLinkedChildActivity()
  1968. {
  1969. }
  1970. void CHThorNormalizeLinkedChildActivity::ready()
  1971. {
  1972. numProcessedLastGroup = 0;
  1973. CHThorSimpleActivityBase::ready();
  1974. }
  1975. void CHThorNormalizeLinkedChildActivity::stop()
  1976. {
  1977. curParent.clear();
  1978. curChild.clear();
  1979. CHThorSimpleActivityBase::stop();
  1980. }
  1981. const void * CHThorNormalizeLinkedChildActivity::nextRow()
  1982. {
  1983. for (;;)
  1984. {
  1985. if (!curParent)
  1986. {
  1987. if (!advanceInput())
  1988. return NULL;
  1989. }
  1990. try
  1991. {
  1992. const void *ret = curChild.getClear();
  1993. curChild.set(helper.next());
  1994. if (!curChild)
  1995. curParent.clear();
  1996. if (ret)
  1997. {
  1998. processed++;
  1999. return ret;
  2000. }
  2001. }
  2002. catch (IException *E)
  2003. {
  2004. throw makeWrappedException(E);
  2005. }
  2006. }
  2007. }
  2008. //=====================================================================================================
  2009. CHThorProjectActivity::CHThorProjectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorProjectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2010. {
  2011. }
  2012. CHThorProjectActivity::~CHThorProjectActivity()
  2013. {
  2014. }
  2015. void CHThorProjectActivity::ready()
  2016. {
  2017. CHThorSimpleActivityBase::ready();
  2018. numProcessedLastGroup = processed;
  2019. }
  2020. const void * CHThorProjectActivity::nextRow()
  2021. {
  2022. for (;;)
  2023. {
  2024. OwnedConstRoxieRow in(input->nextRow());
  2025. if (!in)
  2026. {
  2027. if (numProcessedLastGroup == processed)
  2028. in.setown(input->nextRow());
  2029. if (!in)
  2030. {
  2031. numProcessedLastGroup = processed;
  2032. return NULL;
  2033. }
  2034. }
  2035. try
  2036. {
  2037. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2038. size32_t outSize = helper.transform(rowBuilder, in);
  2039. if (outSize)
  2040. {
  2041. processed++;
  2042. return rowBuilder.finalizeRowClear(outSize);
  2043. }
  2044. }
  2045. catch(IException * e)
  2046. {
  2047. throw makeWrappedException(e);
  2048. }
  2049. }
  2050. }
  2051. //=====================================================================================================
  2052. CHThorPrefetchProjectActivity::CHThorPrefetchProjectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorPrefetchProjectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2053. {
  2054. }
  2055. void CHThorPrefetchProjectActivity::ready()
  2056. {
  2057. CHThorSimpleActivityBase::ready();
  2058. recordCount = 0;
  2059. numProcessedLastGroup = processed;
  2060. eof = !helper.canMatchAny();
  2061. child = helper.queryChild();
  2062. }
  2063. const void * CHThorPrefetchProjectActivity::nextRow()
  2064. {
  2065. if (eof)
  2066. return NULL;
  2067. for (;;)
  2068. {
  2069. try
  2070. {
  2071. OwnedConstRoxieRow row(input->nextRow());
  2072. if (!row)
  2073. {
  2074. if (numProcessedLastGroup == processed)
  2075. row.setown(input->nextRow());
  2076. if (!row)
  2077. {
  2078. numProcessedLastGroup = processed;
  2079. return NULL;
  2080. }
  2081. }
  2082. ++recordCount;
  2083. rtlRowBuilder extract;
  2084. if (helper.preTransform(extract,row,recordCount))
  2085. {
  2086. Owned<IEclGraphResults> results;
  2087. if (child)
  2088. {
  2089. results.setown(child->evaluate(extract.size(), extract.getbytes()));
  2090. }
  2091. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2092. size32_t outSize = helper.transform(rowBuilder, row, results, recordCount);
  2093. if (outSize)
  2094. {
  2095. processed++;
  2096. return rowBuilder.finalizeRowClear(outSize);
  2097. }
  2098. }
  2099. }
  2100. catch(IException * e)
  2101. {
  2102. throw makeWrappedException(e);
  2103. }
  2104. }
  2105. }
  2106. //=====================================================================================================
  2107. CHThorFilterProjectActivity::CHThorFilterProjectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorFilterProjectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2108. {
  2109. }
  2110. CHThorFilterProjectActivity::~CHThorFilterProjectActivity()
  2111. {
  2112. }
  2113. void CHThorFilterProjectActivity::ready()
  2114. {
  2115. CHThorSimpleActivityBase::ready();
  2116. recordCount = 0;
  2117. numProcessedLastGroup = processed;
  2118. eof = !helper.canMatchAny();
  2119. }
  2120. const void * CHThorFilterProjectActivity::nextRow()
  2121. {
  2122. if (eof)
  2123. return NULL;
  2124. for (;;)
  2125. {
  2126. OwnedConstRoxieRow in = input->nextRow();
  2127. if (!in)
  2128. {
  2129. recordCount = 0;
  2130. if (numProcessedLastGroup == processed)
  2131. in.setown(input->nextRow());
  2132. if (!in)
  2133. {
  2134. numProcessedLastGroup = processed;
  2135. return NULL;
  2136. }
  2137. }
  2138. try
  2139. {
  2140. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2141. size32_t outSize = helper.transform(rowBuilder, in, ++recordCount);
  2142. if (outSize)
  2143. {
  2144. processed++;
  2145. return rowBuilder.finalizeRowClear(outSize);
  2146. }
  2147. }
  2148. catch(IException * e)
  2149. {
  2150. throw makeWrappedException(e);
  2151. }
  2152. }
  2153. }
  2154. //=====================================================================================================
  2155. CHThorCountProjectActivity::CHThorCountProjectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCountProjectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2156. {
  2157. }
  2158. CHThorCountProjectActivity::~CHThorCountProjectActivity()
  2159. {
  2160. }
  2161. void CHThorCountProjectActivity::ready()
  2162. {
  2163. CHThorSimpleActivityBase::ready();
  2164. recordCount = 0;
  2165. numProcessedLastGroup = processed;
  2166. }
  2167. const void * CHThorCountProjectActivity::nextRow()
  2168. {
  2169. for (;;)
  2170. {
  2171. OwnedConstRoxieRow in = input->nextRow();
  2172. if (!in)
  2173. {
  2174. recordCount = 0;
  2175. if (numProcessedLastGroup == processed)
  2176. in.setown(input->nextRow());
  2177. if (!in)
  2178. {
  2179. numProcessedLastGroup = processed;
  2180. return NULL;
  2181. }
  2182. }
  2183. try
  2184. {
  2185. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2186. size32_t outSize = helper.transform(rowBuilder, in, ++recordCount);
  2187. if (outSize)
  2188. {
  2189. processed++;
  2190. return rowBuilder.finalizeRowClear(outSize);
  2191. }
  2192. }
  2193. catch(IException * e)
  2194. {
  2195. throw makeWrappedException(e);
  2196. }
  2197. }
  2198. }
  2199. //=====================================================================================================
  2200. CHThorRollupActivity::CHThorRollupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorRollupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2201. {
  2202. }
  2203. CHThorRollupActivity::~CHThorRollupActivity()
  2204. {
  2205. }
  2206. void CHThorRollupActivity::ready()
  2207. {
  2208. CHThorSimpleActivityBase::ready();
  2209. left.setown(input->nextRow());
  2210. prev.set(left);
  2211. }
  2212. void CHThorRollupActivity::stop()
  2213. {
  2214. left.clear();
  2215. prev.clear();
  2216. right.clear();
  2217. CHThorSimpleActivityBase::stop();
  2218. }
  2219. const void *CHThorRollupActivity::nextRow()
  2220. {
  2221. for (;;)
  2222. {
  2223. right.setown(input->nextRow());
  2224. if(!prev || !right || !helper.matches(prev,right))
  2225. {
  2226. const void * ret = left.getClear();
  2227. if(ret)
  2228. {
  2229. processed++;
  2230. }
  2231. left.setown(right.getClear());
  2232. prev.set(left);
  2233. return ret;
  2234. }
  2235. try
  2236. {
  2237. //MORE: could optimise by reusing buffer, but would have to make sure to call destructor on previous contents before overwriting
  2238. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2239. if(unsigned outSize = helper.transform(rowBuilder, left, right))
  2240. {
  2241. left.setown(rowBuilder.finalizeRowClear(outSize));
  2242. }
  2243. if (helper.getFlags() & RFrolledismatchleft)
  2244. prev.set(left);
  2245. else
  2246. prev.set(right);
  2247. }
  2248. catch(IException * e)
  2249. {
  2250. throw makeWrappedException(e);
  2251. }
  2252. }
  2253. }
  2254. //=====================================================================================================
  2255. CHThorGroupDedupActivity::CHThorGroupDedupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDedupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2256. {
  2257. }
  2258. void CHThorGroupDedupActivity::ready()
  2259. {
  2260. CHThorSimpleActivityBase::ready();
  2261. numToKeep = helper.numToKeep();
  2262. numKept = 0;
  2263. }
  2264. //=====================================================================================================
  2265. CHThorGroupDedupKeepLeftActivity::CHThorGroupDedupKeepLeftActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDedupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorGroupDedupActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  2266. {
  2267. }
  2268. void CHThorGroupDedupKeepLeftActivity::ready()
  2269. {
  2270. CHThorGroupDedupActivity::ready();
  2271. prev.clear();
  2272. }
  2273. void CHThorGroupDedupKeepLeftActivity::stop()
  2274. {
  2275. prev.clear();
  2276. CHThorSimpleActivityBase::stop();
  2277. }
  2278. const void *CHThorGroupDedupKeepLeftActivity::nextRow()
  2279. {
  2280. OwnedConstRoxieRow next;
  2281. for (;;)
  2282. {
  2283. next.setown(input->nextRow());
  2284. if (!prev || !next || !helper.matches(prev,next))
  2285. {
  2286. numKept = 0;
  2287. break;
  2288. }
  2289. if (numKept < numToKeep-1)
  2290. {
  2291. numKept++;
  2292. break;
  2293. }
  2294. }
  2295. const void * ret = next.getClear();
  2296. prev.set(ret);
  2297. if(ret)
  2298. processed++;
  2299. return ret;
  2300. }
  2301. const void * CHThorGroupDedupKeepLeftActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  2302. {
  2303. OwnedConstRoxieRow next;
  2304. for (;;)
  2305. {
  2306. next.setown(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  2307. if (!prev || !next || !helper.matches(prev,next))
  2308. {
  2309. numKept = 0;
  2310. break;
  2311. }
  2312. if (numKept < numToKeep-1)
  2313. {
  2314. numKept++;
  2315. break;
  2316. }
  2317. }
  2318. const void * ret = next.getClear();
  2319. prev.set(ret);
  2320. if(ret)
  2321. processed++;
  2322. return ret;
  2323. }
  2324. void CHThorGroupDedupKeepLeftActivity::setInput(unsigned index, IHThorInput *_input)
  2325. {
  2326. CHThorGroupDedupActivity::setInput(index, _input);
  2327. if (input)
  2328. inputStepping = input->querySteppingMeta();
  2329. }
  2330. IInputSteppingMeta * CHThorGroupDedupKeepLeftActivity::querySteppingMeta()
  2331. {
  2332. return inputStepping;
  2333. }
  2334. bool CHThorGroupDedupKeepLeftActivity::gatherConjunctions(ISteppedConjunctionCollector & collector)
  2335. {
  2336. return input->gatherConjunctions(collector);
  2337. }
  2338. void CHThorGroupDedupKeepLeftActivity::resetEOF()
  2339. {
  2340. input->resetEOF();
  2341. }
  2342. //=====================================================================================================
  2343. CHThorGroupDedupKeepRightActivity::CHThorGroupDedupKeepRightActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDedupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorGroupDedupActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), compareBest(nullptr)
  2344. {
  2345. }
  2346. void CHThorGroupDedupKeepRightActivity::ready()
  2347. {
  2348. CHThorGroupDedupActivity::ready();
  2349. assertex(numToKeep==1);
  2350. firstDone = false;
  2351. if (helper.keepBest())
  2352. compareBest = helper.queryCompareBest();
  2353. }
  2354. void CHThorGroupDedupKeepRightActivity::stop()
  2355. {
  2356. kept.clear();
  2357. CHThorGroupDedupActivity::stop();
  2358. }
  2359. const void *CHThorGroupDedupKeepRightActivity::nextRow()
  2360. {
  2361. if (!firstDone)
  2362. {
  2363. firstDone = true;
  2364. kept.setown(input->nextRow());
  2365. }
  2366. OwnedConstRoxieRow next;
  2367. for (;;)
  2368. {
  2369. next.setown(input->nextRow());
  2370. if (!kept || !next || !helper.matches(kept,next))
  2371. {
  2372. numKept = 0;
  2373. break;
  2374. }
  2375. if (compareBest)
  2376. {
  2377. if (compareBest->docompare(kept,next) > 0)
  2378. kept.setown(next.getClear());
  2379. }
  2380. else
  2381. {
  2382. if (numKept < numToKeep-1)
  2383. {
  2384. numKept++;
  2385. break;
  2386. }
  2387. kept.setown(next.getClear());
  2388. }
  2389. }
  2390. const void * ret = kept.getClear();
  2391. kept.setown(next.getClear());
  2392. if(ret)
  2393. processed++;
  2394. return ret;
  2395. }
  2396. //=====================================================================================================
  2397. CHThorGroupDedupAllActivity::CHThorGroupDedupAllActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDedupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2398. {
  2399. }
  2400. void CHThorGroupDedupAllActivity::ready()
  2401. {
  2402. CHThorSimpleActivityBase::ready();
  2403. keepLeft = helper.keepLeft();
  2404. primaryCompare = helper.queryComparePrimary();
  2405. assertex(helper.numToKeep() == 1);
  2406. firstDone = false;
  2407. survivorIndex = 0;
  2408. }
  2409. void CHThorGroupDedupAllActivity::stop()
  2410. {
  2411. survivors.clear();
  2412. CHThorSimpleActivityBase::stop();
  2413. }
  2414. bool CHThorGroupDedupAllActivity::calcNextDedupAll()
  2415. {
  2416. survivors.clear();
  2417. survivorIndex = 0;
  2418. OwnedRowArray group;
  2419. const void * next;
  2420. while((next = input->nextRow()) != NULL)
  2421. group.append(next);
  2422. if(group.ordinality() == 0)
  2423. return false;
  2424. unsigned max = group.ordinality();
  2425. if (primaryCompare)
  2426. {
  2427. //hard, if not impossible, to hit this code once optimisations in place
  2428. MemoryAttr indexbuff(max*sizeof(void *));
  2429. void ** temp = (void **)indexbuff.bufferBase();
  2430. void ** rows = (void * *)group.getArray();
  2431. msortvecstableinplace(rows, max, *primaryCompare, temp);
  2432. unsigned first = 0;
  2433. for (unsigned idx = 1; idx < max; idx++)
  2434. {
  2435. if (primaryCompare->docompare(rows[first], rows[idx]) != 0)
  2436. {
  2437. dedupRange(first, idx, group);
  2438. first = idx;
  2439. }
  2440. }
  2441. dedupRange(first, max, group);
  2442. for(unsigned idx2=0; idx2<max; ++idx2)
  2443. {
  2444. void * cur = rows[idx2];
  2445. if(cur)
  2446. {
  2447. LinkRoxieRow(cur);
  2448. survivors.append(cur);
  2449. }
  2450. }
  2451. }
  2452. else
  2453. {
  2454. dedupRange(0, max, group);
  2455. for(unsigned idx=0; idx<max; ++idx)
  2456. {
  2457. const void * cur = group.itemClear(idx);
  2458. if(cur)
  2459. survivors.append(cur);
  2460. }
  2461. }
  2462. return true;
  2463. }
  2464. void CHThorGroupDedupAllActivity::dedupRange(unsigned first, unsigned last, OwnedRowArray & group)
  2465. {
  2466. for (unsigned idxL = first; idxL < last; idxL++)
  2467. {
  2468. const void * left = group.item(idxL);
  2469. if (left)
  2470. {
  2471. for (unsigned idxR = first; idxR < last; idxR++)
  2472. {
  2473. const void * right = group.item(idxR);
  2474. if ((idxL != idxR) && right)
  2475. {
  2476. if (helper.matches(left, right))
  2477. {
  2478. if (keepLeft)
  2479. {
  2480. group.replace(NULL, idxR);
  2481. }
  2482. else
  2483. {
  2484. group.replace(NULL, idxL);
  2485. break;
  2486. }
  2487. }
  2488. }
  2489. }
  2490. }
  2491. }
  2492. }
  2493. const void *CHThorGroupDedupAllActivity::nextRow()
  2494. {
  2495. if (!firstDone)
  2496. {
  2497. firstDone = true;
  2498. calcNextDedupAll();
  2499. }
  2500. if(survivors.isItem(survivorIndex))
  2501. {
  2502. processed++;
  2503. return survivors.itemClear(survivorIndex++);
  2504. }
  2505. calcNextDedupAll();
  2506. return NULL;
  2507. }
  2508. //=====================================================================================================
  2509. bool HashDedupTable::insert(const void * row)
  2510. {
  2511. unsigned hash = helper.queryHash()->hash(row);
  2512. RtlDynamicRowBuilder keyRowBuilder(keyRowAllocator, true);
  2513. size32_t thisKeySize = helper.recordToKey(keyRowBuilder, row);
  2514. OwnedConstRoxieRow keyRow = keyRowBuilder.finalizeRowClear(thisKeySize);
  2515. if (find(hash, keyRow.get()))
  2516. return false;
  2517. addNew(new HashDedupElement(hash, keyRow.getClear()), hash);
  2518. return true;
  2519. }
  2520. bool HashDedupTable::insertBest(const void * nextrow)
  2521. {
  2522. unsigned hash = helper.queryHash()->hash(nextrow);
  2523. const void *et = find(hash, nextrow);
  2524. if (et)
  2525. {
  2526. const HashDedupElement *element = reinterpret_cast<const HashDedupElement *>(et);
  2527. const void * row = element->queryRow();
  2528. if (queryBestCompare->docompare(row,nextrow) <= 0)
  2529. return false;
  2530. removeExact( const_cast<void *>(et));
  2531. // drop-through to add new row
  2532. }
  2533. LinkRoxieRow(nextrow);
  2534. addNew(new HashDedupElement(hash, nextrow), hash);
  2535. return true;
  2536. }
  2537. CHThorHashDedupActivity::CHThorHashDedupActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorHashDedupArg & _arg, ThorActivityKind _kind, EclGraph & _graph)
  2538. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), table(_arg), hashTableFilled(false), hashDedupTableIter(table)
  2539. {
  2540. keepBest = helper.keepBest();
  2541. }
  2542. void CHThorHashDedupActivity::ready()
  2543. {
  2544. CHThorSimpleActivityBase::ready();
  2545. table.setRowAllocator(agent.queryCodeContext()->getRowAllocator(helper.queryKeySize(), activityId));
  2546. }
  2547. void CHThorHashDedupActivity::stop()
  2548. {
  2549. table.kill();
  2550. CHThorSimpleActivityBase::stop();
  2551. }
  2552. const void * CHThorHashDedupActivity::nextRow()
  2553. {
  2554. if (keepBest)
  2555. {
  2556. // Populate hash table with best rows
  2557. if (!hashTableFilled)
  2558. {
  2559. OwnedConstRoxieRow next(input->nextRow());
  2560. while(next)
  2561. {
  2562. table.insertBest(next);
  2563. next.setown(input->nextRow());
  2564. }
  2565. hashTableFilled = true;
  2566. hashDedupTableIter.first();
  2567. }
  2568. // Iterate through hash table returning rows
  2569. if (hashDedupTableIter.isValid())
  2570. {
  2571. HashDedupElement &el = hashDedupTableIter.query();
  2572. OwnedConstRoxieRow row(el.getRow());
  2573. hashDedupTableIter.next();
  2574. return row.getClear();
  2575. }
  2576. table.kill();
  2577. hashTableFilled = false;
  2578. return NULL;
  2579. }
  2580. else
  2581. {
  2582. while(true)
  2583. {
  2584. OwnedConstRoxieRow next(input->nextRow());
  2585. if(!next)
  2586. {
  2587. table.kill();
  2588. return NULL;
  2589. }
  2590. if(table.insert(next))
  2591. return next.getClear();
  2592. }
  2593. }
  2594. }
  2595. //=====================================================================================================
  2596. CHThorSteppableActivityBase::CHThorSteppableActivityBase(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg & _help, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _help, _kind, _graph)
  2597. {
  2598. inputStepping = NULL;
  2599. stepCompare = NULL;
  2600. }
  2601. void CHThorSteppableActivityBase::setInput(unsigned index, IHThorInput *_input)
  2602. {
  2603. CHThorSimpleActivityBase::setInput(index, _input);
  2604. if (input && index == 0)
  2605. {
  2606. inputStepping = input->querySteppingMeta();
  2607. if (inputStepping)
  2608. stepCompare = inputStepping->queryCompare();
  2609. }
  2610. }
  2611. IInputSteppingMeta * CHThorSteppableActivityBase::querySteppingMeta()
  2612. {
  2613. return inputStepping;
  2614. }
  2615. //=====================================================================================================
  2616. CHThorFilterActivity::CHThorFilterActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorFilterArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2617. {
  2618. }
  2619. void CHThorFilterActivity::ready()
  2620. {
  2621. CHThorSimpleActivityBase::ready();
  2622. anyThisGroup = false;
  2623. eof = !helper.canMatchAny();
  2624. }
  2625. const void * CHThorFilterActivity::nextRow()
  2626. {
  2627. if (eof)
  2628. return NULL;
  2629. for (;;)
  2630. {
  2631. OwnedConstRoxieRow ret(input->nextRow());
  2632. if (!ret)
  2633. {
  2634. //stop returning two NULLs in a row.
  2635. if (anyThisGroup)
  2636. {
  2637. anyThisGroup = false;
  2638. return NULL;
  2639. }
  2640. ret.setown(input->nextRow());
  2641. if (!ret)
  2642. return NULL; // eof...
  2643. }
  2644. if (helper.isValid(ret))
  2645. {
  2646. anyThisGroup = true;
  2647. processed++;
  2648. return ret.getClear();
  2649. }
  2650. }
  2651. }
  2652. const void * CHThorFilterActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  2653. {
  2654. if (eof)
  2655. return NULL;
  2656. OwnedConstRoxieRow ret(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  2657. if (!ret)
  2658. return NULL;
  2659. if (helper.isValid(ret))
  2660. {
  2661. anyThisGroup = true;
  2662. processed++;
  2663. return ret.getClear();
  2664. }
  2665. return ungroupedNextRow();
  2666. }
  2667. bool CHThorFilterActivity::gatherConjunctions(ISteppedConjunctionCollector & collector)
  2668. {
  2669. return input->gatherConjunctions(collector);
  2670. }
  2671. void CHThorFilterActivity::resetEOF()
  2672. {
  2673. //Sometimes the smart stepping code returns a premature eof indicator (two nulls) and will
  2674. //therefore call resetEOF so the activity can reset its eof without resetting the activity itself.
  2675. //Note that resetEOF only needs to be implemented by activities that implement gatherConjunctions()
  2676. //and that cache eof.
  2677. eof = false;
  2678. anyThisGroup = false;
  2679. input->resetEOF();
  2680. }
  2681. //=====================================================================================================
  2682. CHThorFilterGroupActivity::CHThorFilterGroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorFilterGroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2683. {
  2684. }
  2685. void CHThorFilterGroupActivity::ready()
  2686. {
  2687. CHThorSimpleActivityBase::ready();
  2688. eof = !helper.canMatchAny();
  2689. nextIndex = 0;
  2690. }
  2691. void CHThorFilterGroupActivity::stop()
  2692. {
  2693. CHThorSimpleActivityBase::stop();
  2694. pending.clear();
  2695. }
  2696. const void * CHThorFilterGroupActivity::nextRow()
  2697. {
  2698. for (;;)
  2699. {
  2700. if (eof)
  2701. return NULL;
  2702. if (pending.ordinality())
  2703. {
  2704. if (pending.isItem(nextIndex))
  2705. {
  2706. processed++;
  2707. return pending.itemClear(nextIndex++);
  2708. }
  2709. nextIndex = 0;
  2710. pending.clear();
  2711. return NULL;
  2712. }
  2713. const void * ret = input->nextRow();
  2714. while (ret)
  2715. {
  2716. pending.append(ret);
  2717. ret = input->nextRow();
  2718. }
  2719. unsigned num = pending.ordinality();
  2720. if (num != 0)
  2721. {
  2722. if (!helper.isValid(num, (const void * *)pending.getArray()))
  2723. pending.clear(); // read next group
  2724. }
  2725. else
  2726. eof = true;
  2727. }
  2728. }
  2729. const void * CHThorFilterGroupActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  2730. {
  2731. if (eof)
  2732. return NULL;
  2733. if (pending.ordinality())
  2734. {
  2735. while (pending.isItem(nextIndex))
  2736. {
  2737. OwnedConstRoxieRow ret(pending.itemClear(nextIndex++));
  2738. if (stepCompare->docompare(ret, seek, numFields) >= 0)
  2739. {
  2740. processed++;
  2741. return ret.getClear();
  2742. }
  2743. }
  2744. nextIndex = 0;
  2745. pending.clear();
  2746. }
  2747. const void * ret = input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra);
  2748. while (ret)
  2749. {
  2750. pending.append(ret);
  2751. ret = input->nextRow();
  2752. }
  2753. unsigned num = pending.ordinality();
  2754. if (num != 0)
  2755. {
  2756. if (!helper.isValid(num, (const void * *)pending.getArray()))
  2757. pending.clear(); // read next group
  2758. }
  2759. else
  2760. eof = true;
  2761. return ungroupedNextRow();
  2762. }
  2763. //=====================================================================================================
  2764. CHThorLimitActivity::CHThorLimitActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLimitArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2765. {
  2766. }
  2767. void CHThorLimitActivity::ready()
  2768. {
  2769. CHThorSimpleActivityBase::ready();
  2770. rowLimit = helper.getRowLimit();
  2771. numGot = 0;
  2772. }
  2773. const void * CHThorLimitActivity::nextRow()
  2774. {
  2775. OwnedConstRoxieRow ret(input->nextRow());
  2776. if (ret)
  2777. {
  2778. if (++numGot > rowLimit)
  2779. {
  2780. if ( agent.queryCodeContext()->queryDebugContext())
  2781. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  2782. helper.onLimitExceeded();
  2783. return NULL;
  2784. }
  2785. processed++;
  2786. }
  2787. return ret.getClear();
  2788. }
  2789. const void * CHThorLimitActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  2790. {
  2791. OwnedConstRoxieRow ret(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  2792. if (ret)
  2793. {
  2794. if (++numGot > rowLimit)
  2795. {
  2796. if ( agent.queryCodeContext()->queryDebugContext())
  2797. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  2798. helper.onLimitExceeded();
  2799. return NULL;
  2800. }
  2801. processed++;
  2802. }
  2803. return ret.getClear();
  2804. }
  2805. //=====================================================================================================
  2806. CHThorSkipLimitActivity::CHThorSkipLimitActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLimitArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2807. {
  2808. }
  2809. void CHThorSkipLimitActivity::ready()
  2810. {
  2811. CHThorSimpleActivityBase::ready();
  2812. rowLimit = helper.getRowLimit();
  2813. }
  2814. void CHThorSkipLimitActivity::stop()
  2815. {
  2816. CHThorSimpleActivityBase::stop();
  2817. buffer.clear();
  2818. }
  2819. const void * CHThorSkipLimitActivity::nextRow()
  2820. {
  2821. if(!buffer)
  2822. {
  2823. buffer.setown(new CRowBuffer(input->queryOutputMeta(), true));
  2824. if(!buffer->pull(input, rowLimit))
  2825. {
  2826. if ( agent.queryCodeContext()->queryDebugContext())
  2827. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  2828. onLimitExceeded();
  2829. }
  2830. }
  2831. const void * next = buffer->next();
  2832. if(next)
  2833. processed++;
  2834. return next;
  2835. }
  2836. //=====================================================================================================
  2837. CHThorCatchActivity::CHThorCatchActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCatchArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2838. {
  2839. }
  2840. const void * CHThorCatchActivity::nextRow()
  2841. {
  2842. try
  2843. {
  2844. OwnedConstRoxieRow ret(input->nextRow());
  2845. if (ret)
  2846. processed++;
  2847. return ret.getClear();
  2848. }
  2849. catch (IException *E)
  2850. {
  2851. E->Release();
  2852. helper.onExceptionCaught();
  2853. }
  2854. catch (...)
  2855. {
  2856. helper.onExceptionCaught();
  2857. }
  2858. throwUnexpected(); // onExceptionCaught should have thrown something
  2859. }
  2860. const void * CHThorCatchActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  2861. {
  2862. try
  2863. {
  2864. OwnedConstRoxieRow ret(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  2865. if (ret)
  2866. processed++;
  2867. return ret.getClear();
  2868. }
  2869. catch (IException *E)
  2870. {
  2871. E->Release();
  2872. helper.onExceptionCaught();
  2873. }
  2874. catch (...)
  2875. {
  2876. helper.onExceptionCaught();
  2877. }
  2878. throwUnexpected(); // onExceptionCaught should have thrown something
  2879. }
  2880. //=====================================================================================================
  2881. CHThorSkipCatchActivity::CHThorSkipCatchActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCatchArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2882. {
  2883. }
  2884. void CHThorSkipCatchActivity::stop()
  2885. {
  2886. CHThorSimpleActivityBase::stop();
  2887. buffer.clear();
  2888. }
  2889. void CHThorSkipCatchActivity::onException(IException *E)
  2890. {
  2891. buffer->clear();
  2892. if (kind == TAKcreaterowcatch)
  2893. {
  2894. createRowAllocator();
  2895. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2896. size32_t newSize = helper.transformOnExceptionCaught(rowBuilder, E);
  2897. if (newSize)
  2898. buffer->insert(rowBuilder.finalizeRowClear(newSize));
  2899. }
  2900. E->Release();
  2901. }
  2902. const void * CHThorSkipCatchActivity::nextRow()
  2903. {
  2904. if(!buffer)
  2905. {
  2906. buffer.setown(new CRowBuffer(input->queryOutputMeta(), true));
  2907. try
  2908. {
  2909. buffer->pull(input, (unsigned __int64) -1);
  2910. }
  2911. catch (IException *E)
  2912. {
  2913. onException(E);
  2914. }
  2915. catch (...)
  2916. {
  2917. onException(MakeStringException(2, "Unknown exception caught"));
  2918. }
  2919. }
  2920. const void * next = buffer->next();
  2921. if(next)
  2922. processed++;
  2923. return next;
  2924. }
  2925. //=====================================================================================================
  2926. CHThorOnFailLimitActivity::CHThorOnFailLimitActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLimitArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSkipLimitActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  2927. {
  2928. }
  2929. void CHThorOnFailLimitActivity::onLimitExceeded()
  2930. {
  2931. buffer->clear();
  2932. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2933. size32_t newSize = helper.transformOnLimitExceeded(rowBuilder);
  2934. if (newSize)
  2935. buffer->insert(rowBuilder.finalizeRowClear(newSize));
  2936. }
  2937. //=====================================================================================================
  2938. CHThorIfActivity::CHThorIfActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorIfArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2939. {
  2940. inputTrue = NULL;
  2941. inputFalse = NULL;
  2942. selectedInput = NULL;
  2943. }
  2944. void CHThorIfActivity::stop()
  2945. {
  2946. if (selectedInput)
  2947. selectedInput->stop();
  2948. CHThorSimpleActivityBase::stop();
  2949. }
  2950. void CHThorIfActivity::ready()
  2951. {
  2952. CHThorSimpleActivityBase::ready();
  2953. selectedInput = helper.getCondition() ? inputTrue : inputFalse;
  2954. if (selectedInput)
  2955. selectedInput->ready();
  2956. }
  2957. void CHThorIfActivity::setInput(unsigned index, IHThorInput *_input)
  2958. {
  2959. if (index==0)
  2960. inputTrue = _input;
  2961. else if (index == 1)
  2962. inputFalse = _input;
  2963. else
  2964. CHThorActivityBase::setInput(index, _input);
  2965. }
  2966. const void * CHThorIfActivity::nextRow()
  2967. {
  2968. if (!selectedInput)
  2969. return NULL;
  2970. const void *ret = selectedInput->nextRow();
  2971. if (ret)
  2972. processed++;
  2973. return ret;
  2974. }
  2975. //=====================================================================================================
  2976. CHThorCaseActivity::CHThorCaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCaseArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2977. {
  2978. }
  2979. void CHThorCaseActivity::ready()
  2980. {
  2981. //Evaluate the condition here to avoid calling ready() on the unused branch?
  2982. initialProcessed = processed;
  2983. selectedInput = NULL;
  2984. unsigned whichBranch = helper.getBranch();
  2985. if (whichBranch >= inputs.ordinality())
  2986. whichBranch = inputs.ordinality()-1;
  2987. selectedInput = inputs.item(whichBranch);
  2988. selectedInput->ready();
  2989. }
  2990. void CHThorCaseActivity::stop()
  2991. {
  2992. if (selectedInput)
  2993. selectedInput->stop();
  2994. }
  2995. const void *CHThorCaseActivity::nextRow()
  2996. {
  2997. if (!selectedInput)
  2998. return NULL;
  2999. const void *ret = selectedInput->nextRow();
  3000. if (ret)
  3001. processed++;
  3002. return ret;
  3003. }
  3004. //=====================================================================================================
  3005. CHThorSampleActivity::CHThorSampleActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSampleArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3006. {
  3007. }
  3008. void CHThorSampleActivity::ready()
  3009. {
  3010. CHThorSimpleActivityBase::ready();
  3011. numSamples = helper.getProportion();
  3012. whichSample = helper.getSampleNumber();
  3013. numToSkip = (whichSample ? whichSample-1 : 0);
  3014. anyThisGroup = false;
  3015. }
  3016. const void * CHThorSampleActivity::nextRow()
  3017. {
  3018. for (;;)
  3019. {
  3020. OwnedConstRoxieRow ret(input->nextRow());
  3021. if (!ret)
  3022. {
  3023. //this does work with groups - may or may not be useful...
  3024. //reset the sample for each group.... probably best.
  3025. numToSkip = (whichSample ? whichSample-1 : 0);
  3026. if (anyThisGroup)
  3027. {
  3028. anyThisGroup = false;
  3029. return NULL;
  3030. }
  3031. ret.setown(input->nextRow());
  3032. if (!ret)
  3033. return NULL; // eof...
  3034. }
  3035. if (numToSkip == 0)
  3036. {
  3037. anyThisGroup = true;
  3038. numToSkip = numSamples-1;
  3039. processed++;
  3040. return ret.getClear();
  3041. }
  3042. numToSkip--;
  3043. }
  3044. }
  3045. //=====================================================================================================
  3046. CHThorAggregateActivity::CHThorAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3047. {
  3048. }
  3049. void CHThorAggregateActivity::ready()
  3050. {
  3051. CHThorSimpleActivityBase::ready();
  3052. eof = false;
  3053. }
  3054. const void * CHThorAggregateActivity::nextRow()
  3055. {
  3056. if (eof)
  3057. return NULL;
  3058. const void * next = input->nextRow();
  3059. if (!next && input->isGrouped())
  3060. {
  3061. eof = true;
  3062. return NULL;
  3063. }
  3064. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  3065. helper.clearAggregate(rowBuilder);
  3066. if (next)
  3067. {
  3068. helper.processFirst(rowBuilder, next);
  3069. ReleaseRoxieRow(next);
  3070. bool abortEarly = (kind == TAKexistsaggregate) && !input->isGrouped();
  3071. if (!abortEarly)
  3072. {
  3073. for (;;)
  3074. {
  3075. next = input->nextRow();
  3076. if (!next)
  3077. break;
  3078. helper.processNext(rowBuilder, next);
  3079. ReleaseRoxieRow(next);
  3080. }
  3081. }
  3082. }
  3083. if (!input->isGrouped()) // either read all, or aborted early
  3084. eof = true;
  3085. processed++;
  3086. size32_t finalSize = outputMeta.getRecordSize(rowBuilder.getSelf());
  3087. return rowBuilder.finalizeRowClear(finalSize);
  3088. }
  3089. //=====================================================================================================
  3090. CHThorHashAggregateActivity::CHThorHashAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorHashAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph, bool _isGroupedAggregate)
  3091. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph),
  3092. aggregated(_arg, _arg),
  3093. isGroupedAggregate(_isGroupedAggregate)
  3094. {
  3095. }
  3096. void CHThorHashAggregateActivity::ready()
  3097. {
  3098. CHThorSimpleActivityBase::ready();
  3099. eof = false;
  3100. gathered = false;
  3101. }
  3102. void CHThorHashAggregateActivity::stop()
  3103. {
  3104. aggregated.reset();
  3105. CHThorSimpleActivityBase::stop();
  3106. }
  3107. const void * CHThorHashAggregateActivity::nextRow()
  3108. {
  3109. if (eof)
  3110. return NULL;
  3111. if (!gathered)
  3112. {
  3113. bool eog = true;
  3114. aggregated.start(rowAllocator, agent.queryCodeContext(), activityId);
  3115. for (;;)
  3116. {
  3117. OwnedConstRoxieRow next(input->nextRow());
  3118. if (!next)
  3119. {
  3120. if (isGroupedAggregate)
  3121. {
  3122. if (eog)
  3123. eof = true;
  3124. break;
  3125. }
  3126. next.setown(input->nextRow());
  3127. if (!next)
  3128. break;
  3129. }
  3130. eog = false;
  3131. try
  3132. {
  3133. aggregated.addRow(next);
  3134. }
  3135. catch(IException * e)
  3136. {
  3137. throw makeWrappedException(e);
  3138. }
  3139. }
  3140. gathered = true;
  3141. }
  3142. Owned<AggregateRowBuilder> next = aggregated.nextResult();
  3143. if (next)
  3144. {
  3145. processed++;
  3146. return next->finalizeRowClear();
  3147. }
  3148. if (!isGroupedAggregate)
  3149. eof = true;
  3150. aggregated.reset();
  3151. gathered = false;
  3152. return NULL;
  3153. }
  3154. //=====================================================================================================
  3155. CHThorSelectNActivity::CHThorSelectNActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSelectNArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3156. {
  3157. }
  3158. const void * CHThorSelectNActivity::defaultRow()
  3159. {
  3160. if (!rowAllocator)
  3161. createRowAllocator(); //We delay as often not needed...
  3162. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  3163. size32_t thisSize = helper.createDefault(rowBuilder);
  3164. return rowBuilder.finalizeRowClear(thisSize);
  3165. }
  3166. void CHThorSelectNActivity::ready()
  3167. {
  3168. CHThorSimpleActivityBase::ready();
  3169. finished = false;
  3170. }
  3171. const void * CHThorSelectNActivity::nextRow()
  3172. {
  3173. if (finished)
  3174. return NULL;
  3175. finished = true;
  3176. unsigned __int64 index = helper.getRowToSelect();
  3177. while (--index)
  3178. {
  3179. OwnedConstRoxieRow next(input->nextRow());
  3180. if (!next)
  3181. next.setown(input->nextRow());
  3182. if (!next)
  3183. {
  3184. processed++;
  3185. return defaultRow();
  3186. }
  3187. }
  3188. OwnedConstRoxieRow next(input->nextRow());
  3189. if (!next)
  3190. next.setown(input->nextRow());
  3191. if (!next)
  3192. next.setown(defaultRow());
  3193. processed++;
  3194. return next.getClear();
  3195. }
  3196. //=====================================================================================================
  3197. CHThorFirstNActivity::CHThorFirstNActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorFirstNArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3198. {
  3199. grouped = outputMeta.isGrouped();
  3200. }
  3201. void CHThorFirstNActivity::ready()
  3202. {
  3203. CHThorSimpleActivityBase::ready();
  3204. skip = helper.numToSkip();
  3205. limit = helper.getLimit();
  3206. doneThisGroup = 0;
  3207. finished = (limit == 0);
  3208. if (limit + skip >= limit)
  3209. limit += skip;
  3210. }
  3211. const void * CHThorFirstNActivity::nextRow()
  3212. {
  3213. if (finished)
  3214. return NULL;
  3215. OwnedConstRoxieRow ret;
  3216. for (;;)
  3217. {
  3218. ret.setown(input->nextRow());
  3219. if (!ret)
  3220. {
  3221. if (grouped)
  3222. {
  3223. if (doneThisGroup > skip)
  3224. {
  3225. doneThisGroup = 0;
  3226. return NULL;
  3227. }
  3228. doneThisGroup = 0;
  3229. }
  3230. ret.setown(input->nextRow());
  3231. if (!ret)
  3232. {
  3233. finished = true;
  3234. return NULL;
  3235. }
  3236. }
  3237. doneThisGroup++;
  3238. if (doneThisGroup > skip)
  3239. break;
  3240. }
  3241. if (doneThisGroup <= limit)
  3242. {
  3243. processed++;
  3244. return ret.getClear();
  3245. }
  3246. if (grouped)
  3247. {
  3248. ret.setown(input->nextRow());
  3249. while (ret)
  3250. ret.setown(input->nextRow());
  3251. doneThisGroup = 0;
  3252. }
  3253. else
  3254. finished = true;
  3255. return NULL;
  3256. }
  3257. //=====================================================================================================
  3258. CHThorChooseSetsActivity::CHThorChooseSetsActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChooseSetsArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3259. {
  3260. numSets = helper.getNumSets();
  3261. setCounts = new unsigned[numSets];
  3262. }
  3263. CHThorChooseSetsActivity::~CHThorChooseSetsActivity()
  3264. {
  3265. delete [] setCounts;
  3266. }
  3267. void CHThorChooseSetsActivity::ready()
  3268. {
  3269. CHThorSimpleActivityBase::ready();
  3270. finished = false;
  3271. memset(setCounts, 0, sizeof(unsigned)*numSets);
  3272. helper.setCounts(setCounts);
  3273. }
  3274. const void * CHThorChooseSetsActivity::nextRow()
  3275. {
  3276. if (finished)
  3277. return NULL;
  3278. for (;;)
  3279. {
  3280. OwnedConstRoxieRow ret(input->nextRow());
  3281. if (!ret)
  3282. {
  3283. ret.setown(input->nextRow());
  3284. if (!ret)
  3285. return NULL;
  3286. }
  3287. processed++;
  3288. switch (helper.getRecordAction(ret))
  3289. {
  3290. case 2:
  3291. finished = true;
  3292. return ret.getClear();
  3293. case 1:
  3294. return ret.getClear();
  3295. }
  3296. }
  3297. }
  3298. //=====================================================================================================
  3299. CHThorChooseSetsExActivity::CHThorChooseSetsExActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChooseSetsExArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3300. {
  3301. numSets = helper.getNumSets();
  3302. setCounts = new unsigned[numSets];
  3303. memset(setCounts, 0, sizeof(unsigned)*numSets);
  3304. limits = (count_t *)checked_calloc(sizeof(count_t), numSets, "choose sets ex");
  3305. helper.getLimits(limits);
  3306. }
  3307. CHThorChooseSetsExActivity::~CHThorChooseSetsExActivity()
  3308. {
  3309. delete [] setCounts;
  3310. free(limits);
  3311. }
  3312. void CHThorChooseSetsExActivity::ready()
  3313. {
  3314. CHThorSimpleActivityBase::ready();
  3315. finished = false;
  3316. curIndex = 0;
  3317. memset(setCounts, 0, sizeof(unsigned)*numSets);
  3318. }
  3319. void CHThorChooseSetsExActivity::stop()
  3320. {
  3321. gathered.clear();
  3322. CHThorSimpleActivityBase::stop();
  3323. }
  3324. const void * CHThorChooseSetsExActivity::nextRow()
  3325. {
  3326. if (gathered.ordinality() == 0)
  3327. {
  3328. curIndex = 0;
  3329. const void * next = input->nextRow();
  3330. while(next)
  3331. {
  3332. gathered.append(next);
  3333. next = input->nextRow();
  3334. }
  3335. if(gathered.ordinality() == 0)
  3336. {
  3337. finished = true;
  3338. return NULL;
  3339. }
  3340. ForEachItemIn(idx1, gathered)
  3341. {
  3342. unsigned category = helper.getCategory(gathered.item(idx1));
  3343. if (category)
  3344. setCounts[category-1]++;
  3345. }
  3346. calculateSelection();
  3347. }
  3348. while (gathered.isItem(curIndex))
  3349. {
  3350. OwnedConstRoxieRow row(gathered.itemClear(curIndex++));
  3351. if (includeRow(row))
  3352. {
  3353. processed++;
  3354. return row.getClear();
  3355. }
  3356. }
  3357. gathered.clear();
  3358. return NULL;
  3359. }
  3360. //=====================================================================================================
  3361. CHThorChooseSetsLastActivity::CHThorChooseSetsLastActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChooseSetsExArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorChooseSetsExActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  3362. {
  3363. numToSkip = (unsigned *)checked_calloc(sizeof(unsigned), numSets, "choose sets last");
  3364. }
  3365. CHThorChooseSetsLastActivity::~CHThorChooseSetsLastActivity()
  3366. {
  3367. free(numToSkip);
  3368. }
  3369. void CHThorChooseSetsLastActivity::ready()
  3370. {
  3371. CHThorChooseSetsExActivity::ready();
  3372. memset(numToSkip, 0, sizeof(unsigned) * numSets);
  3373. }
  3374. void CHThorChooseSetsLastActivity::calculateSelection()
  3375. {
  3376. for (unsigned idx=0; idx < numSets; idx++)
  3377. {
  3378. if (setCounts[idx] < limits[idx])
  3379. numToSkip[idx] = 0;
  3380. else
  3381. numToSkip[idx] = (unsigned)(setCounts[idx] - limits[idx]);
  3382. }
  3383. }
  3384. bool CHThorChooseSetsLastActivity::includeRow(const void * row)
  3385. {
  3386. unsigned category = helper.getCategory(row);
  3387. if (category)
  3388. {
  3389. if (numToSkip[category-1] == 0)
  3390. return true;
  3391. numToSkip[category-1]--;
  3392. }
  3393. return false;
  3394. }
  3395. //=====================================================================================================
  3396. CHThorChooseSetsEnthActivity::CHThorChooseSetsEnthActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChooseSetsExArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorChooseSetsExActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  3397. {
  3398. counter = (unsigned __int64 *)checked_calloc(sizeof(unsigned __int64), numSets, "choose sets enth");
  3399. }
  3400. CHThorChooseSetsEnthActivity::~CHThorChooseSetsEnthActivity()
  3401. {
  3402. free(counter);
  3403. }
  3404. void CHThorChooseSetsEnthActivity::ready()
  3405. {
  3406. CHThorChooseSetsExActivity::ready();
  3407. memset(counter, 0, sizeof(unsigned __int64) * numSets);
  3408. }
  3409. void CHThorChooseSetsEnthActivity::calculateSelection()
  3410. {
  3411. }
  3412. bool CHThorChooseSetsEnthActivity::includeRow(const void * row)
  3413. {
  3414. unsigned category = helper.getCategory(row);
  3415. if (category)
  3416. {
  3417. counter[category-1] += limits[category-1];
  3418. if(counter[category-1] >= setCounts[category-1])
  3419. {
  3420. counter[category-1] -= setCounts[category-1];
  3421. return true;
  3422. }
  3423. }
  3424. return false;
  3425. }
  3426. //=====================================================================================================
  3427. CHThorDegroupActivity::CHThorDegroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDegroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  3428. {
  3429. }
  3430. const void * CHThorDegroupActivity::nextRow()
  3431. {
  3432. const void * ret = input->ungroupedNextRow();
  3433. if (ret)
  3434. processed++;
  3435. return ret;
  3436. }
  3437. const void * CHThorDegroupActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  3438. {
  3439. const void * ret = input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra);
  3440. if (ret)
  3441. processed++;
  3442. return ret;
  3443. }
  3444. bool CHThorDegroupActivity::isGrouped()
  3445. {
  3446. return false;
  3447. }
  3448. //=====================================================================================================
  3449. CHThorGroupActivity::CHThorGroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3450. {
  3451. }
  3452. bool CHThorGroupActivity::isGrouped()
  3453. {
  3454. return true;
  3455. }
  3456. void CHThorGroupActivity::ready()
  3457. {
  3458. CHThorSimpleActivityBase::ready();
  3459. next.clear();
  3460. endPending = false;
  3461. firstDone = false;
  3462. }
  3463. void CHThorGroupActivity::stop()
  3464. {
  3465. CHThorSimpleActivityBase::stop();
  3466. next.clear();
  3467. }
  3468. const void *CHThorGroupActivity::nextRow()
  3469. {
  3470. if (!firstDone)
  3471. {
  3472. firstDone = true;
  3473. next.setown(input->nextRow());
  3474. }
  3475. if (endPending)
  3476. {
  3477. endPending = false;
  3478. return NULL;
  3479. }
  3480. OwnedConstRoxieRow prev(next.getClear());
  3481. next.setown(input->nextRow());
  3482. if (!next) // skip incoming groups. (should it sub-group??)
  3483. next.setown(input->nextRow());
  3484. if (next)
  3485. {
  3486. assertex(prev); // If this fails, you have an initial empty group. That is not legal.
  3487. if (!helper.isSameGroup(prev, next))
  3488. endPending = true;
  3489. }
  3490. if (prev)
  3491. processed++;
  3492. return prev.getClear();
  3493. }
  3494. const void * CHThorGroupActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  3495. {
  3496. if (firstDone)
  3497. {
  3498. if (next)
  3499. {
  3500. if (stepCompare->docompare(next, seek, numFields) >= 0)
  3501. return nextRow();
  3502. }
  3503. }
  3504. next.setown(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  3505. firstDone = true;
  3506. return nextRow();
  3507. }
  3508. //=====================================================================================================
  3509. CHThorGroupSortActivity::CHThorGroupSortActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSortArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3510. {
  3511. gotSorted = false;
  3512. }
  3513. void CHThorGroupSortActivity::ready()
  3514. {
  3515. CHThorSimpleActivityBase::ready();
  3516. if(!sorter)
  3517. createSorter();
  3518. }
  3519. void CHThorGroupSortActivity::stop()
  3520. {
  3521. if(sorter)
  3522. {
  3523. if(sorterIsConst)
  3524. sorter->killSorted();
  3525. else
  3526. sorter.clear();
  3527. }
  3528. gotSorted = false;
  3529. diskReader.clear();
  3530. CHThorSimpleActivityBase::stop();
  3531. }
  3532. const void *CHThorGroupSortActivity::nextRow()
  3533. {
  3534. if(!gotSorted)
  3535. getSorted();
  3536. if(diskReader)
  3537. {
  3538. const void *row = diskReader->nextRow();
  3539. if (row)
  3540. return row;
  3541. diskReader.clear();
  3542. }
  3543. else
  3544. {
  3545. const void * ret = sorter->getNextSorted();
  3546. if(ret)
  3547. {
  3548. processed++;
  3549. return ret;
  3550. }
  3551. }
  3552. sorter->killSorted();
  3553. gotSorted = false;
  3554. return NULL;
  3555. }
  3556. void CHThorGroupSortActivity::createSorter()
  3557. {
  3558. unsigned flags = helper.getAlgorithmFlags();
  3559. sorterIsConst = ((flags & TAFconstant) != 0);
  3560. OwnedRoxieString algoname(helper.getAlgorithm());
  3561. if(!algoname)
  3562. {
  3563. if((flags & TAFunstable) != 0)
  3564. sorter.setown(new CQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3565. else
  3566. sorter.setown(new CHeapSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3567. return;
  3568. }
  3569. if(stricmp(algoname, "quicksort") == 0)
  3570. {
  3571. if((flags & TAFstable) != 0)
  3572. sorter.setown(new CStableQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3573. else
  3574. sorter.setown(new CQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3575. }
  3576. else if(stricmp(algoname, "parquicksort") == 0)
  3577. {
  3578. if((flags & TAFstable) != 0)
  3579. sorter.setown(new CParallelStableQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3580. else
  3581. sorter.setown(new CParallelQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3582. }
  3583. else if(stricmp(algoname, "mergesort") == 0)
  3584. {
  3585. if((flags & TAFparallel) != 0)
  3586. sorter.setown(new CParallelStableMergeSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3587. else
  3588. sorter.setown(new CStableMergeSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3589. }
  3590. else if(stricmp(algoname, "parmergesort") == 0)
  3591. sorter.setown(new CParallelStableMergeSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3592. else if(stricmp(algoname, "heapsort") == 0)
  3593. sorter.setown(new CHeapSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3594. else if(stricmp(algoname, "insertionsort") == 0)
  3595. {
  3596. if((flags & TAFstable) != 0)
  3597. sorter.setown(new CStableInsertionSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3598. else
  3599. sorter.setown(new CInsertionSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3600. }
  3601. else
  3602. {
  3603. StringBuffer sb;
  3604. sb.appendf("Ignoring unsupported sort order algorithm '%s', using default", algoname.get());
  3605. agent.addWuExceptionEx(sb.str(),WRN_UnsupportedAlgorithm,SeverityWarning,MSGAUD_user,"hthor");
  3606. if((flags & TAFunstable) != 0)
  3607. sorter.setown(new CQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3608. else
  3609. sorter.setown(new CHeapSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3610. }
  3611. sorter->setActivityId(activityId);
  3612. }
  3613. void CHThorGroupSortActivity::getSorted()
  3614. {
  3615. diskMerger.clear();
  3616. diskReader.clear();
  3617. queryRowManager()->addRowBuffer(this);//register for OOM callbacks
  3618. const void * next;
  3619. while((next = input->nextRow()) != NULL)
  3620. {
  3621. if (!sorter->addRow(next))
  3622. {
  3623. {
  3624. //Unlikely that this code will ever be executed but added for comfort
  3625. roxiemem::RoxieOutputRowArrayLock block(sorter->getRowArray());
  3626. sorter->flushRows();
  3627. sortAndSpillRows();
  3628. //Ensure new rows are written to the head of the array. It needs to be a separate call because
  3629. //performSort() cannot shift active row pointer since it can be called from any thread
  3630. sorter->flushRows();
  3631. }
  3632. if (!sorter->addRow(next))
  3633. {
  3634. ReleaseRoxieRow(next);
  3635. throw MakeStringException(0, "Insufficient memory to append sort row");
  3636. }
  3637. }
  3638. }
  3639. queryRowManager()->removeRowBuffer(this);//unregister for OOM callbacks
  3640. sorter->flushRows();
  3641. if(diskMerger)
  3642. {
  3643. sortAndSpillRows();
  3644. sorter->killSorted();
  3645. ICompare *compare = helper.queryCompare();
  3646. diskReader.setown(diskMerger->merge(compare));
  3647. }
  3648. else
  3649. {
  3650. sorter->performSort();
  3651. }
  3652. gotSorted = true;
  3653. }
  3654. //interface roxiemem::IBufferedRowCallback
  3655. unsigned CHThorGroupSortActivity::getSpillCost() const
  3656. {
  3657. return 10;
  3658. }
  3659. unsigned CHThorGroupSortActivity::getActivityId() const
  3660. {
  3661. return activityId;
  3662. }
  3663. bool CHThorGroupSortActivity::freeBufferedRows(bool critical)
  3664. {
  3665. roxiemem::RoxieOutputRowArrayLock block(sorter->getRowArray());
  3666. return sortAndSpillRows();
  3667. }
  3668. bool CHThorGroupSortActivity::sortAndSpillRows()
  3669. {
  3670. if (0 == sorter->numCommitted())
  3671. return false;
  3672. if(!diskMerger)
  3673. {
  3674. StringBuffer fbase;
  3675. agent.getTempfileBase(fbase).append(PATHSEPCHAR).appendf("spill_sort_%p", this);
  3676. PROGLOG("SORT: spilling to disk, filename base %s", fbase.str());
  3677. class CHThorRowLinkCounter : implements IRowLinkCounter, public CSimpleInterface
  3678. {
  3679. public:
  3680. IMPLEMENT_IINTERFACE_USING(CSimpleInterface);
  3681. virtual void releaseRow(const void *row)
  3682. {
  3683. ReleaseRoxieRow(row);
  3684. }
  3685. virtual void linkRow(const void *row)
  3686. {
  3687. LinkRoxieRow(row);
  3688. }
  3689. };
  3690. Owned<IRowLinkCounter> linker = new CHThorRowLinkCounter();
  3691. Owned<IRowInterfaces> rowInterfaces = createRowInterfaces(input->queryOutputMeta(), activityId, 0, agent.queryCodeContext());
  3692. diskMerger.setown(createDiskMerger(rowInterfaces, linker, fbase.str()));
  3693. }
  3694. sorter->performSort();
  3695. sorter->spillSortedToDisk(diskMerger);
  3696. return true;
  3697. }
  3698. // Base for Quick sort and both Insertion sorts
  3699. void CSimpleSorterBase::spillSortedToDisk(IDiskMerger * merger)
  3700. {
  3701. Owned<IRowWriter> out = merger->createWriteBlock();
  3702. for (;;)
  3703. {
  3704. const void *row = getNextSorted();
  3705. if (!row)
  3706. break;
  3707. out->putRow(row);
  3708. }
  3709. finger = 0;
  3710. out->flush();
  3711. rowsToSort.noteSpilled(rowsToSort.numCommitted());
  3712. }
  3713. // Quick sort
  3714. void CQuickSorter::performSort()
  3715. {
  3716. size32_t numRows = rowsToSort.numCommitted();
  3717. if (numRows)
  3718. {
  3719. const void * * rows = rowsToSort.getBlock(numRows);
  3720. qsortvec((void * *)rows, numRows, *compare);
  3721. finger = 0;
  3722. }
  3723. }
  3724. // Quick sort
  3725. void CParallelQuickSorter::performSort()
  3726. {
  3727. size32_t numRows = rowsToSort.numCommitted();
  3728. if (numRows)
  3729. {
  3730. const void * * rows = rowsToSort.getBlock(numRows);
  3731. parqsortvec((void * *)rows, numRows, *compare);
  3732. finger = 0;
  3733. }
  3734. }
  3735. // StableQuick sort
  3736. bool CStableSorter::addRow(const void * next)
  3737. {
  3738. roxiemem::rowidx_t nextRowCapacity = rowsToSort.rowCapacity() + 1;//increment capacity for the row we are about to add
  3739. if (nextRowCapacity > indexCapacity)
  3740. {
  3741. void *** newIndex = (void ***)rowManager->allocate(nextRowCapacity * sizeof(void*), activityId);//could force an OOM callback
  3742. if (newIndex)
  3743. {
  3744. roxiemem::RoxieOutputRowArrayLock block(getRowArray());//could force an OOM callback after index is freed but before index,indexCapacity is updated
  3745. ReleaseRoxieRow(index);
  3746. index = newIndex;
  3747. indexCapacity = RoxieRowCapacity(index) / sizeof(void*);
  3748. }
  3749. else
  3750. {
  3751. killSorted();
  3752. ReleaseRoxieRow(next);
  3753. throw MakeStringException(0, "Insufficient memory to allocate StableQuickSorter index");
  3754. }
  3755. }
  3756. return CSimpleSorterBase::addRow(next);
  3757. }
  3758. void CStableSorter::spillSortedToDisk(IDiskMerger * merger)
  3759. {
  3760. CSimpleSorterBase::spillSortedToDisk(merger);
  3761. ReleaseRoxieRow(index);
  3762. index = NULL;
  3763. indexCapacity = 0;
  3764. }
  3765. void CStableSorter::killSorted()
  3766. {
  3767. CSimpleSorterBase::killSorted();
  3768. ReleaseRoxieRow(index);
  3769. index = NULL;
  3770. indexCapacity = 0;
  3771. }
  3772. // StableQuick sort
  3773. void CStableQuickSorter::performSort()
  3774. {
  3775. size32_t numRows = rowsToSort.numCommitted();
  3776. if (numRows)
  3777. {
  3778. const void * * rows = rowsToSort.getBlock(numRows);
  3779. qsortvecstableinplace((void * *)rows, numRows, *compare, (void * *)index);
  3780. finger = 0;
  3781. }
  3782. }
  3783. void CParallelStableQuickSorter::performSort()
  3784. {
  3785. size32_t numRows = rowsToSort.numCommitted();
  3786. if (numRows)
  3787. {
  3788. const void * * rows = rowsToSort.getBlock(numRows);
  3789. parqsortvecstableinplace((void * *)rows, numRows, *compare, (void * *)index);
  3790. finger = 0;
  3791. }
  3792. }
  3793. // StableMerge sort
  3794. void CStableMergeSorter::performSort()
  3795. {
  3796. size32_t numRows = rowsToSort.numCommitted();
  3797. if (numRows)
  3798. {
  3799. const void * * rows = rowsToSort.getBlock(numRows);
  3800. msortvecstableinplace((void * *)rows, numRows, *compare, (void * *)index);
  3801. finger = 0;
  3802. }
  3803. }
  3804. void CParallelStableMergeSorter::performSort()
  3805. {
  3806. size32_t numRows = rowsToSort.numCommitted();
  3807. if (numRows)
  3808. {
  3809. const void * * rows = rowsToSort.getBlock(numRows);
  3810. parmsortvecstableinplace((void * *)rows, numRows, *compare, (void * *)index);
  3811. finger = 0;
  3812. }
  3813. }
  3814. // Heap sort
  3815. void CHeapSorter::performSort()
  3816. {
  3817. size32_t numRows = rowsToSort.numCommitted();
  3818. if (numRows)
  3819. {
  3820. const void * * rows = rowsToSort.getBlock(numRows);
  3821. heapsize = numRows;
  3822. for (unsigned i = 0; i < numRows; i++)
  3823. {
  3824. heap.append(i);
  3825. heap_push_up(i, heap.getArray(), rows, compare);
  3826. }
  3827. }
  3828. }
  3829. void CHeapSorter::spillSortedToDisk(IDiskMerger * merger)
  3830. {
  3831. CSimpleSorterBase::spillSortedToDisk(merger);
  3832. heap.kill();
  3833. heapsize = 0;
  3834. }
  3835. const void * CHeapSorter::getNextSorted()
  3836. {
  3837. if(heapsize)
  3838. {
  3839. size32_t numRows = rowsToSort.numCommitted();
  3840. if (numRows)
  3841. {
  3842. const void * * rows = rowsToSort.getBlock(numRows);
  3843. unsigned top = heap.item(0);
  3844. --heapsize;
  3845. heap.replace(heap.item(heapsize), 0);
  3846. heap_push_down(0, heapsize, heap.getArray(), rows, compare);
  3847. const void * row = rows[top];
  3848. rows[top] = NULL;
  3849. return row;
  3850. }
  3851. }
  3852. return NULL;
  3853. }
  3854. void CHeapSorter::killSorted()
  3855. {
  3856. CSimpleSorterBase::killSorted();
  3857. heap.kill();
  3858. heapsize = 0;
  3859. }
  3860. // Insertion sorts
  3861. void CInsertionSorter::performSort()
  3862. {
  3863. size32_t numRows = rowsToSort.numCommitted();
  3864. if (numRows)
  3865. {
  3866. const void * * rows = rowsToSort.getBlock(numRows);
  3867. for (unsigned i = 0; i < numRows; i++)
  3868. {
  3869. binary_vec_insert(rowsToSort.query(i), rows, i, *compare);
  3870. }
  3871. finger = 0;
  3872. }
  3873. }
  3874. void CStableInsertionSorter::performSort()
  3875. {
  3876. size32_t numRows = rowsToSort.numCommitted();
  3877. if (numRows)
  3878. {
  3879. const void * * rows = rowsToSort.getBlock(numRows);
  3880. for (unsigned i = 0; i < numRows; i++)
  3881. {
  3882. binary_vec_insert_stable(rowsToSort.query(i), rows, i, *compare);
  3883. }
  3884. finger = 0;
  3885. }
  3886. }
  3887. //=====================================================================================================
  3888. CHThorGroupedActivity::CHThorGroupedActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGroupedArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3889. {
  3890. }
  3891. void CHThorGroupedActivity::ready()
  3892. {
  3893. CHThorSimpleActivityBase::ready();
  3894. firstDone = false;
  3895. nextRowIndex = 0;
  3896. }
  3897. void CHThorGroupedActivity::stop()
  3898. {
  3899. CHThorSimpleActivityBase::stop();
  3900. next[0].clear();
  3901. next[1].clear();
  3902. next[2].clear();
  3903. }
  3904. const void *CHThorGroupedActivity::nextRow()
  3905. {
  3906. if (!firstDone)
  3907. {
  3908. next[0].setown(input->nextRow());
  3909. next[1].setown(input->nextRow());
  3910. nextRowIndex = 0;
  3911. }
  3912. unsigned nextToCompare = (nextRowIndex + 1) % 3;
  3913. unsigned nextToFill = (nextRowIndex + 2) % 3;
  3914. next[nextToFill].setown(input->nextRow());
  3915. OwnedConstRoxieRow ret(next[nextRowIndex].getClear());
  3916. if (ret)
  3917. {
  3918. if (next[nextToCompare])
  3919. {
  3920. if (!helper.isSameGroup(ret, next[nextToCompare]))
  3921. throw MakeStringException(100, "GROUPED(%u), expected a group break between adjacent rows (rows %" I64F "d, %" I64F "d) ", activityId, processed+1, processed+2);
  3922. }
  3923. else if (next[nextToFill])
  3924. {
  3925. if (helper.isSameGroup(ret, next[nextToFill]))
  3926. throw MakeStringException(100, "GROUPED(%u), unexpected group break found between rows %" I64F "d and %" I64F "d)", activityId, processed+1, processed+2);
  3927. }
  3928. processed++;
  3929. }
  3930. nextRowIndex = nextToCompare;
  3931. return ret.getClear();
  3932. }
  3933. //=====================================================================================================
  3934. CHThorSortedActivity::CHThorSortedActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSortedArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3935. {
  3936. //MORE: Should probably have a inter group and intra group sort functions
  3937. compare = helper.queryCompare();
  3938. }
  3939. void CHThorSortedActivity::ready()
  3940. {
  3941. CHThorSimpleActivityBase::ready();
  3942. firstDone = false;
  3943. }
  3944. void CHThorSortedActivity::stop()
  3945. {
  3946. CHThorSimpleActivityBase::stop();
  3947. next.clear();
  3948. }
  3949. const void *CHThorSortedActivity::nextRow()
  3950. {
  3951. if (!firstDone)
  3952. {
  3953. firstDone = true;
  3954. next.setown(input->nextRow());
  3955. }
  3956. OwnedConstRoxieRow prev(next.getClear());
  3957. next.setown(input->nextRow());
  3958. if (prev && next)
  3959. if (compare->docompare(prev, next) > 0)
  3960. throw MakeStringException(100, "SORTED(%u) detected incorrectly sorted rows (row %" I64F "d, %" I64F "d))", activityId, processed+1, processed+2);
  3961. if (prev)
  3962. processed++;
  3963. return prev.getClear();
  3964. }
  3965. const void * CHThorSortedActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  3966. {
  3967. if (next)
  3968. {
  3969. if (stepCompare->docompare(next, seek, numFields) >= 0)
  3970. return nextRow();
  3971. }
  3972. firstDone = true;
  3973. next.setown(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  3974. return nextRow();
  3975. }
  3976. //=====================================================================================================
  3977. CHThorTraceActivity::CHThorTraceActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorTraceArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  3978. : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph),
  3979. helper(_arg), keepLimit(0), skip(0), sample(0), traceEnabled(false)
  3980. {
  3981. }
  3982. void CHThorTraceActivity::ready()
  3983. {
  3984. CHThorSimpleActivityBase::ready();
  3985. traceEnabled = agent.queryWorkUnit()->getDebugValueBool("traceEnabled", false);
  3986. if (traceEnabled && helper.canMatchAny())
  3987. {
  3988. keepLimit = helper.getKeepLimit();
  3989. if (keepLimit==(unsigned) -1)
  3990. keepLimit = agent.queryWorkUnit()->getDebugValueInt("traceLimit", 10);
  3991. skip = helper.getSkip();
  3992. sample = helper.getSample();
  3993. if (sample)
  3994. sample--;
  3995. name.setown(helper.getName());
  3996. if (!name)
  3997. name.set("Row");
  3998. }
  3999. else
  4000. keepLimit = 0;
  4001. }
  4002. void CHThorTraceActivity::stop()
  4003. {
  4004. CHThorSimpleActivityBase::stop();
  4005. name.clear();
  4006. }
  4007. const void *CHThorTraceActivity::nextRow()
  4008. {
  4009. OwnedConstRoxieRow ret(input->nextRow());
  4010. if (!ret)
  4011. return NULL;
  4012. onTrace(ret);
  4013. processed++;
  4014. return ret.getClear();
  4015. }
  4016. const void * CHThorTraceActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  4017. {
  4018. OwnedConstRoxieRow ret(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  4019. if (ret)
  4020. {
  4021. onTrace(ret);
  4022. processed++;
  4023. }
  4024. return ret.getClear();
  4025. }
  4026. void CHThorTraceActivity::onTrace(const void *row)
  4027. {
  4028. if (keepLimit && helper.isValid(row))
  4029. {
  4030. if (skip)
  4031. skip--;
  4032. else if (sample)
  4033. sample--;
  4034. else
  4035. {
  4036. CommonXmlWriter xmlwrite(XWFnoindent);
  4037. outputMeta.toXML((const byte *) row, xmlwrite);
  4038. DBGLOG("TRACE: <%s>%s<%s>", name.get(), xmlwrite.str(), name.get());
  4039. keepLimit--;
  4040. sample = helper.getSample();
  4041. if (sample)
  4042. sample--;
  4043. }
  4044. }
  4045. }
  4046. //=====================================================================================================
  4047. void getLimitType(unsigned flags, bool & limitFail, bool & limitOnFail)
  4048. {
  4049. if((flags & JFmatchAbortLimitSkips) != 0)
  4050. {
  4051. limitFail = false;
  4052. limitOnFail = false;
  4053. }
  4054. else
  4055. {
  4056. limitOnFail = ((flags & JFonfail) != 0);
  4057. limitFail = !limitOnFail;
  4058. }
  4059. }
  4060. CHThorJoinActivity::CHThorJoinActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorJoinArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  4061. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL)
  4062. {
  4063. }
  4064. void CHThorJoinActivity::ready()
  4065. {
  4066. CHThorActivityBase::ready();
  4067. input1->ready();
  4068. bool isStable = (helper.getJoinFlags() & JFunstable) == 0;
  4069. RoxieSortAlgorithm sortAlgorithm = isStable ? stableSpillingQuickSortAlgorithm : spillingQuickSortAlgorithm;
  4070. StringBuffer tempBase;
  4071. agent.getTempfileBase(tempBase);
  4072. if (helper.isLeftAlreadySorted())
  4073. sortedLeftInput.setown(createDegroupedInputReader(&input->queryStream()));
  4074. else
  4075. sortedLeftInput.setown(createSortedInputReader(&input->queryStream(), createSortAlgorithm(sortAlgorithm, helper.queryCompareLeft(), *queryRowManager(), input->queryOutputMeta(), agent.queryCodeContext(), tempBase, activityId)));
  4076. ICompare *compareRight = helper.queryCompareRight();
  4077. if (helper.isRightAlreadySorted())
  4078. groupedSortedRightInput.setown(createGroupedInputReader(&input1->queryStream(), compareRight));
  4079. else
  4080. groupedSortedRightInput.setown(createSortedGroupedInputReader(&input1->queryStream(), compareRight, createSortAlgorithm(sortAlgorithm, compareRight, *queryRowManager(), input1->queryOutputMeta(), agent.queryCodeContext(), tempBase, activityId)));
  4081. outBuilder.setAllocator(rowAllocator);
  4082. leftOuterJoin = (helper.getJoinFlags() & JFleftouter) != 0;
  4083. rightOuterJoin = (helper.getJoinFlags() & JFrightouter) != 0;
  4084. exclude = (helper.getJoinFlags() & JFexclude) != 0;
  4085. getLimitType(helper.getJoinFlags(), limitFail, limitOnFail);
  4086. if (rightOuterJoin && !defaultLeft)
  4087. createDefaultLeft();
  4088. if ((leftOuterJoin || limitOnFail) && !defaultRight)
  4089. createDefaultRight();
  4090. betweenjoin = ((helper.getJoinFlags() & JFslidingmatch) != 0);
  4091. assertex(!(betweenjoin && rightOuterJoin));
  4092. keepLimit = helper.getKeepLimit();
  4093. if (keepLimit == 0)
  4094. keepLimit = (unsigned)-1;
  4095. atmostLimit = helper.getJoinLimit();
  4096. if(atmostLimit == 0)
  4097. atmostLimit = (unsigned)-1;
  4098. else
  4099. assertex(!rightOuterJoin && !betweenjoin);
  4100. abortLimit = helper.getMatchAbortLimit();
  4101. if (abortLimit == 0)
  4102. abortLimit = (unsigned)-1;
  4103. assertex((helper.getJoinFlags() & (JFfirst | JFfirstleft | JFfirstright)) == 0); // no longer supported
  4104. if(betweenjoin)
  4105. {
  4106. collate = helper.queryCompareLeftRightLower();
  4107. collateupper = helper.queryCompareLeftRightUpper();
  4108. }
  4109. else
  4110. {
  4111. collate = collateupper = helper.queryCompareLeftRight();
  4112. }
  4113. rightIndex = 0;
  4114. joinCounter = 0;
  4115. failingLimit.clear();
  4116. state = JSfill;
  4117. if ((helper.getJoinFlags() & JFlimitedprefixjoin) && helper.getJoinLimit())
  4118. { //Limited Match Join (s[1..n])
  4119. limitedhelper.setown(createRHLimitedCompareHelper());
  4120. limitedhelper->init( helper.getJoinLimit(), groupedSortedRightInput, collate, helper.queryPrefixCompare() );
  4121. }
  4122. }
  4123. void CHThorJoinActivity::stop()
  4124. {
  4125. outBuilder.clear();
  4126. right.clear();
  4127. left.clear();
  4128. pendingRight.clear();
  4129. sortedLeftInput.clear();
  4130. groupedSortedRightInput.clear();
  4131. CHThorActivityBase::stop();
  4132. input1->stop();
  4133. }
  4134. void CHThorJoinActivity::setInput(unsigned index, IHThorInput *_input)
  4135. {
  4136. if (index==1)
  4137. input1 = _input;
  4138. else
  4139. CHThorActivityBase::setInput(index, _input);
  4140. }
  4141. void CHThorJoinActivity::createDefaultLeft()
  4142. {
  4143. if (!defaultLeft)
  4144. {
  4145. if (!defaultLeftAllocator)
  4146. defaultLeftAllocator.setown(agent.queryCodeContext()->getRowAllocator(input->queryOutputMeta(), activityId));
  4147. RtlDynamicRowBuilder rowBuilder(defaultLeftAllocator);
  4148. size32_t thisSize = helper.createDefaultLeft(rowBuilder);
  4149. defaultLeft.setown(rowBuilder.finalizeRowClear(thisSize));
  4150. }
  4151. }
  4152. void CHThorJoinActivity::createDefaultRight()
  4153. {
  4154. if (!defaultRight)
  4155. {
  4156. if (!defaultRightAllocator)
  4157. defaultRightAllocator.setown(agent.queryCodeContext()->getRowAllocator(input1->queryOutputMeta(), activityId));
  4158. RtlDynamicRowBuilder rowBuilder(defaultRightAllocator);
  4159. size32_t thisSize = helper.createDefaultRight(rowBuilder);
  4160. defaultRight.setown(rowBuilder.finalizeRowClear(thisSize));
  4161. }
  4162. }
  4163. void CHThorJoinActivity::fillLeft()
  4164. {
  4165. matchedLeft = false;
  4166. left.setown(sortedLeftInput->nextRow()); // NOTE: already degrouped
  4167. if(betweenjoin && left && pendingRight && (collate->docompare(left, pendingRight) >= 0))
  4168. fillRight();
  4169. if (limitedhelper && 0==rightIndex)
  4170. {
  4171. rightIndex = 0;
  4172. joinCounter = 0;
  4173. right.clear();
  4174. matchedRight.kill();
  4175. if (left)
  4176. {
  4177. limitedhelper->getGroup(right,left);
  4178. ForEachItemIn(idx, right)
  4179. matchedRight.append(false);
  4180. }
  4181. }
  4182. }
  4183. void CHThorJoinActivity::fillRight()
  4184. {
  4185. if (limitedhelper)
  4186. return;
  4187. failingLimit.clear();
  4188. if(betweenjoin && left)
  4189. {
  4190. aindex_t start = 0;
  4191. while(right.isItem(start) && (collateupper->docompare(left, right.item(start)) > 0))
  4192. start++;
  4193. if(start>0)
  4194. right.clearPart(0, start);
  4195. }
  4196. else
  4197. right.clear();
  4198. rightIndex = 0;
  4199. joinCounter = 0;
  4200. unsigned groupCount = 0;
  4201. while(true)
  4202. {
  4203. OwnedConstRoxieRow next;
  4204. if(pendingRight)
  4205. {
  4206. next.setown(pendingRight.getClear());
  4207. }
  4208. else
  4209. {
  4210. next.setown(groupedSortedRightInput->nextRow());
  4211. }
  4212. if(!rightOuterJoin && next && (!left || (collateupper->docompare(left, next) > 0))) // if right is less than left, and not right outer, can skip group
  4213. {
  4214. while(next)
  4215. next.setown(groupedSortedRightInput->nextRow());
  4216. continue;
  4217. }
  4218. while(next)
  4219. {
  4220. if(groupCount==abortLimit)
  4221. {
  4222. if(limitFail)
  4223. failLimit();
  4224. if ( agent.queryCodeContext()->queryDebugContext())
  4225. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  4226. if(limitOnFail)
  4227. {
  4228. assertex(!failingLimit);
  4229. try
  4230. {
  4231. failLimit();
  4232. }
  4233. catch(IException * except)
  4234. {
  4235. failingLimit.setown(except);
  4236. }
  4237. assertex(failingLimit);
  4238. }
  4239. right.append(next.getClear());
  4240. do
  4241. {
  4242. next.setown(groupedSortedRightInput->nextRow());
  4243. } while(next);
  4244. break;
  4245. }
  4246. else if(groupCount==atmostLimit)
  4247. {
  4248. right.clear();
  4249. groupCount = 0;
  4250. while(next)
  4251. {
  4252. next.setown(groupedSortedRightInput->nextRow());
  4253. }
  4254. }
  4255. else
  4256. {
  4257. right.append(next.getClear());
  4258. groupCount++;
  4259. }
  4260. next.setown(groupedSortedRightInput->nextRow());
  4261. }
  4262. // normally only want to read one right group, but if is between join and next right group is in window for left, need to continue
  4263. if(betweenjoin && left)
  4264. {
  4265. pendingRight.setown(groupedSortedRightInput->nextRow());
  4266. if(!pendingRight || (collate->docompare(left, pendingRight) < 0))
  4267. break;
  4268. }
  4269. else
  4270. break;
  4271. }
  4272. matchedRight.kill();
  4273. ForEachItemIn(idx, right)
  4274. matchedRight.append(false);
  4275. }
  4276. const void * CHThorJoinActivity::joinRecords(const void * curLeft, const void * curRight, unsigned counter, unsigned flags)
  4277. {
  4278. try
  4279. {
  4280. outBuilder.ensureRow();
  4281. size32_t thisSize = helper.transform(outBuilder, curLeft, curRight, counter, flags);
  4282. if(thisSize)
  4283. return outBuilder.finalizeRowClear(thisSize);
  4284. else
  4285. return NULL;
  4286. }
  4287. catch(IException * e)
  4288. {
  4289. throw makeWrappedException(e);
  4290. }
  4291. }
  4292. const void * CHThorJoinActivity::groupDenormalizeRecords(const void * curLeft, ConstPointerArray & rows, unsigned flags)
  4293. {
  4294. try
  4295. {
  4296. outBuilder.ensureRow();
  4297. unsigned numRows = rows.ordinality();
  4298. const void * rhs = numRows ? rows.item(0) : defaultRight.get();
  4299. if (numRows>0)
  4300. flags |= JTFmatchedright;
  4301. memsize_t thisSize = helper.transform(outBuilder, curLeft, rhs, numRows, (const void * *)rows.getArray(), flags);
  4302. if(thisSize)
  4303. return outBuilder.finalizeRowClear(thisSize);
  4304. else
  4305. return NULL;
  4306. }
  4307. catch(IException * e)
  4308. {
  4309. throw makeWrappedException(e);
  4310. }
  4311. }
  4312. const void * CHThorJoinActivity::joinException(const void * curLeft, IException * except)
  4313. {
  4314. try
  4315. {
  4316. outBuilder.ensureRow();
  4317. size32_t thisSize = helper.onFailTransform(outBuilder, curLeft, defaultRight, except, JTFmatchedleft);
  4318. if(thisSize)
  4319. return outBuilder.finalizeRowClear(thisSize);
  4320. else
  4321. return NULL;
  4322. }
  4323. catch(IException * e)
  4324. {
  4325. throw makeWrappedException(e);
  4326. }
  4327. }
  4328. void CHThorJoinActivity::failLimit()
  4329. {
  4330. helper.onMatchAbortLimitExceeded();
  4331. CommonXmlWriter xmlwrite(0);
  4332. if (input->queryOutputMeta() && input->queryOutputMeta()->hasXML())
  4333. {
  4334. input->queryOutputMeta()->toXML((byte *)left.get(), xmlwrite);
  4335. }
  4336. throw MakeStringException(0, "More than %d match candidates in join for row %s", abortLimit, xmlwrite.str());
  4337. }
  4338. const void *CHThorJoinActivity::nextRow()
  4339. {
  4340. for (;;)
  4341. {
  4342. switch (state)
  4343. {
  4344. case JSfill:
  4345. fillLeft();
  4346. state = JSfillright;
  4347. break;
  4348. case JSfillright:
  4349. fillRight();
  4350. state = JScollate;
  4351. break;
  4352. case JSfillleft:
  4353. fillLeft();
  4354. state = JScollate;
  4355. break;
  4356. case JScollate:
  4357. if (right.ordinality() == 0)
  4358. {
  4359. if (!left)
  4360. return NULL;
  4361. state = JSleftonly;
  4362. }
  4363. else
  4364. {
  4365. if (!left)
  4366. state = JSrightonly;
  4367. else
  4368. {
  4369. int diff;
  4370. if(betweenjoin)
  4371. diff = ((collate->docompare(left, right.item(0)) < 0) ? -1 : ((collateupper->docompare(left, right.item(right.ordinality()-1)) > 0) ? +1 : 0));
  4372. else
  4373. diff = collate->docompare(left, right.item(0));
  4374. bool limitExceeded = right.ordinality()>abortLimit;
  4375. if (diff == 0)
  4376. {
  4377. if (limitExceeded)
  4378. {
  4379. const void * ret = NULL;
  4380. if(failingLimit)
  4381. {
  4382. if ( agent.queryCodeContext()->queryDebugContext())
  4383. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  4384. ret = joinException(left, failingLimit);
  4385. }
  4386. left.clear();
  4387. state = JSfillleft;
  4388. ForEachItemIn(idx, right)
  4389. matchedRight.replace(true, idx);
  4390. if(ret)
  4391. {
  4392. processed++;
  4393. return ret;
  4394. }
  4395. }
  4396. else
  4397. {
  4398. state = JScompare;
  4399. joinLimit = keepLimit;
  4400. }
  4401. }
  4402. else if (diff < 0)
  4403. state = JSleftonly;
  4404. else if (limitExceeded)
  4405. {
  4406. // MORE - Roxie code seems to think there should be a destroyRowset(right) here....
  4407. state = JSfillright;
  4408. }
  4409. else
  4410. state = JSrightonly;
  4411. }
  4412. }
  4413. break;
  4414. case JSrightonly:
  4415. if (rightOuterJoin)
  4416. {
  4417. switch (kind)
  4418. {
  4419. case TAKjoin:
  4420. {
  4421. while (right.isItem(rightIndex))
  4422. {
  4423. if (!matchedRight.item(rightIndex))
  4424. {
  4425. const void * rhs = right.item(rightIndex++);
  4426. const void * ret = joinRecords(defaultLeft, rhs, 0, JTFmatchedright);
  4427. if (ret)
  4428. {
  4429. processed++;
  4430. return ret;
  4431. }
  4432. }
  4433. else
  4434. rightIndex++;
  4435. }
  4436. break;
  4437. }
  4438. //Probably excessive to implement the following, but possibly useful
  4439. case TAKdenormalize:
  4440. {
  4441. OwnedConstRoxieRow newLeft(defaultLeft.getLink());
  4442. unsigned rowSize = 0;
  4443. unsigned leftCount = 0;
  4444. while (right.isItem(rightIndex))
  4445. {
  4446. if (!matchedRight.item(rightIndex))
  4447. {
  4448. const void * rhs = right.item(rightIndex);
  4449. try
  4450. {
  4451. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  4452. size32_t thisSize = helper.transform(rowBuilder, newLeft, rhs, ++leftCount, JTFmatchedright);
  4453. if (thisSize)
  4454. {
  4455. rowSize = thisSize;
  4456. newLeft.setown(rowBuilder.finalizeRowClear(rowSize));
  4457. }
  4458. }
  4459. catch(IException * e)
  4460. {
  4461. throw makeWrappedException(e);
  4462. }
  4463. }
  4464. rightIndex++;
  4465. }
  4466. state = JSfillright;
  4467. if (rowSize)
  4468. {
  4469. processed++;
  4470. return newLeft.getClear();
  4471. }
  4472. break;
  4473. }
  4474. case TAKdenormalizegroup:
  4475. {
  4476. filteredRight.kill();
  4477. while (right.isItem(rightIndex))
  4478. {
  4479. if (!matchedRight.item(rightIndex))
  4480. filteredRight.append(right.item(rightIndex));
  4481. rightIndex++;
  4482. }
  4483. state = JSfillright;
  4484. if (filteredRight.ordinality())
  4485. {
  4486. const void * ret = groupDenormalizeRecords(defaultLeft, filteredRight, 0);
  4487. filteredRight.kill();
  4488. if (ret)
  4489. {
  4490. processed++;
  4491. return ret;
  4492. }
  4493. }
  4494. break;
  4495. }
  4496. default:
  4497. throwUnexpected();
  4498. }
  4499. }
  4500. state = JSfillright;
  4501. break;
  4502. case JSleftonly:
  4503. {
  4504. const void * ret = NULL;
  4505. if (!matchedLeft && leftOuterJoin)
  4506. {
  4507. switch (kind)
  4508. {
  4509. case TAKjoin:
  4510. ret = joinRecords(left, defaultRight, 0, JTFmatchedleft);
  4511. break;
  4512. case TAKdenormalize:
  4513. ret = left.getClear();
  4514. break;
  4515. case TAKdenormalizegroup:
  4516. filteredRight.kill();
  4517. ret = groupDenormalizeRecords(left, filteredRight, JTFmatchedleft);
  4518. break;
  4519. default:
  4520. throwUnexpected();
  4521. }
  4522. }
  4523. left.clear();
  4524. state = JSfillleft;
  4525. if (ret)
  4526. {
  4527. processed++;
  4528. return ret;
  4529. }
  4530. break;
  4531. }
  4532. case JScompare:
  4533. if (joinLimit != 0)
  4534. {
  4535. switch (kind)
  4536. {
  4537. case TAKjoin:
  4538. {
  4539. while (right.isItem(rightIndex))
  4540. {
  4541. const void * rhs = right.item(rightIndex++);
  4542. if (helper.match(left, rhs))
  4543. {
  4544. matchedRight.replace(true, rightIndex-1);
  4545. matchedLeft = true;
  4546. if (!exclude)
  4547. {
  4548. const void *ret = joinRecords(left, rhs, ++joinCounter, JTFmatchedleft|JTFmatchedright);
  4549. if (ret)
  4550. {
  4551. processed++;
  4552. joinLimit--;
  4553. return ret;
  4554. }
  4555. }
  4556. }
  4557. }
  4558. break;
  4559. }
  4560. case TAKdenormalize:
  4561. {
  4562. OwnedConstRoxieRow newLeft;
  4563. newLeft.set(left);
  4564. unsigned rowSize = 0;
  4565. unsigned leftCount = 0;
  4566. while (right.isItem(rightIndex) && joinLimit)
  4567. {
  4568. const void * rhs = right.item(rightIndex++);
  4569. if (helper.match(left, rhs))
  4570. {
  4571. matchedRight.replace(true, rightIndex-1);
  4572. matchedLeft = true;
  4573. if (!exclude)
  4574. {
  4575. try
  4576. {
  4577. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  4578. unsigned thisSize = helper.transform(rowBuilder, newLeft, rhs, ++leftCount, JTFmatchedleft|JTFmatchedright);
  4579. if (thisSize)
  4580. {
  4581. rowSize = thisSize;
  4582. newLeft.setown(rowBuilder.finalizeRowClear(rowSize));
  4583. joinLimit--;
  4584. }
  4585. }
  4586. catch(IException * e)
  4587. {
  4588. throw makeWrappedException(e);
  4589. }
  4590. }
  4591. }
  4592. }
  4593. state = JSleftonly;
  4594. rightIndex = 0;
  4595. if (rowSize)
  4596. {
  4597. processed++;
  4598. return newLeft.getClear();
  4599. }
  4600. break;
  4601. }
  4602. case TAKdenormalizegroup:
  4603. {
  4604. filteredRight.kill();
  4605. while (right.isItem(rightIndex))
  4606. {
  4607. const void * rhs = right.item(rightIndex++);
  4608. if (helper.match(left, rhs))
  4609. {
  4610. matchedRight.replace(true, rightIndex-1);
  4611. filteredRight.append(rhs);
  4612. matchedLeft = true;
  4613. if (filteredRight.ordinality()==joinLimit)
  4614. break;
  4615. }
  4616. }
  4617. state = JSleftonly;
  4618. rightIndex = 0;
  4619. if (!exclude && filteredRight.ordinality())
  4620. {
  4621. const void * ret = groupDenormalizeRecords(left, filteredRight, JTFmatchedleft);
  4622. filteredRight.kill();
  4623. if (ret)
  4624. {
  4625. processed++;
  4626. return ret;
  4627. }
  4628. }
  4629. break;
  4630. }
  4631. default:
  4632. throwUnexpected();
  4633. }
  4634. }
  4635. state = JSleftonly;
  4636. rightIndex = 0;
  4637. joinCounter = 0;
  4638. break;
  4639. }
  4640. }
  4641. }
  4642. bool CHThorJoinActivity::isGrouped()
  4643. {
  4644. return false;
  4645. }
  4646. //=====================================================================================================
  4647. CHThorSelfJoinActivity::CHThorSelfJoinActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorJoinArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  4648. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL)
  4649. {
  4650. dualCacheInput = NULL;
  4651. }
  4652. void CHThorSelfJoinActivity::ready()
  4653. {
  4654. CHThorActivityBase::ready();
  4655. outBuilder.setAllocator(rowAllocator);
  4656. ICompare *compareLeft = helper.queryCompareLeft();
  4657. if (helper.isLeftAlreadySorted())
  4658. groupedInput.setown(createGroupedInputReader(&input->queryStream(), compareLeft));
  4659. else
  4660. {
  4661. bool isStable = (helper.getJoinFlags() & JFunstable) == 0;
  4662. RoxieSortAlgorithm sortAlgorithm = isStable ? stableSpillingQuickSortAlgorithm : spillingQuickSortAlgorithm;
  4663. StringBuffer tempBase;
  4664. agent.getTempfileBase(tempBase);
  4665. groupedInput.setown(createSortedGroupedInputReader(&input->queryStream(), compareLeft, createSortAlgorithm(sortAlgorithm, compareLeft, *queryRowManager(), input->queryOutputMeta(), agent.queryCodeContext(), tempBase, activityId)));
  4666. }
  4667. leftOuterJoin = (helper.getJoinFlags() & JFleftouter) != 0;
  4668. rightOuterJoin = (helper.getJoinFlags() & JFrightouter) != 0;
  4669. exclude = (helper.getJoinFlags() & JFexclude) != 0;
  4670. getLimitType(helper.getJoinFlags(), limitFail, limitOnFail);
  4671. if (rightOuterJoin && !defaultLeft)
  4672. {
  4673. if (!defaultAllocator)
  4674. defaultAllocator.setown(agent.queryCodeContext()->getRowAllocator(input->queryOutputMeta(), activityId));
  4675. RtlDynamicRowBuilder rowBuilder(defaultAllocator);
  4676. size32_t thisSize = helper.createDefaultLeft(rowBuilder);
  4677. defaultLeft.setown(rowBuilder.finalizeRowClear(thisSize));
  4678. }
  4679. if ((leftOuterJoin || limitOnFail) && !defaultRight)
  4680. {
  4681. if (!defaultAllocator)
  4682. defaultAllocator.setown(agent.queryCodeContext()->getRowAllocator(input->queryOutputMeta(), activityId));
  4683. RtlDynamicRowBuilder rowBuilder(defaultAllocator);
  4684. size32_t thisSize = helper.createDefaultRight(rowBuilder);
  4685. defaultRight.setown(rowBuilder.finalizeRowClear(thisSize));
  4686. }
  4687. if((helper.getJoinFlags() & JFslidingmatch) != 0)
  4688. throw MakeStringException(99, "Sliding self join not supported");
  4689. keepLimit = helper.getKeepLimit();
  4690. if(keepLimit == 0)
  4691. keepLimit = (unsigned)-1;
  4692. atmostLimit = helper.getJoinLimit();
  4693. if(atmostLimit == 0)
  4694. atmostLimit = (unsigned)-1;
  4695. else
  4696. assertex(!rightOuterJoin);
  4697. abortLimit = helper.getMatchAbortLimit();
  4698. if (abortLimit == 0)
  4699. abortLimit = (unsigned)-1;
  4700. assertex((helper.getJoinFlags() & (JFfirst | JFfirstleft | JFfirstright)) == 0); // no longer supported
  4701. collate = helper.queryCompareLeftRight();
  4702. eof = false;
  4703. doneFirstFill = false;
  4704. failingLimit.clear();
  4705. if ((helper.getJoinFlags() & JFlimitedprefixjoin) && helper.getJoinLimit())
  4706. { //Limited Match Join (s[1..n])
  4707. dualcache.setown(new CRHDualCache());
  4708. dualcache->init(groupedInput);
  4709. dualCacheInput = dualcache->queryOut1();
  4710. failingOuterAtmost = false;
  4711. matchedLeft = false;
  4712. leftIndex = 0;
  4713. rightOuterIndex = 0;
  4714. limitedhelper.setown(createRHLimitedCompareHelper());
  4715. limitedhelper->init( helper.getJoinLimit(), dualcache->queryOut2(), collate, helper.queryPrefixCompare() );
  4716. }
  4717. joinCounter = 0;
  4718. }
  4719. void CHThorSelfJoinActivity::stop()
  4720. {
  4721. outBuilder.clear();
  4722. group.clear();
  4723. groupedInput.clear();
  4724. CHThorActivityBase::stop();
  4725. }
  4726. bool CHThorSelfJoinActivity::fillGroup()
  4727. {
  4728. group.clear();
  4729. matchedLeft = false;
  4730. matchedRight.kill();
  4731. failingOuterAtmost = false;
  4732. OwnedConstRoxieRow next;
  4733. unsigned groupCount = 0;
  4734. next.setown(groupedInput->nextRow());
  4735. while(next)
  4736. {
  4737. if(groupCount==abortLimit)
  4738. {
  4739. if(limitFail)
  4740. failLimit(next);
  4741. if ( agent.queryCodeContext()->queryDebugContext())
  4742. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  4743. if(limitOnFail)
  4744. {
  4745. assertex(!failingLimit);
  4746. try
  4747. {
  4748. failLimit(next);
  4749. }
  4750. catch(IException * except)
  4751. {
  4752. failingLimit.setown(except);
  4753. }
  4754. assertex(failingLimit);
  4755. group.append(next.getClear());
  4756. groupCount++;
  4757. break;
  4758. }
  4759. group.clear();
  4760. groupCount = 0;
  4761. while(next)
  4762. next.setown(groupedInput->nextRow());
  4763. }
  4764. else if(groupCount==atmostLimit)
  4765. {
  4766. if(leftOuterJoin)
  4767. {
  4768. group.append(next.getClear());
  4769. groupCount++;
  4770. failingOuterAtmost = true;
  4771. break;
  4772. }
  4773. else
  4774. {
  4775. group.clear();
  4776. groupCount = 0;
  4777. while(next)
  4778. next.setown(groupedInput->nextRow());
  4779. }
  4780. }
  4781. else
  4782. {
  4783. group.append(next.getClear());
  4784. groupCount++;
  4785. }
  4786. next.setown(groupedInput->nextRow());
  4787. }
  4788. if(group.ordinality()==0)
  4789. {
  4790. eof = true;
  4791. return false;
  4792. }
  4793. leftIndex = 0;
  4794. rightIndex = 0;
  4795. joinCounter = 0;
  4796. rightOuterIndex = 0;
  4797. joinLimit = keepLimit;
  4798. ForEachItemIn(idx, group)
  4799. matchedRight.append(false);
  4800. return true;
  4801. }
  4802. const void * CHThorSelfJoinActivity::nextRow()
  4803. {
  4804. if (limitedhelper) {
  4805. while(!eof) //limited match join
  4806. {
  4807. if (!group.isItem(rightIndex))
  4808. {
  4809. lhs.setown(dualCacheInput->nextRow());
  4810. if (lhs)
  4811. {
  4812. rightIndex = 0;
  4813. joinCounter = 0;
  4814. group.clear();
  4815. limitedhelper->getGroup(group,lhs);
  4816. }
  4817. else
  4818. eof = true;
  4819. }
  4820. if (group.isItem(rightIndex))
  4821. {
  4822. const void * rhs = group.item(rightIndex++);
  4823. if(helper.match(lhs, rhs))
  4824. {
  4825. const void * ret = joinRecords(lhs, rhs, ++joinCounter, JTFmatchedleft|JTFmatchedright, NULL);
  4826. if(ret)
  4827. {
  4828. processed++;
  4829. return ret;
  4830. }
  4831. }
  4832. }
  4833. }
  4834. return NULL;
  4835. }
  4836. if(!doneFirstFill)
  4837. {
  4838. fillGroup();
  4839. doneFirstFill = true;
  4840. }
  4841. while(!eof)
  4842. {
  4843. if(failingOuterAtmost)
  4844. while(group.isItem(leftIndex))
  4845. {
  4846. const void * ret = joinRecords(group.item(leftIndex++), defaultRight, 0, JTFmatchedleft, NULL);
  4847. if(ret)
  4848. {
  4849. processed++;
  4850. return ret;
  4851. }
  4852. }
  4853. if((joinLimit == 0) || !group.isItem(rightIndex))
  4854. {
  4855. if(leftOuterJoin && !matchedLeft && !failingLimit)
  4856. {
  4857. const void * ret = joinRecords(group.item(leftIndex), defaultRight, 0, JTFmatchedleft, NULL);
  4858. if(ret)
  4859. {
  4860. matchedLeft = true;
  4861. processed++;
  4862. return ret;
  4863. }
  4864. }
  4865. leftIndex++;
  4866. matchedLeft = false;
  4867. rightIndex = 0;
  4868. joinCounter = 0;
  4869. joinLimit = keepLimit;
  4870. }
  4871. if(!group.isItem(leftIndex))
  4872. {
  4873. if(failingLimit || failingOuterAtmost)
  4874. {
  4875. OwnedConstRoxieRow lhs(groupedInput->nextRow()); // dualCache never active here
  4876. while(lhs)
  4877. {
  4878. const void * ret = joinRecords(lhs, defaultRight, 0, JTFmatchedleft, failingLimit);
  4879. if(ret)
  4880. {
  4881. processed++;
  4882. return ret;
  4883. }
  4884. lhs.setown(groupedInput->nextRow());
  4885. }
  4886. failingLimit.clear();
  4887. }
  4888. if(rightOuterJoin && !failingLimit)
  4889. while(group.isItem(rightOuterIndex))
  4890. if(!matchedRight.item(rightOuterIndex++))
  4891. {
  4892. const void * ret = joinRecords(defaultLeft, group.item(rightOuterIndex-1), 0, JTFmatchedright, NULL);
  4893. if(ret)
  4894. {
  4895. processed++;
  4896. return ret;
  4897. }
  4898. }
  4899. if(!fillGroup())
  4900. return NULL;
  4901. continue;
  4902. }
  4903. const void * lhs = group.item(leftIndex);
  4904. if(failingLimit)
  4905. {
  4906. leftIndex++;
  4907. const void * ret = joinRecords(lhs, defaultRight, 0, JTFmatchedleft, failingLimit);
  4908. if(ret)
  4909. {
  4910. processed++;
  4911. return ret;
  4912. }
  4913. }
  4914. else
  4915. {
  4916. const void * rhs = group.item(rightIndex++);
  4917. if(helper.match(lhs, rhs))
  4918. {
  4919. matchedLeft = true;
  4920. matchedRight.replace(true, rightIndex-1);
  4921. if(!exclude)
  4922. {
  4923. const void * ret = joinRecords(lhs, rhs, ++joinCounter, JTFmatchedleft|JTFmatchedright, NULL);
  4924. if(ret)
  4925. {
  4926. processed++;
  4927. joinLimit--;
  4928. return ret;
  4929. }
  4930. }
  4931. }
  4932. }
  4933. }
  4934. return NULL;
  4935. }
  4936. const void * CHThorSelfJoinActivity::joinRecords(const void * curLeft, const void * curRight, unsigned counter, unsigned flags, IException * except)
  4937. {
  4938. outBuilder.ensureRow();
  4939. try
  4940. {
  4941. size32_t thisSize = (except ? helper.onFailTransform(outBuilder, curLeft, curRight, except, flags) : helper.transform(outBuilder, curLeft, curRight, counter, flags));
  4942. if(thisSize){
  4943. return outBuilder.finalizeRowClear(thisSize);
  4944. }
  4945. else
  4946. return NULL;
  4947. }
  4948. catch(IException * e)
  4949. {
  4950. throw makeWrappedException(e);
  4951. }
  4952. }
  4953. void CHThorSelfJoinActivity::failLimit(const void * next)
  4954. {
  4955. helper.onMatchAbortLimitExceeded();
  4956. CommonXmlWriter xmlwrite(0);
  4957. if (input->queryOutputMeta() && input->queryOutputMeta()->hasXML())
  4958. {
  4959. input->queryOutputMeta()->toXML((byte *) next, xmlwrite);
  4960. }
  4961. throw MakeStringException(0, "More than %d match candidates in self-join for row %s", abortLimit, xmlwrite.str());
  4962. }
  4963. bool CHThorSelfJoinActivity::isGrouped()
  4964. {
  4965. return false;
  4966. }
  4967. //=====================================================================================================
  4968. CHThorLookupJoinActivity::LookupTable::LookupTable(unsigned _size, ICompare * _leftRightCompare, ICompare * _rightCompare, IHash * _leftHash, IHash * _rightHash, bool _dedupOnAdd)
  4969. : leftRightCompare(_leftRightCompare), rightCompare(_rightCompare), leftHash(_leftHash), rightHash(_rightHash), dedupOnAdd(_dedupOnAdd)
  4970. {
  4971. unsigned minsize = (4*_size)/3;
  4972. size = 2;
  4973. while((minsize >>= 1) > 0)
  4974. size <<= 1;
  4975. mask = size - 1;
  4976. table = new OwnedConstRoxieRow[size];
  4977. findex = BadIndex;
  4978. }
  4979. CHThorLookupJoinActivity::LookupTable::~LookupTable()
  4980. {
  4981. delete [] table;
  4982. }
  4983. bool CHThorLookupJoinActivity::LookupTable::add(const void * _right)
  4984. {
  4985. OwnedConstRoxieRow right(_right);
  4986. findex = BadIndex;
  4987. unsigned start = rightHash->hash(right) & mask;
  4988. unsigned index = start;
  4989. while(table[index])
  4990. {
  4991. if(dedupOnAdd && (rightCompare->docompare(table[index], right) == 0))
  4992. return false;
  4993. index++;
  4994. if(index==size)
  4995. index = 0;
  4996. if(index==start)
  4997. return false; //table is full, should never happen
  4998. }
  4999. table[index].setown(right.getClear());
  5000. return true;
  5001. }
  5002. const void * CHThorLookupJoinActivity::LookupTable::find(const void * left) const
  5003. {
  5004. fstart = leftHash->hash(left) & mask;
  5005. findex = fstart;
  5006. return doFind(left);
  5007. }
  5008. const void * CHThorLookupJoinActivity::LookupTable::findNext(const void * left) const
  5009. {
  5010. if(findex == BadIndex)
  5011. return NULL;
  5012. advance();
  5013. return doFind(left);
  5014. }
  5015. void CHThorLookupJoinActivity::LookupTable::advance() const
  5016. {
  5017. findex++;
  5018. if(findex==size)
  5019. findex = 0;
  5020. if(findex==fstart)
  5021. throw MakeStringException(0, "Internal error hthor lookup join activity (hash table full on lookup)");
  5022. }
  5023. const void * CHThorLookupJoinActivity::LookupTable::doFind(const void * left) const
  5024. {
  5025. while(table[findex])
  5026. {
  5027. if(leftRightCompare->docompare(left, table[findex]) == 0)
  5028. return table[findex];
  5029. advance();
  5030. }
  5031. findex = BadIndex;
  5032. return NULL;
  5033. }
  5034. CHThorLookupJoinActivity::CHThorLookupJoinActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorHashJoinArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  5035. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL), table(0)
  5036. {
  5037. }
  5038. void CHThorLookupJoinActivity::ready()
  5039. {
  5040. CHThorActivityBase::ready();
  5041. input1->ready();
  5042. outBuilder.setAllocator(rowAllocator);
  5043. leftOuterJoin = (helper.getJoinFlags() & JFleftouter) != 0;
  5044. assertex((helper.getJoinFlags() & JFrightouter) == 0);
  5045. exclude = (helper.getJoinFlags() & JFexclude) != 0;
  5046. many = (helper.getJoinFlags() & JFmanylookup) != 0;
  5047. dedupRHS = (helper.getJoinFlags() & (JFmanylookup | JFmatchrequired | JFtransformMaySkip)) == 0; // optimisation: can implicitly dedup RHS unless is many lookup, or match required, or transform may skip
  5048. if((helper.getJoinFlags() & (JFfirst | JFfirstleft | JFfirstright | JFslidingmatch)) != 0)
  5049. throwUnexpected(); // compiler should have rejected
  5050. keepLimit = helper.getKeepLimit();
  5051. if(keepLimit==0)
  5052. keepLimit = static_cast<unsigned>(-1);
  5053. atmostLimit = helper.getJoinLimit();
  5054. limitLimit = helper.getMatchAbortLimit();
  5055. hasGroupLimit = ((atmostLimit > 0) || (limitLimit > 0));
  5056. if(atmostLimit==0)
  5057. atmostLimit = static_cast<unsigned>(-1);
  5058. if(limitLimit==0)
  5059. limitLimit = static_cast<unsigned>(-1);
  5060. isSmartJoin = (helper.getJoinFlags() & JFsmart) != 0;
  5061. getLimitType(helper.getJoinFlags(), limitFail, limitOnFail);
  5062. if((leftOuterJoin || limitOnFail) && !defaultRight)
  5063. createDefaultRight();
  5064. eog = false;
  5065. matchedGroup = false;
  5066. joinCounter = 0;
  5067. }
  5068. void CHThorLookupJoinActivity::stop()
  5069. {
  5070. outBuilder.clear();
  5071. left.clear();
  5072. table.clear();
  5073. CHThorActivityBase::stop();
  5074. input1->stop();
  5075. }
  5076. void CHThorLookupJoinActivity::createDefaultRight()
  5077. {
  5078. if (!defaultRight)
  5079. {
  5080. if (!defaultRightAllocator)
  5081. defaultRightAllocator.setown(agent.queryCodeContext()->getRowAllocator(input1->queryOutputMeta(), activityId));
  5082. RtlDynamicRowBuilder rowBuilder(defaultRightAllocator);
  5083. size32_t thisSize = helper.createDefaultRight(rowBuilder);
  5084. defaultRight.setown(rowBuilder.finalizeRowClear(thisSize));
  5085. }
  5086. }
  5087. void CHThorLookupJoinActivity::loadRight()
  5088. {
  5089. OwnedRowArray rightset;
  5090. const void * next;
  5091. while(true)
  5092. {
  5093. next = input1->nextRow();
  5094. if(!next)
  5095. next = input1->nextRow();
  5096. if(!next)
  5097. break;
  5098. rightset.append(next);
  5099. }
  5100. unsigned rightord = rightset.ordinality();
  5101. table.setown(new LookupTable(rightord, helper.queryCompareLeftRight(), helper.queryCompareRight(), helper.queryHashLeft(), helper.queryHashRight(), dedupRHS));
  5102. unsigned i;
  5103. for(i=0; i<rightord; i++)
  5104. table->add(rightset.itemClear(i));
  5105. };
  5106. void CHThorLookupJoinActivity::setInput(unsigned index, IHThorInput * _input)
  5107. {
  5108. if (index==1)
  5109. input1 = _input;
  5110. else
  5111. CHThorActivityBase::setInput(index, _input);
  5112. }
  5113. //following are all copied from CHThorJoinActivity - should common up.
  5114. const void * CHThorLookupJoinActivity::joinRecords(const void * left, const void * right, unsigned counter, unsigned flags)
  5115. {
  5116. try
  5117. {
  5118. outBuilder.ensureRow();
  5119. size32_t thisSize = helper.transform(outBuilder, left, right, counter, flags);
  5120. if(thisSize)
  5121. return outBuilder.finalizeRowClear(thisSize);
  5122. else
  5123. return NULL;
  5124. }
  5125. catch(IException * e)
  5126. {
  5127. throw makeWrappedException(e);
  5128. }
  5129. }
  5130. const void * CHThorLookupJoinActivity::joinException(const void * left, IException * except)
  5131. {
  5132. try
  5133. {
  5134. outBuilder.ensureRow();
  5135. memsize_t thisSize = helper.onFailTransform(outBuilder, left, defaultRight, except, JTFmatchedleft);
  5136. if(thisSize)
  5137. return outBuilder.finalizeRowClear(thisSize);
  5138. else
  5139. return NULL;
  5140. }
  5141. catch(IException * e)
  5142. {
  5143. throw makeWrappedException(e);
  5144. }
  5145. }
  5146. const void * CHThorLookupJoinActivity::groupDenormalizeRecords(const void * left, ConstPointerArray & rows, unsigned flags)
  5147. {
  5148. try
  5149. {
  5150. outBuilder.ensureRow();
  5151. unsigned numRows = rows.ordinality();
  5152. const void * right = numRows ? rows.item(0) : defaultRight.get();
  5153. if (numRows>0)
  5154. flags |= JTFmatchedright;
  5155. memsize_t thisSize = helper.transform(outBuilder, left, right, numRows, (const void * *)rows.getArray(), flags);
  5156. if(thisSize)
  5157. return outBuilder.finalizeRowClear(thisSize);
  5158. else
  5159. return NULL;
  5160. }
  5161. catch(IException * e)
  5162. {
  5163. throw makeWrappedException(e);
  5164. }
  5165. }
  5166. const void * CHThorLookupJoinActivity::nextRow()
  5167. {
  5168. if(!table)
  5169. loadRight();
  5170. switch (kind)
  5171. {
  5172. case TAKlookupjoin:
  5173. case TAKsmartjoin:
  5174. return nextRowJoin();
  5175. case TAKlookupdenormalize:
  5176. case TAKlookupdenormalizegroup:
  5177. case TAKsmartdenormalize:
  5178. case TAKsmartdenormalizegroup:
  5179. return nextRowDenormalize();
  5180. }
  5181. throwUnexpected();
  5182. }
  5183. const void * CHThorLookupJoinActivity::nextRowJoin()
  5184. {
  5185. while(true)
  5186. {
  5187. const void * right = NULL;
  5188. if(!left)
  5189. {
  5190. left.setown(input->nextRow());
  5191. keepCount = keepLimit;
  5192. if(!left)
  5193. {
  5194. if (isSmartJoin)
  5195. left.setown(input->nextRow());
  5196. if(!left)
  5197. {
  5198. if(matchedGroup || eog)
  5199. {
  5200. matchedGroup = false;
  5201. eog = true;
  5202. return NULL;
  5203. }
  5204. eog = true;
  5205. continue;
  5206. }
  5207. }
  5208. eog = false;
  5209. gotMatch = false;
  5210. right = getRightFirst();
  5211. }
  5212. else
  5213. right = getRightNext();
  5214. const void * ret = NULL;
  5215. if(failingLimit)
  5216. {
  5217. ret = joinException(left, failingLimit);
  5218. }
  5219. else
  5220. {
  5221. while(right)
  5222. {
  5223. if(helper.match(left, right))
  5224. {
  5225. gotMatch = true;
  5226. if(exclude)
  5227. break;
  5228. ret = joinRecords(left, right, ++joinCounter, JTFmatchedleft|JTFmatchedright);
  5229. if(ret)
  5230. {
  5231. processed++;
  5232. break;
  5233. }
  5234. }
  5235. right = getRightNext();
  5236. ret = NULL;
  5237. }
  5238. if(leftOuterJoin && !gotMatch)
  5239. {
  5240. ret = joinRecords(left, defaultRight, 0, JTFmatchedleft);
  5241. gotMatch = true;
  5242. }
  5243. }
  5244. if(ret)
  5245. {
  5246. matchedGroup = true;
  5247. processed++;
  5248. if(!many || (--keepCount == 0) || failingLimit)
  5249. {
  5250. left.clear();
  5251. joinCounter = 0;
  5252. failingLimit.clear();
  5253. }
  5254. return ret;
  5255. }
  5256. left.clear();
  5257. joinCounter = 0;
  5258. }
  5259. }
  5260. const void * CHThorLookupJoinActivity::nextRowDenormalize()
  5261. {
  5262. while(true)
  5263. {
  5264. left.setown(input->nextRow());
  5265. if(!left)
  5266. {
  5267. if (!matchedGroup || isSmartJoin)
  5268. left.setown(input->nextRow());
  5269. if (!left)
  5270. {
  5271. matchedGroup = false;
  5272. return NULL;
  5273. }
  5274. }
  5275. gotMatch = false;
  5276. const void * right = getRightFirst();
  5277. const void * ret = NULL;
  5278. if (failingLimit)
  5279. ret = joinException(left, failingLimit);
  5280. else if (kind == TAKlookupdenormalize || kind == TAKsmartdenormalize)
  5281. {
  5282. OwnedConstRoxieRow newLeft(left.getLink());
  5283. unsigned rowSize = 0;
  5284. unsigned leftCount = 0;
  5285. keepCount = keepLimit;
  5286. while (right)
  5287. {
  5288. if (helper.match(left, right))
  5289. {
  5290. gotMatch = true;
  5291. if (exclude)
  5292. break;
  5293. try
  5294. {
  5295. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  5296. unsigned thisSize = helper.transform(rowBuilder, newLeft, right, ++leftCount, JTFmatchedleft|JTFmatchedright);
  5297. if (thisSize)
  5298. {
  5299. rowSize = thisSize;
  5300. newLeft.setown(rowBuilder.finalizeRowClear(rowSize));
  5301. }
  5302. }
  5303. catch(IException * e)
  5304. {
  5305. throw makeWrappedException(e);
  5306. }
  5307. if(!many || (--keepCount == 0))
  5308. break;
  5309. }
  5310. right = getRightNext();
  5311. }
  5312. //Is this rowSize test correct?? Is there any situation where it shouldn't just return newLeft?
  5313. if (rowSize)
  5314. ret = newLeft.getClear();
  5315. else if (leftOuterJoin && !gotMatch)
  5316. ret = left.getClear();
  5317. }
  5318. else
  5319. {
  5320. filteredRight.kill();
  5321. keepCount = keepLimit;
  5322. while (right)
  5323. {
  5324. if (helper.match(left, right))
  5325. {
  5326. gotMatch = true;
  5327. if(exclude)
  5328. break;
  5329. filteredRight.append(right);
  5330. if(!many || (--keepCount == 0))
  5331. break;
  5332. }
  5333. right = getRightNext();
  5334. }
  5335. if((filteredRight.ordinality() > 0) || (leftOuterJoin && !gotMatch))
  5336. ret = groupDenormalizeRecords(left, filteredRight, JTFmatchedleft);
  5337. filteredRight.kill();
  5338. }
  5339. left.clear();
  5340. failingLimit.clear();
  5341. if(ret)
  5342. {
  5343. matchedGroup = true;
  5344. processed++;
  5345. return ret;
  5346. }
  5347. }
  5348. }
  5349. bool CHThorLookupJoinActivity::isGrouped()
  5350. {
  5351. return input ? input->isGrouped() : false;
  5352. }
  5353. const void * CHThorLookupJoinActivity::fillRightGroup()
  5354. {
  5355. rightGroup.kill();
  5356. for(const void * right = table->find(left); right; right = table->findNext(left))
  5357. {
  5358. rightGroup.append(right);
  5359. if(rightGroup.ordinality() > limitLimit)
  5360. {
  5361. if(limitFail)
  5362. failLimit();
  5363. if ( agent.queryCodeContext()->queryDebugContext())
  5364. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  5365. gotMatch = true;
  5366. if(limitOnFail)
  5367. {
  5368. assertex(!failingLimit);
  5369. try
  5370. {
  5371. failLimit();
  5372. }
  5373. catch(IException * e)
  5374. {
  5375. failingLimit.setown(e);
  5376. }
  5377. assertex(failingLimit);
  5378. }
  5379. else
  5380. {
  5381. rightGroup.kill();
  5382. }
  5383. break;
  5384. }
  5385. if(rightGroup.ordinality() > atmostLimit)
  5386. {
  5387. rightGroup.kill();
  5388. break;
  5389. }
  5390. }
  5391. rightGroupIndex = 0;
  5392. return readRightGroup();
  5393. }
  5394. void CHThorLookupJoinActivity::failLimit()
  5395. {
  5396. helper.onMatchAbortLimitExceeded();
  5397. CommonXmlWriter xmlwrite(0);
  5398. if(input->queryOutputMeta() && input->queryOutputMeta()->hasXML())
  5399. {
  5400. input->queryOutputMeta()->toXML(static_cast<const unsigned char *>(left.get()), xmlwrite);
  5401. }
  5402. throw MakeStringException(0, "More than %u match candidates in join for row %s", limitLimit, xmlwrite.str());
  5403. }
  5404. unsigned const CHThorLookupJoinActivity::LookupTable::BadIndex(static_cast<unsigned>(-1));
  5405. //=====================================================================================================
  5406. CHThorAllJoinActivity::CHThorAllJoinActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorAllJoinArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL)
  5407. {
  5408. }
  5409. void CHThorAllJoinActivity::ready()
  5410. {
  5411. CHThorActivityBase::ready();
  5412. input1->ready();
  5413. outBuilder.setAllocator(rowAllocator);
  5414. leftOuterJoin = (helper.getJoinFlags() & JFleftouter) != 0;
  5415. exclude = (helper.getJoinFlags() & JFexclude) != 0;
  5416. if(leftOuterJoin && !defaultRight)
  5417. createDefaultRight();
  5418. if((helper.getJoinFlags() & (JFrightouter | JFfirst | JFfirstleft | JFfirstright)) != 0)
  5419. throwUnexpected();
  5420. keepLimit = helper.getKeepLimit();
  5421. if(keepLimit==0)
  5422. keepLimit = (unsigned)-1;
  5423. started = false;
  5424. countForLeft = keepLimit;
  5425. matchedLeft = false;
  5426. matchedGroup = false;
  5427. eog = false;
  5428. eos = false;
  5429. }
  5430. void CHThorAllJoinActivity::stop()
  5431. {
  5432. outBuilder.clear();
  5433. left.clear();
  5434. rightset.clear();
  5435. matchedRight.kill();
  5436. CHThorActivityBase::stop();
  5437. input1->stop();
  5438. }
  5439. void CHThorAllJoinActivity::createDefaultRight()
  5440. {
  5441. if (!defaultRight)
  5442. {
  5443. if (!defaultRightAllocator)
  5444. defaultRightAllocator.setown(agent.queryCodeContext()->getRowAllocator(input1->queryOutputMeta(), activityId));
  5445. RtlDynamicRowBuilder rowBuilder(defaultRightAllocator);
  5446. size32_t thisSize = helper.createDefaultRight(rowBuilder);
  5447. defaultRight.setown(rowBuilder.finalizeRowClear(thisSize));
  5448. }
  5449. }
  5450. void CHThorAllJoinActivity::loadRight()
  5451. {
  5452. const void * next;
  5453. while(true)
  5454. {
  5455. next = input1->nextRow();
  5456. if(!next)
  5457. next = input1->nextRow();
  5458. if(!next)
  5459. break;
  5460. rightset.append(next);
  5461. matchedRight.append(false);
  5462. }
  5463. rightIndex = 0;
  5464. joinCounter = 0;
  5465. rightOrdinality = rightset.ordinality();
  5466. }
  5467. const void * CHThorAllJoinActivity::joinRecords(const void * left, const void * right, unsigned counter, unsigned flags)
  5468. {
  5469. try
  5470. {
  5471. outBuilder.ensureRow();
  5472. memsize_t thisSize = helper.transform(outBuilder, left, right, counter, flags);
  5473. if(thisSize)
  5474. return outBuilder.finalizeRowClear(thisSize);
  5475. else
  5476. return NULL;
  5477. }
  5478. catch(IException * e)
  5479. {
  5480. throw makeWrappedException(e);
  5481. }
  5482. }
  5483. const void * CHThorAllJoinActivity::groupDenormalizeRecords(const void * curLeft, ConstPointerArray & rows, unsigned flags)
  5484. {
  5485. try
  5486. {
  5487. outBuilder.ensureRow();
  5488. unsigned numRows = rows.ordinality();
  5489. const void * right = numRows ? rows.item(0) : defaultRight.get();
  5490. if (numRows>0)
  5491. flags |= JTFmatchedright;
  5492. memsize_t thisSize = helper.transform(outBuilder, curLeft, right, numRows, (const void * *)rows.getArray(), flags);
  5493. if(thisSize)
  5494. return outBuilder.finalizeRowClear(thisSize);
  5495. else
  5496. return NULL;
  5497. }
  5498. catch(IException * e)
  5499. {
  5500. throw makeWrappedException(e);
  5501. }
  5502. }
  5503. void CHThorAllJoinActivity::setInput(unsigned index, IHThorInput * _input)
  5504. {
  5505. if (index==1)
  5506. input1 = _input;
  5507. else
  5508. {
  5509. CHThorActivityBase::setInput(index, _input);
  5510. leftIsGrouped = true; // input->isGrouped() is unreliable and it is just as good to always behave as if input is grouped
  5511. }
  5512. }
  5513. const void * CHThorAllJoinActivity::nextRow()
  5514. {
  5515. if(!started)
  5516. {
  5517. started = true;
  5518. left.setown(input->nextRow());
  5519. matchedLeft = false;
  5520. countForLeft = keepLimit;
  5521. if(!left)
  5522. {
  5523. eos = true;
  5524. return NULL;
  5525. }
  5526. loadRight();
  5527. }
  5528. const void * ret;
  5529. const void * right;
  5530. if(eos)
  5531. return NULL;
  5532. while(true)
  5533. {
  5534. ret = NULL;
  5535. if((rightIndex == rightOrdinality) || (countForLeft==0))
  5536. {
  5537. if(leftOuterJoin && left && !matchedLeft)
  5538. {
  5539. switch(kind)
  5540. {
  5541. case TAKalljoin:
  5542. ret = joinRecords(left, defaultRight, 0, JTFmatchedleft);
  5543. break;
  5544. case TAKalldenormalize:
  5545. ret = left.getClear();
  5546. break;
  5547. case TAKalldenormalizegroup:
  5548. filteredRight.kill();
  5549. ret = groupDenormalizeRecords(left, filteredRight, JTFmatchedleft);
  5550. break;
  5551. default:
  5552. throwUnexpected();
  5553. }
  5554. }
  5555. rightIndex = 0;
  5556. joinCounter = 0;
  5557. left.clear();
  5558. if(ret)
  5559. {
  5560. matchedGroup = true;
  5561. processed++;
  5562. return ret;
  5563. }
  5564. }
  5565. if(!left)
  5566. {
  5567. left.setown(input->nextRow());
  5568. matchedLeft = false;
  5569. countForLeft = keepLimit;
  5570. }
  5571. if(!left)
  5572. {
  5573. if(eog)
  5574. {
  5575. eos = true;
  5576. matchedGroup = false;
  5577. return NULL;
  5578. }
  5579. eog = true;
  5580. if(matchedGroup && leftIsGrouped)
  5581. {
  5582. matchedGroup = false;
  5583. return NULL;
  5584. }
  5585. matchedGroup = false;
  5586. continue;
  5587. }
  5588. eog = false;
  5589. switch(kind)
  5590. {
  5591. case TAKalljoin:
  5592. while(rightIndex < rightOrdinality)
  5593. {
  5594. right = rightset.item(rightIndex);
  5595. if(helper.match(left, right))
  5596. {
  5597. matchedLeft = true;
  5598. matchedRight.replace(true, rightIndex);
  5599. if(!exclude)
  5600. ret = joinRecords(left, right, ++joinCounter, JTFmatchedleft|JTFmatchedright);
  5601. }
  5602. rightIndex++;
  5603. if(ret)
  5604. {
  5605. countForLeft--;
  5606. matchedGroup = true;
  5607. processed++;
  5608. return ret;
  5609. }
  5610. }
  5611. case TAKalldenormalize:
  5612. {
  5613. OwnedConstRoxieRow newLeft;
  5614. newLeft.set(left);
  5615. unsigned rowSize = 0;
  5616. unsigned leftCount = 0;
  5617. while((rightIndex < rightOrdinality) && countForLeft)
  5618. {
  5619. right = rightset.item(rightIndex);
  5620. if(helper.match(left, right))
  5621. {
  5622. matchedLeft = true;
  5623. matchedRight.replace(true, rightIndex);
  5624. if(!exclude)
  5625. {
  5626. try
  5627. {
  5628. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  5629. unsigned thisSize = helper.transform(rowBuilder, newLeft, right, ++leftCount, JTFmatchedleft|JTFmatchedright);
  5630. if(thisSize)
  5631. {
  5632. rowSize = thisSize;
  5633. newLeft.setown(rowBuilder.finalizeRowClear(rowSize));
  5634. --countForLeft;
  5635. }
  5636. }
  5637. catch(IException * e)
  5638. {
  5639. throw makeWrappedException(e);
  5640. }
  5641. }
  5642. }
  5643. rightIndex++;
  5644. }
  5645. if(rowSize)
  5646. {
  5647. processed++;
  5648. return newLeft.getClear();
  5649. }
  5650. }
  5651. break;
  5652. case TAKalldenormalizegroup:
  5653. filteredRight.kill();
  5654. while((rightIndex < rightOrdinality) && countForLeft)
  5655. {
  5656. right = rightset.item(rightIndex);
  5657. if(helper.match(left, right))
  5658. {
  5659. matchedLeft = true;
  5660. matchedRight.replace(true, rightIndex);
  5661. filteredRight.append(right);
  5662. --countForLeft;
  5663. }
  5664. ++rightIndex;
  5665. }
  5666. if(!exclude && filteredRight.ordinality())
  5667. {
  5668. const void * ret = groupDenormalizeRecords(left, filteredRight, JTFmatchedleft);
  5669. filteredRight.kill();
  5670. if(ret)
  5671. {
  5672. processed++;
  5673. return ret;
  5674. }
  5675. }
  5676. break;
  5677. default:
  5678. throwUnexpected();
  5679. }
  5680. }
  5681. }
  5682. bool CHThorAllJoinActivity::isGrouped()
  5683. {
  5684. return input ? input->isGrouped() : false;
  5685. }
  5686. //=====================================================================================================
  5687. //=====================================================================================================
  5688. CHThorWorkUnitWriteActivity::CHThorWorkUnitWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorWorkUnitWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  5689. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5690. {
  5691. }
  5692. static void throwWuResultTooLarge(size32_t outputLimit, IHThorWorkUnitWriteArg &helper)
  5693. {
  5694. StringBuffer errMsg("Dataset too large to output to workunit (limit ");
  5695. errMsg.append(outputLimit/0x100000).append(" megabytes), in result (");
  5696. const char *name = helper.queryName();
  5697. if (name)
  5698. errMsg.append("name=").append(name);
  5699. else
  5700. errMsg.append("sequence=").append(helper.getSequence());
  5701. errMsg.append(")");
  5702. throw MakeStringExceptionDirect(0, errMsg.str());
  5703. }
  5704. void CHThorWorkUnitWriteActivity::execute()
  5705. {
  5706. unsigned flags = helper.getFlags();
  5707. grouped = (POFgrouped & flags) != 0;
  5708. // In absense of OPT_OUTPUTLIMIT check pre 5.2 legacy name OPT_OUTPUTLIMIT_LEGACY
  5709. size32_t outputLimit = agent.queryWorkUnit()->getDebugValueInt(OPT_OUTPUTLIMIT, agent.queryWorkUnit()->getDebugValueInt(OPT_OUTPUTLIMIT_LEGACY, defaultDaliResultLimit));
  5710. if (flags & POFmaxsize)
  5711. outputLimit = helper.getMaxSize();
  5712. if (outputLimit>daliResultOutputMax)
  5713. throw MakeStringException(0, "Dali result outputs are restricted to a maximum of %d MB, the current limit is %d MB. A huge dali result usually indicates the ECL needs altering.", daliResultOutputMax, defaultDaliResultLimit);
  5714. assertex(outputLimit<=0x1000); // 32bit limit because MemoryBuffer/CMessageBuffers involved etc.
  5715. outputLimit *= 0x100000;
  5716. MemoryBuffer rowdata;
  5717. __int64 rows = 0;
  5718. IRecordSize * inputMeta = input->queryOutputMeta();
  5719. if (0 != (POFextend & helper.getFlags()))
  5720. {
  5721. WorkunitUpdate w = agent.updateWorkUnit();
  5722. Owned<IWUResult> result = updateWorkUnitResult(w, helper.queryName(), helper.getSequence());
  5723. rows = result->getResultRowCount();
  5724. }
  5725. __int64 initialRows = rows;
  5726. Owned<IOutputRowSerializer> rowSerializer;
  5727. if (input->queryOutputMeta()->getMetaFlags() & MDFneedserializedisk)
  5728. rowSerializer.setown( input->queryOutputMeta()->createDiskSerializer(agent.queryCodeContext(), activityId) );
  5729. int seq = helper.getSequence();
  5730. bool toStdout = (seq >= 0) && agent.queryWriteResultsToStdout();
  5731. Owned<SimpleOutputWriter> writer;
  5732. if (toStdout)
  5733. writer.setown(new SimpleOutputWriter);
  5734. if (agent.queryOutputFmt() == ofXML && seq >= 0)
  5735. {
  5736. StringBuffer sb;
  5737. const char *name = helper.queryName();
  5738. if (name && *name)
  5739. sb.appendf("<Dataset name='%s'>\n", name);
  5740. else
  5741. sb.appendf("<Dataset name='Result %d'>\n", seq+1);
  5742. agent.queryOutputSerializer()->fwrite(seq, (const void*)sb.str(), 1, sb.length());
  5743. }
  5744. for (;;)
  5745. {
  5746. if ((unsigned __int64)rows >= agent.queryStopAfter())
  5747. break;
  5748. OwnedConstRoxieRow nextrec(input->nextRow());
  5749. if (grouped && (rows != initialRows))
  5750. rowdata.append(nextrec == NULL);
  5751. if (!nextrec)
  5752. {
  5753. nextrec.setown(input->nextRow());
  5754. if (!nextrec)
  5755. break;
  5756. }
  5757. size32_t thisSize = inputMeta->getRecordSize(nextrec);
  5758. if (outputLimit && ((rowdata.length() + thisSize) > outputLimit))
  5759. throwWuResultTooLarge(outputLimit, helper);
  5760. if (rowSerializer)
  5761. {
  5762. CThorDemoRowSerializer serializerTarget(rowdata);
  5763. rowSerializer->serialize(serializerTarget, (const byte *) nextrec.get() );
  5764. }
  5765. else
  5766. rowdata.append(thisSize, nextrec);
  5767. if (toStdout && seq >= 0)
  5768. {
  5769. if (agent.queryOutputFmt() == ofSTD)
  5770. {
  5771. helper.serializeXml((byte *) nextrec.get(), *writer);
  5772. writer->newline();
  5773. agent.queryOutputSerializer()->fwrite(seq, (const void*)writer->str(), 1, writer->length());
  5774. writer->clear();
  5775. }
  5776. else if (agent.queryOutputFmt() == ofXML)
  5777. {
  5778. CommonXmlWriter xmlwrite(0,1);
  5779. xmlwrite.outputBeginNested(DEFAULTXMLROWTAG, false);
  5780. helper.serializeXml((byte *) nextrec.get(), xmlwrite);
  5781. xmlwrite.outputEndNested(DEFAULTXMLROWTAG);
  5782. agent.queryOutputSerializer()->fwrite(seq, (const void*)xmlwrite.str(), 1, xmlwrite.length());
  5783. }
  5784. }
  5785. rows++;
  5786. }
  5787. WorkunitUpdate w = agent.updateWorkUnit();
  5788. Owned<IWUResult> result = updateWorkUnitResult(w, helper.queryName(), helper.getSequence());
  5789. if (0 != (POFextend & helper.getFlags()))
  5790. {
  5791. __int64 existingSz = result->getResultRawSize(nullptr, nullptr);
  5792. if (outputLimit && ((rowdata.length() + existingSz) > outputLimit))
  5793. throwWuResultTooLarge(outputLimit, helper);
  5794. result->addResultRaw(rowdata.length(), rowdata.toByteArray(), ResultFormatRaw);
  5795. }
  5796. else
  5797. result->setResultRaw(rowdata.length(), rowdata.toByteArray(), ResultFormatRaw);
  5798. result->setResultStatus(ResultStatusCalculated);
  5799. result->setResultRowCount(rows);
  5800. result->setResultTotalRowCount(rows); // Is this right??
  5801. if (toStdout && seq >= 0)
  5802. {
  5803. if (agent.queryOutputFmt() == ofXML)
  5804. {
  5805. StringBuffer sb;
  5806. sb.appendf(DEFAULTXMLFOOTER).newline();
  5807. agent.queryOutputSerializer()->fwrite(seq, (const void*)sb.str(), 1, sb.length());
  5808. }
  5809. else if (agent.queryOutputFmt() != ofSTD)
  5810. agent.outputFormattedResult(helper.queryName(), seq, false);
  5811. if (!(POFextend & helper.getFlags()))//POextend results will never get closed, so wont flush until serializer dtor
  5812. agent.queryOutputSerializer()->close(seq, false);
  5813. }
  5814. }
  5815. //=====================================================================================================
  5816. CHThorDictionaryWorkUnitWriteActivity::CHThorDictionaryWorkUnitWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDictionaryWorkUnitWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  5817. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5818. {
  5819. }
  5820. void CHThorDictionaryWorkUnitWriteActivity::execute()
  5821. {
  5822. int sequence = helper.getSequence();
  5823. const char *storedName = helper.queryName();
  5824. assertex(storedName && *storedName);
  5825. assertex(sequence < 0);
  5826. RtlLinkedDictionaryBuilder builder(rowAllocator, helper.queryHashLookupInfo());
  5827. for (;;)
  5828. {
  5829. const void *row = input->nextRow();
  5830. if (!row)
  5831. {
  5832. row = input->nextRow();
  5833. if (!row)
  5834. break;
  5835. }
  5836. builder.appendOwn(row);
  5837. processed++;
  5838. }
  5839. unsigned __int64 usedCount = rtlDictionaryCount(builder.getcount(), builder.queryrows());
  5840. // In absense of OPT_OUTPUTLIMIT check pre 5.2 legacy name OPT_OUTPUTLIMIT_LEGACY
  5841. size32_t outputLimit = agent.queryWorkUnit()->getDebugValueInt(OPT_OUTPUTLIMIT, agent.queryWorkUnit()->getDebugValueInt(OPT_OUTPUTLIMIT_LEGACY, defaultDaliResultLimit)) * 0x100000;
  5842. MemoryBuffer rowdata;
  5843. CThorDemoRowSerializer out(rowdata);
  5844. Owned<IOutputRowSerializer> serializer = input->queryOutputMeta()->createDiskSerializer(agent.queryCodeContext(), activityId);
  5845. rtlSerializeDictionary(out, serializer, builder.getcount(), builder.queryrows());
  5846. if(outputLimit && (rowdata.length() > outputLimit))
  5847. {
  5848. StringBuffer errMsg("Dictionary too large to output to workunit (limit ");
  5849. errMsg.append(outputLimit/0x100000).append(" megabytes), in result (");
  5850. const char *name = helper.queryName();
  5851. if (name)
  5852. errMsg.append("name=").append(name);
  5853. else
  5854. errMsg.append("sequence=").append(helper.getSequence());
  5855. errMsg.append(")");
  5856. throw MakeStringExceptionDirect(0, errMsg.str());
  5857. }
  5858. WorkunitUpdate w = agent.updateWorkUnit();
  5859. Owned<IWUResult> result = updateWorkUnitResult(w, helper.queryName(), helper.getSequence());
  5860. result->setResultRaw(rowdata.length(), rowdata.toByteArray(), ResultFormatRaw);
  5861. result->setResultStatus(ResultStatusCalculated);
  5862. result->setResultRowCount(usedCount);
  5863. result->setResultTotalRowCount(usedCount); // Is this right??
  5864. }
  5865. //=====================================================================================================
  5866. CHThorRemoteResultActivity::CHThorRemoteResultActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorRemoteResultArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  5867. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5868. {
  5869. }
  5870. void CHThorRemoteResultActivity::execute()
  5871. {
  5872. OwnedConstRoxieRow result(input->nextRow());
  5873. helper.sendResult(result);
  5874. }
  5875. //=====================================================================================================
  5876. CHThorInlineTableActivity::CHThorInlineTableActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorInlineTableArg &_arg, ThorActivityKind _kind, EclGraph & _graph) :
  5877. CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5878. {
  5879. }
  5880. void CHThorInlineTableActivity::ready()
  5881. {
  5882. CHThorSimpleActivityBase::ready();
  5883. curRow = 0;
  5884. numRows = helper.numRows();
  5885. }
  5886. const void *CHThorInlineTableActivity::nextRow()
  5887. {
  5888. // Filtering empty rows, returns the next valid row
  5889. while (curRow < numRows)
  5890. {
  5891. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  5892. size32_t size = helper.getRow(rowBuilder, curRow++);
  5893. if (size)
  5894. {
  5895. processed++;
  5896. return rowBuilder.finalizeRowClear(size);
  5897. }
  5898. }
  5899. return NULL;
  5900. }
  5901. //=====================================================================================================
  5902. CHThorNullActivity::CHThorNullActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  5903. {
  5904. }
  5905. const void *CHThorNullActivity::nextRow()
  5906. {
  5907. return NULL;
  5908. }
  5909. //=====================================================================================================
  5910. CHThorActionActivity::CHThorActionActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorActionArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5911. {
  5912. }
  5913. void CHThorActionActivity::execute()
  5914. {
  5915. helper.action();
  5916. }
  5917. const void *CHThorActionActivity::nextRow()
  5918. {
  5919. return NULL;
  5920. }
  5921. //=====================================================================================================
  5922. CHThorSideEffectActivity::CHThorSideEffectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSideEffectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5923. {
  5924. }
  5925. const void *CHThorSideEffectActivity::nextRow()
  5926. {
  5927. try
  5928. {
  5929. helper.action();
  5930. }
  5931. catch(IException * e)
  5932. {
  5933. throw makeWrappedException(e);
  5934. }
  5935. return NULL;
  5936. }
  5937. //=====================================================================================================
  5938. CHThorDummyActivity::CHThorDummyActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  5939. {
  5940. }
  5941. void CHThorDummyActivity::execute()
  5942. {
  5943. }
  5944. const void *CHThorDummyActivity::nextRow()
  5945. {
  5946. return input ? input->nextRow() : NULL;
  5947. }
  5948. //=====================================================================================================
  5949. CHThorWhenActionActivity::CHThorWhenActionActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &_arg, ThorActivityKind _kind, EclGraph & _graph, EclGraphElement * _graphElement)
  5950. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), graphElement(_graphElement)
  5951. {
  5952. }
  5953. void CHThorWhenActionActivity::ready()
  5954. {
  5955. CHThorSimpleActivityBase::ready();
  5956. graphElement->executeDependentActions(agent, NULL, WhenBeforeId);
  5957. graphElement->executeDependentActions(agent, NULL, WhenParallelId);
  5958. }
  5959. void CHThorWhenActionActivity::execute()
  5960. {
  5961. graphElement->executeDependentActions(agent, NULL, 1);
  5962. }
  5963. const void * CHThorWhenActionActivity::nextRow()
  5964. {
  5965. return input->nextRow();
  5966. }
  5967. void CHThorWhenActionActivity::stop()
  5968. {
  5969. graphElement->executeDependentActions(agent, NULL, WhenSuccessId);
  5970. CHThorSimpleActivityBase::stop();
  5971. }
  5972. //=====================================================================================================
  5973. CHThorMultiInputActivity::CHThorMultiInputActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  5974. {
  5975. }
  5976. void CHThorMultiInputActivity::ready()
  5977. {
  5978. CHThorSimpleActivityBase::ready();
  5979. ForEachItemIn(idx, inputs)
  5980. inputs.item(idx)->ready();
  5981. }
  5982. void CHThorMultiInputActivity::stop()
  5983. {
  5984. CHThorSimpleActivityBase::stop();
  5985. ForEachItemIn(idx, inputs)
  5986. inputs.item(idx)->stop();
  5987. }
  5988. void CHThorMultiInputActivity::resetEOF()
  5989. {
  5990. CHThorSimpleActivityBase::resetEOF();
  5991. ForEachItemIn(idx, inputs)
  5992. inputs.item(idx)->resetEOF();
  5993. }
  5994. void CHThorMultiInputActivity::setInput(unsigned index, IHThorInput *_input)
  5995. {
  5996. if (index==inputs.length())
  5997. {
  5998. inputs.append(_input);
  5999. }
  6000. else
  6001. {
  6002. while (!inputs.isItem(index))
  6003. inputs.append(NULL);
  6004. inputs.replace(_input, index);
  6005. }
  6006. }
  6007. void CHThorMultiInputActivity::updateProgress(IStatisticGatherer &progress) const
  6008. {
  6009. CHThorSimpleActivityBase::updateProgress(progress);
  6010. ForEachItemIn(idx, inputs)
  6011. {
  6012. IHThorInput *i = inputs.item(idx);
  6013. if (i)
  6014. i->updateProgress(progress);
  6015. }
  6016. }
  6017. //=====================================================================================================
  6018. CHThorConcatActivity::CHThorConcatActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorFunnelArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6019. {
  6020. }
  6021. void CHThorConcatActivity::ready()
  6022. {
  6023. grouped = helper.queryOutputMeta()->isGrouped();
  6024. inputIdx = 0;
  6025. curInput = inputs.item(inputIdx);
  6026. eogSeen = false;
  6027. anyThisGroup = false;
  6028. CHThorMultiInputActivity::ready();
  6029. }
  6030. const void *CHThorConcatActivity::nextRow()
  6031. {
  6032. if (!curInput)
  6033. return NULL; // eof
  6034. const void * next = curInput->nextRow();
  6035. if (next)
  6036. {
  6037. anyThisGroup = true;
  6038. eogSeen = false;
  6039. processed++;
  6040. return next;
  6041. }
  6042. else if (!eogSeen)
  6043. {
  6044. eogSeen = true;
  6045. if (grouped)
  6046. {
  6047. if (anyThisGroup)
  6048. {
  6049. anyThisGroup = false;
  6050. return NULL;
  6051. }
  6052. else
  6053. return nextRow();
  6054. }
  6055. else
  6056. return nextRow();
  6057. }
  6058. else if (inputIdx < inputs.length()-1)
  6059. {
  6060. inputIdx++;
  6061. curInput = inputs.item(inputIdx);
  6062. eogSeen = false;
  6063. anyThisGroup = false;
  6064. return nextRow();
  6065. }
  6066. else
  6067. {
  6068. curInput = NULL;
  6069. return NULL;
  6070. }
  6071. }
  6072. //=====================================================================================================
  6073. CHThorNonEmptyActivity::CHThorNonEmptyActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNonEmptyArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6074. {
  6075. }
  6076. void CHThorNonEmptyActivity::ready()
  6077. {
  6078. grouped = helper.queryOutputMeta()->isGrouped();
  6079. selectedInput = NULL;
  6080. CHThorMultiInputActivity::ready();
  6081. }
  6082. const void *CHThorNonEmptyActivity::nextRow()
  6083. {
  6084. if (!selectedInput)
  6085. {
  6086. ForEachItemIn(i, inputs)
  6087. {
  6088. IHThorInput * cur = inputs.item(i);
  6089. const void * next = cur->nextRow();
  6090. if (next)
  6091. {
  6092. selectedInput = cur;
  6093. processed++;
  6094. return next;
  6095. }
  6096. }
  6097. return NULL;
  6098. }
  6099. const void * next = selectedInput->nextRow();
  6100. if (next)
  6101. processed++;
  6102. return next;
  6103. }
  6104. //=====================================================================================================
  6105. CHThorRegroupActivity::CHThorRegroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorRegroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6106. {
  6107. }
  6108. void CHThorRegroupActivity::ready()
  6109. {
  6110. inputIndex = 0;
  6111. eof = false;
  6112. numProcessedLastGroup = processed;
  6113. CHThorMultiInputActivity::ready();
  6114. }
  6115. const void * CHThorRegroupActivity::nextFromInputs()
  6116. {
  6117. unsigned initialInput = inputIndex;
  6118. while (inputs.isItem(inputIndex))
  6119. {
  6120. OwnedConstRoxieRow next(inputs.item(inputIndex)->nextRow());
  6121. if (next)
  6122. {
  6123. if ((inputIndex != initialInput) && (inputIndex != initialInput+1))
  6124. {
  6125. throw MakeStringException(100, "Mismatched groups supplied to regroup %u", activityId);
  6126. }
  6127. return next.getClear();
  6128. }
  6129. inputIndex++;
  6130. }
  6131. if ((initialInput != 0) && (initialInput+1 != inputs.ordinality()))
  6132. throw MakeStringException(100, "Mismatched groups supplied to Regroup Activity(%u)", activityId);
  6133. inputIndex = 0;
  6134. return NULL;
  6135. }
  6136. const void * CHThorRegroupActivity::nextRow()
  6137. {
  6138. if (eof)
  6139. return NULL;
  6140. const void * ret = nextFromInputs();
  6141. if (ret)
  6142. {
  6143. processed++;
  6144. return ret;
  6145. }
  6146. if (numProcessedLastGroup != processed)
  6147. {
  6148. numProcessedLastGroup = processed;
  6149. return NULL;
  6150. }
  6151. eof = true;
  6152. return NULL;
  6153. }
  6154. //=====================================================================================================
  6155. CHThorRollupGroupActivity::CHThorRollupGroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorRollupGroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6156. {
  6157. }
  6158. void CHThorRollupGroupActivity::ready()
  6159. {
  6160. CHThorSimpleActivityBase::ready();
  6161. eof = false;
  6162. }
  6163. const void * CHThorRollupGroupActivity::nextRow()
  6164. {
  6165. if (eof)
  6166. return NULL;
  6167. for (;;)
  6168. {
  6169. OwnedRowArray group;
  6170. for (;;)
  6171. {
  6172. const void * in = input->nextRow();
  6173. if (!in)
  6174. break;
  6175. group.append(in);
  6176. }
  6177. if (group.ordinality() == 0)
  6178. {
  6179. eof = true;
  6180. return NULL;
  6181. }
  6182. try
  6183. {
  6184. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  6185. size32_t outSize = helper.transform(rowBuilder, group.ordinality(), (const void * *)group.getArray());
  6186. if (outSize)
  6187. {
  6188. processed++;
  6189. return rowBuilder.finalizeRowClear(outSize);
  6190. }
  6191. }
  6192. catch(IException * e)
  6193. {
  6194. throw makeWrappedException(e);
  6195. }
  6196. }
  6197. }
  6198. //=====================================================================================================
  6199. CHThorCombineActivity::CHThorCombineActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCombineArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6200. {
  6201. }
  6202. void CHThorCombineActivity::ready()
  6203. {
  6204. numProcessedLastGroup = processed;
  6205. CHThorMultiInputActivity::ready();
  6206. }
  6207. void CHThorCombineActivity::nextInputs(OwnedRowArray & out)
  6208. {
  6209. ForEachItemIn(i, inputs)
  6210. {
  6211. const void * next = inputs.item(i)->nextRow();
  6212. if (next)
  6213. out.append(next);
  6214. }
  6215. }
  6216. const void *CHThorCombineActivity::nextRow()
  6217. {
  6218. for (;;)
  6219. {
  6220. OwnedRowArray group;
  6221. nextInputs(group);
  6222. if ((group.ordinality() == 0) && (numProcessedLastGroup == processed))
  6223. nextInputs(group);
  6224. if (group.ordinality() == 0)
  6225. {
  6226. numProcessedLastGroup = processed;
  6227. return NULL;
  6228. }
  6229. else if (group.ordinality() != inputs.ordinality())
  6230. {
  6231. throw MakeStringException(101, "Mismatched group input for Combine Activity(%u)", activityId);
  6232. }
  6233. try
  6234. {
  6235. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  6236. size32_t outSize = helper.transform(rowBuilder, group.ordinality(), (const void * *)group.getArray());
  6237. if (outSize)
  6238. {
  6239. processed++;
  6240. return rowBuilder.finalizeRowClear(outSize);
  6241. }
  6242. }
  6243. catch(IException * e)
  6244. {
  6245. throw makeWrappedException(e);
  6246. }
  6247. }
  6248. }
  6249. //=====================================================================================================
  6250. CHThorCombineGroupActivity::CHThorCombineGroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCombineGroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6251. {
  6252. }
  6253. void CHThorCombineGroupActivity::ready()
  6254. {
  6255. numProcessedLastGroup = processed;
  6256. CHThorSimpleActivityBase::ready();
  6257. input1->ready();
  6258. }
  6259. void CHThorCombineGroupActivity::stop()
  6260. {
  6261. CHThorSimpleActivityBase::stop();
  6262. input1->stop();
  6263. }
  6264. void CHThorCombineGroupActivity::setInput(unsigned index, IHThorInput *_input)
  6265. {
  6266. if (index==1)
  6267. input1 = _input;
  6268. else
  6269. CHThorSimpleActivityBase::setInput(index, _input);
  6270. }
  6271. const void *CHThorCombineGroupActivity::nextRow()
  6272. {
  6273. for (;;)
  6274. {
  6275. OwnedConstRoxieRow left(input->nextRow());
  6276. if (!left && (numProcessedLastGroup == processed))
  6277. left.setown(input->nextRow());
  6278. if (!left)
  6279. {
  6280. if (numProcessedLastGroup == processed)
  6281. {
  6282. OwnedConstRoxieRow nextRight(input1->nextRow());
  6283. if (nextRight)
  6284. throw MakeStringException(101, "Missing LEFT record for Combine group Activity(%u)", activityId);
  6285. }
  6286. else
  6287. numProcessedLastGroup = processed;
  6288. return NULL;
  6289. }
  6290. OwnedRowArray group;
  6291. for (;;)
  6292. {
  6293. const void * in = input1->nextRow();
  6294. if (!in)
  6295. break;
  6296. group.append(in);
  6297. }
  6298. if (group.ordinality() == 0)
  6299. {
  6300. throw MakeStringException(101, "Missing RIGHT group for Combine Group Activity(%u)", activityId);
  6301. }
  6302. try
  6303. {
  6304. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  6305. size32_t outSize = helper.transform(rowBuilder, left, group.ordinality(), (const void * *)group.getArray());
  6306. if (outSize)
  6307. {
  6308. processed++;
  6309. return rowBuilder.finalizeRowClear(outSize);
  6310. }
  6311. }
  6312. catch(IException * e)
  6313. {
  6314. throw makeWrappedException(e);
  6315. }
  6316. }
  6317. }
  6318. //=====================================================================================================
  6319. CHThorApplyActivity::CHThorApplyActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorApplyArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6320. {
  6321. }
  6322. void CHThorApplyActivity::execute()
  6323. {
  6324. try
  6325. {
  6326. helper.start();
  6327. for (;;)
  6328. {
  6329. OwnedConstRoxieRow next(input->nextRow());
  6330. if (!next)
  6331. {
  6332. next.setown(input->nextRow());
  6333. if (!next)
  6334. break;
  6335. }
  6336. helper.apply(next);
  6337. }
  6338. helper.end();
  6339. }
  6340. catch (IException *e)
  6341. {
  6342. throw makeWrappedException(e);
  6343. }
  6344. }
  6345. //=====================================================================================================
  6346. CHThorDistributionActivity::CHThorDistributionActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDistributionArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  6347. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6348. {
  6349. }
  6350. void CHThorDistributionActivity::execute()
  6351. {
  6352. MemoryAttr ma;
  6353. IDistributionTable * * accumulator = (IDistributionTable * *)ma.allocate(helper.queryInternalRecordSize()->getMinRecordSize());
  6354. helper.clearAggregate(accumulator);
  6355. OwnedConstRoxieRow nextrec(input->nextRow());
  6356. for (;;)
  6357. {
  6358. if (!nextrec)
  6359. {
  6360. nextrec.setown(input->nextRow());
  6361. if (!nextrec)
  6362. break;
  6363. }
  6364. helper.process(accumulator, nextrec);
  6365. nextrec.setown(input->nextRow());
  6366. }
  6367. StringBuffer result;
  6368. result.append("<XML>");
  6369. helper.gatherResult(accumulator, result);
  6370. result.append("</XML>");
  6371. helper.sendResult(result.length(), result.str());
  6372. helper.destruct(accumulator);
  6373. }
  6374. //---------------------------------------------------------------------------
  6375. CHThorWorkunitReadActivity::CHThorWorkunitReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorWorkunitReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6376. {
  6377. first = true;
  6378. bufferStream.setown(createMemoryBufferSerialStream(resultBuffer));
  6379. deserializer.setStream(bufferStream);
  6380. }
  6381. CHThorWorkunitReadActivity::~CHThorWorkunitReadActivity()
  6382. {
  6383. }
  6384. void CHThorWorkunitReadActivity::ready()
  6385. {
  6386. CHThorSimpleActivityBase::ready();
  6387. rowDeserializer.setown(rowAllocator->createDiskDeserializer(agent.queryCodeContext()));
  6388. if(first)
  6389. {
  6390. checkForDiskRead();
  6391. first = false;
  6392. }
  6393. if(diskread)
  6394. {
  6395. diskread->ready();
  6396. return;
  6397. }
  6398. grouped = outputMeta.isGrouped();
  6399. unsigned lenData;
  6400. void * tempData;
  6401. OwnedRoxieString fromWuid(helper.getWUID());
  6402. ICsvToRowTransformer * csvTransformer = helper.queryCsvTransformer();
  6403. IXmlToRowTransformer * xmlTransformer = helper.queryXmlTransformer();
  6404. if (fromWuid)
  6405. agent.queryCodeContext()->getExternalResultRaw(lenData, tempData, fromWuid, helper.queryName(), helper.querySequence(), xmlTransformer, csvTransformer);
  6406. else
  6407. agent.queryCodeContext()->getResultRaw(lenData, tempData, helper.queryName(), helper.querySequence(), xmlTransformer, csvTransformer);
  6408. resultBuffer.setBuffer(lenData, tempData, true);
  6409. eogPending = false;
  6410. }
  6411. void CHThorWorkunitReadActivity::checkForDiskRead()
  6412. {
  6413. StringBuffer diskFilename;
  6414. OwnedRoxieString fromWuid(helper.getWUID());
  6415. if (agent.getWorkunitResultFilename(diskFilename, fromWuid, helper.queryName(), helper.querySequence()))
  6416. {
  6417. diskreadHelper.setown(createWorkUnitReadArg(diskFilename.str(), &helper));
  6418. try
  6419. {
  6420. diskreadHelper->onCreate(agent.queryCodeContext(), NULL, NULL);
  6421. }
  6422. catch(IException * e)
  6423. {
  6424. throw makeWrappedException(e);
  6425. }
  6426. diskread.setown(new CHThorDiskReadActivity(agent, activityId, subgraphId, *diskreadHelper, TAKdiskread, graph, nullptr));
  6427. }
  6428. }
  6429. void CHThorWorkunitReadActivity::stop()
  6430. {
  6431. if(diskread)
  6432. diskread->stop();
  6433. resultBuffer.resetBuffer();
  6434. CHThorSimpleActivityBase::stop();
  6435. }
  6436. const void *CHThorWorkunitReadActivity::nextRow()
  6437. {
  6438. if(diskread)
  6439. {
  6440. const void * ret = diskread->nextRow();
  6441. processed = diskread->queryProcessed();
  6442. return ret;
  6443. }
  6444. if (deserializer.eos())
  6445. return NULL;
  6446. if (eogPending)
  6447. {
  6448. eogPending = false;
  6449. return NULL;
  6450. }
  6451. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  6452. size32_t newSize = rowDeserializer->deserialize(rowBuilder, deserializer);
  6453. if (grouped)
  6454. deserializer.read(sizeof(bool), &eogPending);
  6455. processed++;
  6456. return rowBuilder.finalizeRowClear(newSize);
  6457. }
  6458. //=====================================================================================================
  6459. CHThorParseActivity::CHThorParseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorParseArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6460. {
  6461. //DebugBreak();
  6462. anyThisGroup = false;
  6463. curSearchTextLen = 0;
  6464. curSearchText = NULL;
  6465. algorithm = createThorParser(agent.queryCodeContext(), helper);
  6466. parser = algorithm->createParser(agent.queryCodeContext(), activityId, helper.queryHelper(), &helper);
  6467. rowIter = parser->queryResultIter();
  6468. }
  6469. CHThorParseActivity::~CHThorParseActivity()
  6470. {
  6471. if (curSearchText && helper.searchTextNeedsFree())
  6472. rtlFree(curSearchText);
  6473. parser->Release();
  6474. algorithm->Release();
  6475. }
  6476. void CHThorParseActivity::ready()
  6477. {
  6478. CHThorSimpleActivityBase::ready();
  6479. anyThisGroup = false;
  6480. parser->reset();
  6481. }
  6482. void CHThorParseActivity::stop()
  6483. {
  6484. CHThorSimpleActivityBase::stop();
  6485. if (curSearchText && helper.searchTextNeedsFree())
  6486. rtlFree(curSearchText);
  6487. curSearchText = NULL;
  6488. in.clear();
  6489. }
  6490. bool CHThorParseActivity::processRecord(const void * in)
  6491. {
  6492. if (curSearchText && helper.searchTextNeedsFree())
  6493. rtlFree(curSearchText);
  6494. curSearchTextLen = 0;
  6495. curSearchText = NULL;
  6496. helper.getSearchText(curSearchTextLen, curSearchText, in);
  6497. return parser->performMatch(*this, in, curSearchTextLen, curSearchText);
  6498. }
  6499. unsigned CHThorParseActivity::onMatch(ARowBuilder & self, const void * curRecord, IMatchedResults * results, IMatchWalker * walker)
  6500. {
  6501. try
  6502. {
  6503. return helper.transform(self, curRecord, results, walker);
  6504. }
  6505. catch(IException * e)
  6506. {
  6507. throw makeWrappedException(e);
  6508. }
  6509. }
  6510. const void * CHThorParseActivity::nextRow()
  6511. {
  6512. for (;;)
  6513. {
  6514. if (rowIter->isValid())
  6515. {
  6516. anyThisGroup = true;
  6517. OwnedConstRoxieRow out = rowIter->getRow();
  6518. rowIter->next();
  6519. processed++;
  6520. return out.getClear();
  6521. }
  6522. in.setown(input->nextRow());
  6523. if (!in)
  6524. {
  6525. if (anyThisGroup)
  6526. {
  6527. anyThisGroup = false;
  6528. return NULL;
  6529. }
  6530. in.setown(input->nextRow());
  6531. if (!in)
  6532. return NULL;
  6533. }
  6534. processRecord(in);
  6535. rowIter->first();
  6536. }
  6537. }
  6538. //=====================================================================================================
  6539. CHThorEnthActivity::CHThorEnthActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorEnthArg & _arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL)
  6540. {
  6541. }
  6542. void CHThorEnthActivity::ready()
  6543. {
  6544. CHThorSimpleActivityBase::ready();
  6545. outBuilder.setAllocator(rowAllocator);
  6546. numerator = helper.getProportionNumerator();
  6547. denominator = helper.getProportionDenominator();
  6548. started = false;
  6549. }
  6550. void CHThorEnthActivity::stop()
  6551. {
  6552. outBuilder.clear();
  6553. }
  6554. void CHThorEnthActivity::start()
  6555. {
  6556. if(denominator == 0) denominator = 1;
  6557. counter = (helper.getSampleNumber()-1) * greatestCommonDivisor(numerator, denominator);
  6558. if (counter >= denominator)
  6559. counter %= denominator;
  6560. started = true;
  6561. }
  6562. const void * CHThorEnthActivity::nextRow()
  6563. {
  6564. if(!started)
  6565. start();
  6566. OwnedConstRoxieRow ret;
  6567. for (;;)
  6568. {
  6569. ret.setown(input->nextRow());
  6570. if(!ret) //end of group
  6571. ret.setown(input->nextRow());
  6572. if(!ret) //eof
  6573. return NULL;
  6574. if (wanted())
  6575. {
  6576. processed++;
  6577. return ret.getClear();
  6578. }
  6579. }
  6580. }
  6581. //=====================================================================================================
  6582. CHThorTopNActivity::CHThorTopNActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorTopNArg & _arg, ThorActivityKind _kind, EclGraph & _graph)
  6583. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), compare(*helper.queryCompare())
  6584. {
  6585. hasBest = helper.hasBest();
  6586. grouped = outputMeta.isGrouped();
  6587. curIndex = 0;
  6588. sortedCount = 0;
  6589. limit = 0;
  6590. sorted = NULL;
  6591. }
  6592. CHThorTopNActivity::~CHThorTopNActivity()
  6593. {
  6594. roxiemem::ReleaseRoxieRowRange(sorted, curIndex, sortedCount);
  6595. free(sorted);
  6596. }
  6597. void CHThorTopNActivity::ready()
  6598. {
  6599. CHThorSimpleActivityBase::ready();
  6600. limit = helper.getLimit();
  6601. assertex(limit == (__int64)(size_t)limit);
  6602. sorted = (const void * *)checked_calloc((size_t)(limit+1), sizeof(void *), "topn");
  6603. sortedCount = 0;
  6604. curIndex = 0;
  6605. eof = false;
  6606. eoi = false;
  6607. }
  6608. void CHThorTopNActivity::stop()
  6609. {
  6610. CHThorSimpleActivityBase::stop();
  6611. roxiemem::ReleaseRoxieRowRange(sorted, curIndex, sortedCount);
  6612. free(sorted);
  6613. sorted = NULL;
  6614. curIndex = 0;
  6615. sortedCount = 0;
  6616. }
  6617. const void * CHThorTopNActivity::nextRow()
  6618. {
  6619. if(eof)
  6620. return NULL;
  6621. if(curIndex >= sortedCount)
  6622. {
  6623. bool eog = sortedCount != 0;
  6624. getSorted();
  6625. if(sortedCount == 0)
  6626. {
  6627. eof = true;
  6628. return NULL;
  6629. }
  6630. if (eog)
  6631. return NULL;
  6632. }
  6633. processed++;
  6634. return sorted[curIndex++];
  6635. }
  6636. bool CHThorTopNActivity::abortEarly()
  6637. {
  6638. if (hasBest && (sortedCount == limit))
  6639. {
  6640. int compare = helper.compareBest(sorted[sortedCount-1]);
  6641. if (compare == 0)
  6642. {
  6643. if (grouped)
  6644. {
  6645. //MORE: This would be more efficient if we had a away of skipping to the end of the incomming group.
  6646. OwnedConstRoxieRow next;
  6647. do
  6648. {
  6649. next.setown(input->nextRow());
  6650. } while(next);
  6651. }
  6652. else
  6653. eoi = true;
  6654. return true;
  6655. }
  6656. //This only checks the lowest element - we could check all elements inserted, but it would increase the number of compares
  6657. if (compare < 0)
  6658. throw MakeStringException(0, "TOPN: row found that exceeds the best value");
  6659. }
  6660. return false;
  6661. }
  6662. void CHThorTopNActivity::getSorted()
  6663. {
  6664. curIndex = 0;
  6665. sortedCount = 0;
  6666. if (eoi)
  6667. return;
  6668. OwnedConstRoxieRow next(input->nextRow());
  6669. while(next)
  6670. {
  6671. if(sortedCount < limit)
  6672. {
  6673. binary_vec_insert_stable(next.getClear(), sorted, sortedCount, compare);
  6674. sortedCount++;
  6675. if (abortEarly())
  6676. return;
  6677. }
  6678. else
  6679. {
  6680. // do not bother with insertion sort if we know next will fall off the end
  6681. if(limit && compare.docompare(sorted[sortedCount-1], next) > 0)
  6682. {
  6683. binary_vec_insert_stable(next.getClear(), sorted, sortedCount, compare);
  6684. ReleaseRoxieRow(sorted[sortedCount]);
  6685. if (abortEarly())
  6686. return;
  6687. }
  6688. }
  6689. next.setown(input->nextRow());
  6690. }
  6691. }
  6692. //=====================================================================================================
  6693. CHThorXmlParseActivity::CHThorXmlParseActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorXmlParseArg & _arg, ThorActivityKind _kind, EclGraph & _graph)
  6694. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6695. {
  6696. srchStrNeedsFree = helper.searchTextNeedsFree();
  6697. srchStr = NULL;
  6698. }
  6699. CHThorXmlParseActivity::~CHThorXmlParseActivity()
  6700. {
  6701. if(srchStrNeedsFree) rtlFree(srchStr);
  6702. }
  6703. void CHThorXmlParseActivity::ready()
  6704. {
  6705. CHThorSimpleActivityBase::ready();
  6706. numProcessedLastGroup = processed;
  6707. }
  6708. void CHThorXmlParseActivity::stop()
  6709. {
  6710. CHThorSimpleActivityBase::stop();
  6711. if(srchStrNeedsFree) rtlFree(srchStr);
  6712. srchStr = NULL;
  6713. in.clear();
  6714. }
  6715. const void * CHThorXmlParseActivity::nextRow()
  6716. {
  6717. for (;;)
  6718. {
  6719. if(xmlParser)
  6720. {
  6721. for (;;)
  6722. {
  6723. bool gotNext = false;
  6724. try
  6725. {
  6726. gotNext = xmlParser->next();
  6727. }
  6728. catch(IException * e)
  6729. {
  6730. throw makeWrappedException(e);
  6731. }
  6732. if(!gotNext)
  6733. {
  6734. if(srchStrNeedsFree)
  6735. {
  6736. rtlFree(srchStr);
  6737. srchStr = NULL;
  6738. }
  6739. xmlParser.clear();
  6740. break;
  6741. }
  6742. if(lastMatch)
  6743. {
  6744. try
  6745. {
  6746. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  6747. unsigned sizeGot = helper.transform(rowBuilder, in, lastMatch);
  6748. lastMatch.clear();
  6749. if (sizeGot)
  6750. {
  6751. processed++;
  6752. return rowBuilder.finalizeRowClear(sizeGot);
  6753. }
  6754. }
  6755. catch(IException * e)
  6756. {
  6757. throw makeWrappedException(e);
  6758. }
  6759. }
  6760. }
  6761. }
  6762. in.setown(input->nextRow());
  6763. if(!in)
  6764. {
  6765. if(numProcessedLastGroup == processed)
  6766. in.setown(input->nextRow());
  6767. if(!in)
  6768. {
  6769. numProcessedLastGroup = processed;
  6770. return NULL;
  6771. }
  6772. }
  6773. size32_t srchLen;
  6774. helper.getSearchText(srchLen, srchStr, in);
  6775. OwnedRoxieString xmlIteratorPath(helper.getXmlIteratorPath());
  6776. xmlParser.setown(createXMLParse(srchStr, srchLen, xmlIteratorPath, *this, ptr_noRoot, helper.requiresContents()));
  6777. }
  6778. }
  6779. //=====================================================================================================
  6780. class CHThorMergeActivity : public CHThorMultiInputActivity
  6781. {
  6782. protected:
  6783. IHThorMergeArg &helper;
  6784. CHThorStreamMerger merger;
  6785. public:
  6786. CHThorMergeActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorMergeArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6787. {
  6788. merger.init(helper.queryCompare(), helper.dedup(), NULL); // can mass null for range because merger.nextGE() never called
  6789. }
  6790. ~CHThorMergeActivity()
  6791. {
  6792. merger.cleanup();
  6793. }
  6794. virtual void ready()
  6795. {
  6796. CHThorMultiInputActivity::ready();
  6797. merger.initInputs(inputs.length(), inputs.getArray());
  6798. }
  6799. virtual void stop()
  6800. {
  6801. merger.done();
  6802. CHThorMultiInputActivity::stop();
  6803. }
  6804. virtual const void * nextRow()
  6805. {
  6806. const void * ret = merger.nextRow();
  6807. if (ret)
  6808. processed++;
  6809. return ret;
  6810. }
  6811. };
  6812. //=====================================================================================================
  6813. //Web Service Call base
  6814. CHThorWSCBaseActivity::CHThorWSCBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorWebServiceCallArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6815. {
  6816. callHelper = &_arg;
  6817. init();
  6818. }
  6819. CHThorWSCBaseActivity::CHThorWSCBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorWebServiceCallActionArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6820. {
  6821. callHelper = NULL;
  6822. init();
  6823. }
  6824. void CHThorWSCBaseActivity::stop()
  6825. {
  6826. WSChelper.clear();//doesn't return until helper threads terminate
  6827. CHThorSimpleActivityBase::stop();
  6828. }
  6829. void CHThorWSCBaseActivity::init()
  6830. {
  6831. // Build authentication token
  6832. StringBuffer uidpair;
  6833. IUserDescriptor *userDesc = agent.queryCodeContext()->queryUserDescriptor();
  6834. if (userDesc)//NULL if standalone
  6835. {
  6836. userDesc->getUserName(uidpair);
  6837. uidpair.append(":");
  6838. userDesc->getPassword(uidpair);
  6839. JBASE64_Encode(uidpair.str(), uidpair.length(), authToken, false);
  6840. }
  6841. soapTraceLevel = agent.queryWorkUnit()->getDebugValueInt("soapTraceLevel", 1);
  6842. }
  6843. //---------------------------------------------------------------------------
  6844. CHThorWSCRowCallActivity::CHThorWSCRowCallActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorWebServiceCallArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorWSCBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6845. {
  6846. }
  6847. const void *CHThorWSCRowCallActivity::nextRow()
  6848. {
  6849. try
  6850. {
  6851. assertex(WSChelper);
  6852. OwnedConstRoxieRow ret = WSChelper->getRow();
  6853. if (!ret)
  6854. return NULL;
  6855. ++processed;
  6856. return ret.getClear();
  6857. }
  6858. catch(IException * e)
  6859. {
  6860. throw makeWrappedException(e);
  6861. }
  6862. }
  6863. //---------------------------------------------------------------------------
  6864. const void *CHThorHttpRowCallActivity::nextRow()
  6865. {
  6866. try
  6867. {
  6868. if (WSChelper == NULL)
  6869. {
  6870. WSChelper.setown(createHttpCallHelper(this, rowAllocator, authToken.str(), SCrow, NULL, queryDummyContextLogger(),NULL));
  6871. WSChelper->start();
  6872. }
  6873. return CHThorWSCRowCallActivity::nextRow();
  6874. }
  6875. catch(IException * e)
  6876. {
  6877. throw makeWrappedException(e);
  6878. }
  6879. }
  6880. //---------------------------------------------------------------------------
  6881. const void *CHThorSoapRowCallActivity::nextRow()
  6882. {
  6883. try
  6884. {
  6885. if (WSChelper == NULL)
  6886. {
  6887. WSChelper.setown(createSoapCallHelper(this, rowAllocator, authToken.str(), SCrow, NULL, queryDummyContextLogger(),NULL));
  6888. WSChelper->start();
  6889. }
  6890. return CHThorWSCRowCallActivity::nextRow();
  6891. }
  6892. catch(IException * e)
  6893. {
  6894. throw makeWrappedException(e);
  6895. }
  6896. }
  6897. //---------------------------------------------------------------------------
  6898. //---------------------------------------------------------------------------
  6899. //---------------------------------------------------------------------------
  6900. CHThorSoapRowActionActivity::CHThorSoapRowActionActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSoapActionArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorWSCBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6901. {
  6902. }
  6903. void CHThorSoapRowActionActivity::execute()
  6904. {
  6905. try
  6906. {
  6907. WSChelper.setown(createSoapCallHelper(this, NULL, authToken.str(), SCrow, NULL, queryDummyContextLogger(),NULL));
  6908. WSChelper->start();
  6909. WSChelper->waitUntilDone();
  6910. }
  6911. catch(IException * e)
  6912. {
  6913. throw makeWrappedException(e);
  6914. }
  6915. IException *e = WSChelper->getError();
  6916. if(e)
  6917. throw makeWrappedException(e);
  6918. }
  6919. //---------------------------------------------------------------------------
  6920. CHThorSoapDatasetCallActivity::CHThorSoapDatasetCallActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSoapCallArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorWSCBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6921. {
  6922. }
  6923. const void * CHThorSoapDatasetCallActivity::nextRow()
  6924. {
  6925. try
  6926. {
  6927. if (WSChelper == NULL)
  6928. {
  6929. WSChelper.setown(createSoapCallHelper(this, rowAllocator, authToken.str(), SCdataset, NULL, queryDummyContextLogger(),NULL));
  6930. WSChelper->start();
  6931. }
  6932. OwnedConstRoxieRow ret = WSChelper->getRow();
  6933. if (!ret)
  6934. return NULL;
  6935. ++processed;
  6936. return ret.getClear();
  6937. }
  6938. catch(IException * e)
  6939. {
  6940. throw makeWrappedException(e);
  6941. }
  6942. }
  6943. const void * CHThorSoapDatasetCallActivity::getNextRow()
  6944. {
  6945. CriticalBlock b(crit);
  6946. const void *nextrec = input->nextRow();
  6947. if (!nextrec)
  6948. {
  6949. nextrec = input->nextRow();
  6950. }
  6951. return nextrec;
  6952. };
  6953. //---------------------------------------------------------------------------
  6954. CHThorSoapDatasetActionActivity::CHThorSoapDatasetActionActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSoapActionArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorWSCBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6955. {
  6956. }
  6957. void CHThorSoapDatasetActionActivity::execute()
  6958. {
  6959. try
  6960. {
  6961. WSChelper.setown(createSoapCallHelper(this, NULL, authToken.str(), SCdataset, NULL, queryDummyContextLogger(),NULL));
  6962. WSChelper->start();
  6963. WSChelper->waitUntilDone();
  6964. }
  6965. catch(IException * e)
  6966. {
  6967. throw makeWrappedException(e);
  6968. }
  6969. IException *e = WSChelper->getError();
  6970. if(e)
  6971. throw makeWrappedException(e);
  6972. }
  6973. const void * CHThorSoapDatasetActionActivity::getNextRow()
  6974. {
  6975. CriticalBlock b(crit);
  6976. const void *nextrec = input->nextRow();
  6977. if (!nextrec)
  6978. {
  6979. nextrec = input->nextRow();
  6980. }
  6981. if (nextrec)
  6982. {
  6983. processed++;
  6984. }
  6985. return nextrec;
  6986. };
  6987. //=====================================================================================================
  6988. CHThorResultActivity::CHThorResultActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  6989. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6990. {
  6991. }
  6992. void CHThorResultActivity::extractResult(unsigned & retSize, void * & ret)
  6993. {
  6994. unsigned len = rowdata.length();
  6995. retSize = len;
  6996. if (len)
  6997. {
  6998. void * temp = rtlMalloc(len);
  6999. memcpy(temp, rowdata.toByteArray(), len);
  7000. ret = temp;
  7001. }
  7002. else
  7003. ret = NULL;
  7004. }
  7005. //=====================================================================================================
  7006. CHThorDatasetResultActivity::CHThorDatasetResultActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDatasetResultArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  7007. : CHThorResultActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  7008. {
  7009. }
  7010. void CHThorDatasetResultActivity::execute()
  7011. {
  7012. rowdata.clear();
  7013. IRecordSize * inputMeta = input->queryOutputMeta();
  7014. for (;;)
  7015. {
  7016. OwnedConstRoxieRow nextrec(input->nextRow());
  7017. if (!nextrec)
  7018. {
  7019. nextrec.setown(input->nextRow());
  7020. if (!nextrec)
  7021. break;
  7022. }
  7023. rowdata.append(inputMeta->getRecordSize(nextrec), nextrec);
  7024. }
  7025. }
  7026. //=====================================================================================================
  7027. CHThorRowResultActivity::CHThorRowResultActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorRowResultArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  7028. : CHThorResultActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  7029. {
  7030. }
  7031. void CHThorRowResultActivity::execute()
  7032. {
  7033. OwnedConstRoxieRow nextrec(input->nextRow());
  7034. assertex(nextrec);
  7035. IRecordSize * inputMeta = input->queryOutputMeta();
  7036. unsigned length = inputMeta->getRecordSize(nextrec);
  7037. rowdata.clear().append(length, nextrec);
  7038. }
  7039. //=====================================================================================================
  7040. CHThorChildIteratorActivity::CHThorChildIteratorActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChildIteratorArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  7041. {
  7042. }
  7043. const void *CHThorChildIteratorActivity::nextRow()
  7044. {
  7045. if (eof)
  7046. return NULL;
  7047. bool ok;
  7048. if (!started)
  7049. {
  7050. ok = helper.first();
  7051. started = true;
  7052. }
  7053. else
  7054. ok = helper.next();
  7055. try
  7056. {
  7057. while(ok)
  7058. {
  7059. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  7060. size32_t outSize = helper.transform(rowBuilder);
  7061. if(outSize)
  7062. {
  7063. processed++;
  7064. return rowBuilder.finalizeRowClear(outSize);
  7065. }
  7066. ok = helper.next();
  7067. }
  7068. }
  7069. catch(IException * e)
  7070. {
  7071. throw makeWrappedException(e);
  7072. }
  7073. eof = true;
  7074. return NULL;
  7075. }
  7076. void CHThorChildIteratorActivity::ready()
  7077. {
  7078. CHThorSimpleActivityBase::ready();
  7079. started = false;
  7080. eof = false;
  7081. }
  7082. //=====================================================================================================
  7083. CHThorLinkedRawIteratorActivity::CHThorLinkedRawIteratorActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLinkedRawIteratorArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  7084. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  7085. {
  7086. }
  7087. const void *CHThorLinkedRawIteratorActivity::nextRow()
  7088. {
  7089. const void *ret =helper.next();
  7090. if (ret)
  7091. {
  7092. LinkRoxieRow(ret);
  7093. processed++;
  7094. }
  7095. return ret;
  7096. }
  7097. //=====================================================================================================
  7098. //=====================================================================================================
  7099. //== New implementations - none are currently used, created or tested =================================
  7100. //=====================================================================================================
  7101. CHThorChildNormalizeActivity::CHThorChildNormalizeActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChildNormalizeArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  7102. {
  7103. }
  7104. const void *CHThorChildNormalizeActivity::nextRow()
  7105. {
  7106. if (eof)
  7107. return NULL;
  7108. bool ok;
  7109. if (!started)
  7110. {
  7111. ok = helper.first();
  7112. started = true;
  7113. }
  7114. else
  7115. ok = helper.next();
  7116. try
  7117. {
  7118. if (ok)
  7119. {
  7120. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  7121. do {
  7122. unsigned thisSize = helper.transform(rowBuilder);
  7123. if (thisSize)
  7124. {
  7125. processed++;
  7126. return rowBuilder.finalizeRowClear(thisSize);
  7127. }
  7128. ok = helper.next();
  7129. }
  7130. while (ok);
  7131. }
  7132. }
  7133. catch(IException * e)
  7134. {
  7135. throw makeWrappedException(e);
  7136. }
  7137. eof = true;
  7138. return NULL;
  7139. }
  7140. void CHThorChildNormalizeActivity::ready()
  7141. {
  7142. CHThorSimpleActivityBase::ready();
  7143. started = false;
  7144. eof = false;
  7145. }
  7146. //=====================================================================================================
  7147. CHThorChildAggregateActivity::CHThorChildAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChildAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  7148. {
  7149. }
  7150. const void *CHThorChildAggregateActivity::nextRow()
  7151. {
  7152. if (eof)
  7153. return NULL;
  7154. eof = true;
  7155. processed++;
  7156. try
  7157. {
  7158. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  7159. helper.clearAggregate(rowBuilder);
  7160. helper.processRows(rowBuilder);
  7161. size32_t finalSize = outputMeta.getRecordSize(rowBuilder.getSelf());
  7162. return rowBuilder.finalizeRowClear(finalSize);
  7163. }
  7164. catch(IException * e)
  7165. {
  7166. throw makeWrappedException(e);
  7167. }
  7168. }
  7169. void CHThorChildAggregateActivity::ready()
  7170. {
  7171. CHThorSimpleActivityBase::ready();
  7172. eof = false;
  7173. }
  7174. //=====================================================================================================
  7175. CHThorChildGroupAggregateActivity::CHThorChildGroupAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChildGroupAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  7176. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph),
  7177. helper(_arg),
  7178. aggregated(_arg, _arg)
  7179. {
  7180. }
  7181. void CHThorChildGroupAggregateActivity::ready()
  7182. {
  7183. CHThorSimpleActivityBase::ready();
  7184. eof = false;
  7185. gathered = false;
  7186. aggregated.start(rowAllocator, agent.queryCodeContext(), activityId);
  7187. }
  7188. void CHThorChildGroupAggregateActivity::stop()
  7189. {
  7190. aggregated.reset();
  7191. CHThorSimpleActivityBase::stop();
  7192. }
  7193. void CHThorChildGroupAggregateActivity::processRow(const void * next)
  7194. {
  7195. aggregated.addRow(next);
  7196. }
  7197. const void * CHThorChildGroupAggregateActivity::nextRow()
  7198. {
  7199. if (eof)
  7200. return NULL;
  7201. if (!gathered)
  7202. {
  7203. helper.processRows(this);
  7204. gathered = true;
  7205. }
  7206. Owned<AggregateRowBuilder> next = aggregated.nextResult();
  7207. if (next)
  7208. {
  7209. processed++;
  7210. return next->finalizeRowClear();
  7211. }
  7212. eof = true;
  7213. return NULL;
  7214. }
  7215. //=====================================================================================================
  7216. CHThorChildThroughNormalizeActivity::CHThorChildThroughNormalizeActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChildThroughNormalizeArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL)
  7217. {
  7218. }
  7219. void CHThorChildThroughNormalizeActivity::stop()
  7220. {
  7221. outBuilder.clear();
  7222. lastInput.clear();
  7223. CHThorSimpleActivityBase::stop();
  7224. }
  7225. void CHThorChildThroughNormalizeActivity::ready()
  7226. {
  7227. CHThorSimpleActivityBase::ready();
  7228. outBuilder.setAllocator(rowAllocator);
  7229. numProcessedLastGroup = processed;
  7230. ok = false;
  7231. }
  7232. const void *CHThorChildThroughNormalizeActivity::nextRow()
  7233. {
  7234. try
  7235. {
  7236. for (;;)
  7237. {
  7238. if (ok)
  7239. ok = helper.next();
  7240. while (!ok)
  7241. {
  7242. lastInput.setown(input->nextRow());
  7243. if (!lastInput)
  7244. {
  7245. if (numProcessedLastGroup != processed)
  7246. {
  7247. numProcessedLastGroup = processed;
  7248. return NULL;
  7249. }
  7250. lastInput.setown(input->nextRow());
  7251. if (!lastInput)
  7252. return NULL;
  7253. }
  7254. ok = helper.first(lastInput);
  7255. }
  7256. outBuilder.ensureRow();
  7257. do
  7258. {
  7259. size32_t thisSize = helper.transform(outBuilder);
  7260. if (thisSize)
  7261. {
  7262. processed++;
  7263. return outBuilder.finalizeRowClear(thisSize);
  7264. }
  7265. ok = helper.next();
  7266. } while (ok);
  7267. }
  7268. }
  7269. catch(IException * e)
  7270. {
  7271. throw makeWrappedException(e);
  7272. }
  7273. }
  7274. //=====================================================================================================
  7275. CHThorDiskReadBaseActivity::CHThorDiskReadBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskReadBaseArg &_arg, ThorActivityKind _kind, IPropertyTree *_node, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  7276. {
  7277. helper.setCallback(this);
  7278. expectedDiskMeta = helper.queryDiskRecordSize();
  7279. projectedDiskMeta = helper.queryProjectedDiskRecordSize();
  7280. actualDiskMeta.set(helper.queryDiskRecordSize()->querySerializedDiskMeta());
  7281. isCodeSigned = false;
  7282. if (_node)
  7283. {
  7284. const char *recordTranslationModeHintText = _node->queryProp("hint[@name='layouttranslation']/@value");
  7285. if (recordTranslationModeHintText)
  7286. recordTranslationModeHint = getTranslationMode(recordTranslationModeHintText, true);
  7287. isCodeSigned = isActivityCodeSigned(*_node);
  7288. }
  7289. }
  7290. CHThorDiskReadBaseActivity::~CHThorDiskReadBaseActivity()
  7291. {
  7292. close();
  7293. }
  7294. void CHThorDiskReadBaseActivity::ready()
  7295. {
  7296. CHThorActivityBase::ready();
  7297. grouped = false;
  7298. fixedDiskRecordSize = 0;
  7299. eofseen = false;
  7300. opened = false;
  7301. compressed = false;
  7302. rowcompressed = false;
  7303. blockcompressed = false;
  7304. persistent = false;
  7305. localOffset = 0;
  7306. offsetOfPart = 0;
  7307. partNum = (unsigned)-1;
  7308. resolve();
  7309. unsigned expectedCrc = helper.getDiskFormatCrc();
  7310. unsigned projectedCrc = helper.getProjectedFormatCrc();
  7311. IDistributedFile *dFile = nullptr;
  7312. if (ldFile)
  7313. dFile = ldFile->queryDistributedFile(); // Null for local file usage
  7314. Owned<IOutputMetaData> publishedMeta;
  7315. unsigned publishedCrc = 0;
  7316. RecordTranslationMode translationMode = getLayoutTranslationMode();
  7317. StringBuffer traceName;
  7318. if (dFile)
  7319. {
  7320. const char *kind = queryFileKind(dFile);
  7321. if (strisame(kind, "flat") || (RecordTranslationMode::AlwaysDisk == translationMode))
  7322. {
  7323. IPropertyTree &props = dFile->queryAttributes();
  7324. publishedMeta.setown(getDaliLayoutInfo(props));
  7325. if (publishedMeta)
  7326. publishedCrc = props.getPropInt("@formatCrc");
  7327. }
  7328. dFile->getLogicalName(traceName);
  7329. }
  7330. else
  7331. traceName.set("hthor-diskread");
  7332. translators.setown(::getTranslators(traceName.str(), expectedCrc, expectedDiskMeta, publishedCrc, publishedMeta, projectedCrc, projectedDiskMeta, translationMode));
  7333. if (translators)
  7334. {
  7335. if (publishedCrc && expectedCrc && publishedCrc != expectedCrc)
  7336. {
  7337. VStringBuffer msg("Record layout translation required for %s", traceName.str());
  7338. agent.addWuExceptionEx(msg.str(), WRN_UseLayoutTranslation, SeverityInformation, MSGAUD_user, "hthor");
  7339. }
  7340. translator = &translators->queryTranslator();
  7341. keyedTranslator = translators->queryKeyedTranslator();
  7342. actualDiskMeta.set(&translators->queryActualFormat());
  7343. }
  7344. else
  7345. {
  7346. translator = nullptr;
  7347. keyedTranslator = nullptr;
  7348. actualDiskMeta.set(helper.queryDiskRecordSize()->querySerializedDiskMeta());
  7349. }
  7350. }
  7351. void CHThorDiskReadBaseActivity::stop()
  7352. {
  7353. close();
  7354. CHThorActivityBase::stop();
  7355. }
  7356. #define TE_FileTypeMismatch 10138 // NB: duplicated from thorlcr/shared/thexception.hpp, but be moved to common header
  7357. void CHThorDiskReadBaseActivity::checkFileType(IDistributedFile *file)
  7358. {
  7359. if (rt_csv == readType)
  7360. return; // CSV read is permitted to read any type
  7361. if (!agent.queryWorkUnit()->getDebugValueInt(OPT_VALIDATE_FILE_TYPE, true))
  7362. return;
  7363. bool warningOnly = false;
  7364. const char *expectedType = nullptr;
  7365. switch (readType)
  7366. {
  7367. case rt_binary:
  7368. if (fixedDiskRecordSize) // we allow fixed width reads of other formats
  7369. return;
  7370. expectedType = "flat";
  7371. break;
  7372. case rt_xml:
  7373. expectedType = "xml";
  7374. warningOnly = true;
  7375. break;
  7376. case rt_json:
  7377. expectedType = "json";
  7378. warningOnly = true;
  7379. break;
  7380. default:
  7381. throwUnexpected();
  7382. }
  7383. const char *kind = queryFileKind(file);
  7384. if (isEmptyString(kind)) // file has no published kind, can't validate
  7385. return;
  7386. if (!strieq(kind, expectedType))
  7387. {
  7388. Owned<IException> e = makeStringExceptionV(TE_FileTypeMismatch, "File format mismatch reading file: '%s'. Expected type '%s', but file is type '%s'", file->queryLogicalName(), expectedType, kind);
  7389. if (!warningOnly)
  7390. throw e.getClear();
  7391. StringBuffer tmp;
  7392. agent.addWuException(e->errorMessage(tmp), e->errorCode(), SeverityWarning, "eclagent");
  7393. }
  7394. }
  7395. void CHThorDiskReadBaseActivity::resolve()
  7396. {
  7397. OwnedRoxieString fileName(helper.getFileName());
  7398. mangleHelperFileName(mangledHelperFileName, fileName, agent.queryWuid(), helper.getFlags());
  7399. if (helper.getFlags() & (TDXtemporary | TDXjobtemp))
  7400. {
  7401. StringBuffer mangledFilename;
  7402. mangleLocalTempFilename(mangledFilename, mangledHelperFileName.str(), nullptr);
  7403. tempFileName.set(agent.queryTemporaryFile(mangledFilename.str()));
  7404. logicalFileName.set(tempFileName);
  7405. gatherInfo(NULL);
  7406. }
  7407. else
  7408. {
  7409. ldFile.setown(resolveLFNFlat(agent, mangledHelperFileName.str(), "Read", 0 != (helper.getFlags() & TDRoptional), isCodeSigned));
  7410. if ( mangledHelperFileName.charAt(0) == '~')
  7411. logicalFileName.set(mangledHelperFileName.str()+1);
  7412. else
  7413. logicalFileName.set(mangledHelperFileName.str());
  7414. if (ldFile)
  7415. {
  7416. Owned<IFileDescriptor> fdesc;
  7417. fdesc.setown(ldFile->getFileDescriptor());
  7418. gatherInfo(fdesc);
  7419. if (ldFile->isExternal())
  7420. compressed = checkWriteIsCompressed(helper.getFlags(), fixedDiskRecordSize, false);//grouped=FALSE because fixedDiskRecordSize already includes grouped
  7421. IDistributedFile *dFile = ldFile->queryDistributedFile();
  7422. if (dFile) //only makes sense for distributed (non local) files
  7423. {
  7424. checkFileType(dFile); // throws an exception if file types mismatch
  7425. persistent = dFile->queryAttributes().getPropBool("@persistent");
  7426. dfsParts.setown(dFile->getIterator());
  7427. IDistributedSuperFile *super = dFile->querySuperFile();
  7428. if (super)
  7429. {
  7430. assertex(fdesc);
  7431. superfile.set(fdesc->querySuperFileDescriptor());
  7432. if (helper.getFlags() & TDRfilenamecallback)
  7433. {
  7434. unsigned numsubs = super->numSubFiles(true);
  7435. unsigned s=0;
  7436. for (; s<numsubs; s++)
  7437. {
  7438. IDistributedFile &subfile = super->querySubFile(s, true);
  7439. subfileLogicalFilenames.append(subfile.queryLogicalName());
  7440. }
  7441. if (!superfile && numsubs>0)
  7442. logicalFileName.set(subfileLogicalFilenames.item(0));
  7443. }
  7444. }
  7445. if((helper.getFlags() & (TDXtemporary | TDXjobtemp)) == 0)
  7446. agent.logFileAccess(dFile, "HThor", "READ", graph);
  7447. if(getLayoutTranslationMode()==RecordTranslationMode::None)
  7448. verifyRecordFormatCrc();
  7449. }
  7450. }
  7451. if (!ldFile)
  7452. {
  7453. StringBuffer buff;
  7454. buff.appendf("Input file '%s' was missing but declared optional", mangledHelperFileName.str());
  7455. agent.addWuExceptionEx(buff.str(), WRN_SkipMissingOptFile, SeverityInformation, MSGAUD_user, "hthor");
  7456. }
  7457. }
  7458. }
  7459. void CHThorDiskReadBaseActivity::gatherInfo(IFileDescriptor * fileDesc)
  7460. {
  7461. if (fileDesc)
  7462. {
  7463. if (!agent.queryResolveFilesLocally())
  7464. {
  7465. grouped = fileDesc->isGrouped();
  7466. if (grouped != ((helper.getFlags() & TDXgrouped) != 0))
  7467. {
  7468. StringBuffer msg;
  7469. msg.append("DFS and code generated group info. differs: DFS(").append(grouped ? "grouped" : "ungrouped").append("), CodeGen(").append(grouped ? "ungrouped" : "grouped").append("), using DFS info");
  7470. agent.addWuExceptionEx(msg.str(), WRN_MismatchGroupInfo, SeverityError, MSGAUD_user, "hthor");
  7471. }
  7472. }
  7473. else
  7474. grouped = ((helper.getFlags() & TDXgrouped) != 0);
  7475. }
  7476. else
  7477. {
  7478. grouped = ((helper.getFlags() & TDXgrouped) != 0);
  7479. }
  7480. calcFixedDiskRecordSize();
  7481. if (fileDesc)
  7482. {
  7483. compressed = fileDesc->isCompressed(&blockcompressed); //try new decompression, fall back to old unless marked as block
  7484. if (fixedDiskRecordSize)
  7485. {
  7486. if (!compressed && (((helper.getFlags() & TDXcompress) != 0) && (fixedDiskRecordSize >= MIN_ROWCOMPRESS_RECSIZE)))
  7487. {
  7488. StringBuffer msg;
  7489. msg.append("Ignoring compression attribute on file ").append(mangledHelperFileName.str()).append(", which is not published as compressed");
  7490. agent.addWuExceptionEx(msg.str(), WRN_MismatchCompressInfo, SeverityWarning, MSGAUD_user, "hthor");
  7491. compressed = true;
  7492. }
  7493. }
  7494. }
  7495. else
  7496. {
  7497. compressed = checkReadIsCompressed(helper.getFlags(), fixedDiskRecordSize, false); //grouped=FALSE because fixedDiskRecordSize already includes grouped
  7498. }
  7499. void *k;
  7500. size32_t kl;
  7501. helper.getEncryptKey(kl,k);
  7502. encryptionkey.setOwn(kl,k);
  7503. if (encryptionkey.length()!=0)
  7504. {
  7505. blockcompressed = true;
  7506. compressed = true;
  7507. }
  7508. }
  7509. void CHThorDiskReadBaseActivity::close()
  7510. {
  7511. closepart();
  7512. tempFileName.clear();
  7513. dfsParts.clear();
  7514. if(ldFile)
  7515. {
  7516. IDistributedFile * dFile = ldFile->queryDistributedFile();
  7517. if(dFile)
  7518. dFile->setAccessed();
  7519. ldFile.clear();
  7520. }
  7521. }
  7522. unsigned __int64 CHThorDiskReadBaseActivity::getFilePosition(const void * row)
  7523. {
  7524. return localOffset + offsetOfPart;
  7525. }
  7526. unsigned __int64 CHThorDiskReadBaseActivity::getLocalFilePosition(const void * row)
  7527. {
  7528. return makeLocalFposOffset(partNum-1, localOffset);
  7529. }
  7530. void CHThorDiskReadBaseActivity::closepart()
  7531. {
  7532. if (opened && inputfileio && ldFile && partNum > 0)
  7533. {
  7534. unsigned previousPartNum = partNum-1;
  7535. if (previousPartNum < ldFile->numParts())
  7536. {
  7537. stat_type curDiskReads = inputfileio->getStatistic(StNumDiskReads);
  7538. IDistributedFile * dFile = ldFile->queryDistributedFile();
  7539. if (dFile)
  7540. {
  7541. if (superfile)
  7542. {
  7543. unsigned subfile, lnum;
  7544. if (superfile->mapSubPart(previousPartNum, subfile, lnum))
  7545. {
  7546. IDistributedSuperFile *super = dFile->querySuperFile();
  7547. dFile = &(super->querySubFile(subfile, true));
  7548. }
  7549. }
  7550. dFile->addAttrValue("@numDiskReads", curDiskReads);
  7551. StringBuffer clusterName;
  7552. dFile->getClusterName(0, clusterName);
  7553. diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, 0, curDiskReads));
  7554. }
  7555. numDiskReads += curDiskReads;
  7556. }
  7557. }
  7558. inputstream.clear();
  7559. inputfileio.clear();
  7560. inputfile.clear();
  7561. }
  7562. bool CHThorDiskReadBaseActivity::openNext()
  7563. {
  7564. offsetOfPart += localOffset;
  7565. localOffset = 0;
  7566. saveOpenExc.clear();
  7567. actualFilter.clear();
  7568. if (translators)
  7569. {
  7570. /* If previous part was remotely accessed, the format used (actualDiskMeta), became the projected meta.
  7571. * Reset for local/direct access.
  7572. */
  7573. translator = &translators->queryTranslator();
  7574. keyedTranslator = translators->queryKeyedTranslator();
  7575. actualDiskMeta.set(&translators->queryActualFormat());
  7576. }
  7577. if (dfsParts||ldFile)
  7578. {
  7579. // open next part of a multipart, if there is one
  7580. while ((dfsParts&&dfsParts->isValid())||
  7581. (!dfsParts&&(partNum<ldFile->numParts())))
  7582. {
  7583. IDistributedFilePart * curPart = dfsParts?&dfsParts->query():NULL;
  7584. unsigned numCopies = curPart?curPart->numCopies():ldFile->numPartCopies(partNum);
  7585. //MORE: Order of copies should be optimized at this point....
  7586. StringBuffer file, filelist;
  7587. closepart();
  7588. if (dfsParts && superfile && curPart && !subfileLogicalFilenames.empty())
  7589. {
  7590. unsigned subfile;
  7591. unsigned lnum;
  7592. if (superfile->mapSubPart(partNum, subfile, lnum))
  7593. {
  7594. logicalFileName.set(subfileLogicalFilenames.item(subfile));
  7595. // MORE - need to set dFile = superfile->getSubFilePart(subfile) to support different formats on different file parts
  7596. }
  7597. }
  7598. if (keyedTranslator && keyedTranslator->needsTranslate())
  7599. keyedTranslator->translate(actualFilter, fieldFilters);
  7600. else
  7601. actualFilter.appendFilters(fieldFilters);
  7602. bool tryRemoteStream = actualDiskMeta->queryTypeInfo()->canInterpret() && actualDiskMeta->queryTypeInfo()->canSerialize() &&
  7603. projectedDiskMeta->queryTypeInfo()->canInterpret() && projectedDiskMeta->queryTypeInfo()->canSerialize();
  7604. /* If part can potentially be remotely streamed, 1st check if any part is local,
  7605. * then try to remote stream, and otherwise failover to legacy remote access
  7606. */
  7607. unsigned startCopy = 0;
  7608. if (tryRemoteStream && (rt_binary == readType))
  7609. {
  7610. std::vector<unsigned> remoteCandidates;
  7611. // scan for local part 1st
  7612. for (unsigned copy=0; copy<numCopies; copy++)
  7613. {
  7614. RemoteFilename rfn;
  7615. if (curPart)
  7616. curPart->getFilename(rfn,copy);
  7617. else
  7618. ldFile->getPartFilename(rfn, partNum, copy);
  7619. if (!isRemoteReadCandidate(agent, rfn))
  7620. {
  7621. StringBuffer path;
  7622. rfn.getPath(path);
  7623. Owned<IFile> iFile = createIFile(path);
  7624. try
  7625. {
  7626. if (iFile->exists())
  7627. {
  7628. startCopy = copy;
  7629. remoteCandidates.clear();
  7630. break;
  7631. }
  7632. }
  7633. catch (IException *e)
  7634. {
  7635. EXCLOG(e, "CHThorDiskReadBaseActivity::openNext()");
  7636. e->Release();
  7637. }
  7638. }
  7639. else
  7640. remoteCandidates.push_back(copy);
  7641. }
  7642. for (unsigned &copy: remoteCandidates)
  7643. {
  7644. RemoteFilename rfilename;
  7645. if (curPart)
  7646. curPart->getFilename(rfilename,copy);
  7647. else
  7648. ldFile->getPartFilename(rfilename,partNum,copy);
  7649. rfilename.getPath(file.clear());
  7650. filelist.append('\n').append(file);
  7651. try
  7652. {
  7653. // NB: only binary handles can be remotely processed by dafilesrv at the moment
  7654. // Open a stream from remote file, having passed actual, expected, projected, and filters to it
  7655. SocketEndpoint ep(rfilename.queryEndpoint());
  7656. setDafsEndpointPort(ep);
  7657. StringBuffer localPath;
  7658. rfilename.getLocalPath(localPath);
  7659. Owned<IRemoteFileIO> remoteFileIO = createRemoteFilteredFile(ep, localPath, actualDiskMeta, projectedDiskMeta, actualFilter, compressed, grouped, remoteLimit);
  7660. if (remoteFileIO)
  7661. {
  7662. StringBuffer tmp;
  7663. remoteFileIO->addVirtualFieldMapping("logicalFilename", logicalFileName.str());
  7664. remoteFileIO->addVirtualFieldMapping("baseFpos", tmp.clear().append(offsetOfPart).str());
  7665. remoteFileIO->addVirtualFieldMapping("partNum", tmp.clear().append(curPart->getPartIndex()).str());
  7666. try
  7667. {
  7668. remoteFileIO->ensureAvailable(); // force open now, because want to failover to other copies or legacy if fails
  7669. }
  7670. catch (IException *e)
  7671. {
  7672. #ifdef _DEBUG
  7673. EXCLOG(e, nullptr);
  7674. #endif
  7675. VStringBuffer msg("Remote streaming failure, failing over to direct read for: '%s'. ", file.str());
  7676. e->errorMessage(msg);
  7677. agent.addWuExceptionEx(msg.str(), WRN_RemoteReadFailure, SeverityWarning, MSGAUD_user, "hthor");
  7678. e->Release();
  7679. continue; // try next copy and ultimately failover to local when no more copies
  7680. }
  7681. Owned<IFile> iFile = createIFile(rfilename);
  7682. // remote side does projection/translation/filtering
  7683. actualDiskMeta.set(projectedDiskMeta);
  7684. translator = nullptr;
  7685. keyedTranslator = nullptr;
  7686. actualFilter.clear();
  7687. inputfileio.setown(remoteFileIO.getClear());
  7688. if (inputfileio)
  7689. {
  7690. inputfile.setown(iFile.getClear());
  7691. break;
  7692. }
  7693. }
  7694. }
  7695. catch (IException *E)
  7696. {
  7697. if (saveOpenExc.get())
  7698. E->Release();
  7699. else
  7700. saveOpenExc.setown(E);
  7701. }
  7702. closepart();
  7703. }
  7704. }
  7705. if (!inputfile)
  7706. {
  7707. unsigned copy = startCopy;
  7708. while (true)
  7709. {
  7710. RemoteFilename rfilename;
  7711. if (curPart)
  7712. curPart->getFilename(rfilename,copy);
  7713. else
  7714. ldFile->getPartFilename(rfilename,partNum,copy);
  7715. rfilename.getPath(file.clear());
  7716. filelist.append('\n').append(file);
  7717. try
  7718. {
  7719. inputfile.setown(createIFile(rfilename));
  7720. if (compressed)
  7721. {
  7722. Owned<IExpander> eexp;
  7723. if (encryptionkey.length()!=0)
  7724. eexp.setown(createAESExpander256((size32_t)encryptionkey.length(),encryptionkey.bufferBase()));
  7725. inputfileio.setown(createCompressedFileReader(inputfile,eexp));
  7726. if(!inputfileio && !blockcompressed) //fall back to old decompression, unless dfs marked as new
  7727. {
  7728. inputfileio.setown(inputfile->open(IFOread));
  7729. if(inputfileio)
  7730. rowcompressed = true;
  7731. }
  7732. }
  7733. else
  7734. inputfileio.setown(inputfile->open(IFOread));
  7735. if (inputfileio)
  7736. break;
  7737. }
  7738. catch (IException *E)
  7739. {
  7740. if (saveOpenExc.get())
  7741. E->Release();
  7742. else
  7743. saveOpenExc.setown(E);
  7744. }
  7745. if (++copy == numCopies) // wrap
  7746. copy = 0;
  7747. if (copy == startCopy) // reached starting copy, so scanned all and failed to open any.
  7748. break;
  7749. }
  7750. }
  7751. calcFixedDiskRecordSize();
  7752. if (dfsParts)
  7753. dfsParts->next();
  7754. partNum++;
  7755. if (checkOpenedFile(file.str(), filelist.str()))
  7756. {
  7757. opened = true;
  7758. return true;
  7759. }
  7760. }
  7761. return false;
  7762. }
  7763. else if (!tempFileName.isEmpty())
  7764. {
  7765. StringBuffer file(tempFileName.get());
  7766. tempFileName.clear();
  7767. closepart();
  7768. try
  7769. {
  7770. inputfile.setown(createIFile(file.str()));
  7771. if(compressed)
  7772. {
  7773. Owned<IExpander> eexp;
  7774. if (encryptionkey.length())
  7775. eexp.setown(createAESExpander256((size32_t) encryptionkey.length(),encryptionkey.bufferBase()));
  7776. inputfileio.setown(createCompressedFileReader(inputfile,eexp));
  7777. if(!inputfileio && !blockcompressed) //fall back to old decompression, unless dfs marked as new
  7778. {
  7779. inputfileio.setown(inputfile->open(IFOread));
  7780. if(inputfileio)
  7781. rowcompressed = true;
  7782. }
  7783. }
  7784. else
  7785. inputfileio.setown(inputfile->open(IFOread));
  7786. }
  7787. catch (IException *E)
  7788. {
  7789. closepart();
  7790. StringBuffer msg;
  7791. IWARNLOG("%s", E->errorMessage(msg).str());
  7792. if (saveOpenExc.get())
  7793. E->Release();
  7794. else
  7795. saveOpenExc.setown(E);
  7796. }
  7797. partNum++;
  7798. if (checkOpenedFile(file.str(), NULL))
  7799. {
  7800. actualFilter.appendFilters(fieldFilters);
  7801. opened = true;
  7802. return true;
  7803. }
  7804. }
  7805. return false;
  7806. }
  7807. bool CHThorDiskReadBaseActivity::checkOpenedFile(char const * filename, char const * filenamelist)
  7808. {
  7809. unsigned __int64 filesize = 0;
  7810. if (!inputfileio)
  7811. {
  7812. if (!(helper.getFlags() & TDRoptional))
  7813. {
  7814. StringBuffer s;
  7815. if(filenamelist) {
  7816. if (saveOpenExc.get())
  7817. {
  7818. if (strstr(mangledHelperFileName.str(),"::>")!=NULL) // if a 'special' filename just use saved exception
  7819. saveOpenExc->errorMessage(s);
  7820. else
  7821. {
  7822. s.append("Could not open logical file ").append(mangledHelperFileName.str()).append(" in any of these locations:").append(filenamelist).append(" (");
  7823. saveOpenExc->errorMessage(s).append(")");
  7824. }
  7825. }
  7826. else
  7827. s.append("Could not open logical file ").append(mangledHelperFileName.str()).append(" in any of these locations:").append(filenamelist).append(" (").append((unsigned)GetLastError()).append(")");
  7828. }
  7829. else
  7830. s.append("Could not open local physical file ").append(filename).append(" (").append((unsigned)GetLastError()).append(")");
  7831. agent.fail(1, s.str());
  7832. }
  7833. }
  7834. else
  7835. filesize = inputfileio->size();
  7836. saveOpenExc.clear();
  7837. if (filesize)
  7838. {
  7839. if (!compressed && fixedDiskRecordSize && ((offset_t)-1 != filesize) && (filesize % fixedDiskRecordSize) != 0)
  7840. {
  7841. StringBuffer s;
  7842. s.append("File ").append(filename).append(" size is ").append(filesize).append(" which is not a multiple of ").append(fixedDiskRecordSize);
  7843. agent.fail(1, s.str());
  7844. }
  7845. unsigned readBufferSize = queryReadBufferSize();
  7846. inputstream.setown(createFileSerialStream(inputfileio, 0, filesize, readBufferSize));
  7847. StringBuffer report("Reading file ");
  7848. report.append(inputfile->queryFilename());
  7849. agent.reportProgress(report.str());
  7850. }
  7851. return (filesize != 0);
  7852. }
  7853. void CHThorDiskReadBaseActivity::open()
  7854. {
  7855. assertex(!opened);
  7856. partNum = 0;
  7857. if (dfsParts)
  7858. eofseen = !dfsParts->first() || !openNext();
  7859. else if (ldFile||tempFileName.length())
  7860. eofseen = !openNext();
  7861. else
  7862. eofseen = true;
  7863. opened = true;
  7864. }
  7865. void CHThorDiskReadBaseActivity::updateProgress(IStatisticGatherer &progress) const
  7866. {
  7867. CHThorActivityBase::updateProgress(progress);
  7868. StatsActivityScope scope(progress, activityId);
  7869. progress.addStatistic(StNumDiskReads, numDiskReads);
  7870. progress.addStatistic(StCostFileAccess, diskAccessCost);
  7871. }
  7872. //=====================================================================================================
  7873. CHThorBinaryDiskReadBase::CHThorBinaryDiskReadBase(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskReadBaseArg &_arg, IHThorCompoundBaseArg & _segHelper, ThorActivityKind _kind, IPropertyTree *_node, EclGraph & _graph)
  7874. : CHThorDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _node, _graph),
  7875. segHelper(_segHelper), prefetchBuffer(NULL)
  7876. {
  7877. readType = rt_binary;
  7878. }
  7879. void CHThorBinaryDiskReadBase::calcFixedDiskRecordSize()
  7880. {
  7881. fixedDiskRecordSize = actualDiskMeta->getFixedSize();
  7882. if (fixedDiskRecordSize && grouped)
  7883. fixedDiskRecordSize += 1;
  7884. }
  7885. void CHThorBinaryDiskReadBase::append(FFoption option, const IFieldFilter * filter)
  7886. {
  7887. if (filter->isWild())
  7888. filter->Release();
  7889. else
  7890. fieldFilters.append(*filter);
  7891. }
  7892. void CHThorBinaryDiskReadBase::ready()
  7893. {
  7894. CHThorDiskReadBaseActivity::ready();
  7895. fieldFilters.kill();
  7896. segHelper.createSegmentMonitors(this);
  7897. }
  7898. bool CHThorBinaryDiskReadBase::openNext()
  7899. {
  7900. if (CHThorDiskReadBaseActivity::openNext())
  7901. {
  7902. if(rowcompressed && fixedDiskRecordSize)
  7903. {
  7904. throwUnexpected();
  7905. //MORE: What happens here
  7906. PROGLOG("Disk read falling back to legacy decompression routine");
  7907. //in.setown(createRowCompReadSeq(*inputfileiostream, 0, fixedDiskRecordSize));
  7908. }
  7909. //Only one of these will actually be used.
  7910. prefetcher.setown(actualDiskMeta->createDiskPrefetcher());
  7911. deserializer.setown(actualDiskMeta->createDiskDeserializer(agent.queryCodeContext(), activityId));
  7912. prefetchBuffer.setStream(inputstream);
  7913. deserializeSource.setStream(inputstream);
  7914. return true;
  7915. }
  7916. return false;
  7917. }
  7918. void CHThorBinaryDiskReadBase::closepart()
  7919. {
  7920. prefetchBuffer.clearStream();
  7921. deserializeSource.clearStream();
  7922. CHThorDiskReadBaseActivity::closepart();
  7923. }
  7924. unsigned CHThorBinaryDiskReadBase::queryReadBufferSize()
  7925. {
  7926. return hthorReadBufferSize;
  7927. }
  7928. void CHThorBinaryDiskReadBase::open()
  7929. {
  7930. if (!segHelper.canMatchAny())
  7931. {
  7932. eofseen = true;
  7933. opened = true;
  7934. }
  7935. else
  7936. CHThorDiskReadBaseActivity::open();
  7937. }
  7938. //=====================================================================================================
  7939. CHThorDiskReadActivity::CHThorDiskReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorBinaryDiskReadBase(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph), helper(_arg), outBuilder(NULL)
  7940. {
  7941. needTransform = false;
  7942. eogPending = 0;
  7943. lastGroupProcessed = 0;
  7944. }
  7945. void CHThorDiskReadActivity::ready()
  7946. {
  7947. PARENT::ready();
  7948. outBuilder.setAllocator(rowAllocator);
  7949. eogPending = false;
  7950. lastGroupProcessed = processed;
  7951. needTransform = helper.needTransform() || fieldFilters.length();
  7952. limit = helper.getRowLimit();
  7953. if (helper.getFlags() & TDRlimitskips)
  7954. limit = (unsigned __int64) -1;
  7955. stopAfter = helper.getChooseNLimit();
  7956. if (!helper.transformMayFilter() && !helper.hasMatchFilter())
  7957. remoteLimit = stopAfter;
  7958. }
  7959. void CHThorDiskReadActivity::stop()
  7960. {
  7961. outBuilder.clear();
  7962. PARENT::stop();
  7963. }
  7964. const void *CHThorDiskReadActivity::nextRow()
  7965. {
  7966. if (!opened) open();
  7967. if (eogPending && (lastGroupProcessed != processed))
  7968. {
  7969. eogPending = false;
  7970. lastGroupProcessed = processed;
  7971. return NULL;
  7972. }
  7973. try
  7974. {
  7975. if (needTransform || grouped || translator || keyedTranslator)
  7976. {
  7977. while (!eofseen && ((stopAfter == 0) || ((processed - initialProcessed) < stopAfter)))
  7978. {
  7979. queryUpdateProgress();
  7980. while (!prefetchBuffer.eos())
  7981. {
  7982. queryUpdateProgress();
  7983. prefetcher->readAhead(prefetchBuffer);
  7984. const byte * next = prefetchBuffer.queryRow();
  7985. size32_t sizeRead = prefetchBuffer.queryRowSize();
  7986. size32_t thisSize = 0;
  7987. if (likely(segMonitorsMatch(next))) // NOTE - keyed fields are checked pre-translation
  7988. {
  7989. MemoryBuffer translated;
  7990. if (translator)
  7991. {
  7992. MemoryBufferBuilder aBuilder(translated, 0);
  7993. translator->translate(aBuilder, *this, next);
  7994. next = aBuilder.getSelf();
  7995. }
  7996. if (likely(helper.canMatch(next)))
  7997. thisSize = helper.transform(outBuilder.ensureRow(), next);
  7998. }
  7999. bool eog = false;
  8000. if (grouped)
  8001. prefetchBuffer.read(sizeof(eog), &eog);
  8002. prefetchBuffer.finishedRow();
  8003. localOffset += sizeRead;
  8004. if (thisSize)
  8005. {
  8006. if (grouped)
  8007. eogPending = eog;
  8008. if ((processed - initialProcessed) >= limit)
  8009. {
  8010. outBuilder.clear();
  8011. if ( agent.queryCodeContext()->queryDebugContext())
  8012. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  8013. helper.onLimitExceeded();
  8014. return NULL;
  8015. }
  8016. processed++;
  8017. return outBuilder.finalizeRowClear(thisSize);
  8018. }
  8019. if (eog && (lastGroupProcessed != processed))
  8020. {
  8021. lastGroupProcessed = processed;
  8022. return NULL;
  8023. }
  8024. }
  8025. eofseen = !openNext();
  8026. }
  8027. }
  8028. else
  8029. {
  8030. while(!eofseen && ((stopAfter == 0) || (processed - initialProcessed) < stopAfter))
  8031. {
  8032. queryUpdateProgress();
  8033. while (!inputstream->eos())
  8034. {
  8035. size32_t sizeRead = deserializer->deserialize(outBuilder.ensureRow(), deserializeSource);
  8036. //In this case size read from disk == size created in memory
  8037. localOffset += sizeRead;
  8038. OwnedConstRoxieRow ret = outBuilder.finalizeRowClear(sizeRead);
  8039. if ((processed - initialProcessed)>=limit)
  8040. {
  8041. if ( agent.queryCodeContext()->queryDebugContext())
  8042. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  8043. helper.onLimitExceeded();
  8044. return NULL;
  8045. }
  8046. if (likely(helper.canMatch(ret)))
  8047. {
  8048. processed++;
  8049. return ret.getClear();
  8050. }
  8051. }
  8052. eofseen = !openNext();
  8053. }
  8054. }
  8055. close();
  8056. }
  8057. catch(IException * e)
  8058. {
  8059. throw makeWrappedException(e);
  8060. }
  8061. return NULL;
  8062. }
  8063. //=====================================================================================================
  8064. CHThorDiskNormalizeActivity::CHThorDiskNormalizeActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskNormalizeArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorBinaryDiskReadBase(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph), helper(_arg), outBuilder(NULL)
  8065. {
  8066. }
  8067. void CHThorDiskNormalizeActivity::stop()
  8068. {
  8069. outBuilder.clear();
  8070. PARENT::stop();
  8071. }
  8072. void CHThorDiskNormalizeActivity::ready()
  8073. {
  8074. PARENT::ready();
  8075. outBuilder.setAllocator(rowAllocator);
  8076. limit = helper.getRowLimit();
  8077. if (helper.getFlags() & TDRlimitskips)
  8078. limit = (unsigned __int64) -1;
  8079. stopAfter = helper.getChooseNLimit();
  8080. lastSizeRead = 0;
  8081. expanding = false;
  8082. }
  8083. void CHThorDiskNormalizeActivity::gatherInfo(IFileDescriptor * fd)
  8084. {
  8085. PARENT::gatherInfo(fd);
  8086. assertex(!grouped);
  8087. }
  8088. const void *CHThorDiskNormalizeActivity::nextRow()
  8089. {
  8090. if (!opened) open();
  8091. for (;;)
  8092. {
  8093. if (eofseen || (stopAfter && (processed - initialProcessed) >= stopAfter))
  8094. break;
  8095. for (;;)
  8096. {
  8097. if (expanding)
  8098. {
  8099. for (;;)
  8100. {
  8101. expanding = helper.next();
  8102. if (!expanding)
  8103. break;
  8104. const void * ret = createNextRow();
  8105. if (ret)
  8106. return ret;
  8107. }
  8108. }
  8109. localOffset += lastSizeRead;
  8110. prefetchBuffer.finishedRow();
  8111. if (prefetchBuffer.eos())
  8112. {
  8113. lastSizeRead = 0;
  8114. break;
  8115. }
  8116. prefetcher->readAhead(prefetchBuffer);
  8117. const byte * next = prefetchBuffer.queryRow();
  8118. lastSizeRead = prefetchBuffer.queryRowSize();
  8119. queryUpdateProgress();
  8120. if (segMonitorsMatch(next))
  8121. {
  8122. try
  8123. {
  8124. if (unlikely(translator))
  8125. {
  8126. MemoryBufferBuilder aBuilder(translatedRow.clear(), 0);
  8127. translator->translate(aBuilder, *this, next);
  8128. next = aBuilder.getSelf();
  8129. }
  8130. expanding = helper.first(next);
  8131. }
  8132. catch(IException * e)
  8133. {
  8134. throw makeWrappedException(e);
  8135. }
  8136. if (expanding)
  8137. {
  8138. const void * ret = createNextRow();
  8139. if (ret)
  8140. return ret;
  8141. }
  8142. }
  8143. }
  8144. eofseen = !openNext();
  8145. }
  8146. close();
  8147. return NULL;
  8148. }
  8149. const void * CHThorDiskNormalizeActivity::createNextRow()
  8150. {
  8151. try
  8152. {
  8153. size32_t thisSize = helper.transform(outBuilder.ensureRow());
  8154. if (thisSize == 0)
  8155. return NULL;
  8156. if ((processed - initialProcessed) >=limit)
  8157. {
  8158. outBuilder.clear();
  8159. if ( agent.queryCodeContext()->queryDebugContext())
  8160. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  8161. helper.onLimitExceeded();
  8162. return NULL;
  8163. }
  8164. processed++;
  8165. return outBuilder.finalizeRowClear(thisSize);
  8166. }
  8167. catch(IException * e)
  8168. {
  8169. throw makeWrappedException(e);
  8170. }
  8171. }
  8172. //=====================================================================================================
  8173. CHThorDiskAggregateActivity::CHThorDiskAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorBinaryDiskReadBase(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph), helper(_arg), outBuilder(NULL)
  8174. {
  8175. }
  8176. void CHThorDiskAggregateActivity::stop()
  8177. {
  8178. outBuilder.clear();
  8179. PARENT::stop();
  8180. }
  8181. void CHThorDiskAggregateActivity::ready()
  8182. {
  8183. PARENT::ready();
  8184. outBuilder.setAllocator(rowAllocator);
  8185. finished = false;
  8186. }
  8187. void CHThorDiskAggregateActivity::gatherInfo(IFileDescriptor * fd)
  8188. {
  8189. PARENT::gatherInfo(fd);
  8190. assertex(!grouped);
  8191. }
  8192. const void *CHThorDiskAggregateActivity::nextRow()
  8193. {
  8194. if (finished) return NULL;
  8195. try
  8196. {
  8197. if (!opened) open();
  8198. outBuilder.ensureRow();
  8199. helper.clearAggregate(outBuilder);
  8200. while (!eofseen)
  8201. {
  8202. while (!prefetchBuffer.eos())
  8203. {
  8204. queryUpdateProgress();
  8205. prefetcher->readAhead(prefetchBuffer);
  8206. const byte * next = prefetchBuffer.queryRow();
  8207. size32_t sizeRead = prefetchBuffer.queryRowSize();
  8208. if (segMonitorsMatch(next))
  8209. {
  8210. if (unlikely(translator))
  8211. {
  8212. MemoryBufferBuilder aBuilder(translatedRow.clear(), 0);
  8213. translator->translate(aBuilder, *this, next);
  8214. helper.processRow(outBuilder, aBuilder.getSelf());
  8215. }
  8216. else
  8217. helper.processRow(outBuilder, next);
  8218. }
  8219. prefetchBuffer.finishedRow();
  8220. localOffset += sizeRead;
  8221. }
  8222. eofseen = !openNext();
  8223. }
  8224. close();
  8225. processed++;
  8226. finished = true;
  8227. unsigned retSize = outputMeta.getRecordSize(outBuilder.getSelf());
  8228. return outBuilder.finalizeRowClear(retSize);
  8229. }
  8230. catch(IException * e)
  8231. {
  8232. throw makeWrappedException(e);
  8233. }
  8234. }
  8235. //=====================================================================================================
  8236. CHThorDiskCountActivity::CHThorDiskCountActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskCountArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorBinaryDiskReadBase(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph), helper(_arg)
  8237. {
  8238. finished = true;
  8239. }
  8240. CHThorDiskCountActivity::~CHThorDiskCountActivity()
  8241. {
  8242. }
  8243. void CHThorDiskCountActivity::ready()
  8244. {
  8245. PARENT::ready();
  8246. finished = false;
  8247. stopAfter = helper.getChooseNLimit();
  8248. if (!helper.hasFilter())
  8249. remoteLimit = stopAfter;
  8250. }
  8251. void CHThorDiskCountActivity::gatherInfo(IFileDescriptor * fd)
  8252. {
  8253. PARENT::gatherInfo(fd);
  8254. assertex(!grouped);
  8255. }
  8256. const void *CHThorDiskCountActivity::nextRow()
  8257. {
  8258. if (finished) return NULL;
  8259. unsigned __int64 totalCount = 0;
  8260. if (fieldFilters.ordinality() == 0 && !helper.hasFilter() &&
  8261. (fixedDiskRecordSize != 0) && !(helper.getFlags() & (TDXtemporary | TDXjobtemp)) &&
  8262. !((helper.getFlags() & TDXcompress) && agent.queryResolveFilesLocally()) )
  8263. {
  8264. resolve();
  8265. if (segHelper.canMatchAny() && ldFile)
  8266. {
  8267. try
  8268. {
  8269. unsigned __int64 size = ldFile->getFileSize();
  8270. if (size % fixedDiskRecordSize)
  8271. throw MakeStringException(0, "Physical file %s has size %" I64F "d which is not a multiple of record size %d", ldFile->queryLogicalName(), size, fixedDiskRecordSize);
  8272. totalCount = size / fixedDiskRecordSize;
  8273. }
  8274. catch (IException * e)
  8275. {
  8276. if (!(helper.getFlags() & TDRoptional) || (e->errorCode() != DFSERR_CannotFindPartFileSize))
  8277. throw;
  8278. e->Release();
  8279. }
  8280. }
  8281. }
  8282. else
  8283. {
  8284. if (!opened) open();
  8285. for (;;)
  8286. {
  8287. if (eofseen)
  8288. break;
  8289. while (!prefetchBuffer.eos())
  8290. {
  8291. queryUpdateProgress();
  8292. prefetcher->readAhead(prefetchBuffer);
  8293. const byte * next = prefetchBuffer.queryRow();
  8294. size32_t sizeRead = prefetchBuffer.queryRowSize();
  8295. if (segMonitorsMatch(next))
  8296. totalCount += helper.numValid(next);
  8297. prefetchBuffer.finishedRow();
  8298. localOffset += sizeRead;
  8299. if (totalCount > stopAfter)
  8300. break;
  8301. }
  8302. if (totalCount > stopAfter)
  8303. break;
  8304. eofseen = !openNext();
  8305. }
  8306. close();
  8307. }
  8308. if (totalCount > stopAfter)
  8309. totalCount = stopAfter;
  8310. finished = true;
  8311. processed++;
  8312. size32_t outSize = outputMeta.getFixedSize();
  8313. void * ret = rowAllocator->createRow();
  8314. if (outSize == 1)
  8315. {
  8316. assertex(stopAfter == 1);
  8317. *(byte *)ret = (byte)totalCount;
  8318. }
  8319. else
  8320. {
  8321. assertex(outSize == sizeof(unsigned __int64));
  8322. *(unsigned __int64 *)ret = totalCount;
  8323. }
  8324. return rowAllocator->finalizeRow(outSize, ret, outSize);
  8325. }
  8326. //=====================================================================================================
  8327. CHThorDiskGroupAggregateActivity::CHThorDiskGroupAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskGroupAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node)
  8328. : CHThorBinaryDiskReadBase(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph),
  8329. helper(_arg),
  8330. aggregated(_arg, _arg)
  8331. {
  8332. }
  8333. void CHThorDiskGroupAggregateActivity::ready()
  8334. {
  8335. PARENT::ready();
  8336. eof = false;
  8337. gathered = false;
  8338. }
  8339. void CHThorDiskGroupAggregateActivity::gatherInfo(IFileDescriptor * fd)
  8340. {
  8341. PARENT::gatherInfo(fd);
  8342. assertex(!grouped);
  8343. aggregated.start(rowAllocator, agent.queryCodeContext(), activityId);
  8344. }
  8345. void CHThorDiskGroupAggregateActivity::processRow(const void * next)
  8346. {
  8347. aggregated.addRow(next);
  8348. }
  8349. const void *CHThorDiskGroupAggregateActivity::nextRow()
  8350. {
  8351. if (eof)
  8352. return NULL;
  8353. try
  8354. {
  8355. if (!gathered)
  8356. {
  8357. if (!opened) open();
  8358. while (!eofseen)
  8359. {
  8360. while (!prefetchBuffer.eos())
  8361. {
  8362. queryUpdateProgress();
  8363. prefetcher->readAhead(prefetchBuffer);
  8364. const byte * next = prefetchBuffer.queryRow();
  8365. size32_t sizeRead = prefetchBuffer.queryRowSize();
  8366. if (segMonitorsMatch(next))
  8367. {
  8368. if (unlikely(translator))
  8369. {
  8370. MemoryBufferBuilder aBuilder(translatedRow.clear(), 0);
  8371. translator->translate(aBuilder, *this, next);
  8372. helper.processRow(aBuilder.getSelf(), this);
  8373. }
  8374. else
  8375. helper.processRow(next, this);
  8376. }
  8377. prefetchBuffer.finishedRow();
  8378. localOffset += sizeRead;
  8379. }
  8380. eofseen = !openNext();
  8381. }
  8382. close();
  8383. gathered = true;
  8384. }
  8385. }
  8386. catch(IException * e)
  8387. {
  8388. throw makeWrappedException(e);
  8389. }
  8390. Owned<AggregateRowBuilder> next = aggregated.nextResult();
  8391. if (next)
  8392. {
  8393. processed++;
  8394. return next->finalizeRowClear();
  8395. }
  8396. eof = true;
  8397. return NULL;
  8398. }
  8399. //=====================================================================================================
  8400. CHThorCsvReadActivity::CHThorCsvReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCsvReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _node, _graph), helper(_arg)
  8401. {
  8402. maxRowSize = agent.queryWorkUnit()->getDebugValueInt(OPT_MAXCSVROWSIZE, defaultMaxCsvRowSize) * 1024 * 1024;
  8403. readType = rt_csv;
  8404. }
  8405. CHThorCsvReadActivity::~CHThorCsvReadActivity()
  8406. {
  8407. }
  8408. void CHThorCsvReadActivity::ready()
  8409. {
  8410. PARENT::ready();
  8411. }
  8412. void CHThorCsvReadActivity::stop()
  8413. {
  8414. csvSplitter.reset();
  8415. PARENT::stop();
  8416. }
  8417. void CHThorCsvReadActivity::gatherInfo(IFileDescriptor * fd)
  8418. {
  8419. PARENT::gatherInfo(fd);
  8420. ICsvParameters * csvInfo = helper.queryCsvParameters();
  8421. headerLines = csvInfo->queryHeaderLen();
  8422. maxDiskSize = csvInfo->queryMaxSize();
  8423. limit = helper.getRowLimit();
  8424. if (helper.getFlags() & TDRlimitskips)
  8425. limit = (unsigned __int64) -1;
  8426. stopAfter = helper.getChooseNLimit();
  8427. const char * quotes = NULL;
  8428. const char * separators = NULL;
  8429. const char * terminators = NULL;
  8430. const char * escapes = NULL;
  8431. IDistributedFile * dFile = ldFile?ldFile->queryDistributedFile():NULL;
  8432. if (dFile) //only makes sense for distributed (non local) files
  8433. {
  8434. IPropertyTree & options = dFile->queryAttributes();
  8435. quotes = options.queryProp("@csvQuote");
  8436. separators = options.queryProp("@csvSeparate");
  8437. terminators = options.queryProp("@csvTerminate");
  8438. escapes = options.queryProp("@csvEscape");
  8439. }
  8440. csvSplitter.init(helper.getMaxColumns(), csvInfo, quotes, separators, terminators, escapes);
  8441. }
  8442. void CHThorCsvReadActivity::calcFixedDiskRecordSize()
  8443. {
  8444. fixedDiskRecordSize = 0;
  8445. }
  8446. const void *CHThorCsvReadActivity::nextRow()
  8447. {
  8448. while (!stopAfter || (processed - initialProcessed) < stopAfter)
  8449. {
  8450. checkOpenNext();
  8451. if (eofseen)
  8452. break;
  8453. size32_t thisLineLength = csvSplitter.splitLine(inputstream, maxRowSize);
  8454. if (thisLineLength)
  8455. {
  8456. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  8457. unsigned thisSize;
  8458. try
  8459. {
  8460. thisSize = helper.transform(rowBuilder, csvSplitter.queryLengths(), (const char * *)csvSplitter.queryData());
  8461. }
  8462. catch(IException * e)
  8463. {
  8464. throw makeWrappedException(e);
  8465. }
  8466. inputstream->skip(thisLineLength);
  8467. localOffset += thisLineLength;
  8468. if (thisSize)
  8469. {
  8470. OwnedConstRoxieRow ret = rowBuilder.finalizeRowClear(thisSize);
  8471. if ((processed - initialProcessed) >= limit)
  8472. {
  8473. if ( agent.queryCodeContext()->queryDebugContext())
  8474. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  8475. helper.onLimitExceeded();
  8476. return NULL;
  8477. }
  8478. processed++;
  8479. return ret.getClear();
  8480. }
  8481. }
  8482. }
  8483. close();
  8484. return NULL;
  8485. }
  8486. bool CHThorCsvReadActivity::openNext()
  8487. {
  8488. if (CHThorDiskReadBaseActivity::openNext())
  8489. {
  8490. unsigned lines = headerLines;
  8491. while (lines-- && !inputstream->eos())
  8492. {
  8493. size32_t numAvailable;
  8494. const void * next = inputstream->peek(maxDiskSize, numAvailable);
  8495. inputstream->skip(csvSplitter.splitLine(numAvailable, (const byte *)next));
  8496. }
  8497. // only skip header in the first file - since spray doesn't duplicate the header.
  8498. headerLines = 0;
  8499. return true;
  8500. }
  8501. return false;
  8502. }
  8503. void CHThorCsvReadActivity::checkOpenNext()
  8504. {
  8505. agent.reportProgress(NULL);
  8506. if (!opened)
  8507. {
  8508. agent.reportProgress(NULL);
  8509. if (!helper.canMatchAny())
  8510. {
  8511. eofseen = true;
  8512. opened = true;
  8513. }
  8514. else
  8515. open();
  8516. }
  8517. for (;;)
  8518. {
  8519. if (eofseen || !inputstream->eos())
  8520. return;
  8521. eofseen = !openNext();
  8522. }
  8523. }
  8524. //=====================================================================================================
  8525. CHThorXmlReadActivity::CHThorXmlReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorXmlReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _node, _graph), helper(_arg)
  8526. {
  8527. readType = (kind==TAKjsonread) ? rt_json : rt_xml;
  8528. }
  8529. void CHThorXmlReadActivity::ready()
  8530. {
  8531. CHThorDiskReadBaseActivity::ready();
  8532. rowTransformer.set(helper.queryTransformer());
  8533. localOffset = 0;
  8534. limit = helper.getRowLimit();
  8535. if (helper.getFlags() & TDRlimitskips)
  8536. limit = (unsigned __int64) -1;
  8537. stopAfter = helper.getChooseNLimit();
  8538. }
  8539. void CHThorXmlReadActivity::stop()
  8540. {
  8541. xmlParser.clear();
  8542. CHThorDiskReadBaseActivity::stop();
  8543. }
  8544. void CHThorXmlReadActivity::gatherInfo(IFileDescriptor * fd)
  8545. {
  8546. PARENT::gatherInfo(fd);
  8547. }
  8548. void CHThorXmlReadActivity::calcFixedDiskRecordSize()
  8549. {
  8550. fixedDiskRecordSize = 0;
  8551. }
  8552. const void *CHThorXmlReadActivity::nextRow()
  8553. {
  8554. if(!opened) open();
  8555. while (!eofseen && (!stopAfter || (processed - initialProcessed) < stopAfter))
  8556. {
  8557. agent.reportProgress(NULL);
  8558. //call to next() will callback on the IXmlSelect interface
  8559. bool gotNext = false;
  8560. try
  8561. {
  8562. gotNext = xmlParser->next();
  8563. }
  8564. catch(IException * e)
  8565. {
  8566. throw makeWrappedException(e, inputfile->queryFilename());
  8567. }
  8568. if(!gotNext)
  8569. eofseen = !openNext();
  8570. else if (lastMatch)
  8571. {
  8572. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  8573. unsigned sizeGot;
  8574. try
  8575. {
  8576. sizeGot = rowTransformer->transform(rowBuilder, lastMatch, this);
  8577. }
  8578. catch(IException * e)
  8579. {
  8580. throw makeWrappedException(e);
  8581. }
  8582. lastMatch.clear();
  8583. localOffset = 0;
  8584. if (sizeGot)
  8585. {
  8586. OwnedConstRoxieRow ret = rowBuilder.finalizeRowClear(sizeGot);
  8587. if ((processed - initialProcessed) >= limit)
  8588. {
  8589. if ( agent.queryCodeContext()->queryDebugContext())
  8590. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  8591. helper.onLimitExceeded();
  8592. return NULL;
  8593. }
  8594. processed++;
  8595. return ret.getClear();
  8596. }
  8597. }
  8598. }
  8599. return NULL;
  8600. }
  8601. bool CHThorXmlReadActivity::openNext()
  8602. {
  8603. if (inputfileio)
  8604. offsetOfPart += inputfileio->size();
  8605. localOffset = 0;
  8606. if (CHThorDiskReadBaseActivity::openNext())
  8607. {
  8608. unsigned readBufferSize = queryReadBufferSize();
  8609. OwnedIFileIOStream inputfileiostream;
  8610. if(readBufferSize)
  8611. inputfileiostream.setown(createBufferedIOStream(inputfileio, readBufferSize));
  8612. else
  8613. inputfileiostream.setown(createIOStream(inputfileio));
  8614. OwnedRoxieString xmlIterator(helper.getXmlIteratorPath());
  8615. if (kind==TAKjsonread)
  8616. xmlParser.setown(createJSONParse(*inputfileiostream, xmlIterator, *this, (0 != (TDRxmlnoroot & helper.getFlags()))?ptr_noRoot:ptr_none, (helper.getFlags() & TDRusexmlcontents) != 0));
  8617. else
  8618. xmlParser.setown(createXMLParse(*inputfileiostream, xmlIterator, *this, (0 != (TDRxmlnoroot & helper.getFlags()))?ptr_noRoot:ptr_none, (helper.getFlags() & TDRusexmlcontents) != 0));
  8619. return true;
  8620. }
  8621. return false;
  8622. }
  8623. void CHThorXmlReadActivity::closepart()
  8624. {
  8625. xmlParser.clear();
  8626. CHThorDiskReadBaseActivity::closepart();
  8627. }
  8628. //---------------------------------------------------------------------------
  8629. CHThorLocalResultReadActivity::CHThorLocalResultReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLocalResultReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8630. {
  8631. physicalRecordSize = outputMeta;
  8632. grouped = outputMeta.isGrouped();
  8633. graph = resolveLocalQuery(graphId);
  8634. result = NULL;
  8635. }
  8636. void CHThorLocalResultReadActivity::ready()
  8637. {
  8638. CHThorSimpleActivityBase::ready();
  8639. result = graph->queryResult(helper.querySequence());
  8640. curRow = 0;
  8641. }
  8642. const void *CHThorLocalResultReadActivity::nextRow()
  8643. {
  8644. const void * next = result->queryRow(curRow++);
  8645. if (next)
  8646. {
  8647. processed++;
  8648. LinkRoxieRow(next);
  8649. return next;
  8650. }
  8651. return NULL;
  8652. }
  8653. //=====================================================================================================
  8654. CHThorLocalResultWriteActivity::CHThorLocalResultWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLocalResultWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId)
  8655. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8656. {
  8657. graph = resolveLocalQuery(graphId);
  8658. }
  8659. void CHThorLocalResultWriteActivity::execute()
  8660. {
  8661. IHThorGraphResult * result = graph->createResult(helper.querySequence(), LINK(rowAllocator));
  8662. for (;;)
  8663. {
  8664. const void *nextrec = input->nextRow();
  8665. if (!nextrec)
  8666. {
  8667. nextrec = input->nextRow();
  8668. if (!nextrec)
  8669. break;
  8670. result->addRowOwn(NULL);
  8671. }
  8672. result->addRowOwn(nextrec);
  8673. }
  8674. }
  8675. //=====================================================================================================
  8676. CHThorDictionaryResultWriteActivity::CHThorDictionaryResultWriteActivity (IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDictionaryResultWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId)
  8677. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8678. {
  8679. graph = resolveLocalQuery(graphId);
  8680. }
  8681. void CHThorDictionaryResultWriteActivity::execute()
  8682. {
  8683. RtlLinkedDictionaryBuilder builder(rowAllocator, helper.queryHashLookupInfo());
  8684. for (;;)
  8685. {
  8686. const void *row = input->nextRow();
  8687. if (!row)
  8688. {
  8689. row = input->nextRow();
  8690. if (!row)
  8691. break;
  8692. }
  8693. builder.appendOwn(row);
  8694. }
  8695. IHThorGraphResult * result = graph->createResult(helper.querySequence(), LINK(rowAllocator));
  8696. size32_t dictSize = builder.getcount();
  8697. const byte ** dictRows = builder.queryrows();
  8698. for (size32_t row = 0; row < dictSize; row++)
  8699. {
  8700. const byte *thisRow = dictRows[row];
  8701. if (thisRow)
  8702. LinkRoxieRow(thisRow);
  8703. result->addRowOwn(thisRow);
  8704. }
  8705. }
  8706. //=====================================================================================================
  8707. CHThorLocalResultSpillActivity::CHThorLocalResultSpillActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLocalResultSpillArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId)
  8708. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8709. {
  8710. result = NULL;
  8711. nullPending = false;
  8712. graph = resolveLocalQuery(graphId);
  8713. assertex(graph);
  8714. }
  8715. void CHThorLocalResultSpillActivity::ready()
  8716. {
  8717. CHThorSimpleActivityBase::ready();
  8718. result = graph->createResult(helper.querySequence(), LINK(rowAllocator));
  8719. nullPending = false;
  8720. }
  8721. const void * CHThorLocalResultSpillActivity::nextRow()
  8722. {
  8723. const void * ret = input->nextRow();
  8724. if (ret)
  8725. {
  8726. if (nullPending)
  8727. {
  8728. result->addRowOwn(NULL);
  8729. nullPending = false;
  8730. }
  8731. LinkRoxieRow(ret);
  8732. result->addRowOwn(ret);
  8733. processed++;
  8734. }
  8735. else
  8736. nullPending = true;
  8737. return ret;
  8738. }
  8739. void CHThorLocalResultSpillActivity::stop()
  8740. {
  8741. for (;;)
  8742. {
  8743. const void * ret = input->nextRow();
  8744. if (!ret)
  8745. {
  8746. if (nullPending)
  8747. break;
  8748. nullPending = true;
  8749. }
  8750. else
  8751. {
  8752. if (nullPending)
  8753. {
  8754. result->addRowOwn(NULL);
  8755. nullPending = false;
  8756. }
  8757. result->addRowOwn(ret);
  8758. }
  8759. }
  8760. CHThorSimpleActivityBase::stop();
  8761. }
  8762. //=====================================================================================================
  8763. CHThorLoopActivity::CHThorLoopActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLoopArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  8764. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8765. {
  8766. flags = helper.getFlags();
  8767. maxIterations = 0;
  8768. }
  8769. CHThorLoopActivity::~CHThorLoopActivity()
  8770. {
  8771. ForEachItemIn(idx, loopPending)
  8772. ReleaseRoxieRow(loopPending.item(idx));
  8773. }
  8774. void CHThorLoopActivity::ready()
  8775. {
  8776. curInput = &input->queryStream();
  8777. eof = false;
  8778. loopCounter = 1;
  8779. CHThorSimpleActivityBase::ready();
  8780. maxIterations = helper.numIterations();
  8781. if ((int)maxIterations < 0) maxIterations = 0;
  8782. finishedLooping = ((kind == TAKloopcount) && (maxIterations == 0));
  8783. if ((flags & IHThorLoopArg::LFnewloopagain) && !helper.loopFirstTime())
  8784. finishedLooping = true;
  8785. extractBuilder.clear();
  8786. helper.createParentExtract(extractBuilder);
  8787. }
  8788. const void * CHThorLoopActivity::nextRow()
  8789. {
  8790. if (eof)
  8791. return NULL;
  8792. unsigned emptyIterations = 0;
  8793. for (;;)
  8794. {
  8795. for (;;)
  8796. {
  8797. const void * ret = curInput->nextRow();
  8798. if (!ret)
  8799. {
  8800. ret = curInput->nextRow(); // more cope with groups somehow....
  8801. if (!ret)
  8802. {
  8803. if (finishedLooping)
  8804. {
  8805. eof = true;
  8806. return NULL;
  8807. }
  8808. break;
  8809. }
  8810. }
  8811. if (finishedLooping ||
  8812. ((flags & IHThorLoopArg::LFfiltered) && !helper.sendToLoop(loopCounter, ret)))
  8813. {
  8814. processed++;
  8815. return ret;
  8816. }
  8817. loopPending.append(ret);
  8818. }
  8819. switch (kind)
  8820. {
  8821. case TAKloopdataset:
  8822. {
  8823. if (!(flags & IHThorLoopArg::LFnewloopagain))
  8824. {
  8825. if (!helper.loopAgain(loopCounter, loopPending.ordinality(), (const void * *)loopPending.getArray()))
  8826. {
  8827. if (loopPending.ordinality() == 0)
  8828. {
  8829. eof = true;
  8830. return NULL;
  8831. }
  8832. arrayInput.init(&loopPending);
  8833. curInput = &arrayInput;
  8834. finishedLooping = true;
  8835. continue; // back to the input loop again
  8836. }
  8837. }
  8838. break;
  8839. }
  8840. case TAKlooprow:
  8841. if (loopPending.empty())
  8842. {
  8843. finishedLooping = true;
  8844. eof = true;
  8845. return NULL;
  8846. }
  8847. break;
  8848. }
  8849. if (loopPending.ordinality())
  8850. emptyIterations = 0;
  8851. else
  8852. {
  8853. //note: any outputs which didn't go around the loop again, would return the record, reinitializing emptyIterations
  8854. emptyIterations++;
  8855. if (emptyIterations > EMPTY_LOOP_LIMIT)
  8856. throw MakeStringException(0, "Executed LOOP with empty input and output %u times", emptyIterations);
  8857. if (emptyIterations % 32 == 0)
  8858. DBGLOG("Executing LOOP with empty input and output %u times", emptyIterations);
  8859. }
  8860. void * counterRow = NULL;
  8861. if (flags & IHThorLoopArg::LFcounter)
  8862. {
  8863. counterRow = queryRowManager()->allocate(sizeof(thor_loop_counter_t), activityId);
  8864. *((thor_loop_counter_t *)counterRow) = loopCounter;
  8865. }
  8866. Owned<IHThorGraphResults> curResults = loopGraph->execute(counterRow, loopPending, extractBuilder.getbytes());
  8867. if (flags & IHThorLoopArg::LFnewloopagain)
  8868. {
  8869. IHThorGraphResult * result = curResults->queryResult(helper.loopAgainResult());
  8870. assertex(result);
  8871. const void * row = result->queryRow(0);
  8872. assertex(row);
  8873. //Result is a row which contains a single boolean field.
  8874. if (!((const bool *)row)[0])
  8875. finishedLooping = true;
  8876. }
  8877. resultInput.init(curResults->queryResult(0));
  8878. curInput = &resultInput;
  8879. loopCounter++;
  8880. if ((kind == TAKloopcount) && (loopCounter > maxIterations))
  8881. finishedLooping = true;
  8882. }
  8883. }
  8884. void CHThorLoopActivity::stop()
  8885. {
  8886. ForEachItemIn(idx, loopPending)
  8887. ReleaseRoxieRow(loopPending.item(idx));
  8888. loopPending.kill();
  8889. CHThorSimpleActivityBase::stop();
  8890. }
  8891. //---------------------------------------------------------------------------
  8892. CHThorGraphLoopResultReadActivity::CHThorGraphLoopResultReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGraphLoopResultReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(&_arg)
  8893. {
  8894. physicalRecordSize = outputMeta;
  8895. grouped = outputMeta.isGrouped();
  8896. result = NULL;
  8897. graph = resolveLocalQuery(graphId);
  8898. }
  8899. CHThorGraphLoopResultReadActivity::CHThorGraphLoopResultReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg & _arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId, unsigned _sequence, bool _grouped) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(NULL)
  8900. {
  8901. physicalRecordSize = outputMeta;
  8902. sequence = _sequence;
  8903. grouped = _grouped;
  8904. result = NULL;
  8905. graph = resolveLocalQuery(graphId);
  8906. }
  8907. void CHThorGraphLoopResultReadActivity::ready()
  8908. {
  8909. CHThorSimpleActivityBase::ready();
  8910. if (helper)
  8911. sequence = helper->querySequence();
  8912. if ((int)sequence >= 0)
  8913. result = graph->queryGraphLoopResult(sequence);
  8914. else
  8915. result = NULL;
  8916. curRow = 0;
  8917. }
  8918. const void *CHThorGraphLoopResultReadActivity::nextRow()
  8919. {
  8920. if (result)
  8921. {
  8922. const void * next = result->queryRow(curRow++);
  8923. if (next)
  8924. {
  8925. processed++;
  8926. LinkRoxieRow(next);
  8927. return (void *)next;
  8928. }
  8929. }
  8930. return NULL;
  8931. }
  8932. //=====================================================================================================
  8933. CHThorGraphLoopResultWriteActivity::CHThorGraphLoopResultWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGraphLoopResultWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId)
  8934. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  8935. {
  8936. graph = resolveLocalQuery(graphId);
  8937. }
  8938. void CHThorGraphLoopResultWriteActivity::execute()
  8939. {
  8940. IHThorGraphResult * result = graph->createGraphLoopResult(LINK(rowAllocator));
  8941. for (;;)
  8942. {
  8943. const void *nextrec = input->nextRow();
  8944. if (!nextrec)
  8945. {
  8946. nextrec = input->nextRow();
  8947. if (!nextrec)
  8948. break;
  8949. result->addRowOwn(NULL);
  8950. }
  8951. result->addRowOwn(nextrec);
  8952. }
  8953. }
  8954. //=====================================================================================================
  8955. class CCounterMeta : implements IOutputMetaData, public CInterface
  8956. {
  8957. public:
  8958. IMPLEMENT_IINTERFACE
  8959. virtual size32_t getRecordSize(const void *rec) { return sizeof(thor_loop_counter_t); }
  8960. virtual size32_t getMinRecordSize() const { return sizeof(thor_loop_counter_t); }
  8961. virtual size32_t getFixedSize() const { return sizeof(thor_loop_counter_t); }
  8962. virtual void toXML(const byte * self, IXmlWriter & out) { }
  8963. virtual unsigned getVersion() const { return OUTPUTMETADATA_VERSION; }
  8964. virtual unsigned getMetaFlags() { return 0; }
  8965. virtual const RtlTypeInfo * queryTypeInfo() const { return nullptr; }
  8966. virtual void destruct(byte * self) {}
  8967. virtual IOutputRowSerializer * createDiskSerializer(ICodeContext * ctx, unsigned activityId) { return NULL; }
  8968. virtual IOutputRowDeserializer * createDiskDeserializer(ICodeContext * ctx, unsigned activityId) { return NULL; }
  8969. virtual ISourceRowPrefetcher * createDiskPrefetcher() { return NULL; }
  8970. virtual IOutputMetaData * querySerializedDiskMeta() { return this; }
  8971. virtual IOutputRowSerializer * createInternalSerializer(ICodeContext * ctx, unsigned activityId) { return NULL; }
  8972. virtual IOutputRowDeserializer * createInternalDeserializer(ICodeContext * ctx, unsigned activityId) { return NULL; }
  8973. virtual void process(const byte * self, IFieldProcessor & target, unsigned from, unsigned to) {}
  8974. virtual void walkIndirectMembers(const byte * self, IIndirectMemberVisitor & visitor) {}
  8975. virtual IOutputMetaData * queryChildMeta(unsigned i) { return NULL; }
  8976. virtual const RtlRecord &queryRecordAccessor(bool expand) const { throwUnexpected(); } // could provide a static implementation if needed
  8977. };
  8978. //=====================================================================================================
  8979. CHThorGraphLoopActivity::CHThorGraphLoopActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGraphLoopArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  8980. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8981. {
  8982. flags = helper.getFlags();
  8983. maxIterations = 0;
  8984. counterMeta.setown(new CCounterMeta);
  8985. }
  8986. void CHThorGraphLoopActivity::ready()
  8987. {
  8988. executed = false;
  8989. resultIndex = 0;
  8990. CHThorSimpleActivityBase::ready();
  8991. maxIterations = helper.numIterations();
  8992. if ((int)maxIterations < 0) maxIterations = 0;
  8993. loopResults.setown(agent.createGraphLoopResults());
  8994. extractBuilder.clear();
  8995. helper.createParentExtract(extractBuilder);
  8996. rowAllocator.setown(agent.queryCodeContext()->getRowAllocator(queryOutputMeta(), activityId));
  8997. rowAllocatorCounter.setown(agent.queryCodeContext()->getRowAllocator(counterMeta, activityId));
  8998. }
  8999. const void * CHThorGraphLoopActivity::nextRow()
  9000. {
  9001. if (!executed)
  9002. {
  9003. executed = true;
  9004. IHThorGraphResult * inputResult = loopResults->createResult(0, LINK(rowAllocator));
  9005. for (;;)
  9006. {
  9007. const void * ret = input->nextRow();
  9008. if (!ret)
  9009. {
  9010. ret = input->nextRow();
  9011. if (!ret)
  9012. break;
  9013. inputResult->addRowOwn(NULL);
  9014. }
  9015. inputResult->addRowOwn(ret);
  9016. }
  9017. for (unsigned loopCounter = 1; loopCounter <= maxIterations; loopCounter++)
  9018. {
  9019. void * counterRow = NULL;
  9020. if (flags & IHThorGraphLoopArg::GLFcounter)
  9021. {
  9022. counterRow = rowAllocatorCounter->createRow();
  9023. *((thor_loop_counter_t *)counterRow) = loopCounter;
  9024. counterRow = rowAllocatorCounter->finalizeRow(sizeof(thor_loop_counter_t), counterRow, sizeof(thor_loop_counter_t));
  9025. }
  9026. loopGraph->execute(counterRow, loopResults, extractBuilder.getbytes());
  9027. }
  9028. int iNumResults = loopResults->ordinality();
  9029. finalResult = loopResults->queryResult(iNumResults-1); //Get the last result, which isnt necessarily 'maxIterations'
  9030. }
  9031. const void * next = finalResult->getOwnRow(resultIndex++);
  9032. if (next)
  9033. processed++;
  9034. return next;
  9035. }
  9036. void CHThorGraphLoopActivity::stop()
  9037. {
  9038. rowAllocator.clear();
  9039. finalResult = NULL;
  9040. loopResults.clear();
  9041. CHThorSimpleActivityBase::stop();
  9042. }
  9043. //=====================================================================================================
  9044. CHThorParallelGraphLoopActivity::CHThorParallelGraphLoopActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGraphLoopArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  9045. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9046. {
  9047. flags = helper.getFlags();
  9048. maxIterations = 0;
  9049. }
  9050. void CHThorParallelGraphLoopActivity::ready()
  9051. {
  9052. executed = false;
  9053. resultIndex = 0;
  9054. CHThorSimpleActivityBase::ready();
  9055. maxIterations = helper.numIterations();
  9056. if ((int)maxIterations < 0) maxIterations = 0;
  9057. loopResults.setown(agent.createGraphLoopResults());
  9058. extractBuilder.clear();
  9059. helper.createParentExtract(extractBuilder);
  9060. rowAllocator.setown(agent.queryCodeContext()->getRowAllocator(queryOutputMeta(), activityId));
  9061. }
  9062. const void * CHThorParallelGraphLoopActivity::nextRow()
  9063. {
  9064. if (!executed)
  9065. {
  9066. executed = true;
  9067. IHThorGraphResult * inputResult = loopResults->createResult(0, LINK(rowAllocator));
  9068. for (;;)
  9069. {
  9070. const void * ret = input->nextRow();
  9071. if (!ret)
  9072. {
  9073. ret = input->nextRow();
  9074. if (!ret)
  9075. break;
  9076. inputResult->addRowOwn(NULL);
  9077. }
  9078. inputResult->addRowOwn(ret);
  9079. }
  9080. // The lack of separation between pre-creation and creation means this would require cloning lots of structures.
  9081. // not implemented for the moment.
  9082. // loopGraph->executeParallel(loopResults, extractBuilder.getbytes(), maxIterations);
  9083. finalResult = loopResults->queryResult(maxIterations);
  9084. }
  9085. const void * next = finalResult->getOwnRow(resultIndex++);
  9086. if (next)
  9087. processed++;
  9088. return next;
  9089. }
  9090. void CHThorParallelGraphLoopActivity::stop()
  9091. {
  9092. rowAllocator.clear();
  9093. finalResult = NULL;
  9094. loopResults.clear();
  9095. CHThorSimpleActivityBase::stop();
  9096. }
  9097. //=====================================================================================================
  9098. LibraryCallOutput::LibraryCallOutput(CHThorLibraryCallActivity * _owner, unsigned _output, IOutputMetaData * _meta) : owner(_owner), output(_output), meta(_meta)
  9099. {
  9100. processed = 0;
  9101. }
  9102. const void * LibraryCallOutput::nextRow()
  9103. {
  9104. if (!gotRows)
  9105. {
  9106. result.set(owner->getResultRows(output));
  9107. gotRows = true;
  9108. }
  9109. const void * ret = result->getOwnRow(curRow++);
  9110. if (ret)
  9111. processed++;
  9112. return ret;
  9113. }
  9114. bool LibraryCallOutput::isGrouped()
  9115. {
  9116. return meta->isGrouped();
  9117. }
  9118. IOutputMetaData * LibraryCallOutput::queryOutputMeta() const
  9119. {
  9120. return meta;
  9121. }
  9122. void LibraryCallOutput::ready()
  9123. {
  9124. owner->ready();
  9125. gotRows = false;
  9126. result.clear();
  9127. curRow = 0;
  9128. }
  9129. void LibraryCallOutput::stop()
  9130. {
  9131. owner->stop();
  9132. result.clear();
  9133. }
  9134. void LibraryCallOutput::resetEOF()
  9135. {
  9136. throwUnexpected();
  9137. }
  9138. void LibraryCallOutput::updateProgress(IStatisticGatherer &progress) const
  9139. {
  9140. owner->updateOutputProgress(progress, *this, processed);
  9141. }
  9142. CHThorLibraryCallActivity::CHThorLibraryCallActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLibraryCallArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree * node)
  9143. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9144. {
  9145. libraryName.set(node->queryProp("att[@name=\"libname\"]/@value"));
  9146. interfaceHash = node->getPropInt("att[@name=\"_interfaceHash\"]/@value", 0);
  9147. bool embedded = node->getPropBool("att[@name=\"embedded\"]/@value", false) ;
  9148. if (embedded)
  9149. {
  9150. embeddedGraphName.set(node->queryProp("att[@name=\"graph\"]/@value"));
  9151. if (!embeddedGraphName)
  9152. embeddedGraphName.set(libraryName);
  9153. }
  9154. Owned<IPropertyTreeIterator> iter = node->getElements("att[@name=\"_outputUsed\"]");
  9155. ForEach(*iter)
  9156. {
  9157. unsigned whichOutput = iter->query().getPropInt("@value");
  9158. IOutputMetaData * meta = helper.queryOutputMeta(whichOutput);
  9159. outputs.append(*new LibraryCallOutput(this, whichOutput, meta));
  9160. }
  9161. state = StateCreated;
  9162. }
  9163. IHThorGraphResult * CHThorLibraryCallActivity::getResultRows(unsigned whichOutput)
  9164. {
  9165. CriticalBlock procedure(cs);
  9166. if (!results)
  9167. {
  9168. if (libraryName.length() == 0)
  9169. libraryName.setown(helper.getLibraryName());
  9170. helper.createParentExtract(extractBuilder);
  9171. results.setown(agent.executeLibraryGraph(libraryName, interfaceHash, activityId, embeddedGraphName, extractBuilder.getbytes()));
  9172. }
  9173. return results->queryResult(whichOutput);
  9174. }
  9175. IHThorInput * CHThorLibraryCallActivity::queryOutput(unsigned idx)
  9176. {
  9177. assert(outputs.isItem(idx));
  9178. return &outputs.item(idx);
  9179. }
  9180. void CHThorLibraryCallActivity::updateOutputProgress(IStatisticGatherer &progress, const LibraryCallOutput & _output, unsigned __int64 numProcessed) const
  9181. {
  9182. LibraryCallOutput & output = const_cast<LibraryCallOutput &>(_output);
  9183. updateProgressForOther(progress, activityId, subgraphId, outputs.find(output), numProcessed);
  9184. }
  9185. void CHThorLibraryCallActivity::ready()
  9186. {
  9187. CriticalBlock procedure(cs);
  9188. if (state != StateReady)
  9189. {
  9190. results.clear();
  9191. CHThorSimpleActivityBase::ready();
  9192. state = StateReady;
  9193. }
  9194. }
  9195. const void * CHThorLibraryCallActivity::nextRow()
  9196. {
  9197. throwUnexpected();
  9198. }
  9199. void CHThorLibraryCallActivity::stop()
  9200. {
  9201. CriticalBlock procedure(cs);
  9202. if (state != StateDone)
  9203. {
  9204. results.clear();
  9205. CHThorSimpleActivityBase::stop();
  9206. }
  9207. }
  9208. //=====================================================================================================
  9209. class CHThorNWayInputActivity : public CHThorSimpleActivityBase, implements IHThorNWayInput
  9210. {
  9211. IHThorNWayInputArg & helper;
  9212. InputArrayType inputs;
  9213. InputArrayType selectedInputs;
  9214. public:
  9215. CHThorNWayInputActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNWayInputArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9216. {
  9217. }
  9218. virtual void ready()
  9219. {
  9220. bool selectionIsAll;
  9221. size32_t selectionLen;
  9222. rtlDataAttr selection;
  9223. helper.getInputSelection(selectionIsAll, selectionLen, selection.refdata());
  9224. selectedInputs.kill();
  9225. if (selectionIsAll)
  9226. {
  9227. ForEachItemIn(i, inputs)
  9228. selectedInputs.append(inputs.item(i));
  9229. }
  9230. else
  9231. {
  9232. const size32_t * selections = (const size32_t *)selection.getdata();
  9233. unsigned max = selectionLen/sizeof(size32_t);
  9234. for (unsigned i = 0; i < max; i++)
  9235. {
  9236. unsigned nextIndex = selections[i];
  9237. //Check there are no duplicates..... Assumes there are a fairly small number of inputs, so n^2 search is ok.
  9238. for (unsigned j=i+1; j < max; j++)
  9239. {
  9240. if (nextIndex == selections[j])
  9241. throw MakeStringException(100, "Selection list for nway input can not contain duplicates");
  9242. }
  9243. if (!inputs.isItem(nextIndex-1))
  9244. throw MakeStringException(100, "Index %d in RANGE selection list is out of range", nextIndex);
  9245. selectedInputs.append(inputs.item(nextIndex-1));
  9246. }
  9247. }
  9248. ForEachItemIn(i2, selectedInputs)
  9249. selectedInputs.item(i2)->ready();
  9250. }
  9251. virtual void setInput(unsigned idx, IHThorInput *_in)
  9252. {
  9253. assertex(idx == inputs.ordinality());
  9254. inputs.append(_in);
  9255. }
  9256. virtual const void * nextRow()
  9257. {
  9258. throwUnexpected();
  9259. }
  9260. virtual void updateProgress(IStatisticGatherer &progress) const
  9261. {
  9262. // CHThorSimpleActivityBase::updateProgress(progress);
  9263. ForEachItemIn(i, inputs)
  9264. inputs.item(i)->updateProgress(progress);
  9265. }
  9266. virtual unsigned numConcreteOutputs() const
  9267. {
  9268. return selectedInputs.ordinality();
  9269. }
  9270. virtual IHThorInput * queryConcreteInput(unsigned idx) const
  9271. {
  9272. if (selectedInputs.isItem(idx))
  9273. return selectedInputs.item(idx);
  9274. return NULL;
  9275. }
  9276. };
  9277. //=====================================================================================================
  9278. class CHThorNWayGraphLoopResultReadActivity : public CHThorSimpleActivityBase, implements IHThorNWayInput
  9279. {
  9280. IHThorNWayGraphLoopResultReadArg & helper;
  9281. CIArrayOf<CHThorActivityBase> inputs;
  9282. __int64 graphId;
  9283. bool grouped;
  9284. public:
  9285. CHThorNWayGraphLoopResultReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNWayGraphLoopResultReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 _graphId) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9286. {
  9287. grouped = helper.isGrouped();
  9288. graphId = _graphId;
  9289. }
  9290. virtual bool isGrouped()
  9291. {
  9292. return grouped;
  9293. }
  9294. virtual void ready()
  9295. {
  9296. bool selectionIsAll;
  9297. size32_t selectionLen;
  9298. rtlDataAttr selection;
  9299. helper.getInputSelection(selectionIsAll, selectionLen, selection.refdata());
  9300. if (selectionIsAll)
  9301. throw MakeStringException(100, "ALL not yet supported for NWay graph inputs");
  9302. unsigned max = selectionLen / sizeof(size32_t);
  9303. const size32_t * selections = (const size32_t *)selection.getdata();
  9304. for (unsigned i = 0; i < max; i++)
  9305. {
  9306. CHThorActivityBase * resultInput = new CHThorGraphLoopResultReadActivity(agent, activityId, subgraphId, helper, kind, graph, graphId, selections[i], grouped);
  9307. inputs.append(*resultInput);
  9308. resultInput->ready();
  9309. }
  9310. }
  9311. virtual void stop()
  9312. {
  9313. inputs.kill();
  9314. }
  9315. virtual void setInput(unsigned idx, IHThorInput *_in)
  9316. {
  9317. throwUnexpected();
  9318. }
  9319. virtual const void * nextRow()
  9320. {
  9321. throwUnexpected();
  9322. }
  9323. virtual unsigned numConcreteOutputs() const
  9324. {
  9325. return inputs.ordinality();
  9326. }
  9327. virtual IHThorInput * queryConcreteInput(unsigned idx) const
  9328. {
  9329. if (inputs.isItem(idx))
  9330. return &inputs.item(idx);
  9331. return NULL;
  9332. }
  9333. };
  9334. //=====================================================================================================
  9335. CHThorNWaySelectActivity::CHThorNWaySelectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNWaySelectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9336. {
  9337. selectedInput = NULL;
  9338. }
  9339. void CHThorNWaySelectActivity::stop()
  9340. {
  9341. selectedInput = NULL;
  9342. CHThorMultiInputActivity::stop();
  9343. }
  9344. void CHThorNWaySelectActivity::ready()
  9345. {
  9346. CHThorMultiInputActivity::ready();
  9347. unsigned whichInput = helper.getInputIndex();
  9348. selectedInput = NULL;
  9349. if (whichInput--)
  9350. {
  9351. ForEachItemIn(i, inputs)
  9352. {
  9353. IHThorInput * cur = inputs.item(i);
  9354. IHThorNWayInput * nWayInput = dynamic_cast<IHThorNWayInput *>(cur);
  9355. if (nWayInput)
  9356. {
  9357. unsigned numRealInputs = nWayInput->numConcreteOutputs();
  9358. if (whichInput < numRealInputs)
  9359. selectedInput = nWayInput->queryConcreteInput(whichInput);
  9360. whichInput -= numRealInputs;
  9361. }
  9362. else
  9363. {
  9364. if (whichInput == 0)
  9365. selectedInput = cur;
  9366. whichInput -= 1;
  9367. }
  9368. if (selectedInput)
  9369. break;
  9370. }
  9371. }
  9372. }
  9373. const void * CHThorNWaySelectActivity::nextRow()
  9374. {
  9375. if (!selectedInput)
  9376. return NULL;
  9377. return selectedInput->nextRow();
  9378. }
  9379. const void * CHThorNWaySelectActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  9380. {
  9381. if (!selectedInput)
  9382. return NULL;
  9383. return selectedInput->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra);
  9384. }
  9385. IInputSteppingMeta * CHThorNWaySelectActivity::querySteppingMeta()
  9386. {
  9387. if (selectedInput)
  9388. return selectedInput->querySteppingMeta();
  9389. return NULL;
  9390. }
  9391. //=====================================================================================================
  9392. CHThorStreamedIteratorActivity::CHThorStreamedIteratorActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorStreamedIteratorArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  9393. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9394. {
  9395. }
  9396. void CHThorStreamedIteratorActivity::ready()
  9397. {
  9398. CHThorSimpleActivityBase::ready();
  9399. rows.setown(helper.createInput());
  9400. }
  9401. const void *CHThorStreamedIteratorActivity::nextRow()
  9402. {
  9403. assertex(rows);
  9404. const void * next = rows->nextRow();
  9405. if (next)
  9406. processed++;
  9407. return next;
  9408. }
  9409. void CHThorStreamedIteratorActivity::stop()
  9410. {
  9411. if (rows)
  9412. {
  9413. rows->stop();
  9414. rows.clear();
  9415. }
  9416. }
  9417. //=====================================================================================================
  9418. CHThorExternalActivity::CHThorExternalActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorExternalArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree * _graphNode)
  9419. : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), graphNode(_graphNode), activityContext(1, 0)
  9420. {
  9421. }
  9422. void CHThorExternalActivity::setInput(unsigned index, IHThorInput *_input)
  9423. {
  9424. CHThorMultiInputActivity::setInput(index, _input);
  9425. CHThorInputAdaptor * adaptedInput = new CHThorInputAdaptor(_input);
  9426. inputAdaptors.append(*adaptedInput);
  9427. helper.setInput(index, adaptedInput);
  9428. }
  9429. void CHThorExternalActivity::ready()
  9430. {
  9431. CHThorMultiInputActivity::ready();
  9432. if (kind != TAKexternalsink)
  9433. rows.setown(helper.createOutput(&activityContext));
  9434. }
  9435. const void *CHThorExternalActivity::nextRow()
  9436. {
  9437. assertex(rows);
  9438. const void * next = rows->nextRow();
  9439. if (next)
  9440. processed++;
  9441. return next;
  9442. }
  9443. void CHThorExternalActivity::execute()
  9444. {
  9445. assertex(!rows);
  9446. helper.execute(&activityContext);
  9447. }
  9448. void CHThorExternalActivity::stop()
  9449. {
  9450. if (rows)
  9451. {
  9452. rows->stop();
  9453. rows.clear();
  9454. }
  9455. CHThorMultiInputActivity::stop();
  9456. }
  9457. //=====================================================================================================
  9458. CHThorNewDiskReadBaseActivity::CHThorNewDiskReadBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadBaseArg &_arg, IHThorCompoundBaseArg & _segHelper, ThorActivityKind _kind, IPropertyTree *_node, EclGraph & _graph)
  9459. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), segHelper(_segHelper)
  9460. {
  9461. helper.setCallback(this);
  9462. expectedDiskMeta = helper.queryDiskRecordSize();
  9463. projectedDiskMeta = helper.queryProjectedDiskRecordSize();
  9464. formatOptions.setown(createPTree());
  9465. isCodeSigned = false;
  9466. if (_node)
  9467. {
  9468. const char *recordTranslationModeHintText = _node->queryProp("hint[@name='layouttranslation']/@value");
  9469. if (recordTranslationModeHintText)
  9470. recordTranslationModeHint = getTranslationMode(recordTranslationModeHintText, true);
  9471. isCodeSigned = isActivityCodeSigned(*_node);
  9472. }
  9473. CPropertyTreeWriter writer(formatOptions);
  9474. helper.getFormatOptions(writer);
  9475. }
  9476. CHThorNewDiskReadBaseActivity::~CHThorNewDiskReadBaseActivity()
  9477. {
  9478. close();
  9479. }
  9480. void CHThorNewDiskReadBaseActivity::ready()
  9481. {
  9482. CHThorActivityBase::ready();
  9483. opened = false;
  9484. offsetOfPart = 0;
  9485. partNum = (unsigned)-1;
  9486. resolveFile();
  9487. fieldFilters.kill();
  9488. segHelper.createSegmentMonitors(this);
  9489. }
  9490. void CHThorNewDiskReadBaseActivity::stop()
  9491. {
  9492. close();
  9493. CHThorActivityBase::stop();
  9494. }
  9495. unsigned __int64 CHThorNewDiskReadBaseActivity::getFilePosition(const void * row)
  9496. {
  9497. //Ideally these functions would not need to be implemented - they should always be implemented by the translation layer
  9498. throwUnexpected();
  9499. }
  9500. unsigned __int64 CHThorNewDiskReadBaseActivity::getLocalFilePosition(const void * row)
  9501. {
  9502. throwUnexpected();
  9503. }
  9504. const char * CHThorNewDiskReadBaseActivity::queryLogicalFilename(const void * row)
  9505. {
  9506. throwUnexpected();
  9507. }
  9508. void CHThorNewDiskReadBaseActivity::resolveFile()
  9509. {
  9510. //If in a child query, and the filenames haven't changed, the information about the resolved filenames will also not have changed
  9511. //MORE: Is this ever untrue?
  9512. if (subfiles && !(helper.getFlags() & (TDXvarfilename|TDRdynformatoptions)))
  9513. return;
  9514. //Only clear these members if we are re-resolving the file - otherwise the previous entries are still valid
  9515. ldFile.clear();
  9516. tempFileName.clear();
  9517. dfsParts.clear();
  9518. subfiles.kill();
  9519. Owned<IPropertyTree> curFormatOptions;
  9520. if (helper.getFlags() & TDRdynformatoptions)
  9521. {
  9522. curFormatOptions.setown(createPTreeFromIPT(formatOptions));
  9523. CPropertyTreeWriter writer(curFormatOptions);
  9524. helper.getFormatDynOptions(writer);
  9525. }
  9526. else
  9527. curFormatOptions.set(formatOptions);
  9528. OwnedRoxieString fileName(helper.getFileName());
  9529. mangleHelperFileName(mangledHelperFileName, fileName, agent.queryWuid(), helper.getFlags());
  9530. if (helper.getFlags() & (TDXtemporary | TDXjobtemp))
  9531. {
  9532. StringBuffer mangledFilename;
  9533. mangleLocalTempFilename(mangledFilename, mangledHelperFileName.str(), nullptr);
  9534. tempFileName.set(agent.queryTemporaryFile(mangledFilename.str()));
  9535. logicalFileName = tempFileName.str();
  9536. gatherInfo(NULL);
  9537. subfiles.append(*extractFileInformation(nullptr, curFormatOptions));
  9538. }
  9539. else
  9540. {
  9541. ldFile.setown(resolveLFNFlat(agent, mangledHelperFileName.str(), "Read", 0 != (helper.getFlags() & TDRoptional), isCodeSigned));
  9542. if ( mangledHelperFileName.charAt(0) == '~')
  9543. logicalFileName = mangledHelperFileName.str()+1;
  9544. else
  9545. logicalFileName = mangledHelperFileName.str();
  9546. if (ldFile)
  9547. {
  9548. Owned<IFileDescriptor> fdesc;
  9549. fdesc.setown(ldFile->getFileDescriptor());
  9550. gatherInfo(fdesc);
  9551. IDistributedFile *dFile = ldFile->queryDistributedFile();
  9552. if (dFile) //only makes sense for distributed (non local) files
  9553. {
  9554. dfsParts.setown(dFile->getIterator());
  9555. IDistributedSuperFile *super = dFile->querySuperFile();
  9556. if (super)
  9557. {
  9558. unsigned numsubs = super->numSubFiles(true);
  9559. unsigned s=0;
  9560. for (; s<numsubs; s++)
  9561. {
  9562. IDistributedFile &subfile = super->querySubFile(s, true);
  9563. subfiles.append(*extractFileInformation(&subfile, curFormatOptions));
  9564. }
  9565. assertex(fdesc);
  9566. superfile.set(fdesc->querySuperFileDescriptor());
  9567. }
  9568. else
  9569. subfiles.append(*extractFileInformation(dFile, curFormatOptions));
  9570. if((helper.getFlags() & (TDXtemporary | TDXjobtemp)) == 0)
  9571. agent.logFileAccess(dFile, "HThor", "READ", graph);
  9572. }
  9573. else
  9574. subfiles.append(*extractFileInformation(nullptr, curFormatOptions));
  9575. }
  9576. else
  9577. subfiles.append(*extractFileInformation(nullptr, curFormatOptions));
  9578. if (!ldFile)
  9579. {
  9580. StringBuffer buff;
  9581. buff.appendf("Input file '%s' was missing but declared optional", mangledHelperFileName.str());
  9582. agent.addWuExceptionEx(buff.str(), WRN_SkipMissingOptFile, SeverityInformation, MSGAUD_user, "hthor");
  9583. }
  9584. }
  9585. assertex(subfiles.ordinality() != 0);
  9586. }
  9587. void CHThorNewDiskReadBaseActivity::gatherInfo(IFileDescriptor * fileDesc)
  9588. {
  9589. if (fileDesc)
  9590. {
  9591. if (!agent.queryResolveFilesLocally())
  9592. {
  9593. grouped = fileDesc->isGrouped();
  9594. if (grouped != ((helper.getFlags() & TDXgrouped) != 0))
  9595. {
  9596. StringBuffer msg;
  9597. msg.append("DFS and code generated group info. differs: DFS(").append(grouped ? "grouped" : "ungrouped").append("), CodeGen(").append(grouped ? "ungrouped" : "grouped").append("), using DFS info");
  9598. agent.addWuExceptionEx(msg.str(), WRN_MismatchGroupInfo, SeverityError, MSGAUD_user, "hthor");
  9599. }
  9600. }
  9601. else
  9602. grouped = ((helper.getFlags() & TDXgrouped) != 0);
  9603. }
  9604. else
  9605. {
  9606. grouped = ((helper.getFlags() & TDXgrouped) != 0);
  9607. }
  9608. }
  9609. static void queryInheritProp(IPropertyTree & target, const char * targetName, IPropertyTree & source, const char * sourceName)
  9610. {
  9611. if (source.hasProp(sourceName) && !target.hasProp(targetName))
  9612. target.setProp(targetName, source.queryProp(sourceName));
  9613. }
  9614. static void queryInheritSeparatorProp(IPropertyTree & target, const char * targetName, IPropertyTree & source, const char * sourceName)
  9615. {
  9616. //Legacy - commas are quoted if they occur in a separator list, so need to remove the leading backslashes
  9617. if (source.hasProp(sourceName) && !target.hasProp(targetName))
  9618. {
  9619. StringBuffer unquoted;
  9620. const char * text = source.queryProp(sourceName);
  9621. while (*text)
  9622. {
  9623. if ((text[0] == '\\') && (text[1] == ','))
  9624. text++;
  9625. unquoted.append(*text++);
  9626. }
  9627. target.setProp(targetName, unquoted);
  9628. }
  9629. }
  9630. CHThorNewDiskReadBaseActivity::InputFileInfo * CHThorNewDiskReadBaseActivity::extractFileInformation(IDistributedFile * distributedFile, const IPropertyTree * curFormatOptions)
  9631. {
  9632. Owned<IPropertyTree> meta = createPTree();
  9633. unsigned actualCrc = helper.getDiskFormatCrc();
  9634. Linked<IOutputMetaData> actualDiskMeta = expectedDiskMeta;
  9635. Linked<IPropertyTree> fileFormatOptions = createPTreeFromIPT(curFormatOptions);
  9636. bool compressed = false;
  9637. bool blockcompressed = false;
  9638. const char * readFormat = helper.queryFormat();
  9639. if (distributedFile)
  9640. {
  9641. const char *kind = queryFileKind(distributedFile);
  9642. //Do not use the field translation if the file was originally csv/xml - unless explicitly set
  9643. if ((strisame(kind, "flat") || (RecordTranslationMode::AlwaysDisk == getLayoutTranslationMode())) &&
  9644. // (strisame(readFormat, "flat") || strisame(kind, readFormat)))
  9645. (strisame(readFormat, "flat"))) // Not sure about this - only allow fixed source format if reading as flat
  9646. {
  9647. //Yuk this will be horrible - it needs to cache it for each distributed file
  9648. //and also common them up if they are the same.
  9649. IPropertyTree &props = distributedFile->queryAttributes();
  9650. Owned<IOutputMetaData> publishedMeta = getDaliLayoutInfo(props);
  9651. if (publishedMeta)
  9652. {
  9653. actualDiskMeta.setown(publishedMeta.getClear());
  9654. actualCrc = props.getPropInt("@formatCrc");
  9655. }
  9656. size32_t dfsSize = props.getPropInt("@recordSize");
  9657. if (dfsSize != 0)
  9658. meta->setPropInt("@recordSize", dfsSize);
  9659. }
  9660. compressed = distributedFile->isCompressed(&blockcompressed); //try new decompression, fall back to old unless marked as block
  9661. //Check for encryption key
  9662. void *k;
  9663. size32_t kl;
  9664. helper.getEncryptKey(kl,k);
  9665. if (kl)
  9666. {
  9667. meta->setPropBin("encryptionKey", kl, k);
  9668. blockcompressed = true;
  9669. compressed = true;
  9670. }
  9671. //MORE: There should probably be a generic way of storing and extracting format options for a file
  9672. IPropertyTree & options = distributedFile->queryAttributes();
  9673. queryInheritProp(*fileFormatOptions, "quote", options, "@csvQuote");
  9674. queryInheritSeparatorProp(*fileFormatOptions, "separator", options, "@csvSeparate");
  9675. queryInheritProp(*fileFormatOptions, "terminator", options, "@csvTerminate");
  9676. queryInheritProp(*fileFormatOptions, "escape", options, "@csvEscape");
  9677. dbglogXML(fileFormatOptions);
  9678. dbglogXML(fileFormatOptions);
  9679. }
  9680. meta->setPropBool("@grouped", grouped);
  9681. meta->setPropBool("@compressed", compressed);
  9682. meta->setPropBool("@blockCompressed", blockcompressed);
  9683. meta->setPropBool("@forceCompressed", (helper.getFlags() & TDXcompress) != 0);
  9684. meta->setPropTree("formatOptions", fileFormatOptions.getClear());
  9685. InputFileInfo & target = * new InputFileInfo;
  9686. target.file = distributedFile;
  9687. target.meta.setown(meta.getClear());
  9688. target.actualCrc = actualCrc;
  9689. target.actualMeta.swap(actualDiskMeta);
  9690. return &target;
  9691. }
  9692. void CHThorNewDiskReadBaseActivity::close()
  9693. {
  9694. closepart();
  9695. if(ldFile)
  9696. {
  9697. IDistributedFile * dFile = ldFile->queryDistributedFile();
  9698. if(dFile)
  9699. dFile->setAccessed();
  9700. }
  9701. }
  9702. void CHThorNewDiskReadBaseActivity::closepart()
  9703. {
  9704. if (activeReader)
  9705. {
  9706. activeReader->clearInput();
  9707. activeReader = nullptr;
  9708. }
  9709. logicalFileName = "";
  9710. }
  9711. static void saveOrRelease(Owned<IException> & target, IException * e)
  9712. {
  9713. if (target.get())
  9714. ::Release(e);
  9715. else
  9716. target.setown(e);
  9717. }
  9718. static void getFilename(RemoteFilename & rfilename, IDistributedFilePart * curPart, ILocalOrDistributedFile * localFile, unsigned partNum, unsigned copy)
  9719. {
  9720. if (curPart)
  9721. curPart->getFilename(rfilename,copy);
  9722. else
  9723. localFile->getPartFilename(rfilename,partNum,copy);
  9724. }
  9725. bool CHThorNewDiskReadBaseActivity::openFirstPart()
  9726. {
  9727. partNum = 0;
  9728. if (dfsParts) // more should really be fileDesc or something
  9729. {
  9730. if (dfsParts->first())
  9731. {
  9732. if (openFilePart(ldFile, &dfsParts->query(), 0))
  9733. return true;
  9734. return openNextPart(true);
  9735. }
  9736. }
  9737. else if (ldFile)
  9738. {
  9739. if (ldFile->numParts() != 0)
  9740. {
  9741. if (openFilePart(ldFile, nullptr, 0))
  9742. return true;
  9743. return openNextPart(true);
  9744. }
  9745. }
  9746. else if (!tempFileName.isEmpty())
  9747. {
  9748. if (openFilePart(tempFileName))
  9749. return true;
  9750. }
  9751. setEmptyStream();
  9752. return false;
  9753. }
  9754. bool CHThorNewDiskReadBaseActivity::openNextPart(bool prevWasMissing)
  9755. {
  9756. if (finishedParts)
  9757. return false;
  9758. if (!prevWasMissing)
  9759. {
  9760. offset_t sizeFilePart = 0;
  9761. if (dfsParts)
  9762. sizeFilePart = dfsParts->query().getFileSize(true, false);
  9763. else if (ldFile)
  9764. sizeFilePart = ldFile->getPartFileSize(partNum);
  9765. offsetOfPart += sizeFilePart;
  9766. closepart();
  9767. }
  9768. for (;;)
  9769. {
  9770. partNum++;
  9771. if (dfsParts)
  9772. {
  9773. if (dfsParts->next())
  9774. {
  9775. if (openFilePart(ldFile, &dfsParts->query(), partNum))
  9776. return true;
  9777. continue; // try the next file part
  9778. }
  9779. }
  9780. else if (ldFile)
  9781. {
  9782. if (partNum < ldFile->numParts())
  9783. {
  9784. if (openFilePart(ldFile, nullptr, partNum))
  9785. return true;
  9786. continue; // try the next file part
  9787. }
  9788. }
  9789. setEmptyStream();
  9790. return false;
  9791. }
  9792. }
  9793. void CHThorNewDiskReadBaseActivity::initStream(IDiskRowReader * reader, const char * filename)
  9794. {
  9795. activeReader = reader;
  9796. inputRowStream = reader->queryAllocatedRowStream(rowAllocator);
  9797. StringBuffer report("Reading file ");
  9798. report.append(filename);
  9799. agent.reportProgress(report.str());
  9800. }
  9801. void CHThorNewDiskReadBaseActivity::setEmptyStream()
  9802. {
  9803. inputRowStream = queryNullDiskRowStream();
  9804. finishedParts = true;
  9805. }
  9806. IDiskRowReader * CHThorNewDiskReadBaseActivity::ensureRowReader(const char * format, bool streamRemote, unsigned expectedCrc, IOutputMetaData & expected, unsigned projectedCrc, IOutputMetaData & projected, unsigned actualCrc, IOutputMetaData & actual, const IPropertyTree * options)
  9807. {
  9808. Owned<IDiskReadMapping> mapping = createDiskReadMapping(getLayoutTranslationMode(), format, actualCrc, actual, expectedCrc, expected, projectedCrc, projected, options);
  9809. ForEachItemIn(i, readers)
  9810. {
  9811. IDiskRowReader & cur = readers.item(i);
  9812. if (cur.matches(format, streamRemote, mapping))
  9813. return &cur;
  9814. }
  9815. IDiskRowReader * reader = createDiskReader(format, streamRemote, mapping);
  9816. readers.append(*reader);
  9817. return reader;
  9818. }
  9819. bool CHThorNewDiskReadBaseActivity::openFilePart(const char * filename)
  9820. {
  9821. const char * format = helper.queryFormat(); // more - should extract from the current file (could even mix flat and csv...)
  9822. InputFileInfo * fileInfo = &subfiles.item(0);
  9823. unsigned expectedCrc = helper.getDiskFormatCrc();
  9824. unsigned projectedCrc = helper.getProjectedFormatCrc();
  9825. IDiskRowReader * reader = ensureRowReader(format, false, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, expectedCrc, *expectedDiskMeta, fileInfo->meta);
  9826. if (reader->setInputFile(filename, logicalFileName, 0, offsetOfPart, fileInfo->meta, fieldFilters))
  9827. {
  9828. initStream(reader, filename);
  9829. return true;
  9830. }
  9831. return false;
  9832. }
  9833. bool CHThorNewDiskReadBaseActivity::openFilePart(ILocalOrDistributedFile * localFile, IDistributedFilePart * filePart, unsigned whichPart)
  9834. {
  9835. IDistributedFile * distributedFile = localFile->queryDistributedFile();
  9836. InputFileInfo * fileInfo = &subfiles.item(0);
  9837. if (superfile && filePart)
  9838. {
  9839. unsigned subfile;
  9840. unsigned lnum;
  9841. if (superfile->mapSubPart(partNum, subfile, lnum))
  9842. {
  9843. fileInfo = &subfiles.item(subfile);
  9844. distributedFile = fileInfo->file;
  9845. logicalFileName = distributedFile->queryLogicalName();
  9846. }
  9847. }
  9848. unsigned expectedCrc = helper.getDiskFormatCrc();
  9849. unsigned projectedCrc = helper.getProjectedFormatCrc();
  9850. unsigned actualCrc = fileInfo->actualCrc;
  9851. IOutputMetaData * actualDiskMeta = fileInfo->actualMeta;
  9852. bool tryRemoteStream = actualDiskMeta->queryTypeInfo()->canInterpret() && actualDiskMeta->queryTypeInfo()->canSerialize() &&
  9853. projectedDiskMeta->queryTypeInfo()->canInterpret() && projectedDiskMeta->queryTypeInfo()->canSerialize();
  9854. /*
  9855. * If a file part can be accessed local, then read it locally
  9856. * If a file part supports a remote stream, then use that
  9857. * Otherwise failover to the legacy remote access.
  9858. */
  9859. const char * format = helper.queryFormat(); // more - should extract from the current file (could even mix flat and csv...)
  9860. Owned<IException> saveOpenExc;
  9861. StringBuffer filename, filenamelist;
  9862. std::vector<unsigned> remoteCandidates;
  9863. // scan for local part 1st
  9864. //MORE: Order of copies should be optimized at this point....
  9865. unsigned numCopies = filePart?filePart->numCopies():ldFile->numPartCopies(partNum);
  9866. for (unsigned copy=0; copy<numCopies; copy++)
  9867. {
  9868. RemoteFilename rfn;
  9869. getFilename(rfn, filePart, localFile, partNum, copy);
  9870. if (!isRemoteReadCandidate(agent, rfn))
  9871. {
  9872. StringBuffer path;
  9873. rfn.getPath(path);
  9874. IDiskRowReader * reader = ensureRowReader(format, false, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, fileInfo->meta);
  9875. if (reader->setInputFile(path.str(), logicalFileName, whichPart, offsetOfPart, fileInfo->meta, fieldFilters))
  9876. {
  9877. initStream(reader, path.str());
  9878. return true;
  9879. }
  9880. }
  9881. else
  9882. remoteCandidates.push_back(copy);
  9883. }
  9884. //First try remote streaming, and if that does not succeed, fall back to remote reading.
  9885. bool allowFallbackToNonStreaming = false;
  9886. for (;;)
  9887. {
  9888. for (unsigned copy: remoteCandidates)
  9889. {
  9890. RemoteFilename rfilename;
  9891. getFilename(rfilename, filePart, localFile, partNum, copy);
  9892. rfilename.getPath(filename.clear());
  9893. filenamelist.append('\n').append(filename);
  9894. try
  9895. {
  9896. IDiskRowReader * reader = ensureRowReader(format, tryRemoteStream, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, fileInfo->meta);
  9897. if (reader->setInputFile(rfilename, logicalFileName, whichPart, offsetOfPart, fileInfo->meta, fieldFilters))
  9898. {
  9899. initStream(reader, filename);
  9900. return true;
  9901. }
  9902. }
  9903. catch (IException *E)
  9904. {
  9905. saveOrRelease(saveOpenExc, E);
  9906. }
  9907. }
  9908. if (!tryRemoteStream || !allowFallbackToNonStreaming)
  9909. break;
  9910. tryRemoteStream = false;
  9911. }
  9912. if (!(helper.getFlags() & TDRoptional))
  9913. {
  9914. StringBuffer s;
  9915. if (filenamelist)
  9916. {
  9917. if (saveOpenExc.get())
  9918. {
  9919. if (strstr(mangledHelperFileName.str(),"::>")!=NULL) // if a 'special' filename just use saved exception
  9920. saveOpenExc->errorMessage(s);
  9921. else
  9922. {
  9923. s.append("Could not open logical file ").append(mangledHelperFileName.str()).append(" in any of these locations:").append(filenamelist).append(" (");
  9924. saveOpenExc->errorMessage(s).append(")");
  9925. }
  9926. }
  9927. else
  9928. s.append("Could not open logical file ").append(mangledHelperFileName.str()).append(" in any of these locations:").append(filenamelist).append(" (").append((unsigned)GetLastError()).append(")");
  9929. }
  9930. else
  9931. s.append("Could not open local physical file ").append(filename).append(" (").append((unsigned)GetLastError()).append(")");
  9932. agent.fail(1, s.str());
  9933. }
  9934. return false;
  9935. }
  9936. bool CHThorNewDiskReadBaseActivity::openNext()
  9937. {
  9938. return openNextPart(false);
  9939. }
  9940. void CHThorNewDiskReadBaseActivity::open()
  9941. {
  9942. assertex(!opened);
  9943. opened = true;
  9944. if (!segHelper.canMatchAny())
  9945. {
  9946. setEmptyStream();
  9947. }
  9948. else
  9949. {
  9950. if (!openFirstPart())
  9951. setEmptyStream();
  9952. }
  9953. }
  9954. void CHThorNewDiskReadBaseActivity::verifyRecordFormatCrc()
  9955. {
  9956. //MORE: Need to configure based on csv/xml
  9957. ::verifyFormatCrcSuper(helper.getDiskFormatCrc(), ldFile?ldFile->queryDistributedFile():NULL, false, true);
  9958. }
  9959. void CHThorNewDiskReadBaseActivity::append(FFoption option, const IFieldFilter * filter)
  9960. {
  9961. if (filter->isWild())
  9962. filter->Release();
  9963. else
  9964. fieldFilters.append(*filter);
  9965. }
  9966. //=====================================================================================================
  9967. CHThorNewDiskReadActivity::CHThorNewDiskReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node)
  9968. : CHThorNewDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph), helper(_arg), outBuilder(NULL)
  9969. {
  9970. needTransform = false;
  9971. lastGroupProcessed = 0;
  9972. hasMatchFilter = helper.hasMatchFilter();
  9973. useRawStream = hasMatchFilter || helper.needTransform();
  9974. }
  9975. void CHThorNewDiskReadActivity::ready()
  9976. {
  9977. PARENT::ready();
  9978. outBuilder.setAllocator(rowAllocator);
  9979. lastGroupProcessed = processed;
  9980. needTransform = helper.needTransform() || fieldFilters.length();
  9981. limit = helper.getRowLimit();
  9982. if (helper.getFlags() & TDRlimitskips)
  9983. limit = (unsigned __int64) -1;
  9984. stopAfter = helper.getChooseNLimit();
  9985. if (!helper.transformMayFilter() && !helper.hasMatchFilter())
  9986. remoteLimit = stopAfter;
  9987. finishedParts = false;
  9988. }
  9989. void CHThorNewDiskReadActivity::stop()
  9990. {
  9991. outBuilder.clear();
  9992. PARENT::stop();
  9993. }
  9994. void CHThorNewDiskReadActivity::onLimitExceeded()
  9995. {
  9996. if ( agent.queryCodeContext()->queryDebugContext())
  9997. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  9998. helper.onLimitExceeded();
  9999. }
  10000. const void *CHThorNewDiskReadActivity::nextRow()
  10001. {
  10002. //Avoid this check on each row- e.g., initialising streams with a null stream, which returns eof, and falls through to eof processing
  10003. if (!opened) open();
  10004. // Only check once per row returned. Potentially means that heavily filtered datasets may wait a long time to check for abort
  10005. queryUpdateProgress();
  10006. //Avoid this test... Combine the limit checking with choosen, and have choosen/limit triggering set the
  10007. //stream to a special no more rows stream so that subsequent calls do not read records.
  10008. if ((processed - initialProcessed) >= stopAfter)
  10009. return nullptr;
  10010. try
  10011. {
  10012. if (useRawStream)
  10013. {
  10014. for (;;)
  10015. {
  10016. //Returns a row in the serialized form of the projected format
  10017. size32_t nextSize;
  10018. const byte * next = (const byte *)inputRowStream->nextRow(nextSize);
  10019. if (!isSpecialRow(next))
  10020. {
  10021. if (likely(!hasMatchFilter || helper.canMatch(next)))
  10022. {
  10023. size32_t thisSize = helper.transform(outBuilder.ensureRow(), next);
  10024. if (thisSize != 0)
  10025. {
  10026. if (unlikely((processed - initialProcessed) >= limit))
  10027. {
  10028. outBuilder.clear();
  10029. onLimitExceeded();
  10030. return nullptr;
  10031. }
  10032. processed++;
  10033. return outBuilder.finalizeRowClear(thisSize);
  10034. }
  10035. }
  10036. }
  10037. else
  10038. {
  10039. switch (getSpecialRowType(next))
  10040. {
  10041. case SpecialRow::eof:
  10042. if (!openNext())
  10043. return next; // i.e. eof
  10044. //rawStream will have changed, but it cannot change into a rowStream
  10045. break;
  10046. case SpecialRow::eos:
  10047. return next;
  10048. case SpecialRow::eog:
  10049. if (processed != lastGroupProcessed)
  10050. {
  10051. lastGroupProcessed = processed;
  10052. //MORE: Change to return next - i.e. an eog marker
  10053. return nullptr;
  10054. }
  10055. break;
  10056. default:
  10057. throwUnexpected();
  10058. }
  10059. }
  10060. }
  10061. }
  10062. else
  10063. {
  10064. //This branch avoids a memcpy from actual to projected followed by a deserialize - since it can map directly
  10065. //May be more efficient to use this branch if serialized==deserialized and there is a filter, but no transform.
  10066. //It would be possibel to have two (or more) different implementations, which were created based on
  10067. //whether there was a limit, a transform etc., but unlikely to save more than a couple of boolean tests.
  10068. for (;;)
  10069. {
  10070. const byte * next = (const byte *)inputRowStream->nextRow();
  10071. if (!isSpecialRow(next))
  10072. {
  10073. if (unlikely((processed - initialProcessed) >= limit))
  10074. {
  10075. ReleaseRoxieRow(next);
  10076. onLimitExceeded();
  10077. return nullptr;
  10078. }
  10079. processed++;
  10080. return next;
  10081. }
  10082. else
  10083. {
  10084. switch (getSpecialRowType(next))
  10085. {
  10086. case SpecialRow::eof:
  10087. if (!openNext())
  10088. return next;
  10089. //rowStream will have changed
  10090. break;
  10091. case SpecialRow::eos:
  10092. return next;
  10093. case SpecialRow::eog:
  10094. if (processed != lastGroupProcessed)
  10095. {
  10096. lastGroupProcessed = processed;
  10097. return nullptr;
  10098. }
  10099. break;
  10100. default:
  10101. throwUnexpected();
  10102. }
  10103. }
  10104. }
  10105. }
  10106. }
  10107. catch(IException * e)
  10108. {
  10109. throw makeWrappedException(e);
  10110. }
  10111. return NULL;
  10112. }
  10113. //=====================================================================================================
  10114. bool RemoteReadChecker::onlyReadLocally(const CLogicalFileSlice & slice, unsigned copy)
  10115. {
  10116. //Allow all operations to be forced to be executed locally.
  10117. if (forceRemoteDisabled.getValue(false))
  10118. return true;
  10119. //If not locally attached then there is no benefit in reading remotely
  10120. if (!slice.onAttachedStorage(copy))
  10121. return true;
  10122. //If the file is not local then execute it remotely
  10123. if (!slice.isLocal(copy))
  10124. return false;
  10125. StringBuffer localPath;
  10126. slice.getURL(localPath, copy);
  10127. if (forceRemoteRead.getValue(testForceRemote(localPath)))
  10128. return false;
  10129. return true;
  10130. }
  10131. CHThorGenericDiskReadBaseActivity::CHThorGenericDiskReadBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadBaseArg &_arg, IHThorCompoundBaseArg & _segHelper, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node)
  10132. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), segHelper(_segHelper), remoteReadChecker(_agent.queryWorkUnit())
  10133. {
  10134. helper.setCallback(this);
  10135. expectedDiskMeta = helper.queryDiskRecordSize();
  10136. projectedDiskMeta = helper.queryProjectedDiskRecordSize();
  10137. isCodeSigned = false;
  10138. if (_node)
  10139. {
  10140. const char *recordTranslationModeHintText = _node->queryProp("hint[@name='layouttranslation']/@value");
  10141. if (recordTranslationModeHintText)
  10142. recordTranslationModeHint = getTranslationMode(recordTranslationModeHintText, true);
  10143. isCodeSigned = isActivityCodeSigned(*_node);
  10144. }
  10145. grouped = ((helper.getFlags() & TDXgrouped) != 0);
  10146. inputOptions.setown(createPTree());
  10147. inputOptions->setPropBool("@grouped", grouped);
  10148. inputOptions->setPropBool("@forceCompressed", (helper.getFlags() & TDXcompress) != 0);
  10149. if (helper.getFlags() & TDRoptional)
  10150. inputOptions->setPropBool("@optional", true);
  10151. if ((helper.getFlags() & TDRcloneappendvirtual) != 0)
  10152. inputOptions->setPropBool("@cloneAppendVirtuals", true);
  10153. CPropertyTreeWriter writer(ensurePTree(inputOptions, "formatOptions"));
  10154. helper.getFormatOptions(writer);
  10155. outputGrouped = helper.queryOutputMeta()->isGrouped(); // It is possible for input to be incorrectly marked as grouped, and input not or vice-versa
  10156. bool isTemporary = (helper.getFlags() & (TDXtemporary | TDXjobtemp)) != 0;
  10157. files.init(this, agent.queryWuid(), isTemporary, agent.queryResolveFilesLocally(), isCodeSigned, agent.queryCodeContext()->queryUserDescriptor(), expectedDiskMeta);
  10158. if (isTemporary)
  10159. {
  10160. StringBuffer spillPath;
  10161. agent.getTempfileBase(spillPath);
  10162. //Should probably be in eclagent
  10163. spillPlane.setown(createPTree("planes"));
  10164. spillPlane->setProp("@name", "localspill");
  10165. spillPlane->setProp("@prefix", spillPath);
  10166. }
  10167. }
  10168. CHThorGenericDiskReadBaseActivity::~CHThorGenericDiskReadBaseActivity()
  10169. {
  10170. close();
  10171. }
  10172. void CHThorGenericDiskReadBaseActivity::ready()
  10173. {
  10174. CHThorActivityBase::ready();
  10175. opened = false;
  10176. curSlice = NotFound;
  10177. resolveFile();
  10178. fieldFilters.kill();
  10179. segHelper.createSegmentMonitors(this);
  10180. }
  10181. void CHThorGenericDiskReadBaseActivity::stop()
  10182. {
  10183. close();
  10184. CHThorActivityBase::stop();
  10185. }
  10186. unsigned __int64 CHThorGenericDiskReadBaseActivity::getFilePosition(const void * row)
  10187. {
  10188. //These functions do not need to be implemented - they will be implemented by the translation layer
  10189. throwUnexpected();
  10190. }
  10191. unsigned __int64 CHThorGenericDiskReadBaseActivity::getLocalFilePosition(const void * row)
  10192. {
  10193. throwUnexpected();
  10194. }
  10195. void CHThorGenericDiskReadBaseActivity::noteException(unsigned severity, unsigned code, const char * text)
  10196. {
  10197. //MORE: This should really supply the activity and the scope - a general issue for hthor errors...
  10198. agent.addWuExceptionEx(text, code, severity, MSGAUD_user, "hthor");
  10199. }
  10200. const char * CHThorGenericDiskReadBaseActivity::queryLogicalFilename(const void * row)
  10201. {
  10202. throwUnexpected();
  10203. }
  10204. void CHThorGenericDiskReadBaseActivity::resolveFile()
  10205. {
  10206. //If in a child query, and the filenames haven't changed, the information about the resolved filenames will also not have changed
  10207. //Assume that is also true for format properties - require dynamic if they are to be recalculated.
  10208. if (resolved && !(helper.getFlags() & (TDXvarfilename|TDRdynformatoptions)))
  10209. return;
  10210. resolved = true;
  10211. //Update the inputOptions and formatOptions if they depend on the current context
  10212. curInputOptions.set(inputOptions);
  10213. //Check for encryption key
  10214. void *k;
  10215. size32_t kl;
  10216. helper.getEncryptKey(kl,k);
  10217. if (kl || (helper.getFlags() & TDRdynformatoptions))
  10218. {
  10219. curInputOptions.setown(createPTreeFromIPT(inputOptions));
  10220. if (kl)
  10221. {
  10222. curInputOptions->setPropBin("encryptionKey", kl, k);
  10223. curInputOptions->setPropBool("blockcompressed", true);
  10224. curInputOptions->setPropBool("compressed", true);
  10225. }
  10226. if (helper.getFlags() & TDRdynformatoptions)
  10227. {
  10228. Owned<IPropertyTree> helperFormatOptions = createPTree("formatOptions");
  10229. CPropertyTreeWriter writer(helperFormatOptions);
  10230. helper.getFormatDynOptions(writer);
  10231. IPropertyTree * curFormatOptions = ensurePTree(curInputOptions, "formatOptions");
  10232. mergeConfiguration(*curFormatOptions, *helperFormatOptions, nullptr, true);
  10233. }
  10234. }
  10235. //Extract meta information from the helper. Another (possibly more efficient) alternative to an IPropertyTree would be a class.
  10236. bool isTemporary = (helper.getFlags() & (TDXtemporary | TDXjobtemp)) != 0;
  10237. OwnedRoxieString fileName(helper.getFileName());
  10238. if (isTemporary)
  10239. {
  10240. StringBuffer mangledFilename;
  10241. mangleLocalTempFilename(mangledFilename, fileName, agent.queryWuid()); // should this occur inside setEclFilename?
  10242. curInputOptions->setPropBool("@singlePartNoSuffix", true);
  10243. files.setTempFilename(mangledFilename, curInputOptions, spillPlane);
  10244. }
  10245. else
  10246. {
  10247. StringBuffer lfn;
  10248. expandLogicalFilename(lfn, fileName, agent.queryWorkUnit(), false, false);
  10249. files.setEclFilename(lfn, curInputOptions);
  10250. }
  10251. slices.clear();
  10252. files.calcPartition(slices, 1, 0, false, true);
  10253. curSlice = 0;
  10254. }
  10255. void CHThorGenericDiskReadBaseActivity::close()
  10256. {
  10257. closepart();
  10258. if (activeSlice)
  10259. activeSlice->setAccessed();
  10260. }
  10261. void CHThorGenericDiskReadBaseActivity::closepart()
  10262. {
  10263. if (activeReader)
  10264. {
  10265. activeReader->clearInput();
  10266. activeReader = nullptr;
  10267. activeSlice = nullptr;
  10268. }
  10269. }
  10270. bool CHThorGenericDiskReadBaseActivity::openFirstPart()
  10271. {
  10272. if (openFilePart(0U))
  10273. return true;
  10274. setEmptyStream();
  10275. return false;
  10276. }
  10277. bool CHThorGenericDiskReadBaseActivity::openNextPart()
  10278. {
  10279. if (curSlice == NotFound)
  10280. return false;
  10281. if (activeSlice)
  10282. closepart();
  10283. if (openFilePart(curSlice+1))
  10284. return true;
  10285. setEmptyStream();
  10286. return false;
  10287. }
  10288. void CHThorGenericDiskReadBaseActivity::initStream(CLogicalFileSlice * slice, IDiskRowReader * reader)
  10289. {
  10290. activeSlice = slice;
  10291. activeReader = reader;
  10292. inputRowStream = reader->queryAllocatedRowStream(rowAllocator);
  10293. StringBuffer report("Reading file ");
  10294. activeSlice->getTracingFilename(report);
  10295. agent.reportProgress(report.str());
  10296. }
  10297. void CHThorGenericDiskReadBaseActivity::setEmptyStream()
  10298. {
  10299. inputRowStream = queryNullDiskRowStream();
  10300. finishedParts = true;
  10301. }
  10302. IDiskRowReader * CHThorGenericDiskReadBaseActivity::ensureRowReader(const char * format, bool streamRemote, unsigned expectedCrc, IOutputMetaData & expected, unsigned projectedCrc, IOutputMetaData & projected, unsigned actualCrc, IOutputMetaData & actual, CLogicalFileSlice * slice)
  10303. {
  10304. bool translateFromActual = strsame(format, slice->queryFormat());
  10305. //Backwards compatibility - there should be an option to override this
  10306. if (strsame(format, "csv") || strsame(format, "xml"))
  10307. translateFromActual = false;
  10308. //If the actual and expected file formats do not translate from the actual file format - use the expected format instead
  10309. Owned<IDiskReadMapping> mapping;
  10310. if (translateFromActual)
  10311. mapping.setown(createDiskReadMapping(getLayoutTranslationMode(), format, actualCrc, actual, expectedCrc, expected, projectedCrc, projected, slice->queryFileMeta()));
  10312. else
  10313. mapping.setown(createDiskReadMapping(getLayoutTranslationMode(), format, expectedCrc, expected, expectedCrc, expected, projectedCrc, projected, slice->queryFileMeta()));
  10314. ForEachItemIn(i, readers)
  10315. {
  10316. IDiskRowReader & cur = readers.item(i);
  10317. if (cur.matches(format, streamRemote, mapping))
  10318. return &cur;
  10319. }
  10320. IDiskRowReader * reader = createDiskReader(format, streamRemote, mapping);
  10321. readers.append(*reader);
  10322. return reader;
  10323. }
  10324. bool CHThorGenericDiskReadBaseActivity::openFilePart(unsigned whichSlice)
  10325. {
  10326. for (;;)
  10327. {
  10328. if (whichSlice >= slices.size())
  10329. {
  10330. curSlice = NotFound;
  10331. return false;
  10332. }
  10333. if (openFilePart(&slices[whichSlice]))
  10334. {
  10335. curSlice = whichSlice;
  10336. activeSlice = &slices[whichSlice];
  10337. return true;
  10338. }
  10339. whichSlice++;
  10340. }
  10341. }
  10342. bool CHThorGenericDiskReadBaseActivity::openFilePart(CLogicalFileSlice * nextSlice)
  10343. {
  10344. unsigned expectedCrc = helper.getDiskFormatCrc();
  10345. unsigned projectedCrc = helper.getProjectedFormatCrc();
  10346. unsigned actualCrc = nextSlice->queryFile()->queryActualCrc();
  10347. IOutputMetaData * actualDiskMeta = nextSlice->queryFile()->queryActualMeta();
  10348. bool tryRemoteStream = actualDiskMeta->queryTypeInfo()->canInterpret() && actualDiskMeta->queryTypeInfo()->canSerialize() &&
  10349. projectedDiskMeta->queryTypeInfo()->canInterpret() && projectedDiskMeta->queryTypeInfo()->canSerialize();
  10350. /*
  10351. * If a file part can be accessed local, then read it locally
  10352. * If a file part supports a remote stream, then use that
  10353. * Otherwise failover to the legacy remote access.
  10354. */
  10355. const char * format = helper.queryFormat();
  10356. // If format is not specified in the ECL then it is deduced from the file. It must be the same for all copies of a file part
  10357. if (!format)
  10358. format = nextSlice->queryFormat();
  10359. Owned<IException> saveOpenExc;
  10360. StringBuffer filenamelist;
  10361. std::vector<unsigned> remoteCandidates;
  10362. // scan for local part 1st
  10363. //MORE: Order of copies should be optimized at this point....
  10364. unsigned numCopies = nextSlice->getNumCopies();
  10365. for (unsigned copy=0; copy<numCopies; copy++)
  10366. {
  10367. if (remoteReadChecker.onlyReadLocally(*nextSlice, copy))
  10368. {
  10369. IDiskRowReader * reader = ensureRowReader(format, false, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, nextSlice);
  10370. if (reader->setInputFile(*nextSlice, fieldFilters, copy))
  10371. {
  10372. initStream(nextSlice, reader);
  10373. return true;
  10374. }
  10375. }
  10376. else
  10377. remoteCandidates.push_back(copy);
  10378. }
  10379. //First try remote streaming, and if that does not succeed, fall back to remote reading.
  10380. bool allowFallbackToNonStreaming = true;
  10381. for (;;)
  10382. {
  10383. for (unsigned copy: remoteCandidates)
  10384. {
  10385. StringBuffer filename;
  10386. nextSlice->getURL(filename, copy);
  10387. filenamelist.append('\n').append(filename);
  10388. try
  10389. {
  10390. IDiskRowReader * reader = ensureRowReader(format, tryRemoteStream, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, nextSlice);
  10391. if (reader->setInputFile(*nextSlice, fieldFilters, copy))
  10392. {
  10393. initStream(nextSlice, reader);
  10394. return true;
  10395. }
  10396. }
  10397. catch (IException *E)
  10398. {
  10399. saveOrRelease(saveOpenExc, E);
  10400. }
  10401. }
  10402. if (!tryRemoteStream || !allowFallbackToNonStreaming)
  10403. break;
  10404. tryRemoteStream = false;
  10405. }
  10406. if (!(helper.getFlags() & TDRoptional))
  10407. {
  10408. //Should this be unconditional? If the logical file exists, but the file can't be opened, it isn't really what OPT means.
  10409. StringBuffer s;
  10410. StringBuffer tracingName;
  10411. nextSlice->getTracingFilename(tracingName);
  10412. if (filenamelist)
  10413. {
  10414. if (saveOpenExc.get())
  10415. {
  10416. if (!nextSlice->isLogicalFile())
  10417. saveOpenExc->errorMessage(s);
  10418. else
  10419. {
  10420. s.append("Could not open logical file ").append(tracingName).append(" in any of these locations:").append(filenamelist).append(" (");
  10421. saveOpenExc->errorMessage(s).append(")");
  10422. }
  10423. }
  10424. else
  10425. s.append("Could not open logical file ").append(tracingName).append(" in any of these locations:").append(filenamelist).append(" (").append((unsigned)GetLastError()).append(")");
  10426. }
  10427. else
  10428. {
  10429. const char * filename = nextSlice->queryFile()->queryLogicalFilename();
  10430. s.append("Could not open local physical file ").append(filename).append(" (").append((unsigned)GetLastError()).append(")");
  10431. }
  10432. agent.fail(1, s.str());
  10433. }
  10434. return false;
  10435. }
  10436. bool CHThorGenericDiskReadBaseActivity::openNext()
  10437. {
  10438. return openNextPart();
  10439. }
  10440. void CHThorGenericDiskReadBaseActivity::open()
  10441. {
  10442. assertex(!opened);
  10443. opened = true;
  10444. if (!segHelper.canMatchAny())
  10445. {
  10446. setEmptyStream();
  10447. }
  10448. else
  10449. {
  10450. if (!openFirstPart())
  10451. setEmptyStream();
  10452. }
  10453. }
  10454. void CHThorGenericDiskReadBaseActivity::append(FFoption option, const IFieldFilter * filter)
  10455. {
  10456. if (filter->isWild())
  10457. filter->Release();
  10458. else
  10459. fieldFilters.append(*filter);
  10460. }
  10461. //=====================================================================================================
  10462. CHThorGenericDiskReadActivity::CHThorGenericDiskReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node)
  10463. : CHThorGenericDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _graph, _node), helper(_arg), outBuilder(NULL)
  10464. {
  10465. hasMatchFilter = helper.hasMatchFilter();
  10466. useRawStream = hasMatchFilter || helper.needTransform();
  10467. }
  10468. void CHThorGenericDiskReadActivity::ready()
  10469. {
  10470. PARENT::ready();
  10471. outBuilder.setAllocator(rowAllocator);
  10472. lastGroupProcessed = processed;
  10473. needTransform = helper.needTransform() || fieldFilters.length();
  10474. limit = helper.getRowLimit();
  10475. if (helper.getFlags() & TDRlimitskips)
  10476. limit = (unsigned __int64) -1;
  10477. stopAfter = helper.getChooseNLimit();
  10478. if (!helper.transformMayFilter() && !helper.hasMatchFilter())
  10479. remoteLimit = stopAfter;
  10480. finishedParts = false;
  10481. }
  10482. void CHThorGenericDiskReadActivity::stop()
  10483. {
  10484. outBuilder.clear();
  10485. PARENT::stop();
  10486. }
  10487. void CHThorGenericDiskReadActivity::onLimitExceeded()
  10488. {
  10489. if ( agent.queryCodeContext()->queryDebugContext())
  10490. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  10491. helper.onLimitExceeded();
  10492. }
  10493. const void *CHThorGenericDiskReadActivity::nextRow()
  10494. {
  10495. //Avoid this check on each row- e.g., initialising streams with a null stream, which returns eof, and falls through to eof processing
  10496. if (!opened) open();
  10497. // Only check once per row returned. Potentially means that heavily filtered datasets may wait a long time to check for abort
  10498. queryUpdateProgress();
  10499. //Avoid this test... Combine the limit checking with choosen, and have choosen/limit triggering set the
  10500. //stream to a special no more rows stream so that subsequent calls do not read records.
  10501. if ((processed - initialProcessed) >= stopAfter)
  10502. return nullptr;
  10503. try
  10504. {
  10505. if (useRawStream)
  10506. {
  10507. for (;;)
  10508. {
  10509. //Returns a row in the serialized form of the projected format
  10510. size32_t nextSize;
  10511. const byte * next = (const byte *)inputRowStream->nextRow(nextSize);
  10512. if (!isSpecialRow(next))
  10513. {
  10514. if (likely(!hasMatchFilter || helper.canMatch(next)))
  10515. {
  10516. size32_t thisSize = helper.transform(outBuilder.ensureRow(), next);
  10517. if (thisSize != 0)
  10518. {
  10519. if (unlikely((processed - initialProcessed) >= limit))
  10520. {
  10521. outBuilder.clear();
  10522. onLimitExceeded();
  10523. return nullptr;
  10524. }
  10525. processed++;
  10526. return outBuilder.finalizeRowClear(thisSize);
  10527. }
  10528. }
  10529. }
  10530. else
  10531. {
  10532. switch (getSpecialRowType(next))
  10533. {
  10534. case SpecialRow::eof:
  10535. if (!openNext())
  10536. return next; // i.e. eof
  10537. //rawStream will have changed, but it cannot change into a rowStream
  10538. break;
  10539. case SpecialRow::eos:
  10540. return next;
  10541. case SpecialRow::eog:
  10542. if (outputGrouped && (processed != lastGroupProcessed))
  10543. {
  10544. lastGroupProcessed = processed;
  10545. //MORE: Change to return next - i.e. an eog marker
  10546. return nullptr;
  10547. }
  10548. break;
  10549. default:
  10550. throwUnexpected();
  10551. }
  10552. }
  10553. }
  10554. }
  10555. else
  10556. {
  10557. //This branch avoids a memcpy from actual to projected followed by a deserialize - since it can map directly
  10558. //May be more efficient to use this branch if serialized==deserialized and there is a filter, but no transform.
  10559. //It would be possibel to have two (or more) different implementations, which were created based on
  10560. //whether there was a limit, a transform etc., but unlikely to save more than a couple of boolean tests.
  10561. for (;;)
  10562. {
  10563. const byte * next = (const byte *)inputRowStream->nextRow();
  10564. if (!isSpecialRow(next))
  10565. {
  10566. if (unlikely((processed - initialProcessed) >= limit))
  10567. {
  10568. ReleaseRoxieRow(next);
  10569. onLimitExceeded();
  10570. return nullptr;
  10571. }
  10572. processed++;
  10573. return next;
  10574. }
  10575. else
  10576. {
  10577. switch (getSpecialRowType(next))
  10578. {
  10579. case SpecialRow::eof:
  10580. if (!openNext())
  10581. return next;
  10582. //rowStream will have changed
  10583. break;
  10584. case SpecialRow::eos:
  10585. return next;
  10586. case SpecialRow::eog:
  10587. if (processed != lastGroupProcessed)
  10588. {
  10589. lastGroupProcessed = processed;
  10590. return nullptr;
  10591. }
  10592. break;
  10593. default:
  10594. throwUnexpected();
  10595. }
  10596. }
  10597. }
  10598. }
  10599. }
  10600. catch(IException * e)
  10601. {
  10602. throw makeWrappedException(e);
  10603. }
  10604. return NULL;
  10605. }
  10606. //=====================================================================================================
  10607. MAKEFACTORY(DiskWrite);
  10608. MAKEFACTORY(Iterate);
  10609. MAKEFACTORY(Filter);
  10610. MAKEFACTORY(Aggregate);
  10611. MAKEFACTORY(Rollup);
  10612. MAKEFACTORY(Project);
  10613. MAKEFACTORY(PrefetchProject);
  10614. MAKEFACTORY(FilterProject);
  10615. extern HTHOR_API IHThorActivity * createGroupDedupActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorDedupArg & arg, ThorActivityKind kind, EclGraph & _graph)
  10616. {
  10617. if(arg.compareAll())
  10618. return new CHThorGroupDedupAllActivity(_agent, _activityId, _subgraphId, arg, kind, _graph);
  10619. else if (arg.keepLeft() && !arg.keepBest())
  10620. return new CHThorGroupDedupKeepLeftActivity(_agent, _activityId, _subgraphId, arg, kind, _graph);
  10621. else
  10622. return new CHThorGroupDedupKeepRightActivity(_agent, _activityId, _subgraphId, arg, kind, _graph);
  10623. }
  10624. MAKEFACTORY(HashDedup);
  10625. MAKEFACTORY(Group);
  10626. MAKEFACTORY(Degroup);
  10627. MAKEFACTORY_ARG(GroupSort, Sort);
  10628. MAKEFACTORY(Join);
  10629. MAKEFACTORY_ARG(SelfJoin, Join);
  10630. MAKEFACTORY_ARG(LookupJoin, HashJoin);
  10631. MAKEFACTORY(AllJoin);
  10632. MAKEFACTORY(WorkUnitWrite);
  10633. MAKEFACTORY(DictionaryWorkUnitWrite);
  10634. MAKEFACTORY(FirstN);
  10635. MAKEFACTORY(InlineTable);
  10636. MAKEFACTORY_ARG(Concat, Funnel);
  10637. MAKEFACTORY(Apply);
  10638. MAKEFACTORY(Sample);
  10639. MAKEFACTORY(Normalize);
  10640. MAKEFACTORY(NormalizeChild);
  10641. MAKEFACTORY(NormalizeLinkedChild);
  10642. MAKEFACTORY(Distribution);
  10643. MAKEFACTORY(RemoteResult);
  10644. MAKEFACTORY(ChooseSets);
  10645. MAKEFACTORY_ARG(ChooseSetsLast, ChooseSetsEx);
  10646. MAKEFACTORY_ARG(ChooseSetsEnth, ChooseSetsEx);
  10647. MAKEFACTORY(WorkunitRead);
  10648. MAKEFACTORY(PipeRead);
  10649. MAKEFACTORY(PipeWrite);
  10650. MAKEFACTORY(CsvWrite);
  10651. MAKEFACTORY(XmlWrite);
  10652. MAKEFACTORY(PipeThrough);
  10653. MAKEFACTORY(If);
  10654. extern HTHOR_API IHThorActivity *createChildIfActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorIfArg &arg, ThorActivityKind kind, EclGraph & _graph)
  10655. {
  10656. return new CHThorIfActivity(_agent, _activityId, _subgraphId, arg, kind, _graph);
  10657. }
  10658. extern HTHOR_API IHThorActivity *createHashAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorHashAggregateArg &arg, ThorActivityKind kind, EclGraph & _graph, bool _isGroupedAggregate)
  10659. {
  10660. return new CHThorHashAggregateActivity(_agent, _activityId, _subgraphId, arg, kind, _graph, _isGroupedAggregate);
  10661. }
  10662. extern HTHOR_API IHThorActivity *createGenericDiskReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadArg &arg, ThorActivityKind kind, EclGraph & _graph, IPropertyTree * node)
  10663. {
  10664. return new CHThorGenericDiskReadActivity(_agent, _activityId, _subgraphId, arg, kind, _graph, node);
  10665. }
  10666. MAKEFACTORY(Null);
  10667. MAKEFACTORY(SideEffect);
  10668. MAKEFACTORY(Action);
  10669. MAKEFACTORY(SelectN);
  10670. MAKEFACTORY(Spill);
  10671. MAKEFACTORY(Limit);
  10672. MAKEFACTORY_ARG(SkipLimit, Limit);
  10673. MAKEFACTORY_ARG(OnFailLimit, Limit);
  10674. MAKEFACTORY(Catch);
  10675. MAKEFACTORY_ARG(SkipCatch, Catch);
  10676. MAKEFACTORY(CountProject);
  10677. MAKEFACTORY(IndexWrite);
  10678. MAKEFACTORY(Parse);
  10679. MAKEFACTORY(Enth);
  10680. MAKEFACTORY(TopN);
  10681. MAKEFACTORY(XmlParse);
  10682. MAKEFACTORY(Merge);
  10683. MAKEFACTORY_ARG(HttpRowCall, HttpCall);
  10684. MAKEFACTORY_ARG(SoapRowCall, SoapCall);
  10685. MAKEFACTORY_ARG(SoapRowAction, SoapAction);
  10686. MAKEFACTORY_ARG(SoapDatasetCall, SoapCall);
  10687. MAKEFACTORY_ARG(SoapDatasetAction, SoapAction);
  10688. MAKEFACTORY(DatasetResult);
  10689. MAKEFACTORY(RowResult);
  10690. MAKEFACTORY(ChildIterator);
  10691. extern HTHOR_API IHThorActivity *createDummyActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &arg, ThorActivityKind kind, EclGraph & _graph)
  10692. {
  10693. return new CHThorDummyActivity(_agent, _activityId, _subgraphId, arg, kind, _graph);
  10694. }
  10695. MAKEFACTORY_EXTRA(WhenAction,EclGraphElement *)
  10696. MAKEFACTORY_EXTRA(LibraryCall, IPropertyTree *)
  10697. MAKEFACTORY(ChildNormalize)
  10698. MAKEFACTORY(ChildAggregate)
  10699. MAKEFACTORY(ChildGroupAggregate)
  10700. MAKEFACTORY(ChildThroughNormalize)
  10701. MAKEFACTORY_EXTRA(DiskRead, IPropertyTree *)
  10702. MAKEFACTORY_EXTRA(DiskNormalize, IPropertyTree *)
  10703. MAKEFACTORY_EXTRA(DiskAggregate, IPropertyTree *)
  10704. MAKEFACTORY_EXTRA(DiskCount, IPropertyTree *)
  10705. MAKEFACTORY_EXTRA(DiskGroupAggregate, IPropertyTree *)
  10706. MAKEFACTORY_EXTRA(CsvRead, IPropertyTree *)
  10707. MAKEFACTORY_EXTRA(XmlRead, IPropertyTree *)
  10708. MAKEFACTORY_EXTRA(NewDiskRead, IPropertyTree *)
  10709. MAKEFACTORY_EXTRA(LocalResultRead, __int64)
  10710. MAKEFACTORY_EXTRA(LocalResultWrite, __int64)
  10711. MAKEFACTORY_EXTRA(DictionaryResultWrite, __int64)
  10712. MAKEFACTORY_EXTRA(LocalResultSpill, __int64)
  10713. MAKEFACTORY_EXTRA(GraphLoopResultRead, __int64)
  10714. MAKEFACTORY_EXTRA(GraphLoopResultWrite, __int64)
  10715. MAKEFACTORY_EXTRA(NWayGraphLoopResultRead, __int64)
  10716. MAKEFACTORY(Combine)
  10717. MAKEFACTORY(RollupGroup)
  10718. MAKEFACTORY(Regroup)
  10719. MAKEFACTORY(CombineGroup)
  10720. MAKEFACTORY(Case)
  10721. MAKEFACTORY(LinkedRawIterator)
  10722. MAKEFACTORY(GraphLoop)
  10723. MAKEFACTORY(Loop)
  10724. MAKEFACTORY(Process)
  10725. MAKEFACTORY(Grouped)
  10726. MAKEFACTORY(Sorted)
  10727. MAKEFACTORY(Trace)
  10728. MAKEFACTORY(NWayInput)
  10729. MAKEFACTORY(NWaySelect)
  10730. MAKEFACTORY(NonEmpty)
  10731. MAKEFACTORY(FilterGroup);
  10732. MAKEFACTORY(StreamedIterator);
  10733. MAKEFACTORY_EXTRA(External, IPropertyTree *);
  10734. IHThorException * makeHThorException(ThorActivityKind kind, unsigned activityId, unsigned subgraphId, int code, char const * format, ...)
  10735. {
  10736. va_list args;
  10737. va_start(args, format);
  10738. IHThorException * ret = new CHThorException(code, format, args, MSGAUD_user, kind, activityId, subgraphId);
  10739. va_end(args);
  10740. return ret;
  10741. }
  10742. IHThorException * makeHThorException(ThorActivityKind kind, unsigned activityId, unsigned subgraphId, IException * exc)
  10743. {
  10744. return new CHThorException(exc, kind, activityId, subgraphId);
  10745. }
  10746. IHThorException * makeHThorException(ThorActivityKind kind, unsigned activityId, unsigned subgraphId, IException * exc, char const * extra)
  10747. {
  10748. return new CHThorException(exc, extra, kind, activityId, subgraphId);
  10749. }