hthor.cpp 382 KB


  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include <algorithm>
  14. #include "hthor.ipp"
  15. #include "jexcept.hpp"
  16. #include "jmisc.hpp"
  17. #include "jthread.hpp"
  18. #include "jsocket.hpp"
  19. #include "jprop.hpp"
  20. #include "jdebug.hpp"
  21. #include "jlzw.hpp"
  22. #include "jisem.hpp"
  23. #include "roxiedebug.hpp"
  24. #include "roxierow.hpp"
  25. #include "roxiemem.hpp"
  26. #include "eclhelper.hpp"
  27. #include "workunit.hpp"
  28. #include "jfile.hpp"
  29. #include "keybuild.hpp"
  30. #include "rmtclient.hpp"
  31. #include "hrpc.hpp"
  32. #include "hrpcsock.hpp"
  33. #include "dafdesc.hpp"
  34. #include "dautils.hpp"
  35. #include "dasess.hpp"
  36. #include "dadfs.hpp"
  37. #include "thorfile.hpp"
  38. #include "thorsort.hpp"
  39. #include "thorparse.ipp"
  40. #include "thorxmlwrite.hpp"
  41. #include "rtlformat.hpp"
  42. #include "thorcommon.hpp"
  43. #include "jsmartsock.hpp"
  44. #include "thorstep.hpp"
  45. #include "eclagent.ipp"
  46. #include "roxierowbuff.hpp"
  47. #include "ftbase.ipp"
  48. #include "rtldynfield.hpp"
  49. #include "rtlnewkey.hpp"
  50. #include "thormeta.hpp"
  51. #include "thorread.hpp"
  52. #include "ws_dfsclient.hpp"
  53. #define EMPTY_LOOP_LIMIT 1000
  54. static unsigned const hthorReadBufferSize = 0x10000;
  55. static offset_t const defaultHThorDiskWriteSizeLimit = I64C(10*1024*1024*1024); //10 GB, per Nigel
  56. using roxiemem::IRowManager;
  57. using roxiemem::OwnedRoxieRow;
  58. using roxiemem::OwnedRoxieString;
  59. using roxiemem::OwnedConstRoxieRow;
  60. IRowManager * theRowManager;
  61. void setHThorRowManager(IRowManager * manager)
  62. {
  63. theRowManager = manager;
  64. }
  65. IRowManager * queryRowManager()
  66. {
  67. return theRowManager;
  68. }
  69. void throwOOMException(size_t size, char const * label)
  70. {
  71. throw MakeStringException(0, "Out of Memory in hthor: trying to allocate %" I64F "u bytes for %s", (unsigned __int64) size, label);
  72. }
  73. void * checked_malloc(size_t size, char const * label)
  74. {
  75. void * ret = malloc(size);
  76. if(!ret)
  77. throwOOMException(size, label);
  78. return ret;
  79. }
  80. void * checked_calloc(size_t size, size_t num, char const * label)
  81. {
  82. void * ret = calloc(size, num);
  83. if(!ret)
  84. throwOOMException(size*num, label);
  85. return ret;
  86. }
  87. inline bool checkWriteIsCompressed(unsigned int flags, size32_t fixedSize, bool grouped)
  88. {
  89. return ((flags & TDWnewcompress) || ((flags & TDXcompress) && ((0 == fixedSize) || (fixedSize+(grouped?1:0) >= MIN_ROWCOMPRESS_RECSIZE))));
  90. }
  91. inline bool checkReadIsCompressed(unsigned int flags, size32_t fixedSize, bool grouped)
  92. {
  93. return ((flags & TDXcompress) && ((0 == fixedSize) || (fixedSize+(grouped?1:0) >= MIN_ROWCOMPRESS_RECSIZE)));
  94. }
  95. //=====================================================================================================
  96. //=====================================================================================================
  97. CRowBuffer::CRowBuffer(IRecordSize * _recsize, bool _grouped) : recsize(_recsize), grouped(_grouped)
  98. {
  99. fixsize = recsize->getFixedSize();
  100. count = 0;
  101. index = 0;
  102. }
  103. void CRowBuffer::insert(const void * next)
  104. {
  105. buff.append(next);
  106. count++;
  107. }
  108. bool CRowBuffer::pull(IHThorInput * input, unsigned __int64 rowLimit)
  109. {
  110. while(true)
  111. {
  112. OwnedConstRoxieRow next(input->nextRow());
  113. if(!next)
  114. {
  115. next.setown(input->nextRow());
  116. if(!next)
  117. break;
  118. if(grouped)
  119. buff.append(NULL);
  120. }
  121. insert(next.getClear());
  122. if(count > rowLimit)
  123. return false;
  124. }
  125. return true;
  126. }
  127. void CRowBuffer::clear()
  128. {
  129. buff.clear();
  130. index = 0;
  131. count = 0;
  132. }
  133. const void * CRowBuffer::next()
  134. {
  135. if(buff.isItem(index))
  136. return buff.itemClear(index++);
  137. else
  138. return NULL;
  139. }
  140. ILocalOrDistributedFile *resolveLFNFlat(IAgentContext &agent, const char *logicalName, const char *errorTxt, bool optional, bool isPrivilegedUser)
  141. {
  142. Owned<ILocalOrDistributedFile> ldFile = agent.resolveLFN(logicalName, errorTxt, optional, true, AccessMode::tbdRead, nullptr, isPrivilegedUser);
  143. if (!ldFile)
  144. return nullptr;
  145. IDistributedFile *dFile = ldFile->queryDistributedFile();
  146. if (dFile && isFileKey(dFile))
  147. throw MakeStringException(0, "Attempting to read index as a flat file: %s", logicalName);
  148. return ldFile.getClear();
  149. }
  150. bool isRemoteReadCandidate(const IAgentContext &agent, const RemoteFilename &rfn)
  151. {
  152. #ifndef _CONTAINERIZED
  153. if (!agent.queryWorkUnit()->getDebugValueBool("forceRemoteDisabled", false))
  154. {
  155. if (!rfn.isLocal())
  156. return true;
  157. StringBuffer localPath;
  158. rfn.getLocalPath(localPath);
  159. if (agent.queryWorkUnit()->getDebugValueBool("forceRemoteRead", testForceRemote(localPath)))
  160. return true;
  161. }
  162. #endif
  163. return false;
  164. }
  165. //=====================================================================================================
  166. CHThorActivityBase::CHThorActivityBase(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg & _help, ThorActivityKind _kind, EclGraph & _graph)
  167. : help(_help), kind(_kind), graph(_graph), agent(_agent), outputMeta(help.queryOutputMeta()), activityId(_activityId), subgraphId(_subgraphId)
  168. {
  169. }
  170. void CHThorActivityBase::setInput(unsigned index, IHThorInput *_input)
  171. {
  172. assertex(index == 0);
  173. input = _input;
  174. }
  175. IHThorInput *CHThorActivityBase::queryOutput(unsigned index)
  176. {
  177. agent.fail(255, "internal logic error: CHThorActivityBase::queryOutput");
  178. // never returns....
  179. return NULL;
  180. }
  181. void CHThorActivityBase::ready()
  182. {
  183. if (input)
  184. input->ready();
  185. if (needsAllocator())
  186. createRowAllocator();
  187. initialProcessed = processed;
  188. }
  189. CHThorActivityBase::~CHThorActivityBase()
  190. {
  191. ::Release(rowAllocator);
  192. }
  193. void CHThorActivityBase::createRowAllocator()
  194. {
  195. if (!rowAllocator)
  196. rowAllocator = agent.queryCodeContext()->getRowAllocator(outputMeta.queryOriginal(), activityId);
  197. }
  198. __int64 CHThorActivityBase::getCount()
  199. {
  200. throw MakeStringException(2, "Internal error: CHThorActivityBase::getCount");
  201. return 0;
  202. }
  203. void CHThorActivityBase::execute()
  204. {
  205. agent.fail(255, "internal logic error: CHThorActivityBase::execute");
  206. }
  207. void CHThorActivityBase::extractResult(unsigned & len, void * & ret)
  208. {
  209. agent.fail(255, "internal logic error: CHThorActivityBase::extractResult");
  210. }
  211. void CHThorActivityBase::stop()
  212. {
  213. if (input)
  214. input->stop();
  215. }
  216. void CHThorActivityBase::resetEOF()
  217. {
  218. if (input)
  219. input->resetEOF();
  220. }
  221. void CHThorActivityBase::updateProgress(IStatisticGatherer &progress) const
  222. {
  223. if (queryOutputs()>0)
  224. updateProgressForOther(progress, activityId, subgraphId);
  225. if (input)
  226. input->updateProgress(progress);
  227. }
  228. void CHThorActivityBase::updateProgressForOther(IStatisticGatherer &progress, unsigned otherActivity, unsigned otherSubgraph) const
  229. {
  230. updateProgressForOther(progress, otherActivity, otherSubgraph, 0, processed);
  231. }
  232. void CHThorActivityBase::updateProgressForOther(IStatisticGatherer &progress, unsigned otherActivity, unsigned otherSubgraph, unsigned whichOutput, unsigned __int64 numProcessed) const
  233. {
  234. StatsEdgeScope scope(progress, otherActivity, whichOutput);
  235. progress.addStatistic(StNumRowsProcessed, numProcessed);
  236. progress.addStatistic(StNumStarts, 1); // wrong for an activity in a subquery
  237. progress.addStatistic(StNumStops, 1);
  238. progress.addStatistic(StNumSlaves, 1); // MORE: A bit pointless for an hthor graph
  239. }
  240. ILocalEclGraphResults * CHThorActivityBase::resolveLocalQuery(__int64 graphId)
  241. {
  242. return static_cast<ILocalEclGraphResults *>(agent.queryCodeContext()->resolveLocalQuery(graphId));
  243. }
  244. IException * CHThorActivityBase::makeWrappedException(IException * e) const
  245. {
  246. if(dynamic_cast<IHThorException *>(e) || dynamic_cast<IUserException *>(e))
  247. return e;
  248. else
  249. return makeHThorException(kind, activityId, subgraphId, e);
  250. }
  251. IException * CHThorActivityBase::makeWrappedException(IException * e, char const * extra) const
  252. {
  253. if(dynamic_cast<IHThorException *>(e) || dynamic_cast<IUserException *>(e))
  254. return e;
  255. else
  256. return makeHThorException(kind, activityId, subgraphId, e, extra);
  257. }
  258. bool CHThorActivityBase::isPassThrough()
  259. {
  260. return false;
  261. }
  262. //=====================================================================================================
  263. CHThorSimpleActivityBase::CHThorSimpleActivityBase(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg & _help, ThorActivityKind _kind, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _help, _kind, _graph)
  264. {
  265. }
  266. IHThorInput * CHThorSimpleActivityBase::queryOutput(unsigned index)
  267. {
  268. assertex(index == 0);
  269. return this;
  270. }
  271. bool CHThorSimpleActivityBase::isGrouped()
  272. {
  273. return input ? input->isGrouped() : outputMeta.isGrouped();
  274. }
  275. IOutputMetaData * CHThorSimpleActivityBase::queryOutputMeta() const
  276. {
  277. return outputMeta;
  278. }
  279. //=====================================================================================================
  280. class CHThorClusterWriteHandler : public ClusterWriteHandler
  281. {
  282. IAgentContext &agent;
  283. public:
  284. CHThorClusterWriteHandler(char const * _logicalName, char const * _activityType, IAgentContext &_agent)
  285. : ClusterWriteHandler(_logicalName, _activityType), agent(_agent)
  286. {
  287. }
  288. private:
  289. virtual void getTempFilename(StringAttr & out) const
  290. {
  291. StringBuffer buff;
  292. agent.getTempfileBase(buff).append(PATHSEPCHAR).appendf("cluster_write_%p.%" I64F "d_%u", this, (__int64)GetCurrentThreadId(), GetCurrentProcessId());
  293. out.set(buff.str());
  294. }
  295. };
  296. ClusterWriteHandler *createClusterWriteHandler(IAgentContext &agent, IHThorIndexWriteArg *iwHelper, IHThorDiskWriteArg *dwHelper, const char * lfn, StringAttr &fn, bool extend)
  297. {
  298. //In the containerized system, the default data plane for this component is in the configuration
  299. StringBuffer defaultCluster;
  300. getDefaultStoragePlane(defaultCluster);
  301. Owned<CHThorClusterWriteHandler> clusterHandler;
  302. unsigned clusterIdx = 0;
  303. while(true)
  304. {
  305. OwnedRoxieString helperCluster(iwHelper ? iwHelper->getCluster(clusterIdx++) : dwHelper->getCluster(clusterIdx++));
  306. const char *cluster = helperCluster;
  307. if (!helperCluster && (clusterIdx == 1))
  308. {
  309. if (defaultCluster.length())
  310. cluster = defaultCluster;
  311. }
  312. if (!cluster)
  313. break;
  314. if(!clusterHandler)
  315. {
  316. if(extend)
  317. throw MakeStringException(0, "Cannot combine EXTEND and CLUSTER flags on disk write of file %s", lfn);
  318. clusterHandler.setown(new CHThorClusterWriteHandler(lfn, "OUTPUT", agent));
  319. }
  320. clusterHandler->addCluster(cluster);
  321. }
  322. if(clusterHandler)
  323. {
  324. clusterHandler->getLocalPhysicalFilename(fn);
  325. }
  326. else if (!agent.queryResolveFilesLocally())
  327. {
  328. StringBuffer filenameText;
  329. bool wasDFS;
  330. makeSinglePhysicalPartName(lfn, filenameText, true, wasDFS);
  331. fn.set(filenameText.str());
  332. }
  333. else
  334. {
  335. fn.set(lfn);
  336. }
  337. StringBuffer dir;
  338. splitFilename(fn, &dir, &dir, NULL, NULL);
  339. recursiveCreateDirectory(dir.str());
  340. return clusterHandler.getClear();
  341. }
  342. //=====================================================================================================
  343. CHThorDiskWriteActivity::CHThorDiskWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  344. {
  345. incomplete = false;
  346. }
  347. CHThorDiskWriteActivity::~CHThorDiskWriteActivity()
  348. {
  349. diskout.clear();
  350. if(incomplete)
  351. {
  352. PROGLOG("Disk write incomplete, deleting physical file: %s", filename.get());
  353. diskout.clear();
  354. outSeq.clear();
  355. file->remove();
  356. }
  357. }
  358. void CHThorDiskWriteActivity::ready()
  359. {
  360. CHThorActivityBase::ready();
  361. grouped = (helper.getFlags() & TDXgrouped) != 0;
  362. extend = ((helper.getFlags() & TDWextend) != 0);
  363. overwrite = ((helper.getFlags() & TDWoverwrite) != 0);
  364. resolve();
  365. uncompressedBytesWritten = 0;
  366. numRecords = 0;
  367. sizeLimit = agent.queryWorkUnit()->getDebugValueInt64("hthorDiskWriteSizeLimit", defaultHThorDiskWriteSizeLimit);
  368. rowIf.setown(createRowInterfaces(input->queryOutputMeta(), activityId, 0, agent.queryCodeContext()));
  369. open();
  370. }
  371. void CHThorDiskWriteActivity::execute()
  372. {
  373. // Loop thru the results
  374. numRecords = 0;
  375. while (next())
  376. numRecords++;
  377. finishOutput();
  378. }
  379. void CHThorDiskWriteActivity::stop()
  380. {
  381. outSeq->flush(NULL);
  382. if(blockcompressed)
  383. uncompressedBytesWritten = outSeq->getPosition();
  384. close();
  385. updateWorkUnitResult(numRecords);
  386. if((helper.getFlags() & (TDXtemporary | TDXjobtemp) ) == 0 && !agent.queryResolveFilesLocally())
  387. publish();
  388. incomplete = false;
  389. if(clusterHandler)
  390. clusterHandler->finish(file);
  391. CHThorActivityBase::stop();
  392. if (helper.getFlags() & TDXvarfilename)
  393. filename.clear();
  394. }
  395. void CHThorDiskWriteActivity::resolve()
  396. {
  397. OwnedRoxieString rawname = helper.getFileName();
  398. mangleHelperFileName(mangledHelperFileName, rawname, agent.queryWuid(), helper.getFlags());
  399. assertex(mangledHelperFileName.str());
  400. if((helper.getFlags() & (TDXtemporary | TDXjobtemp)) == 0)
  401. {
  402. Owned<ILocalOrDistributedFile> f = agent.resolveLFN(mangledHelperFileName.str(),"Cannot write, invalid logical name",true,false,AccessMode::tbdWrite,&lfn,defaultPrivilegedUser);
  403. if (f)
  404. {
  405. if (f->queryDistributedFile())
  406. {
  407. // An already existing dali file
  408. if(extend)
  409. agent.logFileAccess(f->queryDistributedFile(), "HThor", "EXTENDED", graph);
  410. else if(overwrite) {
  411. LOG(MCoperatorInfo, "Removing %s from DFS", lfn.str());
  412. agent.logFileAccess(f->queryDistributedFile(), "HThor", "DELETED", graph);
  413. if (!agent.queryResolveFilesLocally())
  414. f->queryDistributedFile()->detach();
  415. else
  416. {
  417. Owned<IFile> file = createIFile(lfn);
  418. if (file->exists())
  419. file->remove();
  420. }
  421. }
  422. else
  423. throw MakeStringException(99, "Cannot write %s, file already exists (missing OVERWRITE attribute?)", lfn.str());
  424. }
  425. else if (f->exists() || f->isExternal() || agent.queryResolveFilesLocally())
  426. {
  427. // special/local/external file
  428. if (f->numParts()!=1)
  429. throw MakeStringException(99, "Cannot write %s, external file has multiple parts)", lfn.str());
  430. RemoteFilename rfn;
  431. f->getPartFilename(rfn,0);
  432. StringBuffer full;
  433. if (rfn.isLocal())
  434. rfn.getLocalPath(full);
  435. else
  436. rfn.getRemotePath(full);
  437. filename.set(full);
  438. if (isSpecialPath(filename))
  439. {
  440. PROGLOG("Writing to query %s", filename.get());
  441. return;
  442. }
  443. if (stdIoHandle(filename)>=0) {
  444. PROGLOG("Writing to %s", filename.get());
  445. return;
  446. }
  447. Owned<IFile> file = createIFile(filename);
  448. if (file->exists())
  449. {
  450. if (!overwrite)
  451. throw MakeStringException(99, "Cannot write %s, file already exists (missing OVERWRITE attribute?)", full.str());
  452. file->remove();
  453. }
  454. //Ensure target folder exists
  455. if (!recursiveCreateDirectoryForFile(filename.get()))
  456. {
  457. throw MakeStringException(99, "Cannot create file folder for %s", filename.str());
  458. }
  459. PROGLOG("Writing to file %s", filename.get());
  460. }
  461. f.clear();
  462. }
  463. if (filename.isEmpty()) // wasn't local or special (i.e. DFS file)
  464. {
  465. CDfsLogicalFileName dfsLogicalName;
  466. dfsLogicalName.allowOsPath(agent.queryResolveFilesLocally());
  467. if (!dfsLogicalName.setValidate(lfn.str()))
  468. {
  469. throw MakeStringException(99, "Could not resolve DFS Logical file %s", lfn.str());
  470. }
  471. clusterHandler.setown(createClusterWriteHandler(agent, NULL, &helper, dfsLogicalName.get(), filename, extend));
  472. }
  473. }
  474. else
  475. {
  476. StringBuffer mangledName;
  477. mangleLocalTempFilename(mangledName, mangledHelperFileName.str(), nullptr);
  478. filename.set(agent.noteTemporaryFile(mangledName.str()));
  479. PROGLOG("DISKWRITE: using temporary filename %s", filename.get());
  480. }
  481. }
  482. void CHThorDiskWriteActivity::open()
  483. {
  484. // Open an output file...
  485. file.setown(createIFile(filename));
  486. serializedOutputMeta.set(input->queryOutputMeta()->querySerializedDiskMeta());//returns outputMeta if serialization not needed
  487. Linked<IRecordSize> groupedMeta = input->queryOutputMeta()->querySerializedDiskMeta();
  488. if (grouped)
  489. groupedMeta.setown(createDeltaRecordSize(groupedMeta, +1));
  490. blockcompressed = checkWriteIsCompressed(helper.getFlags(), serializedOutputMeta.getFixedSize(), grouped);//TDWnewcompress for new compression, else check for row compression
  491. void *ekey;
  492. size32_t ekeylen;
  493. helper.getEncryptKey(ekeylen,ekey);
  494. encrypted = false;
  495. Owned<ICompressor> ecomp;
  496. if (ekeylen!=0)
  497. {
  498. ecomp.setown(createAESCompressor256(ekeylen,ekey));
  499. memset(ekey,0,ekeylen);
  500. rtlFree(ekey);
  501. encrypted = true;
  502. blockcompressed = true;
  503. }
  504. if(blockcompressed)
  505. io.setown(createCompressedFileWriter(file, groupedMeta->getFixedSize(), extend, true, ecomp, COMPRESS_METHOD_LZW));
  506. else
  507. io.setown(file->open(extend ? IFOwrite : IFOcreate));
  508. if(!io)
  509. throw MakeStringException(errno, "Failed to create%s file %s for writing", (encrypted ? " encrypted" : (blockcompressed ? " compressed" : "")), filename.get());
  510. incomplete = true;
  511. diskout.setown(createBufferedIOStream(io));
  512. if(extend)
  513. diskout->seek(0, IFSend);
  514. unsigned rwFlags = rw_autoflush;
  515. if (grouped)
  516. rwFlags |= rw_grouped;
  517. if (true) // MORE: Should this be controlled by an activity hint/flag?
  518. rwFlags |= rw_crc;
  519. IExtRowWriter * writer = createRowWriter(diskout, rowIf, rwFlags);
  520. outSeq.setown(writer);
  521. }
  522. const void * CHThorDiskWriteActivity::getNext()
  523. { // through operation (writes and returns row)
  524. // needs a one row lookahead to preserve group
  525. if (!nextrow.get())
  526. {
  527. nextrow.setown(input->nextRow());
  528. if (!nextrow.get())
  529. {
  530. nextrow.setown(input->nextRow());
  531. if (nextrow.get()&&grouped) // only write eog if not at eof
  532. outSeq->putRow(NULL);
  533. return NULL;
  534. }
  535. }
  536. outSeq->putRow(nextrow.getLink());
  537. checkSizeLimit();
  538. return nextrow.getClear();
  539. }
  540. bool CHThorDiskWriteActivity::next()
  541. {
  542. if (!nextrow.get())
  543. {
  544. OwnedConstRoxieRow row(input->nextRow());
  545. if (!row.get())
  546. {
  547. row.setown(input->nextRow());
  548. if (!row.get())
  549. return false; // we are done
  550. if (grouped)
  551. outSeq->putRow(NULL);
  552. }
  553. outSeq->putRow(row.getClear());
  554. }
  555. else
  556. outSeq->putRow(nextrow.getClear());
  557. checkSizeLimit();
  558. return true;
  559. }
  560. void CHThorDiskWriteActivity::finishOutput()
  561. {
  562. }
  563. void CHThorDiskWriteActivity::close()
  564. {
  565. diskout.clear();
  566. outSeq.clear();
  567. if (io)
  568. {
  569. io->flush();
  570. numDiskWrites = io->getStatistic(StNumDiskWrites);
  571. io.clear();
  572. }
  573. if(clusterHandler)
  574. clusterHandler->copyPhysical(file, agent.queryWorkUnit()->getDebugValueBool("__output_cluster_no_copy_physical", false));
  575. }
  576. void CHThorDiskWriteActivity::publish()
  577. {
  578. StringBuffer dir,base;
  579. offset_t fileSize = file->size();
  580. if(clusterHandler)
  581. clusterHandler->getDirAndFilename(dir, base);
  582. else
  583. splitFilename(filename, &dir, &dir, &base, &base);
  584. Owned<IFileDescriptor> desc = createFileDescriptor();
  585. desc->setDefaultDir(dir.str());
  586. Owned<IPropertyTree> attrs;
  587. if(clusterHandler)
  588. attrs.setown(createPTree("Part")); // clusterHandler is going to set attributes
  589. else
  590. {
  591. // add cluster
  592. StringBuffer mygroupname;
  593. Owned<IGroup> mygrp;
  594. if (isContainerized())
  595. {
  596. queryNamedGroupStore().getNasGroupName(mygroupname, 1);
  597. mygrp.setown(queryNamedGroupStore().lookup(mygroupname));
  598. }
  599. else
  600. {
  601. if (!agent.queryResolveFilesLocally())
  602. mygrp.setown(agent.getHThorGroup(mygroupname));
  603. }
  604. ClusterPartDiskMapSpec partmap; // will get this from group at some point
  605. desc->setNumParts(1);
  606. desc->setPartMask(base.str());
  607. desc->addCluster(mygroupname.str(),mygrp, partmap);
  608. attrs.set(&desc->queryPart(0)->queryProperties());
  609. }
  610. //properties of the first file part.
  611. if(blockcompressed)
  612. {
  613. attrs->setPropInt64("@size", uncompressedBytesWritten);
  614. attrs->setPropInt64("@compressedSize", fileSize);
  615. }
  616. else
  617. attrs->setPropInt64("@size", fileSize);
  618. attrs->setPropInt64("@recordCount", numRecords);
  619. CDateTime createTime, modifiedTime, accessedTime;
  620. file->getTime(&createTime, &modifiedTime, &accessedTime);
  621. // round file time down to nearest sec. Nanosec accurancy is not preserved elsewhere and can lead to mismatch later.
  622. unsigned hour, min, sec, nanosec;
  623. modifiedTime.getTime(hour, min, sec, nanosec);
  624. modifiedTime.setTime(hour, min, sec, 0);
  625. StringBuffer timestr;
  626. modifiedTime.getString(timestr);
  627. if(timestr.length())
  628. attrs->setProp("@modified", timestr.str());
  629. if(clusterHandler)
  630. clusterHandler->setDescriptorParts(desc, base.str(), attrs);
  631. // properties of the logical file
  632. IPropertyTree & properties = desc->queryProperties();
  633. properties.setPropInt64("@size", (blockcompressed) ? uncompressedBytesWritten : fileSize);
  634. if (encrypted)
  635. properties.setPropBool("@encrypted", true);
  636. if (blockcompressed)
  637. properties.setPropBool("@blockCompressed", true);
  638. if (helper.getFlags() & TDWpersist)
  639. properties.setPropBool("@persistent", true);
  640. if (grouped)
  641. properties.setPropBool("@grouped", true);
  642. properties.setPropInt64("@recordCount", numRecords);
  643. properties.setProp("@owner", agent.queryWorkUnit()->queryUser());
  644. if (helper.getFlags() & (TDWowned|TDXjobtemp|TDXtemporary))
  645. properties.setPropBool("@owned", true);
  646. if (helper.getFlags() & TDWresult)
  647. properties.setPropBool("@result", true);
  648. properties.setProp("@workunit", agent.queryWorkUnit()->queryWuid());
  649. properties.setProp("@job", agent.queryWorkUnit()->queryJobName());
  650. setFormat(desc);
  651. if (helper.getFlags() & TDWexpires)
  652. setExpiryTime(properties, helper.getExpiryDays());
  653. if (helper.getFlags() & TDWupdate)
  654. {
  655. unsigned eclCRC;
  656. unsigned __int64 totalCRC;
  657. helper.getUpdateCRCs(eclCRC, totalCRC);
  658. properties.setPropInt("@eclCRC", eclCRC);
  659. properties.setPropInt64("@totalCRC", totalCRC);
  660. }
  661. properties.setPropInt("@formatCrc", helper.getFormatCrc());
  662. if (helper.getFlags() & TDWrestricted)
  663. properties.setPropBool("restricted", true);
  664. properties.setPropInt64("@numDiskWrites", numDiskWrites);
  665. StringBuffer lfn;
  666. expandLogicalFilename(lfn, mangledHelperFileName.str(), agent.queryWorkUnit(), agent.queryResolveFilesLocally(), false);
  667. CDfsLogicalFileName logicalName;
  668. if (agent.queryResolveFilesLocally())
  669. logicalName.allowOsPath(true);
  670. if (!logicalName.setValidate(lfn.str()))
  671. throw MakeStringException(99, "Cannot publish %s, invalid logical name", lfn.str());
  672. if (!logicalName.isExternal()) // no need to publish externals
  673. {
  674. Owned<IDistributedFile> file = queryDistributedFileDirectory().createNew(desc);
  675. if(file->getModificationTime(modifiedTime))
  676. file->setAccessedTime(modifiedTime);
  677. if ((helper.getFlags() & TDXtemporary) == 0)
  678. {
  679. StringBuffer clusterName;
  680. file->getClusterName(0, clusterName);
  681. diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, numDiskWrites, 0));
  682. }
  683. file->attach(logicalName.get(), agent.queryCodeContext()->queryUserDescriptor());
  684. agent.logFileAccess(file, "HThor", "CREATED", graph);
  685. }
  686. }
  687. void CHThorDiskWriteActivity::updateProgress(IStatisticGatherer &progress) const
  688. {
  689. CHThorActivityBase::updateProgress(progress);
  690. StatsActivityScope scope(progress, activityId);
  691. progress.addStatistic(StNumDiskWrites, numDiskWrites);
  692. if ((helper.getFlags() & TDXtemporary) == 0)
  693. progress.addStatistic(StCostFileAccess, diskAccessCost);
  694. }
  695. void CHThorDiskWriteActivity::updateWorkUnitResult(unsigned __int64 reccount)
  696. {
  697. if(lfn.length()) //this is required as long as temp files don't get a name which can be stored in the WU and automatically deleted by the WU
  698. {
  699. WorkunitUpdate wu = agent.updateWorkUnit();
  700. StringArray clusters;
  701. if (clusterHandler)
  702. clusterHandler->getClusters(clusters);
  703. else
  704. clusters.append(wu->queryClusterName());
  705. unsigned flags = helper.getFlags();
  706. if (!agent.queryResolveFilesLocally())
  707. {
  708. WUFileKind fileKind;
  709. if (TDXtemporary & flags)
  710. fileKind = WUFileTemporary;
  711. else if(TDXjobtemp & flags)
  712. fileKind = WUFileJobOwned;
  713. else if(TDWowned & flags)
  714. fileKind = WUFileOwned;
  715. else
  716. fileKind = WUFileStandard;
  717. wu->addFile(lfn.str(), &clusters, helper.getTempUsageCount(), fileKind, NULL);
  718. }
  719. else if ((TDXtemporary | TDXjobtemp) & flags)
  720. agent.noteTemporaryFilespec(filename);//note for later deletion
  721. if (!(flags & TDXtemporary) && helper.getSequence() >= 0)
  722. {
  723. Owned<IWUResult> result = wu->updateResultBySequence(helper.getSequence());
  724. if (result)
  725. {
  726. result->setResultTotalRowCount(reccount);
  727. result->setResultStatus(ResultStatusCalculated);
  728. if (helper.getFlags() & TDWresult)
  729. result->setResultFilename(lfn.str());
  730. else
  731. result->setResultLogicalName(lfn.str());
  732. }
  733. }
  734. }
  735. }
  736. void CHThorDiskWriteActivity::setFormat(IFileDescriptor * desc)
  737. {
  738. if ((serializedOutputMeta.isFixedSize()) && !isOutputTransformed())
  739. desc->queryProperties().setPropInt("@recordSize", serializedOutputMeta.getFixedSize() + (grouped ? 1 : 0));
  740. const char *recordECL = helper.queryRecordECL();
  741. if (recordECL && *recordECL)
  742. desc->queryProperties().setProp("ECL", recordECL);
  743. setRtlFormat(desc->queryProperties(), helper.queryDiskRecordSize());
  744. desc->queryProperties().setProp("@kind", "flat");
  745. }
  746. void CHThorDiskWriteActivity::checkSizeLimit()
  747. {
  748. if(sizeLimit && outSeq && (outSeq->getPosition() > sizeLimit))
  749. {
  750. StringBuffer msg;
  751. msg.append("Exceeded disk write size limit of ").append(sizeLimit).append(" while writing file ").append(mangledHelperFileName.str());
  752. throw MakeStringExceptionDirect(0, msg.str());
  753. }
  754. }
  755. //=====================================================================================================
  756. CHThorSpillActivity::CHThorSpillActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSpillArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorDiskWriteActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  757. {
  758. }
  759. void CHThorSpillActivity::setInput(unsigned index, IHThorInput *_input)
  760. {
  761. CHThorActivityBase::setInput(index, _input);
  762. }
  763. void CHThorSpillActivity::ready()
  764. {
  765. CHThorDiskWriteActivity::ready();
  766. }
  767. void CHThorSpillActivity::execute()
  768. {
  769. UNIMPLEMENTED;
  770. }
  771. const void *CHThorSpillActivity::nextRow()
  772. {
  773. const void *nextrec = getNext();
  774. if (nextrec)
  775. {
  776. numRecords++;
  777. processed++;
  778. }
  779. return nextrec;
  780. }
  781. void CHThorSpillActivity::stop()
  782. {
  783. for (;;)
  784. {
  785. OwnedConstRoxieRow nextrec(nextRow());
  786. if (!nextrec)
  787. {
  788. nextrec.setown(nextRow());
  789. if (!nextrec)
  790. break;
  791. }
  792. }
  793. finishOutput();
  794. CHThorDiskWriteActivity::stop();
  795. }
  796. //=====================================================================================================
  797. CHThorCsvWriteActivity::CHThorCsvWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCsvWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorDiskWriteActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  798. {
  799. csvOutput.init(helper.queryCsvParameters(),agent.queryWorkUnit()->getDebugValueBool("oldCSVoutputFormat", false));
  800. }
  801. void CHThorCsvWriteActivity::execute()
  802. {
  803. OwnedRoxieString header(helper.queryCsvParameters()->getHeader());
  804. if (header) {
  805. csvOutput.beginLine();
  806. csvOutput.writeHeaderLn(strlen(header), header);
  807. diskout->write(csvOutput.length(), csvOutput.str());
  808. }
  809. // Loop thru the results
  810. numRecords = 0;
  811. for (;;)
  812. {
  813. OwnedConstRoxieRow nextrec(input->nextRow());
  814. if (!nextrec)
  815. {
  816. nextrec.setown(input->nextRow());
  817. if (!nextrec)
  818. break;
  819. }
  820. try
  821. {
  822. csvOutput.beginLine();
  823. helper.writeRow((const byte *)nextrec.get(), &csvOutput);
  824. csvOutput.endLine();
  825. }
  826. catch(IException * e)
  827. {
  828. throw makeWrappedException(e);
  829. }
  830. diskout->write(csvOutput.length(), csvOutput.str());
  831. numRecords++;
  832. }
  833. OwnedRoxieString footer(helper.queryCsvParameters()->getFooter());
  834. if (footer) {
  835. csvOutput.beginLine();
  836. csvOutput.writeHeaderLn(strlen(footer), footer);
  837. diskout->write(csvOutput.length(), csvOutput.str());
  838. }
  839. }
  840. void CHThorCsvWriteActivity::setFormat(IFileDescriptor * desc)
  841. {
  842. // MORE - should call parent's setFormat too?
  843. ICsvParameters * csvInfo = helper.queryCsvParameters();
  844. OwnedRoxieString rs(csvInfo->getSeparator(0));
  845. StringBuffer separator;
  846. const char *s = rs;
  847. while (s && *s)
  848. {
  849. if (',' == *s)
  850. separator.append("\\,");
  851. else
  852. separator.append(*s);
  853. ++s;
  854. }
  855. desc->queryProperties().setProp("@csvSeparate", separator.str());
  856. desc->queryProperties().setProp("@csvQuote", rs.setown(csvInfo->getQuote(0)));
  857. desc->queryProperties().setProp("@csvTerminate", rs.setown(csvInfo->getTerminator(0)));
  858. desc->queryProperties().setProp("@csvEscape", rs.setown(csvInfo->getEscape(0)));
  859. desc->queryProperties().setProp("@format","utf8n");
  860. desc->queryProperties().setProp("@kind", "csv");
  861. const char *recordECL = helper.queryRecordECL();
  862. if (recordECL && *recordECL)
  863. desc->queryProperties().setProp("ECL", recordECL);
  864. setRtlFormat(desc->queryProperties(), helper.queryDiskRecordSize());
  865. }
  866. //=====================================================================================================
  867. CHThorXmlWriteActivity::CHThorXmlWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorXmlWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorDiskWriteActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), headerLength(0), footerLength(0)
  868. {
  869. OwnedRoxieString xmlpath(helper.getXmlIteratorPath());
  870. if (!xmlpath)
  871. rowTag.append(DEFAULTXMLROWTAG);
  872. else
  873. {
  874. const char *path = xmlpath;
  875. if (*path == '/') path++;
  876. if (strchr(path, '/')) UNIMPLEMENTED; // more what do we do with /mydata/row
  877. rowTag.append(path);
  878. }
  879. }
  880. void CHThorXmlWriteActivity::execute()
  881. {
  882. // Loop thru the results
  883. numRecords = 0;
  884. StringBuffer header;
  885. OwnedRoxieString suppliedHeader(helper.getHeader());
  886. if (kind==TAKjsonwrite)
  887. buildJsonHeader(header, suppliedHeader, rowTag);
  888. else if (suppliedHeader)
  889. header.set(suppliedHeader);
  890. else
  891. header.append(DEFAULTXMLHEADER).newline();
  892. headerLength = header.length();
  893. diskout->write(headerLength, header.str());
  894. Owned<IXmlWriterExt> writer = createIXmlWriterExt(helper.getXmlFlags(), 0, NULL, (kind==TAKjsonwrite) ? WTJSONRootless : WTStandard);
  895. writer->outputBeginArray(rowTag); //need to set up the array
  896. writer->clear(); //but not output it
  897. for (;;)
  898. {
  899. OwnedConstRoxieRow nextrec(input->nextRow());
  900. if (!nextrec)
  901. {
  902. nextrec.setown(input->nextRow());
  903. if (!nextrec)
  904. break;
  905. }
  906. try
  907. {
  908. writer->clear().outputBeginNested(rowTag, false);
  909. helper.toXML((const byte *)nextrec.get(), *writer);
  910. writer->outputEndNested(rowTag);
  911. }
  912. catch(IException * e)
  913. {
  914. throw makeWrappedException(e);
  915. }
  916. diskout->write(writer->length(), writer->str());
  917. numRecords++;
  918. }
  919. OwnedRoxieString suppliedFooter(helper.getFooter());
  920. StringBuffer footer;
  921. if (kind==TAKjsonwrite)
  922. buildJsonFooter(footer.newline(), suppliedFooter, rowTag);
  923. else if (suppliedFooter)
  924. footer.append(suppliedFooter);
  925. else
  926. footer.append(DEFAULTXMLFOOTER).newline();
  927. footerLength=footer.length();
  928. diskout->write(footerLength, footer);
  929. }
  930. void CHThorXmlWriteActivity::setFormat(IFileDescriptor * desc)
  931. {
  932. desc->queryProperties().setProp("@format","utf8n");
  933. desc->queryProperties().setProp("@rowTag",rowTag.str());
  934. desc->queryProperties().setProp("@kind", (kind==TAKjsonwrite) ? "json" : "xml");
  935. desc->queryProperties().setPropInt(FPheaderLength, headerLength);
  936. desc->queryProperties().setPropInt(FPfooterLength, footerLength);
  937. const char *recordECL = helper.queryRecordECL();
  938. if (recordECL && *recordECL)
  939. desc->queryProperties().setProp("ECL", recordECL);
  940. setRtlFormat(desc->queryProperties(), helper.queryDiskRecordSize());
  941. }
  942. //=====================================================================================================
  943. void throwPipeProcessError(unsigned err, char const * preposition, char const * program, IPipeProcess * pipe)
  944. {
  945. StringBuffer msg;
  946. msg.append("Error piping ").append(preposition).append(" (").append(program).append("): ");
  947. if (START_FAILURE == err) // PIPE process didn't start at all, START_FAILURE is our own error code
  948. msg.append("process failed to start");
  949. else
  950. msg.append("process failed with code ").append(err);
  951. if(pipe->hasError())
  952. {
  953. try
  954. {
  955. char error[512];
  956. size32_t sz = pipe->readError(sizeof(error), error);
  957. if(sz && sz!=(size32_t)-1)
  958. msg.append(", stderr: '").append(sz, error).append("'");
  959. }
  960. catch (IException *e)
  961. {
  962. EXCLOG(e, "Error reading pipe stderr");
  963. e->Release();
  964. }
  965. }
  966. throw MakeStringExceptionDirect(2, msg.str());
  967. }
  968. //=====================================================================================================
  969. CHThorIndexWriteActivity::CHThorIndexWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorIndexWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  970. {
  971. incomplete = false;
  972. StringBuffer lfn;
  973. OwnedRoxieString fname(helper.getFileName());
  974. expandLogicalFilename(lfn, fname, agent.queryWorkUnit(), agent.queryResolveFilesLocally(), false);
  975. if (!agent.queryResolveFilesLocally())
  976. {
  977. Owned<IDistributedFile> f = wsdfs::lookup(lfn, agent.queryCodeContext()->queryUserDescriptor(), AccessMode::tbdWrite, false, false, nullptr, defaultNonPrivilegedUser, INFINITE);
  978. if (f)
  979. {
  980. if (TIWoverwrite & helper.getFlags())
  981. {
  982. LOG(MCuserInfo, "Removing %s from DFS", lfn.str());
  983. agent.logFileAccess(f, "HThor", "DELETED", _graph);
  984. f->detach();
  985. }
  986. else // not quite sure about raising exceptions in constructors
  987. throw MakeStringException(99, "Cannot write %s, file already exists (missing OVERWRITE attribute?)", lfn.str());
  988. }
  989. }
  990. clusterHandler.setown(createClusterWriteHandler(agent, &helper, NULL, lfn, filename, false));
  991. sizeLimit = agent.queryWorkUnit()->getDebugValueInt64("hthorDiskWriteSizeLimit", defaultHThorDiskWriteSizeLimit);
  992. defaultNoSeek = agent.queryWorkUnit()->getDebugValueBool("noSeekBuildIndex", isContainerized());
  993. }
  994. CHThorIndexWriteActivity::~CHThorIndexWriteActivity()
  995. {
  996. if(incomplete)
  997. {
  998. PROGLOG("Index write incomplete, deleting physical file: %s", filename.get());
  999. file->remove();
  1000. }
  1001. }
  1002. void CHThorIndexWriteActivity::execute()
  1003. {
  1004. size32_t maxDiskRecordSize;
  1005. if (helper.queryDiskRecordSize()->isVariableSize())
  1006. {
  1007. if (helper.getFlags() & TIWmaxlength)
  1008. maxDiskRecordSize = helper.getMaxKeySize();
  1009. else
  1010. maxDiskRecordSize = KEYBUILD_MAXLENGTH; // Current default behaviour, could be improved in the future
  1011. }
  1012. else
  1013. maxDiskRecordSize = helper.queryDiskRecordSize()->getFixedSize();
  1014. if (maxDiskRecordSize > KEYBUILD_MAXLENGTH)
  1015. throw MakeStringException(99, "Index maximum record length (%d) exceeds 32K internal limit", maxDiskRecordSize);
  1016. OwnedMalloc<char> rowBuffer(maxDiskRecordSize, true);
  1017. // Loop thru the results
  1018. unsigned __int64 reccount = 0;
  1019. unsigned int fileCrc = -1;
  1020. file.setown(createIFile(filename.get()));
  1021. {
  1022. OwnedIFileIO io;
  1023. try
  1024. {
  1025. io.setown(file->open(IFOcreate));
  1026. }
  1027. catch(IException * e)
  1028. {
  1029. e->Release();
  1030. clearKeyStoreCache(false);
  1031. io.setown(file->open(IFOcreate));
  1032. }
  1033. incomplete = true;
  1034. bool needsSeek = true;
  1035. bool isVariable = helper.queryDiskRecordSize()->isVariableSize();
  1036. unsigned flags = COL_PREFIX | HTREE_FULLSORT_KEY;
  1037. if (helper.getFlags() & TIWrowcompress)
  1038. flags |= HTREE_COMPRESSED_KEY|HTREE_QUICK_COMPRESSED_KEY;
  1039. else if (!(helper.getFlags() & TIWnolzwcompress))
  1040. flags |= HTREE_COMPRESSED_KEY;
  1041. if (isVariable)
  1042. flags |= HTREE_VARSIZE;
  1043. Owned<IPropertyTree> metadata;
  1044. buildUserMetadata(metadata);
  1045. buildLayoutMetadata(metadata);
  1046. unsigned nodeSize = metadata->getPropInt("_nodeSize", NODESIZE);
  1047. if (metadata->getPropBool("_noSeek", defaultNoSeek))
  1048. {
  1049. flags |= TRAILING_HEADER_ONLY;
  1050. needsSeek = false;
  1051. }
  1052. if (metadata->getPropBool("_useTrailingHeader", true))
  1053. flags |= USE_TRAILING_HEADER;
  1054. size32_t keyMaxSize = helper.queryDiskRecordSize()->getRecordSize(NULL);
  1055. if (hasTrailingFileposition(helper.queryDiskRecordSize()->queryTypeInfo()))
  1056. keyMaxSize -= sizeof(offset_t);
  1057. Owned<IFileIOStream> out = createBufferedIOStream(io, 0x100000);
  1058. if (!needsSeek)
  1059. out.setown(createNoSeekIOStream(out));
  1060. Owned<IKeyBuilder> builder = createKeyBuilder(out, flags, keyMaxSize, nodeSize, helper.getKeyedSize(), 0, &helper, true, false);
  1061. class BcWrapper : implements IBlobCreator
  1062. {
  1063. IKeyBuilder *builder;
  1064. public:
  1065. BcWrapper(IKeyBuilder *_builder) : builder(_builder) {}
  1066. virtual unsigned __int64 createBlob(size32_t size, const void * ptr)
  1067. {
  1068. return builder->createBlob(size, (const char *) ptr);
  1069. }
  1070. } bc(builder);
  1071. for (;;)
  1072. {
  1073. OwnedConstRoxieRow nextrec(input->nextRow());
  1074. if (!nextrec)
  1075. {
  1076. nextrec.setown(input->nextRow());
  1077. if (!nextrec)
  1078. break;
  1079. }
  1080. try
  1081. {
  1082. unsigned __int64 fpos;
  1083. RtlStaticRowBuilder rowBuilder(rowBuffer, maxDiskRecordSize);
  1084. size32_t thisSize = helper.transform(rowBuilder, nextrec, &bc, fpos);
  1085. builder->processKeyData(rowBuffer, fpos, thisSize);
  1086. }
  1087. catch(IException * e)
  1088. {
  1089. throw makeWrappedException(e);
  1090. }
  1091. if(sizeLimit && (out->tell() > sizeLimit))
  1092. {
  1093. StringBuffer msg;
  1094. OwnedRoxieString fname(helper.getFileName());
  1095. msg.append("Exceeded disk write size limit of ").append(sizeLimit).append(" while writing index ").append(fname);
  1096. throw MakeStringExceptionDirect(0, msg.str());
  1097. }
  1098. reccount++;
  1099. }
  1100. builder->finish(metadata, &fileCrc);
  1101. duplicateKeyCount = builder->getDuplicateCount();
  1102. cummulativeDuplicateKeyCount += duplicateKeyCount;
  1103. numDiskWrites = io->getStatistic(StNumDiskWrites);
  1104. out->flush();
  1105. out.clear();
  1106. }
  1107. if(clusterHandler)
  1108. clusterHandler->copyPhysical(file, agent.queryWorkUnit()->getDebugValueBool("__output_cluster_no_copy_physical", false));
  1109. clearKeyStoreCacheEntry(file->queryFilename());
  1110. // Now publish to name services
  1111. StringBuffer dir,base;
  1112. offset_t indexFileSize = file->size();
  1113. if(clusterHandler)
  1114. clusterHandler->getDirAndFilename(dir, base);
  1115. else
  1116. splitFilename(filename, &dir, &dir, &base, &base);
  1117. Owned<IFileDescriptor> desc = createFileDescriptor();
  1118. desc->setDefaultDir(dir.str());
  1119. //properties of the first file part.
  1120. Owned<IPropertyTree> attrs;
  1121. if(clusterHandler)
  1122. attrs.setown(createPTree("Part")); // clusterHandler is going to set attributes
  1123. else
  1124. {
  1125. // add cluster
  1126. StringBuffer mygroupname;
  1127. Owned<IGroup> mygrp = NULL;
  1128. if (isContainerized())
  1129. {
  1130. queryNamedGroupStore().getNasGroupName(mygroupname, 1);
  1131. mygrp.setown(queryNamedGroupStore().lookup(mygroupname));
  1132. }
  1133. else
  1134. {
  1135. if (!agent.queryResolveFilesLocally())
  1136. mygrp.setown(agent.getHThorGroup(mygroupname));
  1137. }
  1138. ClusterPartDiskMapSpec partmap; // will get this from group at some point
  1139. desc->setNumParts(1);
  1140. desc->setPartMask(base.str());
  1141. desc->addCluster(mygroupname.str(),mygrp, partmap);
  1142. attrs.set(&desc->queryPart(0)->queryProperties());
  1143. }
  1144. attrs->setPropInt64("@size", indexFileSize);
  1145. attrs->setPropInt64("@recordCount", reccount);
  1146. CDateTime createTime, modifiedTime, accessedTime;
  1147. file->getTime(&createTime, &modifiedTime, &accessedTime);
  1148. // round file time down to nearest sec. Nanosec accurancy is not preserved elsewhere and can lead to mismatch later.
  1149. unsigned hour, min, sec, nanosec;
  1150. modifiedTime.getTime(hour, min, sec, nanosec);
  1151. modifiedTime.setTime(hour, min, sec, 0);
  1152. StringBuffer timestr;
  1153. modifiedTime.getString(timestr);
  1154. if(timestr.length())
  1155. attrs->setProp("@modified", timestr.str());
  1156. if(clusterHandler)
  1157. clusterHandler->setDescriptorParts(desc, base.str(), attrs);
  1158. // properties of the logical file
  1159. IPropertyTree & properties = desc->queryProperties();
  1160. properties.setProp("@kind", "key");
  1161. properties.setPropInt64("@size", indexFileSize);
  1162. properties.setPropInt64("@recordCount", reccount);
  1163. properties.setProp("@owner", agent.queryWorkUnit()->queryUser());
  1164. properties.setProp("@workunit", agent.queryWorkUnit()->queryWuid());
  1165. properties.setProp("@job", agent.queryWorkUnit()->queryJobName());
  1166. properties.setPropInt64("@duplicateKeyCount",duplicateKeyCount);
  1167. properties.setPropInt64("@numDiskWrites", numDiskWrites);
  1168. char const * rececl = helper.queryRecordECL();
  1169. if(rececl && *rececl)
  1170. properties.setProp("ECL", rececl);
  1171. if (helper.getFlags() & TIWexpires)
  1172. setExpiryTime(properties, helper.getExpiryDays());
  1173. if (helper.getFlags() & TIWupdate)
  1174. {
  1175. unsigned eclCRC;
  1176. unsigned __int64 totalCRC;
  1177. helper.getUpdateCRCs(eclCRC, totalCRC);
  1178. properties.setPropInt("@eclCRC", eclCRC);
  1179. properties.setPropInt64("@totalCRC", totalCRC);
  1180. }
  1181. properties.setPropInt("@fileCrc", fileCrc);
  1182. properties.setPropInt("@formatCrc", helper.getFormatCrc());
  1183. // Legacy record layout info
  1184. void * layoutMetaBuff;
  1185. size32_t layoutMetaSize;
  1186. if(helper.getIndexLayout(layoutMetaSize, layoutMetaBuff))
  1187. {
  1188. properties.setPropBin("_record_layout", layoutMetaSize, layoutMetaBuff);
  1189. rtlFree(layoutMetaBuff);
  1190. }
  1191. if (helper.getFlags() & TIWrestricted)
  1192. properties.setPropBool("restricted", true);
  1193. // New record layout info
  1194. setRtlFormat(properties, helper.queryDiskRecordSize());
  1195. // Bloom info
  1196. const IBloomBuilderInfo * const *bloomFilters = helper.queryBloomInfo();
  1197. while (bloomFilters && *bloomFilters)
  1198. {
  1199. const IBloomBuilderInfo *info = *bloomFilters++;
  1200. IPropertyTree *bloom = properties.addPropTree("Bloom");
  1201. bloom->setPropInt64("@bloomFieldMask", info->getBloomFields());
  1202. bloom->setPropInt64("@bloomLimit", info->getBloomLimit()); // MORE - if we didn't actually build because of the limit that might be interesting. Though that's going to vary by part.
  1203. VStringBuffer pval("%f", info->getBloomProbability());
  1204. bloom->setProp("@bloomProbability", pval.str());
  1205. }
  1206. StringBuffer lfn;
  1207. Owned<IDistributedFile> dfile = NULL;
  1208. if (!agent.queryResolveFilesLocally())
  1209. {
  1210. dfile.setown(queryDistributedFileDirectory().createNew(desc));
  1211. OwnedRoxieString fname(helper.getFileName());
  1212. expandLogicalFilename(lfn, fname, agent.queryWorkUnit(), agent.queryResolveFilesLocally(), false);
  1213. dfile->attach(lfn.str(),agent.queryCodeContext()->queryUserDescriptor());
  1214. agent.logFileAccess(dfile, "HThor", "CREATED", graph);
  1215. StringBuffer clusterName;
  1216. dfile->getClusterName(0, clusterName);
  1217. diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, numDiskWrites, 0));
  1218. }
  1219. else
  1220. lfn = filename;
  1221. incomplete = false;
  1222. if(clusterHandler)
  1223. clusterHandler->finish(file);
  1224. // and update wu info
  1225. if (helper.getSequence() >= 0)
  1226. {
  1227. WorkunitUpdate wu = agent.updateWorkUnit();
  1228. Owned<IWUResult> result = wu->updateResultBySequence(helper.getSequence());
  1229. if (result)
  1230. {
  1231. result->setResultTotalRowCount(reccount);
  1232. result->setResultStatus(ResultStatusCalculated);
  1233. result->setResultLogicalName(lfn.str());
  1234. }
  1235. }
  1236. }
  1237. void CHThorIndexWriteActivity::buildUserMetadata(Owned<IPropertyTree> & metadata)
  1238. {
  1239. size32_t nameLen;
  1240. char * nameBuff;
  1241. size32_t valueLen;
  1242. char * valueBuff;
  1243. unsigned idx = 0;
  1244. while(helper.getIndexMeta(nameLen, nameBuff, valueLen, valueBuff, idx++))
  1245. {
  1246. StringBuffer name(nameLen, nameBuff);
  1247. StringBuffer value(valueLen, valueBuff);
  1248. if(*nameBuff == '_' && !checkReservedMetadataName(name))
  1249. {
  1250. OwnedRoxieString fname(helper.getFileName());
  1251. throw MakeStringException(0, "Invalid name %s in user metadata for index %s (names beginning with underscore are reserved)", name.str(), fname.get());
  1252. }
  1253. if(!validateXMLTag(name.str()))
  1254. {
  1255. OwnedRoxieString fname(helper.getFileName());
  1256. throw MakeStringException(0, "Invalid name %s in user metadata for index %s (not legal XML element name)", name.str(), fname.get());
  1257. }
  1258. if(!metadata) metadata.setown(createPTree("metadata"));
  1259. metadata->setProp(name.str(), value.str());
  1260. }
  1261. }
  1262. void CHThorIndexWriteActivity::buildLayoutMetadata(Owned<IPropertyTree> & metadata)
  1263. {
  1264. if(!metadata) metadata.setown(createPTree("metadata"));
  1265. metadata->setProp("_record_ECL", helper.queryRecordECL());
  1266. setRtlFormat(*metadata, helper.queryDiskRecordSize());
  1267. }
  1268. //=====================================================================================================
  1269. class CHThorPipeReadActivity : public CHThorSimpleActivityBase
  1270. {
  1271. IHThorPipeReadArg &helper;
  1272. Owned<IPipeProcess> pipe;
  1273. StringAttr pipeCommand;
  1274. Owned<IOutputRowDeserializer> rowDeserializer;
  1275. Owned<IReadRowStream> readTransformer;
  1276. bool groupSignalled;
  1277. public:
  1278. CHThorPipeReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorPipeReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  1279. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1280. {
  1281. groupSignalled = true;
  1282. }
  1283. virtual bool needsAllocator() const { return true; }
  1284. virtual void ready()
  1285. {
  1286. groupSignalled = true; // i.e. don't start with a NULL row
  1287. CHThorSimpleActivityBase::ready();
  1288. rowDeserializer.setown(rowAllocator->createDiskDeserializer(agent.queryCodeContext()));
  1289. OwnedRoxieString xmlIteratorPath(helper.getXmlIteratorPath());
  1290. readTransformer.setown(createReadRowStream(rowAllocator, rowDeserializer, helper.queryXmlTransformer(), helper.queryCsvTransformer(), xmlIteratorPath, helper.getPipeFlags()));
  1291. OwnedRoxieString pipeProgram(helper.getPipeProgram());
  1292. openPipe(pipeProgram);
  1293. }
  1294. virtual void stop()
  1295. {
  1296. //Need to close the output (or read it in its entirety), otherwise we might wait forever for the
  1297. //program to finish
  1298. if (pipe)
  1299. pipe->closeOutput();
  1300. pipe.clear();
  1301. readTransformer->setStream(NULL);
  1302. CHThorSimpleActivityBase::stop();
  1303. }
  1304. virtual const void *nextRow()
  1305. {
  1306. while (!waitForPipe())
  1307. {
  1308. if (!pipe)
  1309. return NULL;
  1310. if (helper.getPipeFlags() & TPFgroupeachrow)
  1311. {
  1312. if (!groupSignalled)
  1313. {
  1314. groupSignalled = true;
  1315. return NULL;
  1316. }
  1317. }
  1318. }
  1319. const void *ret = readTransformer->next();
  1320. assertex(ret != NULL); // if ret can ever be NULL then we need to recode this logic
  1321. processed++;
  1322. groupSignalled = false;
  1323. return ret;
  1324. }
  1325. protected:
  1326. bool waitForPipe()
  1327. {
  1328. if (!pipe)
  1329. return false; // done
  1330. if (!readTransformer->eos())
  1331. return true;
  1332. verifyPipe();
  1333. return false;
  1334. }
  1335. void openPipe(char const * cmd)
  1336. {
  1337. pipeCommand.setown(cmd);
  1338. pipe.setown(createPipeProcess(agent.queryAllowedPipePrograms()));
  1339. if(!pipe->run(NULL, cmd, ".", false, true, true, 0x10000))
  1340. {
  1341. // NB: pipe->run can't rely on the child process failing fast enough to return false here, failure picked up later with stderr context.
  1342. WARNLOG(2, "Could not run pipe process %s", cmd);
  1343. }
  1344. Owned<ISimpleReadStream> pipeReader = pipe->getOutputStream();
  1345. readTransformer->setStream(pipeReader.get());
  1346. }
  1347. void verifyPipe()
  1348. {
  1349. if (pipe)
  1350. {
  1351. unsigned err = pipe->wait();
  1352. if(err && !(helper.getPipeFlags() & TPFnofail))
  1353. throwPipeProcessError(err, "from", pipeCommand.get(), pipe);
  1354. pipe.clear();
  1355. }
  1356. }
  1357. };
  1358. //=====================================================================================================
  1359. // Through pipe code - taken from Roxie implementation
  1360. interface IPipeRecordPullerCallback : extends IExceptionHandler
  1361. {
  1362. virtual void processRow(const void *row) = 0;
  1363. virtual void processDone() = 0;
  1364. virtual const void *nextInput() = 0;
  1365. };
  1366. class CPipeRecordPullerThread : public Thread
  1367. {
  1368. protected:
  1369. IPipeRecordPullerCallback *helper;
  1370. bool eog;
  1371. public:
  1372. CPipeRecordPullerThread() : Thread("PipeRecordPullerThread")
  1373. {
  1374. helper = NULL;
  1375. eog = false;
  1376. }
  1377. void setInput(IPipeRecordPullerCallback *_helper)
  1378. {
  1379. helper = _helper;
  1380. }
  1381. virtual int run()
  1382. {
  1383. try
  1384. {
  1385. for (;;)
  1386. {
  1387. const void * row = helper->nextInput();
  1388. if (row)
  1389. {
  1390. eog = false;
  1391. helper->processRow(row);
  1392. }
  1393. else if (!eog)
  1394. {
  1395. eog = true;
  1396. }
  1397. else
  1398. {
  1399. break;
  1400. }
  1401. }
  1402. helper->processDone();
  1403. }
  1404. catch (IException *e)
  1405. {
  1406. helper->fireException(e);
  1407. }
  1408. catch (...)
  1409. {
  1410. helper->fireException(MakeStringException(2, "Unexpected exception caught in PipeRecordPullerThread::run"));
  1411. }
  1412. return 0;
  1413. }
  1414. };
  1415. class CHThorPipeThroughActivity : public CHThorSimpleActivityBase, implements IPipeRecordPullerCallback
  1416. {
  1417. IHThorPipeThroughArg &helper;
  1418. CPipeRecordPullerThread puller;
  1419. Owned<IPipeProcess> pipe;
  1420. StringAttr pipeCommand;
  1421. InterruptableSemaphore pipeVerified;
  1422. InterruptableSemaphore pipeOpened;
  1423. CachedOutputMetaData inputMeta;
  1424. Owned<IOutputRowSerializer> rowSerializer;
  1425. Owned<IOutputRowDeserializer> rowDeserializer;
  1426. Owned<IPipeWriteXformHelper> writeTransformer;
  1427. Owned<IReadRowStream> readTransformer;
  1428. bool firstRead;
  1429. bool recreate;
  1430. bool inputExhausted;
  1431. bool groupSignalled;
  1432. public:
  1433. CHThorPipeThroughActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorPipeThroughArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  1434. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1435. {
  1436. recreate = helper.recreateEachRow();
  1437. groupSignalled = true;
  1438. firstRead = false;
  1439. inputExhausted = false;
  1440. puller.setInput(this);
  1441. }
  1442. virtual void ready()
  1443. {
  1444. CHThorSimpleActivityBase::ready();
  1445. // From the create() in roxie
  1446. inputMeta.set(input->queryOutputMeta());
  1447. rowSerializer.setown(inputMeta.createDiskSerializer(agent.queryCodeContext(), activityId));
  1448. rowDeserializer.setown(rowAllocator->createDiskDeserializer(agent.queryCodeContext()));
  1449. writeTransformer.setown(createPipeWriteXformHelper(helper.getPipeFlags(), helper.queryXmlOutput(), helper.queryCsvOutput(), rowSerializer));
  1450. // From the start() in roxie
  1451. firstRead = true;
  1452. inputExhausted = false;
  1453. groupSignalled = true; // i.e. don't start with a NULL row
  1454. pipeVerified.reinit();
  1455. pipeOpened.reinit();
  1456. writeTransformer->ready();
  1457. if (!readTransformer)
  1458. {
  1459. OwnedRoxieString xmlIterator(helper.getXmlIteratorPath());
  1460. readTransformer.setown(createReadRowStream(rowAllocator, rowDeserializer, helper.queryXmlTransformer(), helper.queryCsvTransformer(), xmlIterator, helper.getPipeFlags()));
  1461. }
  1462. if(!recreate)
  1463. {
  1464. OwnedRoxieString pipeProgram(helper.getPipeProgram());
  1465. openPipe(pipeProgram);
  1466. }
  1467. puller.start();
  1468. }
  1469. void stop()
  1470. {
  1471. //Need to close the output (or read it in its entirety), otherwise we might wait forever for the
  1472. //program to finish
  1473. if (pipe)
  1474. pipe->closeOutput();
  1475. pipeVerified.interrupt(NULL);
  1476. pipeOpened.interrupt(NULL);
  1477. puller.join();
  1478. CHThorSimpleActivityBase::stop();
  1479. pipe.clear();
  1480. readTransformer->setStream(NULL);
  1481. }
  1482. virtual bool needsAllocator() const { return true; }
  1483. virtual const void *nextRow()
  1484. {
  1485. while (!waitForPipe())
  1486. {
  1487. if (!pipe)
  1488. return NULL;
  1489. if (helper.getPipeFlags() & TPFgroupeachrow)
  1490. {
  1491. if (!groupSignalled)
  1492. {
  1493. groupSignalled = true;
  1494. return NULL;
  1495. }
  1496. }
  1497. }
  1498. const void *ret = readTransformer->next();
  1499. assertex(ret != NULL); // if ret can ever be NULL then we need to recode this logic
  1500. processed++;
  1501. groupSignalled = false;
  1502. return ret;
  1503. }
  1504. virtual bool isGrouped()
  1505. {
  1506. return outputMeta.isGrouped();
  1507. }
  1508. virtual void processRow(const void *row)
  1509. {
  1510. // called from puller thread
  1511. if(recreate)
  1512. openPipe(helper.getNameFromRow(row));
  1513. try
  1514. {
  1515. writeTransformer->writeTranslatedText(row, pipe);
  1516. }
  1517. catch (IException *)
  1518. {
  1519. ReleaseRoxieRow(row);
  1520. throw;
  1521. }
  1522. ReleaseRoxieRow(row);
  1523. if(recreate)
  1524. {
  1525. closePipe();
  1526. pipeVerified.wait();
  1527. }
  1528. }
  1529. virtual void processDone()
  1530. {
  1531. // called from puller thread
  1532. if(recreate)
  1533. {
  1534. inputExhausted = true;
  1535. pipeOpened.signal();
  1536. }
  1537. else
  1538. {
  1539. closePipe();
  1540. pipeVerified.wait();
  1541. }
  1542. }
  1543. virtual const void *nextInput()
  1544. {
  1545. return input->nextRow();
  1546. }
  1547. virtual bool fireException(IException *e)
  1548. {
  1549. inputExhausted = true;
  1550. pipeOpened.interrupt(LINK(e));
  1551. pipeVerified.interrupt(e);
  1552. return true;
  1553. }
  1554. private:
  1555. bool waitForPipe()
  1556. {
  1557. Owned<IException> pipeException;
  1558. try
  1559. {
  1560. if (firstRead)
  1561. {
  1562. pipeOpened.wait();
  1563. firstRead = false;
  1564. }
  1565. if (!pipe)
  1566. return false; // done
  1567. if (!readTransformer->eos())
  1568. return true;
  1569. }
  1570. catch (IException *e)
  1571. {
  1572. // NB: the original exception is probably a IPipeProcessException, but because InterruptableSemaphore rethrows it, we must catch it as an IException
  1573. pipeException.setown(e);
  1574. }
  1575. verifyPipe();
  1576. if (pipeException) // NB: verifyPipe may throw error based on pipe prog. output 1st.
  1577. throw pipeException.getClear();
  1578. if (recreate && !inputExhausted)
  1579. pipeOpened.wait();
  1580. return false;
  1581. }
  1582. void openPipe(char const * cmd)
  1583. {
  1584. pipeCommand.setown(cmd);
  1585. pipe.setown(createPipeProcess(agent.queryAllowedPipePrograms()));
  1586. if(!pipe->run(NULL, cmd, ".", true, true, true, 0x10000))
  1587. {
  1588. // NB: pipe->run can't rely on the child process failing fast enough to return false here, failure picked up later with stderr context.
  1589. WARNLOG(2, "Could not run pipe process %s", cmd);
  1590. }
  1591. else
  1592. writeTransformer->writeHeader(pipe);
  1593. Owned<ISimpleReadStream> pipeReader = pipe->getOutputStream();
  1594. readTransformer->setStream(pipeReader.get());
  1595. pipeOpened.signal();
  1596. }
  1597. void closePipe()
  1598. {
  1599. writeTransformer->writeFooter(pipe);
  1600. pipe->closeInput();
  1601. }
  1602. void verifyPipe()
  1603. {
  1604. if (pipe)
  1605. {
  1606. unsigned err = pipe->wait();
  1607. if(err && !(helper.getPipeFlags() & TPFnofail))
  1608. throwPipeProcessError(err, "through", pipeCommand.get(), pipe);
  1609. pipe.clear();
  1610. pipeVerified.signal();
  1611. }
  1612. }
  1613. };
  1614. class CHThorPipeWriteActivity : public CHThorActivityBase
  1615. {
  1616. IHThorPipeWriteArg &helper;
  1617. Owned<IPipeProcess> pipe;
  1618. StringAttr pipeCommand;
  1619. CachedOutputMetaData inputMeta;
  1620. Owned<IOutputRowSerializer> rowSerializer;
  1621. Owned<IPipeWriteXformHelper> writeTransformer;
  1622. bool firstRead;
  1623. bool recreate;
  1624. bool inputExhausted;
  1625. public:
  1626. IMPLEMENT_SINKACTIVITY;
  1627. CHThorPipeWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorPipeWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  1628. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1629. {
  1630. recreate = helper.recreateEachRow();
  1631. firstRead = false;
  1632. inputExhausted = false;
  1633. }
  1634. virtual bool needsAllocator() const { return true; }
  1635. virtual void ready()
  1636. {
  1637. CHThorActivityBase::ready();
  1638. inputMeta.set(input->queryOutputMeta());
  1639. rowSerializer.setown(inputMeta.createDiskSerializer(agent.queryCodeContext(), activityId));
  1640. writeTransformer.setown(createPipeWriteXformHelper(helper.getPipeFlags(), helper.queryXmlOutput(), helper.queryCsvOutput(), rowSerializer));
  1641. firstRead = true;
  1642. inputExhausted = false;
  1643. writeTransformer->ready();
  1644. if(!recreate)
  1645. {
  1646. OwnedRoxieString pipeProgram(helper.getPipeProgram());
  1647. openPipe(pipeProgram);
  1648. }
  1649. }
  1650. virtual void execute()
  1651. {
  1652. Owned<IException> pipeException;
  1653. try
  1654. {
  1655. for (;;)
  1656. {
  1657. OwnedConstRoxieRow row(input->nextRow());
  1658. if (!row)
  1659. {
  1660. row.setown(input->nextRow());
  1661. if (!row)
  1662. break;
  1663. }
  1664. processed++;
  1665. if (recreate)
  1666. openPipe(helper.getNameFromRow(row));
  1667. writeTransformer->writeTranslatedText(row, pipe);
  1668. if (recreate)
  1669. {
  1670. closePipe();
  1671. verifyPipe();
  1672. }
  1673. }
  1674. if (!recreate)
  1675. closePipe();
  1676. }
  1677. catch (IException *e)
  1678. {
  1679. // NB: the original exception is probably a IPipeProcessException, but because InterruptableSemaphore rethrows it, we must catch it as an IException
  1680. pipeException.setown(e);
  1681. }
  1682. verifyPipe();
  1683. if (pipeException) // NB: verifyPipe may throw error based on pipe prog. output 1st.
  1684. throw pipeException.getClear();
  1685. if (helper.getSequence() >= 0)
  1686. {
  1687. WorkunitUpdate wu = agent.updateWorkUnit();
  1688. Owned<IWUResult> result = wu->updateResultBySequence(helper.getSequence());
  1689. if (result)
  1690. {
  1691. result->setResultTotalRowCount(processed);
  1692. result->setResultStatus(ResultStatusCalculated);
  1693. }
  1694. }
  1695. }
  1696. private:
  1697. void openPipe(char const * cmd)
  1698. {
  1699. pipeCommand.setown(cmd);
  1700. pipe.setown(createPipeProcess(agent.queryAllowedPipePrograms()));
  1701. if (!pipe->run(NULL, cmd, ".", true, false, true, 0x10000))
  1702. {
  1703. // NB: pipe->run can't rely on the child process failing fast enough to return false here, failure picked up later with stderr context.
  1704. WARNLOG(2, "Could not run pipe process %s", cmd);
  1705. }
  1706. else
  1707. writeTransformer->writeHeader(pipe);
  1708. }
  1709. void closePipe()
  1710. {
  1711. writeTransformer->writeFooter(pipe);
  1712. pipe->closeInput();
  1713. }
  1714. void verifyPipe()
  1715. {
  1716. if (pipe)
  1717. {
  1718. unsigned err = pipe->wait();
  1719. if(err && !(helper.getPipeFlags() & TPFnofail))
  1720. throwPipeProcessError(err, "to", pipeCommand.get(), pipe);
  1721. pipe.clear();
  1722. }
  1723. }
  1724. };
  1725. //=====================================================================================================
  1726. CHThorIterateActivity::CHThorIterateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorIterateArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1727. {
  1728. }
  1729. void CHThorIterateActivity::stop()
  1730. {
  1731. CHThorSimpleActivityBase::stop();
  1732. right.clear();
  1733. left.clear();
  1734. }
  1735. void CHThorIterateActivity::ready()
  1736. {
  1737. CHThorSimpleActivityBase::ready();
  1738. if (!defaultRecord)
  1739. {
  1740. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  1741. size32_t thisSize = helper.createDefault(rowBuilder);
  1742. defaultRecord.setown(rowBuilder.finalizeRowClear(thisSize));
  1743. }
  1744. counter = 0;
  1745. }
  1746. const void *CHThorIterateActivity::nextRow()
  1747. {
  1748. for (;;)
  1749. {
  1750. right.setown(input->nextRow());
  1751. if(!right)
  1752. {
  1753. bool skippedGroup = (!left) && (counter > 0); //we have just skipped entire group, but shouldn't output a double null
  1754. left.clear();
  1755. counter = 0;
  1756. if(skippedGroup) continue;
  1757. return NULL;
  1758. }
  1759. try
  1760. {
  1761. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  1762. unsigned outSize = helper.transform(rowBuilder, left ? left : defaultRecord, right, ++counter);
  1763. if (outSize)
  1764. {
  1765. left.setown(rowBuilder.finalizeRowClear(outSize));
  1766. processed++;
  1767. return left.getLink();
  1768. }
  1769. }
  1770. catch(IException * e)
  1771. {
  1772. throw makeWrappedException(e);
  1773. }
  1774. }
  1775. }
  1776. //=====================================================================================================
  1777. CHThorProcessActivity::CHThorProcessActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorProcessArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1778. {
  1779. }
  1780. CHThorProcessActivity::~CHThorProcessActivity()
  1781. {
  1782. }
  1783. void CHThorProcessActivity::ready()
  1784. {
  1785. CHThorSimpleActivityBase::ready();
  1786. rightRowAllocator.setown(agent.queryCodeContext()->getRowAllocator( helper.queryRightRecordSize(), activityId));
  1787. RtlDynamicRowBuilder rowBuilder(rightRowAllocator);
  1788. size32_t thisSize = helper.createInitialRight(rowBuilder);
  1789. initialRight.setown(rowBuilder.finalizeRowClear(thisSize));
  1790. curRight.set(initialRight);
  1791. counter = 0;
  1792. }
  1793. const void *CHThorProcessActivity::nextRow()
  1794. {
  1795. try
  1796. {
  1797. for (;;)
  1798. {
  1799. OwnedConstRoxieRow next(input->nextRow());
  1800. if (!next)
  1801. {
  1802. bool eog = (curRight != initialRight); // processed any records?
  1803. counter = 0;
  1804. curRight.set(initialRight);
  1805. if (eog)
  1806. return NULL;
  1807. next.setown(input->nextRow());
  1808. if (!next)
  1809. return NULL;
  1810. }
  1811. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  1812. RtlDynamicRowBuilder rightRowBuilder(rightRowAllocator);
  1813. size32_t outSize = helper.transform(rowBuilder, rightRowBuilder, next, curRight, ++counter);
  1814. if (outSize)
  1815. {
  1816. size32_t rightSize = rightRowAllocator->queryOutputMeta()->getRecordSize(rightRowBuilder.getSelf()); // yuk
  1817. curRight.setown(rightRowBuilder.finalizeRowClear(rightSize));
  1818. processed++;
  1819. return rowBuilder.finalizeRowClear(outSize);
  1820. }
  1821. }
  1822. }
  1823. catch(IException * e)
  1824. {
  1825. throw makeWrappedException(e);
  1826. }
  1827. }
  1828. //=====================================================================================================
  1829. CHThorNormalizeActivity::CHThorNormalizeActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNormalizeArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1830. {
  1831. IRecordSize* recSize = outputMeta;
  1832. if (recSize == NULL)
  1833. throw MakeStringException(2, "Unexpected null pointer from helper.queryOutputMeta()");
  1834. }
  1835. CHThorNormalizeActivity::~CHThorNormalizeActivity()
  1836. {
  1837. }
  1838. void CHThorNormalizeActivity::ready()
  1839. {
  1840. CHThorSimpleActivityBase::ready();
  1841. numThisRow = 0;
  1842. curRow = 0;
  1843. numProcessedLastGroup = processed;
  1844. }
  1845. const void *CHThorNormalizeActivity::nextRow()
  1846. {
  1847. for (;;)
  1848. {
  1849. while (curRow == numThisRow)
  1850. {
  1851. inbuff.setown(input->nextRow());
  1852. if (!inbuff && (processed == numProcessedLastGroup))
  1853. inbuff.setown(input->nextRow());
  1854. if (!inbuff)
  1855. {
  1856. numProcessedLastGroup = processed;
  1857. return NULL;
  1858. }
  1859. curRow = 0;
  1860. numThisRow = helper.numExpandedRows(inbuff);
  1861. }
  1862. try
  1863. {
  1864. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  1865. memsize_t thisSize = helper.transform(rowBuilder, inbuff, ++curRow);
  1866. if(thisSize != 0)
  1867. {
  1868. processed++;
  1869. return rowBuilder.finalizeRowClear(thisSize);
  1870. }
  1871. }
  1872. catch(IException * e)
  1873. {
  1874. throw makeWrappedException(e);
  1875. }
  1876. }
  1877. }
  1878. //=====================================================================================================
  1879. CHThorNormalizeChildActivity::CHThorNormalizeChildActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNormalizeChildArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1880. {
  1881. }
  1882. CHThorNormalizeChildActivity::~CHThorNormalizeChildActivity()
  1883. {
  1884. }
  1885. bool CHThorNormalizeChildActivity::advanceInput()
  1886. {
  1887. for (;;)
  1888. {
  1889. inbuff.setown(input->nextRow());
  1890. if (!inbuff && (processed == numProcessedLastGroup))
  1891. inbuff.setown(input->nextRow());
  1892. if (!inbuff)
  1893. {
  1894. numProcessedLastGroup = processed;
  1895. return false;
  1896. }
  1897. curChildRow = cursor->first(inbuff);
  1898. if (curChildRow)
  1899. {
  1900. curRow = 0;
  1901. return true;
  1902. }
  1903. }
  1904. }
  1905. void CHThorNormalizeChildActivity::stop()
  1906. {
  1907. inbuff.clear();
  1908. CHThorSimpleActivityBase::stop();
  1909. }
  1910. void CHThorNormalizeChildActivity::ready()
  1911. {
  1912. CHThorSimpleActivityBase::ready();
  1913. curRow = 0;
  1914. numProcessedLastGroup = processed;
  1915. cursor = helper.queryIterator();
  1916. curChildRow = NULL;
  1917. }
  1918. const void *CHThorNormalizeChildActivity::nextRow()
  1919. {
  1920. for (;;)
  1921. {
  1922. if (!inbuff)
  1923. {
  1924. if (!advanceInput())
  1925. return NULL;
  1926. }
  1927. try
  1928. {
  1929. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  1930. size32_t outSize = helper.transform(rowBuilder, inbuff, curChildRow, ++curRow);
  1931. curChildRow = cursor->next();
  1932. if (!curChildRow)
  1933. inbuff.clear();
  1934. if (outSize != 0)
  1935. {
  1936. processed++;
  1937. return rowBuilder.finalizeRowClear(outSize);
  1938. }
  1939. }
  1940. catch(IException * e)
  1941. {
  1942. throw makeWrappedException(e);
  1943. }
  1944. }
  1945. }
  1946. //=================================================================================
  1947. bool CHThorNormalizeLinkedChildActivity::advanceInput()
  1948. {
  1949. for (;;)
  1950. {
  1951. curParent.setown(input->nextRow());
  1952. if (!curParent && (processed == numProcessedLastGroup))
  1953. curParent.setown(input->nextRow());
  1954. if (!curParent)
  1955. {
  1956. numProcessedLastGroup = processed;
  1957. return false;
  1958. }
  1959. curChild.set(helper.first(curParent));
  1960. if (curChild)
  1961. return true;
  1962. }
  1963. }
  1964. CHThorNormalizeLinkedChildActivity::CHThorNormalizeLinkedChildActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNormalizeLinkedChildArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  1965. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  1966. {
  1967. }
  1968. CHThorNormalizeLinkedChildActivity::~CHThorNormalizeLinkedChildActivity()
  1969. {
  1970. }
  1971. void CHThorNormalizeLinkedChildActivity::ready()
  1972. {
  1973. numProcessedLastGroup = 0;
  1974. CHThorSimpleActivityBase::ready();
  1975. }
  1976. void CHThorNormalizeLinkedChildActivity::stop()
  1977. {
  1978. curParent.clear();
  1979. curChild.clear();
  1980. CHThorSimpleActivityBase::stop();
  1981. }
  1982. const void * CHThorNormalizeLinkedChildActivity::nextRow()
  1983. {
  1984. for (;;)
  1985. {
  1986. if (!curParent)
  1987. {
  1988. if (!advanceInput())
  1989. return NULL;
  1990. }
  1991. try
  1992. {
  1993. const void *ret = curChild.getClear();
  1994. curChild.set(helper.next());
  1995. if (!curChild)
  1996. curParent.clear();
  1997. if (ret)
  1998. {
  1999. processed++;
  2000. return ret;
  2001. }
  2002. }
  2003. catch (IException *E)
  2004. {
  2005. throw makeWrappedException(E);
  2006. }
  2007. }
  2008. }
  2009. //=====================================================================================================
  2010. CHThorProjectActivity::CHThorProjectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorProjectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2011. {
  2012. }
  2013. CHThorProjectActivity::~CHThorProjectActivity()
  2014. {
  2015. }
  2016. void CHThorProjectActivity::ready()
  2017. {
  2018. CHThorSimpleActivityBase::ready();
  2019. numProcessedLastGroup = processed;
  2020. }
  2021. const void * CHThorProjectActivity::nextRow()
  2022. {
  2023. for (;;)
  2024. {
  2025. OwnedConstRoxieRow in(input->nextRow());
  2026. if (!in)
  2027. {
  2028. if (numProcessedLastGroup == processed)
  2029. in.setown(input->nextRow());
  2030. if (!in)
  2031. {
  2032. numProcessedLastGroup = processed;
  2033. return NULL;
  2034. }
  2035. }
  2036. try
  2037. {
  2038. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2039. size32_t outSize = helper.transform(rowBuilder, in);
  2040. if (outSize)
  2041. {
  2042. processed++;
  2043. return rowBuilder.finalizeRowClear(outSize);
  2044. }
  2045. }
  2046. catch(IException * e)
  2047. {
  2048. throw makeWrappedException(e);
  2049. }
  2050. }
  2051. }
  2052. //=====================================================================================================
  2053. CHThorPrefetchProjectActivity::CHThorPrefetchProjectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorPrefetchProjectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2054. {
  2055. }
  2056. void CHThorPrefetchProjectActivity::ready()
  2057. {
  2058. CHThorSimpleActivityBase::ready();
  2059. recordCount = 0;
  2060. numProcessedLastGroup = processed;
  2061. eof = !helper.canMatchAny();
  2062. child = helper.queryChild();
  2063. }
  2064. const void * CHThorPrefetchProjectActivity::nextRow()
  2065. {
  2066. if (eof)
  2067. return NULL;
  2068. for (;;)
  2069. {
  2070. try
  2071. {
  2072. OwnedConstRoxieRow row(input->nextRow());
  2073. if (!row)
  2074. {
  2075. if (numProcessedLastGroup == processed)
  2076. row.setown(input->nextRow());
  2077. if (!row)
  2078. {
  2079. numProcessedLastGroup = processed;
  2080. return NULL;
  2081. }
  2082. }
  2083. ++recordCount;
  2084. rtlRowBuilder extract;
  2085. if (helper.preTransform(extract,row,recordCount))
  2086. {
  2087. Owned<IEclGraphResults> results;
  2088. if (child)
  2089. {
  2090. results.setown(child->evaluate(extract.size(), extract.getbytes()));
  2091. }
  2092. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2093. size32_t outSize = helper.transform(rowBuilder, row, results, recordCount);
  2094. if (outSize)
  2095. {
  2096. processed++;
  2097. return rowBuilder.finalizeRowClear(outSize);
  2098. }
  2099. }
  2100. }
  2101. catch(IException * e)
  2102. {
  2103. throw makeWrappedException(e);
  2104. }
  2105. }
  2106. }
  2107. //=====================================================================================================
  2108. CHThorFilterProjectActivity::CHThorFilterProjectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorFilterProjectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2109. {
  2110. }
  2111. CHThorFilterProjectActivity::~CHThorFilterProjectActivity()
  2112. {
  2113. }
  2114. void CHThorFilterProjectActivity::ready()
  2115. {
  2116. CHThorSimpleActivityBase::ready();
  2117. recordCount = 0;
  2118. numProcessedLastGroup = processed;
  2119. eof = !helper.canMatchAny();
  2120. }
  2121. const void * CHThorFilterProjectActivity::nextRow()
  2122. {
  2123. if (eof)
  2124. return NULL;
  2125. for (;;)
  2126. {
  2127. OwnedConstRoxieRow in = input->nextRow();
  2128. if (!in)
  2129. {
  2130. recordCount = 0;
  2131. if (numProcessedLastGroup == processed)
  2132. in.setown(input->nextRow());
  2133. if (!in)
  2134. {
  2135. numProcessedLastGroup = processed;
  2136. return NULL;
  2137. }
  2138. }
  2139. try
  2140. {
  2141. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2142. size32_t outSize = helper.transform(rowBuilder, in, ++recordCount);
  2143. if (outSize)
  2144. {
  2145. processed++;
  2146. return rowBuilder.finalizeRowClear(outSize);
  2147. }
  2148. }
  2149. catch(IException * e)
  2150. {
  2151. throw makeWrappedException(e);
  2152. }
  2153. }
  2154. }
  2155. //=====================================================================================================
  2156. CHThorCountProjectActivity::CHThorCountProjectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCountProjectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2157. {
  2158. }
  2159. CHThorCountProjectActivity::~CHThorCountProjectActivity()
  2160. {
  2161. }
  2162. void CHThorCountProjectActivity::ready()
  2163. {
  2164. CHThorSimpleActivityBase::ready();
  2165. recordCount = 0;
  2166. numProcessedLastGroup = processed;
  2167. }
  2168. const void * CHThorCountProjectActivity::nextRow()
  2169. {
  2170. for (;;)
  2171. {
  2172. OwnedConstRoxieRow in = input->nextRow();
  2173. if (!in)
  2174. {
  2175. recordCount = 0;
  2176. if (numProcessedLastGroup == processed)
  2177. in.setown(input->nextRow());
  2178. if (!in)
  2179. {
  2180. numProcessedLastGroup = processed;
  2181. return NULL;
  2182. }
  2183. }
  2184. try
  2185. {
  2186. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2187. size32_t outSize = helper.transform(rowBuilder, in, ++recordCount);
  2188. if (outSize)
  2189. {
  2190. processed++;
  2191. return rowBuilder.finalizeRowClear(outSize);
  2192. }
  2193. }
  2194. catch(IException * e)
  2195. {
  2196. throw makeWrappedException(e);
  2197. }
  2198. }
  2199. }
  2200. //=====================================================================================================
  2201. CHThorRollupActivity::CHThorRollupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorRollupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2202. {
  2203. }
  2204. CHThorRollupActivity::~CHThorRollupActivity()
  2205. {
  2206. }
  2207. void CHThorRollupActivity::ready()
  2208. {
  2209. CHThorSimpleActivityBase::ready();
  2210. left.setown(input->nextRow());
  2211. prev.set(left);
  2212. }
  2213. void CHThorRollupActivity::stop()
  2214. {
  2215. left.clear();
  2216. prev.clear();
  2217. right.clear();
  2218. CHThorSimpleActivityBase::stop();
  2219. }
  2220. const void *CHThorRollupActivity::nextRow()
  2221. {
  2222. for (;;)
  2223. {
  2224. right.setown(input->nextRow());
  2225. if(!prev || !right || !helper.matches(prev,right))
  2226. {
  2227. const void * ret = left.getClear();
  2228. if(ret)
  2229. {
  2230. processed++;
  2231. }
  2232. left.setown(right.getClear());
  2233. prev.set(left);
  2234. return ret;
  2235. }
  2236. try
  2237. {
  2238. //MORE: could optimise by reusing buffer, but would have to make sure to call destructor on previous contents before overwriting
  2239. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2240. if(unsigned outSize = helper.transform(rowBuilder, left, right))
  2241. {
  2242. left.setown(rowBuilder.finalizeRowClear(outSize));
  2243. }
  2244. if (helper.getFlags() & RFrolledismatchleft)
  2245. prev.set(left);
  2246. else
  2247. prev.set(right);
  2248. }
  2249. catch(IException * e)
  2250. {
  2251. throw makeWrappedException(e);
  2252. }
  2253. }
  2254. }
  2255. //=====================================================================================================
  2256. CHThorGroupDedupActivity::CHThorGroupDedupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDedupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2257. {
  2258. }
  2259. void CHThorGroupDedupActivity::ready()
  2260. {
  2261. CHThorSimpleActivityBase::ready();
  2262. numToKeep = helper.numToKeep();
  2263. numKept = 0;
  2264. }
  2265. //=====================================================================================================
  2266. CHThorGroupDedupKeepLeftActivity::CHThorGroupDedupKeepLeftActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDedupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorGroupDedupActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  2267. {
  2268. }
  2269. void CHThorGroupDedupKeepLeftActivity::ready()
  2270. {
  2271. CHThorGroupDedupActivity::ready();
  2272. prev.clear();
  2273. }
  2274. void CHThorGroupDedupKeepLeftActivity::stop()
  2275. {
  2276. prev.clear();
  2277. CHThorSimpleActivityBase::stop();
  2278. }
  2279. const void *CHThorGroupDedupKeepLeftActivity::nextRow()
  2280. {
  2281. OwnedConstRoxieRow next;
  2282. for (;;)
  2283. {
  2284. next.setown(input->nextRow());
  2285. if (!prev || !next || !helper.matches(prev,next))
  2286. {
  2287. numKept = 0;
  2288. break;
  2289. }
  2290. if (numKept < numToKeep-1)
  2291. {
  2292. numKept++;
  2293. break;
  2294. }
  2295. }
  2296. const void * ret = next.getClear();
  2297. prev.set(ret);
  2298. if(ret)
  2299. processed++;
  2300. return ret;
  2301. }
  2302. const void * CHThorGroupDedupKeepLeftActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  2303. {
  2304. OwnedConstRoxieRow next;
  2305. for (;;)
  2306. {
  2307. next.setown(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  2308. if (!prev || !next || !helper.matches(prev,next))
  2309. {
  2310. numKept = 0;
  2311. break;
  2312. }
  2313. if (numKept < numToKeep-1)
  2314. {
  2315. numKept++;
  2316. break;
  2317. }
  2318. }
  2319. const void * ret = next.getClear();
  2320. prev.set(ret);
  2321. if(ret)
  2322. processed++;
  2323. return ret;
  2324. }
  2325. void CHThorGroupDedupKeepLeftActivity::setInput(unsigned index, IHThorInput *_input)
  2326. {
  2327. CHThorGroupDedupActivity::setInput(index, _input);
  2328. if (input)
  2329. inputStepping = input->querySteppingMeta();
  2330. }
  2331. IInputSteppingMeta * CHThorGroupDedupKeepLeftActivity::querySteppingMeta()
  2332. {
  2333. return inputStepping;
  2334. }
  2335. bool CHThorGroupDedupKeepLeftActivity::gatherConjunctions(ISteppedConjunctionCollector & collector)
  2336. {
  2337. return input->gatherConjunctions(collector);
  2338. }
  2339. void CHThorGroupDedupKeepLeftActivity::resetEOF()
  2340. {
  2341. input->resetEOF();
  2342. }
  2343. //=====================================================================================================
  2344. CHThorGroupDedupKeepRightActivity::CHThorGroupDedupKeepRightActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDedupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorGroupDedupActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), compareBest(nullptr)
  2345. {
  2346. }
  2347. void CHThorGroupDedupKeepRightActivity::ready()
  2348. {
  2349. CHThorGroupDedupActivity::ready();
  2350. assertex(numToKeep==1);
  2351. firstDone = false;
  2352. if (helper.keepBest())
  2353. compareBest = helper.queryCompareBest();
  2354. }
  2355. void CHThorGroupDedupKeepRightActivity::stop()
  2356. {
  2357. kept.clear();
  2358. CHThorGroupDedupActivity::stop();
  2359. }
  2360. const void *CHThorGroupDedupKeepRightActivity::nextRow()
  2361. {
  2362. if (!firstDone)
  2363. {
  2364. firstDone = true;
  2365. kept.setown(input->nextRow());
  2366. }
  2367. OwnedConstRoxieRow next;
  2368. for (;;)
  2369. {
  2370. next.setown(input->nextRow());
  2371. if (!kept || !next || !helper.matches(kept,next))
  2372. {
  2373. numKept = 0;
  2374. break;
  2375. }
  2376. if (compareBest)
  2377. {
  2378. if (compareBest->docompare(kept,next) > 0)
  2379. kept.setown(next.getClear());
  2380. }
  2381. else
  2382. {
  2383. if (numKept < numToKeep-1)
  2384. {
  2385. numKept++;
  2386. break;
  2387. }
  2388. kept.setown(next.getClear());
  2389. }
  2390. }
  2391. const void * ret = kept.getClear();
  2392. kept.setown(next.getClear());
  2393. if(ret)
  2394. processed++;
  2395. return ret;
  2396. }
  2397. //=====================================================================================================
  2398. CHThorGroupDedupAllActivity::CHThorGroupDedupAllActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDedupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2399. {
  2400. }
  2401. void CHThorGroupDedupAllActivity::ready()
  2402. {
  2403. CHThorSimpleActivityBase::ready();
  2404. keepLeft = helper.keepLeft();
  2405. primaryCompare = helper.queryComparePrimary();
  2406. assertex(helper.numToKeep() == 1);
  2407. firstDone = false;
  2408. survivorIndex = 0;
  2409. }
  2410. void CHThorGroupDedupAllActivity::stop()
  2411. {
  2412. survivors.clear();
  2413. CHThorSimpleActivityBase::stop();
  2414. }
  2415. bool CHThorGroupDedupAllActivity::calcNextDedupAll()
  2416. {
  2417. survivors.clear();
  2418. survivorIndex = 0;
  2419. OwnedRowArray group;
  2420. const void * next;
  2421. while((next = input->nextRow()) != NULL)
  2422. group.append(next);
  2423. if(group.ordinality() == 0)
  2424. return false;
  2425. unsigned max = group.ordinality();
  2426. if (primaryCompare)
  2427. {
  2428. //hard, if not impossible, to hit this code once optimisations in place
  2429. MemoryAttr indexbuff(max*sizeof(void *));
  2430. void ** temp = (void **)indexbuff.bufferBase();
  2431. void ** rows = (void * *)group.getArray();
  2432. msortvecstableinplace(rows, max, *primaryCompare, temp);
  2433. unsigned first = 0;
  2434. for (unsigned idx = 1; idx < max; idx++)
  2435. {
  2436. if (primaryCompare->docompare(rows[first], rows[idx]) != 0)
  2437. {
  2438. dedupRange(first, idx, group);
  2439. first = idx;
  2440. }
  2441. }
  2442. dedupRange(first, max, group);
  2443. for(unsigned idx2=0; idx2<max; ++idx2)
  2444. {
  2445. void * cur = rows[idx2];
  2446. if(cur)
  2447. {
  2448. LinkRoxieRow(cur);
  2449. survivors.append(cur);
  2450. }
  2451. }
  2452. }
  2453. else
  2454. {
  2455. dedupRange(0, max, group);
  2456. for(unsigned idx=0; idx<max; ++idx)
  2457. {
  2458. const void * cur = group.itemClear(idx);
  2459. if(cur)
  2460. survivors.append(cur);
  2461. }
  2462. }
  2463. return true;
  2464. }
  2465. void CHThorGroupDedupAllActivity::dedupRange(unsigned first, unsigned last, OwnedRowArray & group)
  2466. {
  2467. for (unsigned idxL = first; idxL < last; idxL++)
  2468. {
  2469. const void * left = group.item(idxL);
  2470. if (left)
  2471. {
  2472. for (unsigned idxR = first; idxR < last; idxR++)
  2473. {
  2474. const void * right = group.item(idxR);
  2475. if ((idxL != idxR) && right)
  2476. {
  2477. if (helper.matches(left, right))
  2478. {
  2479. if (keepLeft)
  2480. {
  2481. group.replace(NULL, idxR);
  2482. }
  2483. else
  2484. {
  2485. group.replace(NULL, idxL);
  2486. break;
  2487. }
  2488. }
  2489. }
  2490. }
  2491. }
  2492. }
  2493. }
  2494. const void *CHThorGroupDedupAllActivity::nextRow()
  2495. {
  2496. if (!firstDone)
  2497. {
  2498. firstDone = true;
  2499. calcNextDedupAll();
  2500. }
  2501. if(survivors.isItem(survivorIndex))
  2502. {
  2503. processed++;
  2504. return survivors.itemClear(survivorIndex++);
  2505. }
  2506. calcNextDedupAll();
  2507. return NULL;
  2508. }
  2509. //=====================================================================================================
  2510. bool HashDedupTable::insert(const void * row)
  2511. {
  2512. unsigned hash = helper.queryHash()->hash(row);
  2513. RtlDynamicRowBuilder keyRowBuilder(keyRowAllocator, true);
  2514. size32_t thisKeySize = helper.recordToKey(keyRowBuilder, row);
  2515. OwnedConstRoxieRow keyRow = keyRowBuilder.finalizeRowClear(thisKeySize);
  2516. if (find(hash, keyRow.get()))
  2517. return false;
  2518. addNew(new HashDedupElement(hash, keyRow.getClear()), hash);
  2519. return true;
  2520. }
  2521. bool HashDedupTable::insertBest(const void * nextrow)
  2522. {
  2523. unsigned hash = helper.queryHash()->hash(nextrow);
  2524. const void *et = find(hash, nextrow);
  2525. if (et)
  2526. {
  2527. const HashDedupElement *element = reinterpret_cast<const HashDedupElement *>(et);
  2528. const void * row = element->queryRow();
  2529. if (queryBestCompare->docompare(row,nextrow) <= 0)
  2530. return false;
  2531. removeExact( const_cast<void *>(et));
  2532. // drop-through to add new row
  2533. }
  2534. LinkRoxieRow(nextrow);
  2535. addNew(new HashDedupElement(hash, nextrow), hash);
  2536. return true;
  2537. }
  2538. CHThorHashDedupActivity::CHThorHashDedupActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorHashDedupArg & _arg, ThorActivityKind _kind, EclGraph & _graph)
  2539. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), table(_arg), hashTableFilled(false), hashDedupTableIter(table)
  2540. {
  2541. keepBest = helper.keepBest();
  2542. }
  2543. void CHThorHashDedupActivity::ready()
  2544. {
  2545. CHThorSimpleActivityBase::ready();
  2546. table.setRowAllocator(agent.queryCodeContext()->getRowAllocator(helper.queryKeySize(), activityId));
  2547. }
  2548. void CHThorHashDedupActivity::stop()
  2549. {
  2550. table.kill();
  2551. CHThorSimpleActivityBase::stop();
  2552. }
  2553. const void * CHThorHashDedupActivity::nextRow()
  2554. {
  2555. if (keepBest)
  2556. {
  2557. // Populate hash table with best rows
  2558. if (!hashTableFilled)
  2559. {
  2560. OwnedConstRoxieRow next(input->nextRow());
  2561. while(next)
  2562. {
  2563. table.insertBest(next);
  2564. next.setown(input->nextRow());
  2565. }
  2566. hashTableFilled = true;
  2567. hashDedupTableIter.first();
  2568. }
  2569. // Iterate through hash table returning rows
  2570. if (hashDedupTableIter.isValid())
  2571. {
  2572. HashDedupElement &el = hashDedupTableIter.query();
  2573. OwnedConstRoxieRow row(el.getRow());
  2574. hashDedupTableIter.next();
  2575. return row.getClear();
  2576. }
  2577. table.kill();
  2578. hashTableFilled = false;
  2579. return NULL;
  2580. }
  2581. else
  2582. {
  2583. while(true)
  2584. {
  2585. OwnedConstRoxieRow next(input->nextRow());
  2586. if(!next)
  2587. {
  2588. table.kill();
  2589. return NULL;
  2590. }
  2591. if(table.insert(next))
  2592. return next.getClear();
  2593. }
  2594. }
  2595. }
  2596. //=====================================================================================================
  2597. CHThorSteppableActivityBase::CHThorSteppableActivityBase(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg & _help, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _help, _kind, _graph)
  2598. {
  2599. inputStepping = NULL;
  2600. stepCompare = NULL;
  2601. }
  2602. void CHThorSteppableActivityBase::setInput(unsigned index, IHThorInput *_input)
  2603. {
  2604. CHThorSimpleActivityBase::setInput(index, _input);
  2605. if (input && index == 0)
  2606. {
  2607. inputStepping = input->querySteppingMeta();
  2608. if (inputStepping)
  2609. stepCompare = inputStepping->queryCompare();
  2610. }
  2611. }
  2612. IInputSteppingMeta * CHThorSteppableActivityBase::querySteppingMeta()
  2613. {
  2614. return inputStepping;
  2615. }
  2616. //=====================================================================================================
  2617. CHThorFilterActivity::CHThorFilterActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorFilterArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2618. {
  2619. }
  2620. void CHThorFilterActivity::ready()
  2621. {
  2622. CHThorSimpleActivityBase::ready();
  2623. anyThisGroup = false;
  2624. eof = !helper.canMatchAny();
  2625. }
  2626. const void * CHThorFilterActivity::nextRow()
  2627. {
  2628. if (eof)
  2629. return NULL;
  2630. for (;;)
  2631. {
  2632. OwnedConstRoxieRow ret(input->nextRow());
  2633. if (!ret)
  2634. {
  2635. //stop returning two NULLs in a row.
  2636. if (anyThisGroup)
  2637. {
  2638. anyThisGroup = false;
  2639. return NULL;
  2640. }
  2641. ret.setown(input->nextRow());
  2642. if (!ret)
  2643. return NULL; // eof...
  2644. }
  2645. if (helper.isValid(ret))
  2646. {
  2647. anyThisGroup = true;
  2648. processed++;
  2649. return ret.getClear();
  2650. }
  2651. }
  2652. }
  2653. const void * CHThorFilterActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  2654. {
  2655. if (eof)
  2656. return NULL;
  2657. OwnedConstRoxieRow ret(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  2658. if (!ret)
  2659. return NULL;
  2660. if (helper.isValid(ret))
  2661. {
  2662. anyThisGroup = true;
  2663. processed++;
  2664. return ret.getClear();
  2665. }
  2666. return ungroupedNextRow();
  2667. }
  2668. bool CHThorFilterActivity::gatherConjunctions(ISteppedConjunctionCollector & collector)
  2669. {
  2670. return input->gatherConjunctions(collector);
  2671. }
  2672. void CHThorFilterActivity::resetEOF()
  2673. {
  2674. //Sometimes the smart stepping code returns a premature eof indicator (two nulls) and will
  2675. //therefore call resetEOF so the activity can reset its eof without resetting the activity itself.
  2676. //Note that resetEOF only needs to be implemented by activities that implement gatherConjunctions()
  2677. //and that cache eof.
  2678. eof = false;
  2679. anyThisGroup = false;
  2680. input->resetEOF();
  2681. }
  2682. //=====================================================================================================
  2683. CHThorFilterGroupActivity::CHThorFilterGroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorFilterGroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2684. {
  2685. }
  2686. void CHThorFilterGroupActivity::ready()
  2687. {
  2688. CHThorSimpleActivityBase::ready();
  2689. eof = !helper.canMatchAny();
  2690. nextIndex = 0;
  2691. }
  2692. void CHThorFilterGroupActivity::stop()
  2693. {
  2694. CHThorSimpleActivityBase::stop();
  2695. pending.clear();
  2696. }
  2697. const void * CHThorFilterGroupActivity::nextRow()
  2698. {
  2699. for (;;)
  2700. {
  2701. if (eof)
  2702. return NULL;
  2703. if (pending.ordinality())
  2704. {
  2705. if (pending.isItem(nextIndex))
  2706. {
  2707. processed++;
  2708. return pending.itemClear(nextIndex++);
  2709. }
  2710. nextIndex = 0;
  2711. pending.clear();
  2712. return NULL;
  2713. }
  2714. const void * ret = input->nextRow();
  2715. while (ret)
  2716. {
  2717. pending.append(ret);
  2718. ret = input->nextRow();
  2719. }
  2720. unsigned num = pending.ordinality();
  2721. if (num != 0)
  2722. {
  2723. if (!helper.isValid(num, (const void * *)pending.getArray()))
  2724. pending.clear(); // read next group
  2725. }
  2726. else
  2727. eof = true;
  2728. }
  2729. }
  2730. const void * CHThorFilterGroupActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  2731. {
  2732. if (eof)
  2733. return NULL;
  2734. if (pending.ordinality())
  2735. {
  2736. while (pending.isItem(nextIndex))
  2737. {
  2738. OwnedConstRoxieRow ret(pending.itemClear(nextIndex++));
  2739. if (stepCompare->docompare(ret, seek, numFields) >= 0)
  2740. {
  2741. processed++;
  2742. return ret.getClear();
  2743. }
  2744. }
  2745. nextIndex = 0;
  2746. pending.clear();
  2747. }
  2748. const void * ret = input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra);
  2749. while (ret)
  2750. {
  2751. pending.append(ret);
  2752. ret = input->nextRow();
  2753. }
  2754. unsigned num = pending.ordinality();
  2755. if (num != 0)
  2756. {
  2757. if (!helper.isValid(num, (const void * *)pending.getArray()))
  2758. pending.clear(); // read next group
  2759. }
  2760. else
  2761. eof = true;
  2762. return ungroupedNextRow();
  2763. }
  2764. //=====================================================================================================
  2765. CHThorLimitActivity::CHThorLimitActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLimitArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2766. {
  2767. }
  2768. void CHThorLimitActivity::ready()
  2769. {
  2770. CHThorSimpleActivityBase::ready();
  2771. rowLimit = helper.getRowLimit();
  2772. numGot = 0;
  2773. }
  2774. const void * CHThorLimitActivity::nextRow()
  2775. {
  2776. OwnedConstRoxieRow ret(input->nextRow());
  2777. if (ret)
  2778. {
  2779. if (++numGot > rowLimit)
  2780. {
  2781. if ( agent.queryCodeContext()->queryDebugContext())
  2782. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  2783. helper.onLimitExceeded();
  2784. return NULL;
  2785. }
  2786. processed++;
  2787. }
  2788. return ret.getClear();
  2789. }
  2790. const void * CHThorLimitActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  2791. {
  2792. OwnedConstRoxieRow ret(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  2793. if (ret)
  2794. {
  2795. if (++numGot > rowLimit)
  2796. {
  2797. if ( agent.queryCodeContext()->queryDebugContext())
  2798. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  2799. helper.onLimitExceeded();
  2800. return NULL;
  2801. }
  2802. processed++;
  2803. }
  2804. return ret.getClear();
  2805. }
  2806. //=====================================================================================================
  2807. CHThorSkipLimitActivity::CHThorSkipLimitActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLimitArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2808. {
  2809. }
  2810. void CHThorSkipLimitActivity::ready()
  2811. {
  2812. CHThorSimpleActivityBase::ready();
  2813. rowLimit = helper.getRowLimit();
  2814. }
  2815. void CHThorSkipLimitActivity::stop()
  2816. {
  2817. CHThorSimpleActivityBase::stop();
  2818. buffer.clear();
  2819. }
  2820. const void * CHThorSkipLimitActivity::nextRow()
  2821. {
  2822. if(!buffer)
  2823. {
  2824. buffer.setown(new CRowBuffer(input->queryOutputMeta(), true));
  2825. if(!buffer->pull(input, rowLimit))
  2826. {
  2827. if ( agent.queryCodeContext()->queryDebugContext())
  2828. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  2829. onLimitExceeded();
  2830. }
  2831. }
  2832. const void * next = buffer->next();
  2833. if(next)
  2834. processed++;
  2835. return next;
  2836. }
  2837. //=====================================================================================================
  2838. CHThorCatchActivity::CHThorCatchActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCatchArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2839. {
  2840. }
  2841. const void * CHThorCatchActivity::nextRow()
  2842. {
  2843. try
  2844. {
  2845. OwnedConstRoxieRow ret(input->nextRow());
  2846. if (ret)
  2847. processed++;
  2848. return ret.getClear();
  2849. }
  2850. catch (IException *E)
  2851. {
  2852. E->Release();
  2853. helper.onExceptionCaught();
  2854. }
  2855. catch (...)
  2856. {
  2857. helper.onExceptionCaught();
  2858. }
  2859. throwUnexpected(); // onExceptionCaught should have thrown something
  2860. }
  2861. const void * CHThorCatchActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  2862. {
  2863. try
  2864. {
  2865. OwnedConstRoxieRow ret(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  2866. if (ret)
  2867. processed++;
  2868. return ret.getClear();
  2869. }
  2870. catch (IException *E)
  2871. {
  2872. E->Release();
  2873. helper.onExceptionCaught();
  2874. }
  2875. catch (...)
  2876. {
  2877. helper.onExceptionCaught();
  2878. }
  2879. throwUnexpected(); // onExceptionCaught should have thrown something
  2880. }
  2881. //=====================================================================================================
  2882. CHThorSkipCatchActivity::CHThorSkipCatchActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCatchArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2883. {
  2884. }
  2885. void CHThorSkipCatchActivity::stop()
  2886. {
  2887. CHThorSimpleActivityBase::stop();
  2888. buffer.clear();
  2889. }
  2890. void CHThorSkipCatchActivity::onException(IException *E)
  2891. {
  2892. buffer->clear();
  2893. if (kind == TAKcreaterowcatch)
  2894. {
  2895. createRowAllocator();
  2896. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2897. size32_t newSize = helper.transformOnExceptionCaught(rowBuilder, E);
  2898. if (newSize)
  2899. buffer->insert(rowBuilder.finalizeRowClear(newSize));
  2900. }
  2901. E->Release();
  2902. }
  2903. const void * CHThorSkipCatchActivity::nextRow()
  2904. {
  2905. if(!buffer)
  2906. {
  2907. buffer.setown(new CRowBuffer(input->queryOutputMeta(), true));
  2908. try
  2909. {
  2910. buffer->pull(input, (unsigned __int64) -1);
  2911. }
  2912. catch (IException *E)
  2913. {
  2914. onException(E);
  2915. }
  2916. catch (...)
  2917. {
  2918. onException(MakeStringException(2, "Unknown exception caught"));
  2919. }
  2920. }
  2921. const void * next = buffer->next();
  2922. if(next)
  2923. processed++;
  2924. return next;
  2925. }
  2926. //=====================================================================================================
  2927. CHThorOnFailLimitActivity::CHThorOnFailLimitActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLimitArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSkipLimitActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  2928. {
  2929. }
  2930. void CHThorOnFailLimitActivity::onLimitExceeded()
  2931. {
  2932. buffer->clear();
  2933. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  2934. size32_t newSize = helper.transformOnLimitExceeded(rowBuilder);
  2935. if (newSize)
  2936. buffer->insert(rowBuilder.finalizeRowClear(newSize));
  2937. }
  2938. //=====================================================================================================
  2939. CHThorIfActivity::CHThorIfActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorIfArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2940. {
  2941. inputTrue = NULL;
  2942. inputFalse = NULL;
  2943. selectedInput = NULL;
  2944. }
  2945. void CHThorIfActivity::stop()
  2946. {
  2947. if (selectedInput)
  2948. selectedInput->stop();
  2949. CHThorSimpleActivityBase::stop();
  2950. }
  2951. void CHThorIfActivity::ready()
  2952. {
  2953. CHThorSimpleActivityBase::ready();
  2954. selectedInput = helper.getCondition() ? inputTrue : inputFalse;
  2955. if (selectedInput)
  2956. selectedInput->ready();
  2957. }
  2958. void CHThorIfActivity::setInput(unsigned index, IHThorInput *_input)
  2959. {
  2960. if (index==0)
  2961. inputTrue = _input;
  2962. else if (index == 1)
  2963. inputFalse = _input;
  2964. else
  2965. CHThorActivityBase::setInput(index, _input);
  2966. }
  2967. const void * CHThorIfActivity::nextRow()
  2968. {
  2969. if (!selectedInput)
  2970. return NULL;
  2971. const void *ret = selectedInput->nextRow();
  2972. if (ret)
  2973. processed++;
  2974. return ret;
  2975. }
  2976. //=====================================================================================================
  2977. CHThorCaseActivity::CHThorCaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCaseArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  2978. {
  2979. }
  2980. void CHThorCaseActivity::ready()
  2981. {
  2982. //Evaluate the condition here to avoid calling ready() on the unused branch?
  2983. initialProcessed = processed;
  2984. selectedInput = NULL;
  2985. unsigned whichBranch = helper.getBranch();
  2986. if (whichBranch >= inputs.ordinality())
  2987. whichBranch = inputs.ordinality()-1;
  2988. selectedInput = inputs.item(whichBranch);
  2989. selectedInput->ready();
  2990. }
  2991. void CHThorCaseActivity::stop()
  2992. {
  2993. if (selectedInput)
  2994. selectedInput->stop();
  2995. }
  2996. const void *CHThorCaseActivity::nextRow()
  2997. {
  2998. if (!selectedInput)
  2999. return NULL;
  3000. const void *ret = selectedInput->nextRow();
  3001. if (ret)
  3002. processed++;
  3003. return ret;
  3004. }
  3005. //=====================================================================================================
  3006. CHThorSampleActivity::CHThorSampleActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSampleArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3007. {
  3008. }
  3009. void CHThorSampleActivity::ready()
  3010. {
  3011. CHThorSimpleActivityBase::ready();
  3012. numSamples = helper.getProportion();
  3013. whichSample = helper.getSampleNumber();
  3014. numToSkip = (whichSample ? whichSample-1 : 0);
  3015. anyThisGroup = false;
  3016. }
  3017. const void * CHThorSampleActivity::nextRow()
  3018. {
  3019. for (;;)
  3020. {
  3021. OwnedConstRoxieRow ret(input->nextRow());
  3022. if (!ret)
  3023. {
  3024. //this does work with groups - may or may not be useful...
  3025. //reset the sample for each group.... probably best.
  3026. numToSkip = (whichSample ? whichSample-1 : 0);
  3027. if (anyThisGroup)
  3028. {
  3029. anyThisGroup = false;
  3030. return NULL;
  3031. }
  3032. ret.setown(input->nextRow());
  3033. if (!ret)
  3034. return NULL; // eof...
  3035. }
  3036. if (numToSkip == 0)
  3037. {
  3038. anyThisGroup = true;
  3039. numToSkip = numSamples-1;
  3040. processed++;
  3041. return ret.getClear();
  3042. }
  3043. numToSkip--;
  3044. }
  3045. }
  3046. //=====================================================================================================
  3047. CHThorAggregateActivity::CHThorAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3048. {
  3049. }
  3050. void CHThorAggregateActivity::ready()
  3051. {
  3052. CHThorSimpleActivityBase::ready();
  3053. eof = false;
  3054. }
  3055. const void * CHThorAggregateActivity::nextRow()
  3056. {
  3057. if (eof)
  3058. return NULL;
  3059. const void * next = input->nextRow();
  3060. if (!next && input->isGrouped())
  3061. {
  3062. eof = true;
  3063. return NULL;
  3064. }
  3065. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  3066. helper.clearAggregate(rowBuilder);
  3067. if (next)
  3068. {
  3069. helper.processFirst(rowBuilder, next);
  3070. ReleaseRoxieRow(next);
  3071. bool abortEarly = (kind == TAKexistsaggregate) && !input->isGrouped();
  3072. if (!abortEarly)
  3073. {
  3074. for (;;)
  3075. {
  3076. next = input->nextRow();
  3077. if (!next)
  3078. break;
  3079. helper.processNext(rowBuilder, next);
  3080. ReleaseRoxieRow(next);
  3081. }
  3082. }
  3083. }
  3084. if (!input->isGrouped()) // either read all, or aborted early
  3085. eof = true;
  3086. processed++;
  3087. size32_t finalSize = outputMeta.getRecordSize(rowBuilder.getSelf());
  3088. return rowBuilder.finalizeRowClear(finalSize);
  3089. }
  3090. //=====================================================================================================
  3091. CHThorHashAggregateActivity::CHThorHashAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorHashAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph, bool _isGroupedAggregate)
  3092. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph),
  3093. aggregated(_arg, _arg),
  3094. isGroupedAggregate(_isGroupedAggregate)
  3095. {
  3096. }
  3097. void CHThorHashAggregateActivity::ready()
  3098. {
  3099. CHThorSimpleActivityBase::ready();
  3100. eof = false;
  3101. gathered = false;
  3102. }
  3103. void CHThorHashAggregateActivity::stop()
  3104. {
  3105. aggregated.reset();
  3106. CHThorSimpleActivityBase::stop();
  3107. }
  3108. const void * CHThorHashAggregateActivity::nextRow()
  3109. {
  3110. if (eof)
  3111. return NULL;
  3112. if (!gathered)
  3113. {
  3114. bool eog = true;
  3115. aggregated.start(rowAllocator, agent.queryCodeContext(), activityId);
  3116. for (;;)
  3117. {
  3118. OwnedConstRoxieRow next(input->nextRow());
  3119. if (!next)
  3120. {
  3121. if (isGroupedAggregate)
  3122. {
  3123. if (eog)
  3124. eof = true;
  3125. break;
  3126. }
  3127. next.setown(input->nextRow());
  3128. if (!next)
  3129. break;
  3130. }
  3131. eog = false;
  3132. try
  3133. {
  3134. aggregated.addRow(next);
  3135. }
  3136. catch(IException * e)
  3137. {
  3138. throw makeWrappedException(e);
  3139. }
  3140. }
  3141. gathered = true;
  3142. }
  3143. Owned<AggregateRowBuilder> next = aggregated.nextResult();
  3144. if (next)
  3145. {
  3146. processed++;
  3147. return next->finalizeRowClear();
  3148. }
  3149. if (!isGroupedAggregate)
  3150. eof = true;
  3151. aggregated.reset();
  3152. gathered = false;
  3153. return NULL;
  3154. }
  3155. //=====================================================================================================
  3156. CHThorSelectNActivity::CHThorSelectNActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSelectNArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3157. {
  3158. }
  3159. const void * CHThorSelectNActivity::defaultRow()
  3160. {
  3161. if (!rowAllocator)
  3162. createRowAllocator(); //We delay as often not needed...
  3163. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  3164. size32_t thisSize = helper.createDefault(rowBuilder);
  3165. return rowBuilder.finalizeRowClear(thisSize);
  3166. }
  3167. void CHThorSelectNActivity::ready()
  3168. {
  3169. CHThorSimpleActivityBase::ready();
  3170. finished = false;
  3171. }
  3172. const void * CHThorSelectNActivity::nextRow()
  3173. {
  3174. if (finished)
  3175. return NULL;
  3176. finished = true;
  3177. unsigned __int64 index = helper.getRowToSelect();
  3178. while (--index)
  3179. {
  3180. OwnedConstRoxieRow next(input->nextRow());
  3181. if (!next)
  3182. next.setown(input->nextRow());
  3183. if (!next)
  3184. {
  3185. processed++;
  3186. return defaultRow();
  3187. }
  3188. }
  3189. OwnedConstRoxieRow next(input->nextRow());
  3190. if (!next)
  3191. next.setown(input->nextRow());
  3192. if (!next)
  3193. next.setown(defaultRow());
  3194. processed++;
  3195. return next.getClear();
  3196. }
  3197. //=====================================================================================================
  3198. CHThorFirstNActivity::CHThorFirstNActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorFirstNArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3199. {
  3200. grouped = outputMeta.isGrouped();
  3201. }
  3202. void CHThorFirstNActivity::ready()
  3203. {
  3204. CHThorSimpleActivityBase::ready();
  3205. skip = helper.numToSkip();
  3206. limit = helper.getLimit();
  3207. doneThisGroup = 0;
  3208. finished = (limit == 0);
  3209. if (limit + skip >= limit)
  3210. limit += skip;
  3211. }
  3212. const void * CHThorFirstNActivity::nextRow()
  3213. {
  3214. if (finished)
  3215. return NULL;
  3216. OwnedConstRoxieRow ret;
  3217. for (;;)
  3218. {
  3219. ret.setown(input->nextRow());
  3220. if (!ret)
  3221. {
  3222. if (grouped)
  3223. {
  3224. if (doneThisGroup > skip)
  3225. {
  3226. doneThisGroup = 0;
  3227. return NULL;
  3228. }
  3229. doneThisGroup = 0;
  3230. }
  3231. ret.setown(input->nextRow());
  3232. if (!ret)
  3233. {
  3234. finished = true;
  3235. return NULL;
  3236. }
  3237. }
  3238. doneThisGroup++;
  3239. if (doneThisGroup > skip)
  3240. break;
  3241. }
  3242. if (doneThisGroup <= limit)
  3243. {
  3244. processed++;
  3245. return ret.getClear();
  3246. }
  3247. if (grouped)
  3248. {
  3249. ret.setown(input->nextRow());
  3250. while (ret)
  3251. ret.setown(input->nextRow());
  3252. doneThisGroup = 0;
  3253. }
  3254. else
  3255. finished = true;
  3256. return NULL;
  3257. }
  3258. //=====================================================================================================
  3259. CHThorChooseSetsActivity::CHThorChooseSetsActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChooseSetsArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3260. {
  3261. numSets = helper.getNumSets();
  3262. setCounts = new unsigned[numSets];
  3263. }
  3264. CHThorChooseSetsActivity::~CHThorChooseSetsActivity()
  3265. {
  3266. delete [] setCounts;
  3267. }
  3268. void CHThorChooseSetsActivity::ready()
  3269. {
  3270. CHThorSimpleActivityBase::ready();
  3271. finished = false;
  3272. memset(setCounts, 0, sizeof(unsigned)*numSets);
  3273. helper.setCounts(setCounts);
  3274. }
  3275. const void * CHThorChooseSetsActivity::nextRow()
  3276. {
  3277. if (finished)
  3278. return NULL;
  3279. for (;;)
  3280. {
  3281. OwnedConstRoxieRow ret(input->nextRow());
  3282. if (!ret)
  3283. {
  3284. ret.setown(input->nextRow());
  3285. if (!ret)
  3286. return NULL;
  3287. }
  3288. processed++;
  3289. switch (helper.getRecordAction(ret))
  3290. {
  3291. case 2:
  3292. finished = true;
  3293. return ret.getClear();
  3294. case 1:
  3295. return ret.getClear();
  3296. }
  3297. }
  3298. }
  3299. //=====================================================================================================
  3300. CHThorChooseSetsExActivity::CHThorChooseSetsExActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChooseSetsExArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3301. {
  3302. numSets = helper.getNumSets();
  3303. setCounts = new unsigned[numSets];
  3304. memset(setCounts, 0, sizeof(unsigned)*numSets);
  3305. limits = (count_t *)checked_calloc(sizeof(count_t), numSets, "choose sets ex");
  3306. helper.getLimits(limits);
  3307. }
  3308. CHThorChooseSetsExActivity::~CHThorChooseSetsExActivity()
  3309. {
  3310. delete [] setCounts;
  3311. free(limits);
  3312. }
  3313. void CHThorChooseSetsExActivity::ready()
  3314. {
  3315. CHThorSimpleActivityBase::ready();
  3316. finished = false;
  3317. curIndex = 0;
  3318. memset(setCounts, 0, sizeof(unsigned)*numSets);
  3319. }
  3320. void CHThorChooseSetsExActivity::stop()
  3321. {
  3322. gathered.clear();
  3323. CHThorSimpleActivityBase::stop();
  3324. }
  3325. const void * CHThorChooseSetsExActivity::nextRow()
  3326. {
  3327. if (gathered.ordinality() == 0)
  3328. {
  3329. curIndex = 0;
  3330. const void * next = input->nextRow();
  3331. while(next)
  3332. {
  3333. gathered.append(next);
  3334. next = input->nextRow();
  3335. }
  3336. if(gathered.ordinality() == 0)
  3337. {
  3338. finished = true;
  3339. return NULL;
  3340. }
  3341. ForEachItemIn(idx1, gathered)
  3342. {
  3343. unsigned category = helper.getCategory(gathered.item(idx1));
  3344. if (category)
  3345. setCounts[category-1]++;
  3346. }
  3347. calculateSelection();
  3348. }
  3349. while (gathered.isItem(curIndex))
  3350. {
  3351. OwnedConstRoxieRow row(gathered.itemClear(curIndex++));
  3352. if (includeRow(row))
  3353. {
  3354. processed++;
  3355. return row.getClear();
  3356. }
  3357. }
  3358. gathered.clear();
  3359. return NULL;
  3360. }
  3361. //=====================================================================================================
  3362. CHThorChooseSetsLastActivity::CHThorChooseSetsLastActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChooseSetsExArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorChooseSetsExActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  3363. {
  3364. numToSkip = (unsigned *)checked_calloc(sizeof(unsigned), numSets, "choose sets last");
  3365. }
  3366. CHThorChooseSetsLastActivity::~CHThorChooseSetsLastActivity()
  3367. {
  3368. free(numToSkip);
  3369. }
  3370. void CHThorChooseSetsLastActivity::ready()
  3371. {
  3372. CHThorChooseSetsExActivity::ready();
  3373. memset(numToSkip, 0, sizeof(unsigned) * numSets);
  3374. }
  3375. void CHThorChooseSetsLastActivity::calculateSelection()
  3376. {
  3377. for (unsigned idx=0; idx < numSets; idx++)
  3378. {
  3379. if (setCounts[idx] < limits[idx])
  3380. numToSkip[idx] = 0;
  3381. else
  3382. numToSkip[idx] = (unsigned)(setCounts[idx] - limits[idx]);
  3383. }
  3384. }
  3385. bool CHThorChooseSetsLastActivity::includeRow(const void * row)
  3386. {
  3387. unsigned category = helper.getCategory(row);
  3388. if (category)
  3389. {
  3390. if (numToSkip[category-1] == 0)
  3391. return true;
  3392. numToSkip[category-1]--;
  3393. }
  3394. return false;
  3395. }
  3396. //=====================================================================================================
  3397. CHThorChooseSetsEnthActivity::CHThorChooseSetsEnthActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChooseSetsExArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorChooseSetsExActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  3398. {
  3399. counter = (unsigned __int64 *)checked_calloc(sizeof(unsigned __int64), numSets, "choose sets enth");
  3400. }
  3401. CHThorChooseSetsEnthActivity::~CHThorChooseSetsEnthActivity()
  3402. {
  3403. free(counter);
  3404. }
  3405. void CHThorChooseSetsEnthActivity::ready()
  3406. {
  3407. CHThorChooseSetsExActivity::ready();
  3408. memset(counter, 0, sizeof(unsigned __int64) * numSets);
  3409. }
  3410. void CHThorChooseSetsEnthActivity::calculateSelection()
  3411. {
  3412. }
  3413. bool CHThorChooseSetsEnthActivity::includeRow(const void * row)
  3414. {
  3415. unsigned category = helper.getCategory(row);
  3416. if (category)
  3417. {
  3418. counter[category-1] += limits[category-1];
  3419. if(counter[category-1] >= setCounts[category-1])
  3420. {
  3421. counter[category-1] -= setCounts[category-1];
  3422. return true;
  3423. }
  3424. }
  3425. return false;
  3426. }
  3427. //=====================================================================================================
  3428. CHThorDegroupActivity::CHThorDegroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDegroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  3429. {
  3430. }
  3431. const void * CHThorDegroupActivity::nextRow()
  3432. {
  3433. const void * ret = input->ungroupedNextRow();
  3434. if (ret)
  3435. processed++;
  3436. return ret;
  3437. }
  3438. const void * CHThorDegroupActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  3439. {
  3440. const void * ret = input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra);
  3441. if (ret)
  3442. processed++;
  3443. return ret;
  3444. }
  3445. bool CHThorDegroupActivity::isGrouped()
  3446. {
  3447. return false;
  3448. }
  3449. //=====================================================================================================
  3450. CHThorGroupActivity::CHThorGroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3451. {
  3452. }
  3453. bool CHThorGroupActivity::isGrouped()
  3454. {
  3455. return true;
  3456. }
  3457. void CHThorGroupActivity::ready()
  3458. {
  3459. CHThorSimpleActivityBase::ready();
  3460. next.clear();
  3461. endPending = false;
  3462. firstDone = false;
  3463. }
  3464. void CHThorGroupActivity::stop()
  3465. {
  3466. CHThorSimpleActivityBase::stop();
  3467. next.clear();
  3468. }
  3469. const void *CHThorGroupActivity::nextRow()
  3470. {
  3471. if (!firstDone)
  3472. {
  3473. firstDone = true;
  3474. next.setown(input->nextRow());
  3475. }
  3476. if (endPending)
  3477. {
  3478. endPending = false;
  3479. return NULL;
  3480. }
  3481. OwnedConstRoxieRow prev(next.getClear());
  3482. next.setown(input->nextRow());
  3483. if (!next) // skip incoming groups. (should it sub-group??)
  3484. next.setown(input->nextRow());
  3485. if (next)
  3486. {
  3487. assertex(prev); // If this fails, you have an initial empty group. That is not legal.
  3488. if (!helper.isSameGroup(prev, next))
  3489. endPending = true;
  3490. }
  3491. if (prev)
  3492. processed++;
  3493. return prev.getClear();
  3494. }
  3495. const void * CHThorGroupActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  3496. {
  3497. if (firstDone)
  3498. {
  3499. if (next)
  3500. {
  3501. if (stepCompare->docompare(next, seek, numFields) >= 0)
  3502. return nextRow();
  3503. }
  3504. }
  3505. next.setown(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  3506. firstDone = true;
  3507. return nextRow();
  3508. }
  3509. //=====================================================================================================
  3510. CHThorGroupSortActivity::CHThorGroupSortActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSortArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3511. {
  3512. gotSorted = false;
  3513. }
  3514. void CHThorGroupSortActivity::ready()
  3515. {
  3516. CHThorSimpleActivityBase::ready();
  3517. if(!sorter)
  3518. createSorter();
  3519. }
  3520. void CHThorGroupSortActivity::stop()
  3521. {
  3522. if(sorter)
  3523. {
  3524. if(sorterIsConst)
  3525. sorter->killSorted();
  3526. else
  3527. sorter.clear();
  3528. }
  3529. gotSorted = false;
  3530. diskReader.clear();
  3531. CHThorSimpleActivityBase::stop();
  3532. }
  3533. const void *CHThorGroupSortActivity::nextRow()
  3534. {
  3535. if(!gotSorted)
  3536. getSorted();
  3537. if(diskReader)
  3538. {
  3539. const void *row = diskReader->nextRow();
  3540. if (row)
  3541. return row;
  3542. diskReader.clear();
  3543. }
  3544. else
  3545. {
  3546. const void * ret = sorter->getNextSorted();
  3547. if(ret)
  3548. {
  3549. processed++;
  3550. return ret;
  3551. }
  3552. }
  3553. sorter->killSorted();
  3554. gotSorted = false;
  3555. return NULL;
  3556. }
  3557. void CHThorGroupSortActivity::createSorter()
  3558. {
  3559. unsigned flags = helper.getAlgorithmFlags();
  3560. sorterIsConst = ((flags & TAFconstant) != 0);
  3561. OwnedRoxieString algoname(helper.getAlgorithm());
  3562. if(!algoname)
  3563. {
  3564. if((flags & TAFunstable) != 0)
  3565. sorter.setown(new CQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3566. else
  3567. sorter.setown(new CHeapSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3568. return;
  3569. }
  3570. if(stricmp(algoname, "quicksort") == 0)
  3571. {
  3572. if((flags & TAFstable) != 0)
  3573. sorter.setown(new CStableQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3574. else
  3575. sorter.setown(new CQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3576. }
  3577. else if(stricmp(algoname, "parquicksort") == 0)
  3578. {
  3579. if((flags & TAFstable) != 0)
  3580. sorter.setown(new CParallelStableQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3581. else
  3582. sorter.setown(new CParallelQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3583. }
  3584. else if(stricmp(algoname, "taskquicksort") == 0)
  3585. {
  3586. if((flags & TAFstable) != 0)
  3587. sorter.setown(new CParallelTaskStableQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3588. else
  3589. sorter.setown(new CParallelTaskQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3590. }
  3591. else if(stricmp(algoname, "mergesort") == 0)
  3592. {
  3593. if((flags & TAFparallel) != 0)
  3594. sorter.setown(new CParallelStableMergeSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3595. else
  3596. sorter.setown(new CStableMergeSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3597. }
  3598. else if(stricmp(algoname, "parmergesort") == 0)
  3599. sorter.setown(new CParallelStableMergeSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep, this));
  3600. else if(stricmp(algoname, "heapsort") == 0)
  3601. sorter.setown(new CHeapSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3602. else if(stricmp(algoname, "insertionsort") == 0)
  3603. {
  3604. if((flags & TAFstable) != 0)
  3605. sorter.setown(new CStableInsertionSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3606. else
  3607. sorter.setown(new CInsertionSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3608. }
  3609. else
  3610. {
  3611. StringBuffer sb;
  3612. sb.appendf("Ignoring unsupported sort order algorithm '%s', using default", algoname.get());
  3613. agent.addWuExceptionEx(sb.str(),WRN_UnsupportedAlgorithm,SeverityWarning,MSGAUD_user,"hthor");
  3614. if((flags & TAFunstable) != 0)
  3615. sorter.setown(new CQuickSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3616. else
  3617. sorter.setown(new CHeapSorter(helper.queryCompare(), queryRowManager(), InitialSortElements, CommitStep));
  3618. }
  3619. sorter->setActivityId(activityId);
  3620. }
  3621. void CHThorGroupSortActivity::getSorted()
  3622. {
  3623. diskMerger.clear();
  3624. diskReader.clear();
  3625. queryRowManager()->addRowBuffer(this);//register for OOM callbacks
  3626. const void * next;
  3627. while((next = input->nextRow()) != NULL)
  3628. {
  3629. if (!sorter->addRow(next))
  3630. {
  3631. {
  3632. //Unlikely that this code will ever be executed but added for comfort
  3633. roxiemem::RoxieOutputRowArrayLock block(sorter->getRowArray());
  3634. sorter->flushRows();
  3635. sortAndSpillRows();
  3636. //Ensure new rows are written to the head of the array. It needs to be a separate call because
  3637. //performSort() cannot shift active row pointer since it can be called from any thread
  3638. sorter->flushRows();
  3639. }
  3640. if (!sorter->addRow(next))
  3641. {
  3642. ReleaseRoxieRow(next);
  3643. throw MakeStringException(0, "Insufficient memory to append sort row");
  3644. }
  3645. }
  3646. }
  3647. queryRowManager()->removeRowBuffer(this);//unregister for OOM callbacks
  3648. sorter->flushRows();
  3649. if(diskMerger)
  3650. {
  3651. sortAndSpillRows();
  3652. sorter->killSorted();
  3653. ICompare *compare = helper.queryCompare();
  3654. diskReader.setown(diskMerger->merge(compare));
  3655. }
  3656. else
  3657. {
  3658. sorter->performSort();
  3659. }
  3660. gotSorted = true;
  3661. }
  3662. //interface roxiemem::IBufferedRowCallback
  3663. unsigned CHThorGroupSortActivity::getSpillCost() const
  3664. {
  3665. return 10;
  3666. }
  3667. unsigned CHThorGroupSortActivity::getActivityId() const
  3668. {
  3669. return activityId;
  3670. }
  3671. bool CHThorGroupSortActivity::freeBufferedRows(bool critical)
  3672. {
  3673. roxiemem::RoxieOutputRowArrayLock block(sorter->getRowArray());
  3674. return sortAndSpillRows();
  3675. }
  3676. bool CHThorGroupSortActivity::sortAndSpillRows()
  3677. {
  3678. if (0 == sorter->numCommitted())
  3679. return false;
  3680. if(!diskMerger)
  3681. {
  3682. StringBuffer fbase;
  3683. agent.getTempfileBase(fbase).append(PATHSEPCHAR).appendf("spill_sort_%p", this);
  3684. PROGLOG("SORT: spilling to disk, filename base %s", fbase.str());
  3685. class CHThorRowLinkCounter : implements IRowLinkCounter, public CSimpleInterface
  3686. {
  3687. public:
  3688. IMPLEMENT_IINTERFACE_USING(CSimpleInterface);
  3689. virtual void releaseRow(const void *row)
  3690. {
  3691. ReleaseRoxieRow(row);
  3692. }
  3693. virtual void linkRow(const void *row)
  3694. {
  3695. LinkRoxieRow(row);
  3696. }
  3697. };
  3698. Owned<IRowLinkCounter> linker = new CHThorRowLinkCounter();
  3699. Owned<IRowInterfaces> rowInterfaces = createRowInterfaces(input->queryOutputMeta(), activityId, 0, agent.queryCodeContext());
  3700. diskMerger.setown(createDiskMerger(rowInterfaces, linker, fbase.str()));
  3701. }
  3702. sorter->performSort();
  3703. sorter->spillSortedToDisk(diskMerger);
  3704. return true;
  3705. }
  3706. // Base for Quick sort and both Insertion sorts
  3707. void CSimpleSorterBase::spillSortedToDisk(IDiskMerger * merger)
  3708. {
  3709. Owned<IRowWriter> out = merger->createWriteBlock();
  3710. for (;;)
  3711. {
  3712. const void *row = getNextSorted();
  3713. if (!row)
  3714. break;
  3715. out->putRow(row);
  3716. }
  3717. finger = 0;
  3718. out->flush();
  3719. rowsToSort.noteSpilled(rowsToSort.numCommitted());
  3720. }
  3721. // Quick sort
  3722. void CQuickSorter::performSort()
  3723. {
  3724. size32_t numRows = rowsToSort.numCommitted();
  3725. if (numRows)
  3726. {
  3727. const void * * rows = rowsToSort.getBlock(numRows);
  3728. qsortvec((void * *)rows, numRows, *compare);
  3729. finger = 0;
  3730. }
  3731. }
  3732. // Quick sort
  3733. void CParallelQuickSorter::performSort()
  3734. {
  3735. size32_t numRows = rowsToSort.numCommitted();
  3736. if (numRows)
  3737. {
  3738. const void * * rows = rowsToSort.getBlock(numRows);
  3739. parqsortvec((void * *)rows, numRows, *compare);
  3740. finger = 0;
  3741. }
  3742. }
  3743. void CParallelTaskQuickSorter::performSort()
  3744. {
  3745. size32_t numRows = rowsToSort.numCommitted();
  3746. if (numRows)
  3747. {
  3748. const void * * rows = rowsToSort.getBlock(numRows);
  3749. taskqsortvec((void * *)rows, numRows, *compare);
  3750. finger = 0;
  3751. }
  3752. }
  3753. // StableQuick sort
  3754. bool CStableSorter::addRow(const void * next)
  3755. {
  3756. roxiemem::rowidx_t nextRowCapacity = rowsToSort.rowCapacity() + 1;//increment capacity for the row we are about to add
  3757. if (nextRowCapacity > indexCapacity)
  3758. {
  3759. void *** newIndex = (void ***)rowManager->allocate(nextRowCapacity * sizeof(void*), activityId);//could force an OOM callback
  3760. if (newIndex)
  3761. {
  3762. roxiemem::RoxieOutputRowArrayLock block(getRowArray());//could force an OOM callback after index is freed but before index,indexCapacity is updated
  3763. ReleaseRoxieRow(index);
  3764. index = newIndex;
  3765. indexCapacity = RoxieRowCapacity(index) / sizeof(void*);
  3766. }
  3767. else
  3768. {
  3769. killSorted();
  3770. ReleaseRoxieRow(next);
  3771. throw MakeStringException(0, "Insufficient memory to allocate StableQuickSorter index");
  3772. }
  3773. }
  3774. return CSimpleSorterBase::addRow(next);
  3775. }
  3776. void CStableSorter::spillSortedToDisk(IDiskMerger * merger)
  3777. {
  3778. CSimpleSorterBase::spillSortedToDisk(merger);
  3779. ReleaseRoxieRow(index);
  3780. index = NULL;
  3781. indexCapacity = 0;
  3782. }
  3783. void CStableSorter::killSorted()
  3784. {
  3785. CSimpleSorterBase::killSorted();
  3786. ReleaseRoxieRow(index);
  3787. index = NULL;
  3788. indexCapacity = 0;
  3789. }
  3790. // StableQuick sort
  3791. void CStableQuickSorter::performSort()
  3792. {
  3793. size32_t numRows = rowsToSort.numCommitted();
  3794. if (numRows)
  3795. {
  3796. const void * * rows = rowsToSort.getBlock(numRows);
  3797. qsortvecstableinplace((void * *)rows, numRows, *compare, (void * *)index);
  3798. finger = 0;
  3799. }
  3800. }
  3801. void CParallelStableQuickSorter::performSort()
  3802. {
  3803. size32_t numRows = rowsToSort.numCommitted();
  3804. if (numRows)
  3805. {
  3806. const void * * rows = rowsToSort.getBlock(numRows);
  3807. parqsortvecstableinplace((void * *)rows, numRows, *compare, (void * *)index);
  3808. finger = 0;
  3809. }
  3810. }
  3811. void CParallelTaskStableQuickSorter::performSort()
  3812. {
  3813. size32_t numRows = rowsToSort.numCommitted();
  3814. if (numRows)
  3815. {
  3816. const void * * rows = rowsToSort.getBlock(numRows);
  3817. taskqsortvecstableinplace((void * *)rows, numRows, *compare, (void * *)index);
  3818. finger = 0;
  3819. }
  3820. }
  3821. // StableMerge sort
  3822. void CStableMergeSorter::performSort()
  3823. {
  3824. size32_t numRows = rowsToSort.numCommitted();
  3825. if (numRows)
  3826. {
  3827. const void * * rows = rowsToSort.getBlock(numRows);
  3828. msortvecstableinplace((void * *)rows, numRows, *compare, (void * *)index);
  3829. finger = 0;
  3830. }
  3831. }
  3832. void CParallelStableMergeSorter::performSort()
  3833. {
  3834. size32_t numRows = rowsToSort.numCommitted();
  3835. if (numRows)
  3836. {
  3837. const void * * rows = rowsToSort.getBlock(numRows);
  3838. parmsortvecstableinplace((void * *)rows, numRows, *compare, (void * *)index);
  3839. finger = 0;
  3840. }
  3841. }
  3842. // Heap sort
  3843. void CHeapSorter::performSort()
  3844. {
  3845. size32_t numRows = rowsToSort.numCommitted();
  3846. if (numRows)
  3847. {
  3848. const void * * rows = rowsToSort.getBlock(numRows);
  3849. heapsize = numRows;
  3850. for (unsigned i = 0; i < numRows; i++)
  3851. {
  3852. heap.append(i);
  3853. heap_push_up(i, heap.getArray(), rows, compare);
  3854. }
  3855. }
  3856. }
  3857. void CHeapSorter::spillSortedToDisk(IDiskMerger * merger)
  3858. {
  3859. CSimpleSorterBase::spillSortedToDisk(merger);
  3860. heap.kill();
  3861. heapsize = 0;
  3862. }
  3863. const void * CHeapSorter::getNextSorted()
  3864. {
  3865. if(heapsize)
  3866. {
  3867. size32_t numRows = rowsToSort.numCommitted();
  3868. if (numRows)
  3869. {
  3870. const void * * rows = rowsToSort.getBlock(numRows);
  3871. unsigned top = heap.item(0);
  3872. --heapsize;
  3873. heap.replace(heap.item(heapsize), 0);
  3874. heap_push_down(0, heapsize, heap.getArray(), rows, compare);
  3875. const void * row = rows[top];
  3876. rows[top] = NULL;
  3877. return row;
  3878. }
  3879. }
  3880. return NULL;
  3881. }
  3882. void CHeapSorter::killSorted()
  3883. {
  3884. CSimpleSorterBase::killSorted();
  3885. heap.kill();
  3886. heapsize = 0;
  3887. }
  3888. // Insertion sorts
  3889. void CInsertionSorter::performSort()
  3890. {
  3891. size32_t numRows = rowsToSort.numCommitted();
  3892. if (numRows)
  3893. {
  3894. const void * * rows = rowsToSort.getBlock(numRows);
  3895. for (unsigned i = 0; i < numRows; i++)
  3896. {
  3897. binary_vec_insert(rowsToSort.query(i), rows, i, *compare);
  3898. }
  3899. finger = 0;
  3900. }
  3901. }
  3902. void CStableInsertionSorter::performSort()
  3903. {
  3904. size32_t numRows = rowsToSort.numCommitted();
  3905. if (numRows)
  3906. {
  3907. const void * * rows = rowsToSort.getBlock(numRows);
  3908. for (unsigned i = 0; i < numRows; i++)
  3909. {
  3910. binary_vec_insert_stable(rowsToSort.query(i), rows, i, *compare);
  3911. }
  3912. finger = 0;
  3913. }
  3914. }
  3915. //=====================================================================================================
  3916. CHThorGroupedActivity::CHThorGroupedActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGroupedArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3917. {
  3918. }
  3919. void CHThorGroupedActivity::ready()
  3920. {
  3921. CHThorSimpleActivityBase::ready();
  3922. firstDone = false;
  3923. nextRowIndex = 0;
  3924. }
  3925. void CHThorGroupedActivity::stop()
  3926. {
  3927. CHThorSimpleActivityBase::stop();
  3928. next[0].clear();
  3929. next[1].clear();
  3930. next[2].clear();
  3931. }
  3932. const void *CHThorGroupedActivity::nextRow()
  3933. {
  3934. if (!firstDone)
  3935. {
  3936. next[0].setown(input->nextRow());
  3937. next[1].setown(input->nextRow());
  3938. nextRowIndex = 0;
  3939. }
  3940. unsigned nextToCompare = (nextRowIndex + 1) % 3;
  3941. unsigned nextToFill = (nextRowIndex + 2) % 3;
  3942. next[nextToFill].setown(input->nextRow());
  3943. OwnedConstRoxieRow ret(next[nextRowIndex].getClear());
  3944. if (ret)
  3945. {
  3946. if (next[nextToCompare])
  3947. {
  3948. if (!helper.isSameGroup(ret, next[nextToCompare]))
  3949. throw MakeStringException(100, "GROUPED(%u), expected a group break between adjacent rows (rows %" I64F "d, %" I64F "d) ", activityId, processed+1, processed+2);
  3950. }
  3951. else if (next[nextToFill])
  3952. {
  3953. if (helper.isSameGroup(ret, next[nextToFill]))
  3954. throw MakeStringException(100, "GROUPED(%u), unexpected group break found between rows %" I64F "d and %" I64F "d)", activityId, processed+1, processed+2);
  3955. }
  3956. processed++;
  3957. }
  3958. nextRowIndex = nextToCompare;
  3959. return ret.getClear();
  3960. }
  3961. //=====================================================================================================
  3962. CHThorSortedActivity::CHThorSortedActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSortedArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  3963. {
  3964. //MORE: Should probably have a inter group and intra group sort functions
  3965. compare = helper.queryCompare();
  3966. }
  3967. void CHThorSortedActivity::ready()
  3968. {
  3969. CHThorSimpleActivityBase::ready();
  3970. firstDone = false;
  3971. }
  3972. void CHThorSortedActivity::stop()
  3973. {
  3974. CHThorSimpleActivityBase::stop();
  3975. next.clear();
  3976. }
  3977. const void *CHThorSortedActivity::nextRow()
  3978. {
  3979. if (!firstDone)
  3980. {
  3981. firstDone = true;
  3982. next.setown(input->nextRow());
  3983. }
  3984. OwnedConstRoxieRow prev(next.getClear());
  3985. next.setown(input->nextRow());
  3986. if (prev && next)
  3987. if (compare->docompare(prev, next) > 0)
  3988. throw MakeStringException(100, "SORTED(%u) detected incorrectly sorted rows (row %" I64F "d, %" I64F "d))", activityId, processed+1, processed+2);
  3989. if (prev)
  3990. processed++;
  3991. return prev.getClear();
  3992. }
  3993. const void * CHThorSortedActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  3994. {
  3995. if (next)
  3996. {
  3997. if (stepCompare->docompare(next, seek, numFields) >= 0)
  3998. return nextRow();
  3999. }
  4000. firstDone = true;
  4001. next.setown(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  4002. return nextRow();
  4003. }
  4004. //=====================================================================================================
  4005. CHThorTraceActivity::CHThorTraceActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorTraceArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  4006. : CHThorSteppableActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph),
  4007. helper(_arg), keepLimit(0), skip(0), sample(0), traceEnabled(false)
  4008. {
  4009. }
  4010. void CHThorTraceActivity::ready()
  4011. {
  4012. CHThorSimpleActivityBase::ready();
  4013. traceEnabled = agent.queryWorkUnit()->getDebugValueBool("traceEnabled", false);
  4014. if (traceEnabled && helper.canMatchAny())
  4015. {
  4016. keepLimit = helper.getKeepLimit();
  4017. if (keepLimit==(unsigned) -1)
  4018. keepLimit = agent.queryWorkUnit()->getDebugValueInt("traceLimit", 10);
  4019. skip = helper.getSkip();
  4020. sample = helper.getSample();
  4021. if (sample)
  4022. sample--;
  4023. name.setown(helper.getName());
  4024. if (!name)
  4025. name.set("Row");
  4026. }
  4027. else
  4028. keepLimit = 0;
  4029. }
  4030. void CHThorTraceActivity::stop()
  4031. {
  4032. CHThorSimpleActivityBase::stop();
  4033. name.clear();
  4034. }
  4035. const void *CHThorTraceActivity::nextRow()
  4036. {
  4037. OwnedConstRoxieRow ret(input->nextRow());
  4038. if (!ret)
  4039. return NULL;
  4040. onTrace(ret);
  4041. processed++;
  4042. return ret.getClear();
  4043. }
  4044. const void * CHThorTraceActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  4045. {
  4046. OwnedConstRoxieRow ret(input->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra));
  4047. if (ret)
  4048. {
  4049. onTrace(ret);
  4050. processed++;
  4051. }
  4052. return ret.getClear();
  4053. }
  4054. void CHThorTraceActivity::onTrace(const void *row)
  4055. {
  4056. if (keepLimit && helper.isValid(row))
  4057. {
  4058. if (skip)
  4059. skip--;
  4060. else if (sample)
  4061. sample--;
  4062. else
  4063. {
  4064. CommonXmlWriter xmlwrite(XWFnoindent);
  4065. outputMeta.toXML((const byte *) row, xmlwrite);
  4066. DBGLOG("TRACE: <%s>%s<%s>", name.get(), xmlwrite.str(), name.get());
  4067. keepLimit--;
  4068. sample = helper.getSample();
  4069. if (sample)
  4070. sample--;
  4071. }
  4072. }
  4073. }
  4074. //=====================================================================================================
  4075. void getLimitType(unsigned flags, bool & limitFail, bool & limitOnFail)
  4076. {
  4077. if((flags & JFmatchAbortLimitSkips) != 0)
  4078. {
  4079. limitFail = false;
  4080. limitOnFail = false;
  4081. }
  4082. else
  4083. {
  4084. limitOnFail = ((flags & JFonfail) != 0);
  4085. limitFail = !limitOnFail;
  4086. }
  4087. }
  4088. CHThorJoinActivity::CHThorJoinActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorJoinArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  4089. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL)
  4090. {
  4091. }
  4092. void CHThorJoinActivity::ready()
  4093. {
  4094. CHThorActivityBase::ready();
  4095. input1->ready();
  4096. bool isStable = (helper.getJoinFlags() & JFunstable) == 0;
  4097. RoxieSortAlgorithm sortAlgorithm = isStable ? stableSpillingQuickSortAlgorithm : spillingQuickSortAlgorithm;
  4098. StringBuffer tempBase;
  4099. agent.getTempfileBase(tempBase);
  4100. if (helper.isLeftAlreadySorted())
  4101. sortedLeftInput.setown(createDegroupedInputReader(&input->queryStream()));
  4102. else
  4103. sortedLeftInput.setown(createSortedInputReader(&input->queryStream(), createSortAlgorithm(sortAlgorithm, helper.queryCompareLeft(), *queryRowManager(), input->queryOutputMeta(), agent.queryCodeContext(), tempBase, activityId)));
  4104. ICompare *compareRight = helper.queryCompareRight();
  4105. if (helper.isRightAlreadySorted())
  4106. groupedSortedRightInput.setown(createGroupedInputReader(&input1->queryStream(), compareRight));
  4107. else
  4108. groupedSortedRightInput.setown(createSortedGroupedInputReader(&input1->queryStream(), compareRight, createSortAlgorithm(sortAlgorithm, compareRight, *queryRowManager(), input1->queryOutputMeta(), agent.queryCodeContext(), tempBase, activityId)));
  4109. outBuilder.setAllocator(rowAllocator);
  4110. leftOuterJoin = (helper.getJoinFlags() & JFleftouter) != 0;
  4111. rightOuterJoin = (helper.getJoinFlags() & JFrightouter) != 0;
  4112. exclude = (helper.getJoinFlags() & JFexclude) != 0;
  4113. getLimitType(helper.getJoinFlags(), limitFail, limitOnFail);
  4114. if (rightOuterJoin && !defaultLeft)
  4115. createDefaultLeft();
  4116. if ((leftOuterJoin || limitOnFail) && !defaultRight)
  4117. createDefaultRight();
  4118. betweenjoin = ((helper.getJoinFlags() & JFslidingmatch) != 0);
  4119. assertex(!(betweenjoin && rightOuterJoin));
  4120. keepLimit = helper.getKeepLimit();
  4121. if (keepLimit == 0)
  4122. keepLimit = (unsigned)-1;
  4123. atmostLimit = helper.getJoinLimit();
  4124. if(atmostLimit == 0)
  4125. atmostLimit = (unsigned)-1;
  4126. else
  4127. assertex(!rightOuterJoin && !betweenjoin);
  4128. abortLimit = helper.getMatchAbortLimit();
  4129. if (abortLimit == 0)
  4130. abortLimit = (unsigned)-1;
  4131. assertex((helper.getJoinFlags() & (JFfirst | JFfirstleft | JFfirstright)) == 0); // no longer supported
  4132. if(betweenjoin)
  4133. {
  4134. collate = helper.queryCompareLeftRightLower();
  4135. collateupper = helper.queryCompareLeftRightUpper();
  4136. }
  4137. else
  4138. {
  4139. collate = collateupper = helper.queryCompareLeftRight();
  4140. }
  4141. rightIndex = 0;
  4142. joinCounter = 0;
  4143. failingLimit.clear();
  4144. state = JSfill;
  4145. if ((helper.getJoinFlags() & JFlimitedprefixjoin) && helper.getJoinLimit())
  4146. { //Limited Match Join (s[1..n])
  4147. limitedhelper.setown(createRHLimitedCompareHelper());
  4148. limitedhelper->init( helper.getJoinLimit(), groupedSortedRightInput, collate, helper.queryPrefixCompare() );
  4149. }
  4150. }
  4151. void CHThorJoinActivity::stop()
  4152. {
  4153. outBuilder.clear();
  4154. right.clear();
  4155. left.clear();
  4156. pendingRight.clear();
  4157. sortedLeftInput.clear();
  4158. groupedSortedRightInput.clear();
  4159. CHThorActivityBase::stop();
  4160. input1->stop();
  4161. }
  4162. void CHThorJoinActivity::setInput(unsigned index, IHThorInput *_input)
  4163. {
  4164. if (index==1)
  4165. input1 = _input;
  4166. else
  4167. CHThorActivityBase::setInput(index, _input);
  4168. }
  4169. void CHThorJoinActivity::createDefaultLeft()
  4170. {
  4171. if (!defaultLeft)
  4172. {
  4173. if (!defaultLeftAllocator)
  4174. defaultLeftAllocator.setown(agent.queryCodeContext()->getRowAllocator(input->queryOutputMeta(), activityId));
  4175. RtlDynamicRowBuilder rowBuilder(defaultLeftAllocator);
  4176. size32_t thisSize = helper.createDefaultLeft(rowBuilder);
  4177. defaultLeft.setown(rowBuilder.finalizeRowClear(thisSize));
  4178. }
  4179. }
  4180. void CHThorJoinActivity::createDefaultRight()
  4181. {
  4182. if (!defaultRight)
  4183. {
  4184. if (!defaultRightAllocator)
  4185. defaultRightAllocator.setown(agent.queryCodeContext()->getRowAllocator(input1->queryOutputMeta(), activityId));
  4186. RtlDynamicRowBuilder rowBuilder(defaultRightAllocator);
  4187. size32_t thisSize = helper.createDefaultRight(rowBuilder);
  4188. defaultRight.setown(rowBuilder.finalizeRowClear(thisSize));
  4189. }
  4190. }
  4191. void CHThorJoinActivity::fillLeft()
  4192. {
  4193. matchedLeft = false;
  4194. left.setown(sortedLeftInput->nextRow()); // NOTE: already degrouped
  4195. if(betweenjoin && left && pendingRight && (collate->docompare(left, pendingRight) >= 0))
  4196. fillRight();
  4197. if (limitedhelper && 0==rightIndex)
  4198. {
  4199. rightIndex = 0;
  4200. joinCounter = 0;
  4201. right.clear();
  4202. matchedRight.kill();
  4203. if (left)
  4204. {
  4205. limitedhelper->getGroup(right,left);
  4206. ForEachItemIn(idx, right)
  4207. matchedRight.append(false);
  4208. }
  4209. }
  4210. }
  4211. void CHThorJoinActivity::fillRight()
  4212. {
  4213. if (limitedhelper)
  4214. return;
  4215. failingLimit.clear();
  4216. if(betweenjoin && left)
  4217. {
  4218. aindex_t start = 0;
  4219. while(right.isItem(start) && (collateupper->docompare(left, right.item(start)) > 0))
  4220. start++;
  4221. if(start>0)
  4222. right.clearPart(0, start);
  4223. }
  4224. else
  4225. right.clear();
  4226. rightIndex = 0;
  4227. joinCounter = 0;
  4228. unsigned groupCount = 0;
  4229. while(true)
  4230. {
  4231. OwnedConstRoxieRow next;
  4232. if(pendingRight)
  4233. {
  4234. next.setown(pendingRight.getClear());
  4235. }
  4236. else
  4237. {
  4238. next.setown(groupedSortedRightInput->nextRow());
  4239. }
  4240. if(!rightOuterJoin && next && (!left || (collateupper->docompare(left, next) > 0))) // if right is less than left, and not right outer, can skip group
  4241. {
  4242. while(next)
  4243. next.setown(groupedSortedRightInput->nextRow());
  4244. continue;
  4245. }
  4246. while(next)
  4247. {
  4248. if(groupCount==abortLimit)
  4249. {
  4250. if(limitFail)
  4251. failLimit();
  4252. if ( agent.queryCodeContext()->queryDebugContext())
  4253. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  4254. if(limitOnFail)
  4255. {
  4256. assertex(!failingLimit);
  4257. try
  4258. {
  4259. failLimit();
  4260. }
  4261. catch(IException * except)
  4262. {
  4263. failingLimit.setown(except);
  4264. }
  4265. assertex(failingLimit);
  4266. }
  4267. right.append(next.getClear());
  4268. do
  4269. {
  4270. next.setown(groupedSortedRightInput->nextRow());
  4271. } while(next);
  4272. break;
  4273. }
  4274. else if(groupCount==atmostLimit)
  4275. {
  4276. right.clear();
  4277. groupCount = 0;
  4278. while(next)
  4279. {
  4280. next.setown(groupedSortedRightInput->nextRow());
  4281. }
  4282. }
  4283. else
  4284. {
  4285. right.append(next.getClear());
  4286. groupCount++;
  4287. }
  4288. next.setown(groupedSortedRightInput->nextRow());
  4289. }
  4290. // normally only want to read one right group, but if is between join and next right group is in window for left, need to continue
  4291. if(betweenjoin && left)
  4292. {
  4293. pendingRight.setown(groupedSortedRightInput->nextRow());
  4294. if(!pendingRight || (collate->docompare(left, pendingRight) < 0))
  4295. break;
  4296. }
  4297. else
  4298. break;
  4299. }
  4300. matchedRight.kill();
  4301. ForEachItemIn(idx, right)
  4302. matchedRight.append(false);
  4303. }
  4304. const void * CHThorJoinActivity::joinRecords(const void * curLeft, const void * curRight, unsigned counter, unsigned flags)
  4305. {
  4306. try
  4307. {
  4308. outBuilder.ensureRow();
  4309. size32_t thisSize = helper.transform(outBuilder, curLeft, curRight, counter, flags);
  4310. if(thisSize)
  4311. return outBuilder.finalizeRowClear(thisSize);
  4312. else
  4313. return NULL;
  4314. }
  4315. catch(IException * e)
  4316. {
  4317. throw makeWrappedException(e);
  4318. }
  4319. }
  4320. const void * CHThorJoinActivity::groupDenormalizeRecords(const void * curLeft, ConstPointerArray & rows, unsigned flags)
  4321. {
  4322. try
  4323. {
  4324. outBuilder.ensureRow();
  4325. unsigned numRows = rows.ordinality();
  4326. const void * rhs = numRows ? rows.item(0) : defaultRight.get();
  4327. if (numRows>0)
  4328. flags |= JTFmatchedright;
  4329. memsize_t thisSize = helper.transform(outBuilder, curLeft, rhs, numRows, (const void * *)rows.getArray(), flags);
  4330. if(thisSize)
  4331. return outBuilder.finalizeRowClear(thisSize);
  4332. else
  4333. return NULL;
  4334. }
  4335. catch(IException * e)
  4336. {
  4337. throw makeWrappedException(e);
  4338. }
  4339. }
  4340. const void * CHThorJoinActivity::joinException(const void * curLeft, IException * except)
  4341. {
  4342. try
  4343. {
  4344. outBuilder.ensureRow();
  4345. size32_t thisSize = helper.onFailTransform(outBuilder, curLeft, defaultRight, except, JTFmatchedleft);
  4346. if(thisSize)
  4347. return outBuilder.finalizeRowClear(thisSize);
  4348. else
  4349. return NULL;
  4350. }
  4351. catch(IException * e)
  4352. {
  4353. throw makeWrappedException(e);
  4354. }
  4355. }
  4356. void CHThorJoinActivity::failLimit()
  4357. {
  4358. helper.onMatchAbortLimitExceeded();
  4359. CommonXmlWriter xmlwrite(0);
  4360. if (input->queryOutputMeta() && input->queryOutputMeta()->hasXML())
  4361. {
  4362. input->queryOutputMeta()->toXML((byte *)left.get(), xmlwrite);
  4363. }
  4364. throw MakeStringException(0, "More than %d match candidates in join for row %s", abortLimit, xmlwrite.str());
  4365. }
  4366. const void *CHThorJoinActivity::nextRow()
  4367. {
  4368. for (;;)
  4369. {
  4370. switch (state)
  4371. {
  4372. case JSfill:
  4373. fillLeft();
  4374. state = JSfillright;
  4375. break;
  4376. case JSfillright:
  4377. fillRight();
  4378. state = JScollate;
  4379. break;
  4380. case JSfillleft:
  4381. fillLeft();
  4382. state = JScollate;
  4383. break;
  4384. case JScollate:
  4385. if (right.ordinality() == 0)
  4386. {
  4387. if (!left)
  4388. return NULL;
  4389. state = JSleftonly;
  4390. }
  4391. else
  4392. {
  4393. if (!left)
  4394. state = JSrightonly;
  4395. else
  4396. {
  4397. int diff;
  4398. if(betweenjoin)
  4399. diff = ((collate->docompare(left, right.item(0)) < 0) ? -1 : ((collateupper->docompare(left, right.item(right.ordinality()-1)) > 0) ? +1 : 0));
  4400. else
  4401. diff = collate->docompare(left, right.item(0));
  4402. bool limitExceeded = right.ordinality()>abortLimit;
  4403. if (diff == 0)
  4404. {
  4405. if (limitExceeded)
  4406. {
  4407. const void * ret = NULL;
  4408. if(failingLimit)
  4409. {
  4410. if ( agent.queryCodeContext()->queryDebugContext())
  4411. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  4412. ret = joinException(left, failingLimit);
  4413. }
  4414. left.clear();
  4415. state = JSfillleft;
  4416. ForEachItemIn(idx, right)
  4417. matchedRight.replace(true, idx);
  4418. if(ret)
  4419. {
  4420. processed++;
  4421. return ret;
  4422. }
  4423. }
  4424. else
  4425. {
  4426. state = JScompare;
  4427. joinLimit = keepLimit;
  4428. }
  4429. }
  4430. else if (diff < 0)
  4431. state = JSleftonly;
  4432. else if (limitExceeded)
  4433. {
  4434. // MORE - Roxie code seems to think there should be a destroyRowset(right) here....
  4435. state = JSfillright;
  4436. }
  4437. else
  4438. state = JSrightonly;
  4439. }
  4440. }
  4441. break;
  4442. case JSrightonly:
  4443. if (rightOuterJoin)
  4444. {
  4445. switch (kind)
  4446. {
  4447. case TAKjoin:
  4448. {
  4449. while (right.isItem(rightIndex))
  4450. {
  4451. if (!matchedRight.item(rightIndex))
  4452. {
  4453. const void * rhs = right.item(rightIndex++);
  4454. const void * ret = joinRecords(defaultLeft, rhs, 0, JTFmatchedright);
  4455. if (ret)
  4456. {
  4457. processed++;
  4458. return ret;
  4459. }
  4460. }
  4461. else
  4462. rightIndex++;
  4463. }
  4464. break;
  4465. }
  4466. //Probably excessive to implement the following, but possibly useful
  4467. case TAKdenormalize:
  4468. {
  4469. OwnedConstRoxieRow newLeft(defaultLeft.getLink());
  4470. unsigned rowSize = 0;
  4471. unsigned leftCount = 0;
  4472. while (right.isItem(rightIndex))
  4473. {
  4474. if (!matchedRight.item(rightIndex))
  4475. {
  4476. const void * rhs = right.item(rightIndex);
  4477. try
  4478. {
  4479. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  4480. size32_t thisSize = helper.transform(rowBuilder, newLeft, rhs, ++leftCount, JTFmatchedright);
  4481. if (thisSize)
  4482. {
  4483. rowSize = thisSize;
  4484. newLeft.setown(rowBuilder.finalizeRowClear(rowSize));
  4485. }
  4486. }
  4487. catch(IException * e)
  4488. {
  4489. throw makeWrappedException(e);
  4490. }
  4491. }
  4492. rightIndex++;
  4493. }
  4494. state = JSfillright;
  4495. if (rowSize)
  4496. {
  4497. processed++;
  4498. return newLeft.getClear();
  4499. }
  4500. break;
  4501. }
  4502. case TAKdenormalizegroup:
  4503. {
  4504. filteredRight.kill();
  4505. while (right.isItem(rightIndex))
  4506. {
  4507. if (!matchedRight.item(rightIndex))
  4508. filteredRight.append(right.item(rightIndex));
  4509. rightIndex++;
  4510. }
  4511. state = JSfillright;
  4512. if (filteredRight.ordinality())
  4513. {
  4514. const void * ret = groupDenormalizeRecords(defaultLeft, filteredRight, 0);
  4515. filteredRight.kill();
  4516. if (ret)
  4517. {
  4518. processed++;
  4519. return ret;
  4520. }
  4521. }
  4522. break;
  4523. }
  4524. default:
  4525. throwUnexpected();
  4526. }
  4527. }
  4528. state = JSfillright;
  4529. break;
  4530. case JSleftonly:
  4531. {
  4532. const void * ret = NULL;
  4533. if (!matchedLeft && leftOuterJoin)
  4534. {
  4535. switch (kind)
  4536. {
  4537. case TAKjoin:
  4538. ret = joinRecords(left, defaultRight, 0, JTFmatchedleft);
  4539. break;
  4540. case TAKdenormalize:
  4541. ret = left.getClear();
  4542. break;
  4543. case TAKdenormalizegroup:
  4544. filteredRight.kill();
  4545. ret = groupDenormalizeRecords(left, filteredRight, JTFmatchedleft);
  4546. break;
  4547. default:
  4548. throwUnexpected();
  4549. }
  4550. }
  4551. left.clear();
  4552. state = JSfillleft;
  4553. if (ret)
  4554. {
  4555. processed++;
  4556. return ret;
  4557. }
  4558. break;
  4559. }
  4560. case JScompare:
  4561. if (joinLimit != 0)
  4562. {
  4563. switch (kind)
  4564. {
  4565. case TAKjoin:
  4566. {
  4567. while (right.isItem(rightIndex))
  4568. {
  4569. const void * rhs = right.item(rightIndex++);
  4570. if (helper.match(left, rhs))
  4571. {
  4572. matchedRight.replace(true, rightIndex-1);
  4573. matchedLeft = true;
  4574. if (!exclude)
  4575. {
  4576. const void *ret = joinRecords(left, rhs, ++joinCounter, JTFmatchedleft|JTFmatchedright);
  4577. if (ret)
  4578. {
  4579. processed++;
  4580. joinLimit--;
  4581. return ret;
  4582. }
  4583. }
  4584. }
  4585. }
  4586. break;
  4587. }
  4588. case TAKdenormalize:
  4589. {
  4590. OwnedConstRoxieRow newLeft;
  4591. newLeft.set(left);
  4592. unsigned rowSize = 0;
  4593. unsigned leftCount = 0;
  4594. while (right.isItem(rightIndex) && joinLimit)
  4595. {
  4596. const void * rhs = right.item(rightIndex++);
  4597. if (helper.match(left, rhs))
  4598. {
  4599. matchedRight.replace(true, rightIndex-1);
  4600. matchedLeft = true;
  4601. if (!exclude)
  4602. {
  4603. try
  4604. {
  4605. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  4606. unsigned thisSize = helper.transform(rowBuilder, newLeft, rhs, ++leftCount, JTFmatchedleft|JTFmatchedright);
  4607. if (thisSize)
  4608. {
  4609. rowSize = thisSize;
  4610. newLeft.setown(rowBuilder.finalizeRowClear(rowSize));
  4611. joinLimit--;
  4612. }
  4613. }
  4614. catch(IException * e)
  4615. {
  4616. throw makeWrappedException(e);
  4617. }
  4618. }
  4619. }
  4620. }
  4621. state = JSleftonly;
  4622. rightIndex = 0;
  4623. if (rowSize)
  4624. {
  4625. processed++;
  4626. return newLeft.getClear();
  4627. }
  4628. break;
  4629. }
  4630. case TAKdenormalizegroup:
  4631. {
  4632. filteredRight.kill();
  4633. while (right.isItem(rightIndex))
  4634. {
  4635. const void * rhs = right.item(rightIndex++);
  4636. if (helper.match(left, rhs))
  4637. {
  4638. matchedRight.replace(true, rightIndex-1);
  4639. filteredRight.append(rhs);
  4640. matchedLeft = true;
  4641. if (filteredRight.ordinality()==joinLimit)
  4642. break;
  4643. }
  4644. }
  4645. state = JSleftonly;
  4646. rightIndex = 0;
  4647. if (!exclude && filteredRight.ordinality())
  4648. {
  4649. const void * ret = groupDenormalizeRecords(left, filteredRight, JTFmatchedleft);
  4650. filteredRight.kill();
  4651. if (ret)
  4652. {
  4653. processed++;
  4654. return ret;
  4655. }
  4656. }
  4657. break;
  4658. }
  4659. default:
  4660. throwUnexpected();
  4661. }
  4662. }
  4663. state = JSleftonly;
  4664. rightIndex = 0;
  4665. joinCounter = 0;
  4666. break;
  4667. }
  4668. }
  4669. }
  4670. bool CHThorJoinActivity::isGrouped()
  4671. {
  4672. return false;
  4673. }
  4674. //=====================================================================================================
  4675. CHThorSelfJoinActivity::CHThorSelfJoinActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorJoinArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  4676. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL)
  4677. {
  4678. dualCacheInput = NULL;
  4679. }
  4680. void CHThorSelfJoinActivity::ready()
  4681. {
  4682. CHThorActivityBase::ready();
  4683. outBuilder.setAllocator(rowAllocator);
  4684. ICompare *compareLeft = helper.queryCompareLeft();
  4685. if (helper.isLeftAlreadySorted())
  4686. groupedInput.setown(createGroupedInputReader(&input->queryStream(), compareLeft));
  4687. else
  4688. {
  4689. bool isStable = (helper.getJoinFlags() & JFunstable) == 0;
  4690. RoxieSortAlgorithm sortAlgorithm = isStable ? stableSpillingQuickSortAlgorithm : spillingQuickSortAlgorithm;
  4691. StringBuffer tempBase;
  4692. agent.getTempfileBase(tempBase);
  4693. groupedInput.setown(createSortedGroupedInputReader(&input->queryStream(), compareLeft, createSortAlgorithm(sortAlgorithm, compareLeft, *queryRowManager(), input->queryOutputMeta(), agent.queryCodeContext(), tempBase, activityId)));
  4694. }
  4695. leftOuterJoin = (helper.getJoinFlags() & JFleftouter) != 0;
  4696. rightOuterJoin = (helper.getJoinFlags() & JFrightouter) != 0;
  4697. exclude = (helper.getJoinFlags() & JFexclude) != 0;
  4698. getLimitType(helper.getJoinFlags(), limitFail, limitOnFail);
  4699. if (rightOuterJoin && !defaultLeft)
  4700. {
  4701. if (!defaultAllocator)
  4702. defaultAllocator.setown(agent.queryCodeContext()->getRowAllocator(input->queryOutputMeta(), activityId));
  4703. RtlDynamicRowBuilder rowBuilder(defaultAllocator);
  4704. size32_t thisSize = helper.createDefaultLeft(rowBuilder);
  4705. defaultLeft.setown(rowBuilder.finalizeRowClear(thisSize));
  4706. }
  4707. if ((leftOuterJoin || limitOnFail) && !defaultRight)
  4708. {
  4709. if (!defaultAllocator)
  4710. defaultAllocator.setown(agent.queryCodeContext()->getRowAllocator(input->queryOutputMeta(), activityId));
  4711. RtlDynamicRowBuilder rowBuilder(defaultAllocator);
  4712. size32_t thisSize = helper.createDefaultRight(rowBuilder);
  4713. defaultRight.setown(rowBuilder.finalizeRowClear(thisSize));
  4714. }
  4715. if((helper.getJoinFlags() & JFslidingmatch) != 0)
  4716. throw MakeStringException(99, "Sliding self join not supported");
  4717. keepLimit = helper.getKeepLimit();
  4718. if(keepLimit == 0)
  4719. keepLimit = (unsigned)-1;
  4720. atmostLimit = helper.getJoinLimit();
  4721. if(atmostLimit == 0)
  4722. atmostLimit = (unsigned)-1;
  4723. else
  4724. assertex(!rightOuterJoin);
  4725. abortLimit = helper.getMatchAbortLimit();
  4726. if (abortLimit == 0)
  4727. abortLimit = (unsigned)-1;
  4728. assertex((helper.getJoinFlags() & (JFfirst | JFfirstleft | JFfirstright)) == 0); // no longer supported
  4729. collate = helper.queryCompareLeftRight();
  4730. eof = false;
  4731. doneFirstFill = false;
  4732. failingLimit.clear();
  4733. if ((helper.getJoinFlags() & JFlimitedprefixjoin) && helper.getJoinLimit())
  4734. { //Limited Match Join (s[1..n])
  4735. dualcache.setown(new CRHDualCache());
  4736. dualcache->init(groupedInput);
  4737. dualCacheInput = dualcache->queryOut1();
  4738. failingOuterAtmost = false;
  4739. matchedLeft = false;
  4740. leftIndex = 0;
  4741. rightOuterIndex = 0;
  4742. limitedhelper.setown(createRHLimitedCompareHelper());
  4743. limitedhelper->init( helper.getJoinLimit(), dualcache->queryOut2(), collate, helper.queryPrefixCompare() );
  4744. }
  4745. joinCounter = 0;
  4746. }
  4747. void CHThorSelfJoinActivity::stop()
  4748. {
  4749. outBuilder.clear();
  4750. group.clear();
  4751. groupedInput.clear();
  4752. CHThorActivityBase::stop();
  4753. }
  4754. bool CHThorSelfJoinActivity::fillGroup()
  4755. {
  4756. group.clear();
  4757. matchedLeft = false;
  4758. matchedRight.kill();
  4759. failingOuterAtmost = false;
  4760. OwnedConstRoxieRow next;
  4761. unsigned groupCount = 0;
  4762. next.setown(groupedInput->nextRow());
  4763. while(next)
  4764. {
  4765. if(groupCount==abortLimit)
  4766. {
  4767. if(limitFail)
  4768. failLimit(next);
  4769. if ( agent.queryCodeContext()->queryDebugContext())
  4770. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  4771. if(limitOnFail)
  4772. {
  4773. assertex(!failingLimit);
  4774. try
  4775. {
  4776. failLimit(next);
  4777. }
  4778. catch(IException * except)
  4779. {
  4780. failingLimit.setown(except);
  4781. }
  4782. assertex(failingLimit);
  4783. group.append(next.getClear());
  4784. groupCount++;
  4785. break;
  4786. }
  4787. group.clear();
  4788. groupCount = 0;
  4789. while(next)
  4790. next.setown(groupedInput->nextRow());
  4791. }
  4792. else if(groupCount==atmostLimit)
  4793. {
  4794. if(leftOuterJoin)
  4795. {
  4796. group.append(next.getClear());
  4797. groupCount++;
  4798. failingOuterAtmost = true;
  4799. break;
  4800. }
  4801. else
  4802. {
  4803. group.clear();
  4804. groupCount = 0;
  4805. while(next)
  4806. next.setown(groupedInput->nextRow());
  4807. }
  4808. }
  4809. else
  4810. {
  4811. group.append(next.getClear());
  4812. groupCount++;
  4813. }
  4814. next.setown(groupedInput->nextRow());
  4815. }
  4816. if(group.ordinality()==0)
  4817. {
  4818. eof = true;
  4819. return false;
  4820. }
  4821. leftIndex = 0;
  4822. rightIndex = 0;
  4823. joinCounter = 0;
  4824. rightOuterIndex = 0;
  4825. joinLimit = keepLimit;
  4826. ForEachItemIn(idx, group)
  4827. matchedRight.append(false);
  4828. return true;
  4829. }
  4830. const void * CHThorSelfJoinActivity::nextRow()
  4831. {
  4832. if (limitedhelper) {
  4833. while(!eof) //limited match join
  4834. {
  4835. if (!group.isItem(rightIndex))
  4836. {
  4837. lhs.setown(dualCacheInput->nextRow());
  4838. if (lhs)
  4839. {
  4840. rightIndex = 0;
  4841. joinCounter = 0;
  4842. group.clear();
  4843. limitedhelper->getGroup(group,lhs);
  4844. }
  4845. else
  4846. eof = true;
  4847. }
  4848. if (group.isItem(rightIndex))
  4849. {
  4850. const void * rhs = group.item(rightIndex++);
  4851. if(helper.match(lhs, rhs))
  4852. {
  4853. const void * ret = joinRecords(lhs, rhs, ++joinCounter, JTFmatchedleft|JTFmatchedright, NULL);
  4854. if(ret)
  4855. {
  4856. processed++;
  4857. return ret;
  4858. }
  4859. }
  4860. }
  4861. }
  4862. return NULL;
  4863. }
  4864. if(!doneFirstFill)
  4865. {
  4866. fillGroup();
  4867. doneFirstFill = true;
  4868. }
  4869. while(!eof)
  4870. {
  4871. if(failingOuterAtmost)
  4872. while(group.isItem(leftIndex))
  4873. {
  4874. const void * ret = joinRecords(group.item(leftIndex++), defaultRight, 0, JTFmatchedleft, NULL);
  4875. if(ret)
  4876. {
  4877. processed++;
  4878. return ret;
  4879. }
  4880. }
  4881. if((joinLimit == 0) || !group.isItem(rightIndex))
  4882. {
  4883. if(leftOuterJoin && !matchedLeft && !failingLimit)
  4884. {
  4885. const void * ret = joinRecords(group.item(leftIndex), defaultRight, 0, JTFmatchedleft, NULL);
  4886. if(ret)
  4887. {
  4888. matchedLeft = true;
  4889. processed++;
  4890. return ret;
  4891. }
  4892. }
  4893. leftIndex++;
  4894. matchedLeft = false;
  4895. rightIndex = 0;
  4896. joinCounter = 0;
  4897. joinLimit = keepLimit;
  4898. }
  4899. if(!group.isItem(leftIndex))
  4900. {
  4901. if(failingLimit || failingOuterAtmost)
  4902. {
  4903. OwnedConstRoxieRow lhs(groupedInput->nextRow()); // dualCache never active here
  4904. while(lhs)
  4905. {
  4906. const void * ret = joinRecords(lhs, defaultRight, 0, JTFmatchedleft, failingLimit);
  4907. if(ret)
  4908. {
  4909. processed++;
  4910. return ret;
  4911. }
  4912. lhs.setown(groupedInput->nextRow());
  4913. }
  4914. failingLimit.clear();
  4915. }
  4916. if(rightOuterJoin && !failingLimit)
  4917. while(group.isItem(rightOuterIndex))
  4918. if(!matchedRight.item(rightOuterIndex++))
  4919. {
  4920. const void * ret = joinRecords(defaultLeft, group.item(rightOuterIndex-1), 0, JTFmatchedright, NULL);
  4921. if(ret)
  4922. {
  4923. processed++;
  4924. return ret;
  4925. }
  4926. }
  4927. if(!fillGroup())
  4928. return NULL;
  4929. continue;
  4930. }
  4931. const void * lhs = group.item(leftIndex);
  4932. if(failingLimit)
  4933. {
  4934. leftIndex++;
  4935. const void * ret = joinRecords(lhs, defaultRight, 0, JTFmatchedleft, failingLimit);
  4936. if(ret)
  4937. {
  4938. processed++;
  4939. return ret;
  4940. }
  4941. }
  4942. else
  4943. {
  4944. const void * rhs = group.item(rightIndex++);
  4945. if(helper.match(lhs, rhs))
  4946. {
  4947. matchedLeft = true;
  4948. matchedRight.replace(true, rightIndex-1);
  4949. if(!exclude)
  4950. {
  4951. const void * ret = joinRecords(lhs, rhs, ++joinCounter, JTFmatchedleft|JTFmatchedright, NULL);
  4952. if(ret)
  4953. {
  4954. processed++;
  4955. joinLimit--;
  4956. return ret;
  4957. }
  4958. }
  4959. }
  4960. }
  4961. }
  4962. return NULL;
  4963. }
  4964. const void * CHThorSelfJoinActivity::joinRecords(const void * curLeft, const void * curRight, unsigned counter, unsigned flags, IException * except)
  4965. {
  4966. outBuilder.ensureRow();
  4967. try
  4968. {
  4969. size32_t thisSize = (except ? helper.onFailTransform(outBuilder, curLeft, curRight, except, flags) : helper.transform(outBuilder, curLeft, curRight, counter, flags));
  4970. if(thisSize){
  4971. return outBuilder.finalizeRowClear(thisSize);
  4972. }
  4973. else
  4974. return NULL;
  4975. }
  4976. catch(IException * e)
  4977. {
  4978. throw makeWrappedException(e);
  4979. }
  4980. }
  4981. void CHThorSelfJoinActivity::failLimit(const void * next)
  4982. {
  4983. helper.onMatchAbortLimitExceeded();
  4984. CommonXmlWriter xmlwrite(0);
  4985. if (input->queryOutputMeta() && input->queryOutputMeta()->hasXML())
  4986. {
  4987. input->queryOutputMeta()->toXML((byte *) next, xmlwrite);
  4988. }
  4989. throw MakeStringException(0, "More than %d match candidates in self-join for row %s", abortLimit, xmlwrite.str());
  4990. }
  4991. bool CHThorSelfJoinActivity::isGrouped()
  4992. {
  4993. return false;
  4994. }
  4995. //=====================================================================================================
  4996. CHThorLookupJoinActivity::LookupTable::LookupTable(unsigned _size, ICompare * _leftRightCompare, ICompare * _rightCompare, IHash * _leftHash, IHash * _rightHash, bool _dedupOnAdd)
  4997. : leftRightCompare(_leftRightCompare), rightCompare(_rightCompare), leftHash(_leftHash), rightHash(_rightHash), dedupOnAdd(_dedupOnAdd)
  4998. {
  4999. unsigned minsize = (4*_size)/3;
  5000. size = 2;
  5001. while((minsize >>= 1) > 0)
  5002. size <<= 1;
  5003. mask = size - 1;
  5004. table = new OwnedConstRoxieRow[size];
  5005. findex = BadIndex;
  5006. }
  5007. CHThorLookupJoinActivity::LookupTable::~LookupTable()
  5008. {
  5009. delete [] table;
  5010. }
  5011. bool CHThorLookupJoinActivity::LookupTable::add(const void * _right)
  5012. {
  5013. OwnedConstRoxieRow right(_right);
  5014. findex = BadIndex;
  5015. unsigned start = rightHash->hash(right) & mask;
  5016. unsigned index = start;
  5017. while(table[index])
  5018. {
  5019. if(dedupOnAdd && (rightCompare->docompare(table[index], right) == 0))
  5020. return false;
  5021. index++;
  5022. if(index==size)
  5023. index = 0;
  5024. if(index==start)
  5025. return false; //table is full, should never happen
  5026. }
  5027. table[index].setown(right.getClear());
  5028. return true;
  5029. }
  5030. const void * CHThorLookupJoinActivity::LookupTable::find(const void * left) const
  5031. {
  5032. fstart = leftHash->hash(left) & mask;
  5033. findex = fstart;
  5034. return doFind(left);
  5035. }
  5036. const void * CHThorLookupJoinActivity::LookupTable::findNext(const void * left) const
  5037. {
  5038. if(findex == BadIndex)
  5039. return NULL;
  5040. advance();
  5041. return doFind(left);
  5042. }
  5043. void CHThorLookupJoinActivity::LookupTable::advance() const
  5044. {
  5045. findex++;
  5046. if(findex==size)
  5047. findex = 0;
  5048. if(findex==fstart)
  5049. throw MakeStringException(0, "Internal error hthor lookup join activity (hash table full on lookup)");
  5050. }
  5051. const void * CHThorLookupJoinActivity::LookupTable::doFind(const void * left) const
  5052. {
  5053. while(table[findex])
  5054. {
  5055. if(leftRightCompare->docompare(left, table[findex]) == 0)
  5056. return table[findex];
  5057. advance();
  5058. }
  5059. findex = BadIndex;
  5060. return NULL;
  5061. }
  5062. CHThorLookupJoinActivity::CHThorLookupJoinActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorHashJoinArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  5063. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL), table(0)
  5064. {
  5065. }
  5066. void CHThorLookupJoinActivity::ready()
  5067. {
  5068. CHThorActivityBase::ready();
  5069. input1->ready();
  5070. outBuilder.setAllocator(rowAllocator);
  5071. leftOuterJoin = (helper.getJoinFlags() & JFleftouter) != 0;
  5072. assertex((helper.getJoinFlags() & JFrightouter) == 0);
  5073. exclude = (helper.getJoinFlags() & JFexclude) != 0;
  5074. many = (helper.getJoinFlags() & JFmanylookup) != 0;
  5075. dedupRHS = (helper.getJoinFlags() & (JFmanylookup | JFmatchrequired | JFtransformMaySkip)) == 0; // optimisation: can implicitly dedup RHS unless is many lookup, or match required, or transform may skip
  5076. if((helper.getJoinFlags() & (JFfirst | JFfirstleft | JFfirstright | JFslidingmatch)) != 0)
  5077. throwUnexpected(); // compiler should have rejected
  5078. keepLimit = helper.getKeepLimit();
  5079. if(keepLimit==0)
  5080. keepLimit = static_cast<unsigned>(-1);
  5081. atmostLimit = helper.getJoinLimit();
  5082. limitLimit = helper.getMatchAbortLimit();
  5083. hasGroupLimit = ((atmostLimit > 0) || (limitLimit > 0));
  5084. if(atmostLimit==0)
  5085. atmostLimit = static_cast<unsigned>(-1);
  5086. if(limitLimit==0)
  5087. limitLimit = static_cast<unsigned>(-1);
  5088. isSmartJoin = (helper.getJoinFlags() & JFsmart) != 0;
  5089. getLimitType(helper.getJoinFlags(), limitFail, limitOnFail);
  5090. if((leftOuterJoin || limitOnFail) && !defaultRight)
  5091. createDefaultRight();
  5092. eog = false;
  5093. matchedGroup = false;
  5094. joinCounter = 0;
  5095. }
  5096. void CHThorLookupJoinActivity::stop()
  5097. {
  5098. outBuilder.clear();
  5099. left.clear();
  5100. table.clear();
  5101. CHThorActivityBase::stop();
  5102. input1->stop();
  5103. }
  5104. void CHThorLookupJoinActivity::createDefaultRight()
  5105. {
  5106. if (!defaultRight)
  5107. {
  5108. if (!defaultRightAllocator)
  5109. defaultRightAllocator.setown(agent.queryCodeContext()->getRowAllocator(input1->queryOutputMeta(), activityId));
  5110. RtlDynamicRowBuilder rowBuilder(defaultRightAllocator);
  5111. size32_t thisSize = helper.createDefaultRight(rowBuilder);
  5112. defaultRight.setown(rowBuilder.finalizeRowClear(thisSize));
  5113. }
  5114. }
  5115. void CHThorLookupJoinActivity::loadRight()
  5116. {
  5117. OwnedRowArray rightset;
  5118. const void * next;
  5119. while(true)
  5120. {
  5121. next = input1->nextRow();
  5122. if(!next)
  5123. next = input1->nextRow();
  5124. if(!next)
  5125. break;
  5126. rightset.append(next);
  5127. }
  5128. unsigned rightord = rightset.ordinality();
  5129. table.setown(new LookupTable(rightord, helper.queryCompareLeftRight(), helper.queryCompareRight(), helper.queryHashLeft(), helper.queryHashRight(), dedupRHS));
  5130. unsigned i;
  5131. for(i=0; i<rightord; i++)
  5132. table->add(rightset.itemClear(i));
  5133. };
  5134. void CHThorLookupJoinActivity::setInput(unsigned index, IHThorInput * _input)
  5135. {
  5136. if (index==1)
  5137. input1 = _input;
  5138. else
  5139. CHThorActivityBase::setInput(index, _input);
  5140. }
  5141. //following are all copied from CHThorJoinActivity - should common up.
  5142. const void * CHThorLookupJoinActivity::joinRecords(const void * left, const void * right, unsigned counter, unsigned flags)
  5143. {
  5144. try
  5145. {
  5146. outBuilder.ensureRow();
  5147. size32_t thisSize = helper.transform(outBuilder, left, right, counter, flags);
  5148. if(thisSize)
  5149. return outBuilder.finalizeRowClear(thisSize);
  5150. else
  5151. return NULL;
  5152. }
  5153. catch(IException * e)
  5154. {
  5155. throw makeWrappedException(e);
  5156. }
  5157. }
  5158. const void * CHThorLookupJoinActivity::joinException(const void * left, IException * except)
  5159. {
  5160. try
  5161. {
  5162. outBuilder.ensureRow();
  5163. memsize_t thisSize = helper.onFailTransform(outBuilder, left, defaultRight, except, JTFmatchedleft);
  5164. if(thisSize)
  5165. return outBuilder.finalizeRowClear(thisSize);
  5166. else
  5167. return NULL;
  5168. }
  5169. catch(IException * e)
  5170. {
  5171. throw makeWrappedException(e);
  5172. }
  5173. }
  5174. const void * CHThorLookupJoinActivity::groupDenormalizeRecords(const void * left, ConstPointerArray & rows, unsigned flags)
  5175. {
  5176. try
  5177. {
  5178. outBuilder.ensureRow();
  5179. unsigned numRows = rows.ordinality();
  5180. const void * right = numRows ? rows.item(0) : defaultRight.get();
  5181. if (numRows>0)
  5182. flags |= JTFmatchedright;
  5183. memsize_t thisSize = helper.transform(outBuilder, left, right, numRows, (const void * *)rows.getArray(), flags);
  5184. if(thisSize)
  5185. return outBuilder.finalizeRowClear(thisSize);
  5186. else
  5187. return NULL;
  5188. }
  5189. catch(IException * e)
  5190. {
  5191. throw makeWrappedException(e);
  5192. }
  5193. }
  5194. const void * CHThorLookupJoinActivity::nextRow()
  5195. {
  5196. if(!table)
  5197. loadRight();
  5198. switch (kind)
  5199. {
  5200. case TAKlookupjoin:
  5201. case TAKsmartjoin:
  5202. return nextRowJoin();
  5203. case TAKlookupdenormalize:
  5204. case TAKlookupdenormalizegroup:
  5205. case TAKsmartdenormalize:
  5206. case TAKsmartdenormalizegroup:
  5207. return nextRowDenormalize();
  5208. }
  5209. throwUnexpected();
  5210. }
  5211. const void * CHThorLookupJoinActivity::nextRowJoin()
  5212. {
  5213. while(true)
  5214. {
  5215. const void * right = NULL;
  5216. if(!left)
  5217. {
  5218. left.setown(input->nextRow());
  5219. keepCount = keepLimit;
  5220. if(!left)
  5221. {
  5222. if (isSmartJoin)
  5223. left.setown(input->nextRow());
  5224. if(!left)
  5225. {
  5226. if(matchedGroup || eog)
  5227. {
  5228. matchedGroup = false;
  5229. eog = true;
  5230. return NULL;
  5231. }
  5232. eog = true;
  5233. continue;
  5234. }
  5235. }
  5236. eog = false;
  5237. gotMatch = false;
  5238. right = getRightFirst();
  5239. }
  5240. else
  5241. right = getRightNext();
  5242. const void * ret = NULL;
  5243. if(failingLimit)
  5244. {
  5245. ret = joinException(left, failingLimit);
  5246. }
  5247. else
  5248. {
  5249. while(right)
  5250. {
  5251. if(helper.match(left, right))
  5252. {
  5253. gotMatch = true;
  5254. if(exclude)
  5255. break;
  5256. ret = joinRecords(left, right, ++joinCounter, JTFmatchedleft|JTFmatchedright);
  5257. if(ret)
  5258. {
  5259. processed++;
  5260. break;
  5261. }
  5262. }
  5263. right = getRightNext();
  5264. ret = NULL;
  5265. }
  5266. if(leftOuterJoin && !gotMatch)
  5267. {
  5268. ret = joinRecords(left, defaultRight, 0, JTFmatchedleft);
  5269. gotMatch = true;
  5270. }
  5271. }
  5272. if(ret)
  5273. {
  5274. matchedGroup = true;
  5275. processed++;
  5276. if(!many || (--keepCount == 0) || failingLimit)
  5277. {
  5278. left.clear();
  5279. joinCounter = 0;
  5280. failingLimit.clear();
  5281. }
  5282. return ret;
  5283. }
  5284. left.clear();
  5285. joinCounter = 0;
  5286. }
  5287. }
  5288. const void * CHThorLookupJoinActivity::nextRowDenormalize()
  5289. {
  5290. while(true)
  5291. {
  5292. left.setown(input->nextRow());
  5293. if(!left)
  5294. {
  5295. if (!matchedGroup || isSmartJoin)
  5296. left.setown(input->nextRow());
  5297. if (!left)
  5298. {
  5299. matchedGroup = false;
  5300. return NULL;
  5301. }
  5302. }
  5303. gotMatch = false;
  5304. const void * right = getRightFirst();
  5305. const void * ret = NULL;
  5306. if (failingLimit)
  5307. ret = joinException(left, failingLimit);
  5308. else if (kind == TAKlookupdenormalize || kind == TAKsmartdenormalize)
  5309. {
  5310. OwnedConstRoxieRow newLeft(left.getLink());
  5311. unsigned rowSize = 0;
  5312. unsigned leftCount = 0;
  5313. keepCount = keepLimit;
  5314. while (right)
  5315. {
  5316. if (helper.match(left, right))
  5317. {
  5318. gotMatch = true;
  5319. if (exclude)
  5320. break;
  5321. try
  5322. {
  5323. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  5324. unsigned thisSize = helper.transform(rowBuilder, newLeft, right, ++leftCount, JTFmatchedleft|JTFmatchedright);
  5325. if (thisSize)
  5326. {
  5327. rowSize = thisSize;
  5328. newLeft.setown(rowBuilder.finalizeRowClear(rowSize));
  5329. }
  5330. }
  5331. catch(IException * e)
  5332. {
  5333. throw makeWrappedException(e);
  5334. }
  5335. if(!many || (--keepCount == 0))
  5336. break;
  5337. }
  5338. right = getRightNext();
  5339. }
  5340. //Is this rowSize test correct?? Is there any situation where it shouldn't just return newLeft?
  5341. if (rowSize)
  5342. ret = newLeft.getClear();
  5343. else if (leftOuterJoin && !gotMatch)
  5344. ret = left.getClear();
  5345. }
  5346. else
  5347. {
  5348. filteredRight.kill();
  5349. keepCount = keepLimit;
  5350. while (right)
  5351. {
  5352. if (helper.match(left, right))
  5353. {
  5354. gotMatch = true;
  5355. if(exclude)
  5356. break;
  5357. filteredRight.append(right);
  5358. if(!many || (--keepCount == 0))
  5359. break;
  5360. }
  5361. right = getRightNext();
  5362. }
  5363. if((filteredRight.ordinality() > 0) || (leftOuterJoin && !gotMatch))
  5364. ret = groupDenormalizeRecords(left, filteredRight, JTFmatchedleft);
  5365. filteredRight.kill();
  5366. }
  5367. left.clear();
  5368. failingLimit.clear();
  5369. if(ret)
  5370. {
  5371. matchedGroup = true;
  5372. processed++;
  5373. return ret;
  5374. }
  5375. }
  5376. }
  5377. bool CHThorLookupJoinActivity::isGrouped()
  5378. {
  5379. return input ? input->isGrouped() : false;
  5380. }
  5381. const void * CHThorLookupJoinActivity::fillRightGroup()
  5382. {
  5383. rightGroup.kill();
  5384. for(const void * right = table->find(left); right; right = table->findNext(left))
  5385. {
  5386. rightGroup.append(right);
  5387. if(rightGroup.ordinality() > limitLimit)
  5388. {
  5389. if(limitFail)
  5390. failLimit();
  5391. if ( agent.queryCodeContext()->queryDebugContext())
  5392. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  5393. gotMatch = true;
  5394. if(limitOnFail)
  5395. {
  5396. assertex(!failingLimit);
  5397. try
  5398. {
  5399. failLimit();
  5400. }
  5401. catch(IException * e)
  5402. {
  5403. failingLimit.setown(e);
  5404. }
  5405. assertex(failingLimit);
  5406. }
  5407. else
  5408. {
  5409. rightGroup.kill();
  5410. }
  5411. break;
  5412. }
  5413. if(rightGroup.ordinality() > atmostLimit)
  5414. {
  5415. rightGroup.kill();
  5416. break;
  5417. }
  5418. }
  5419. rightGroupIndex = 0;
  5420. return readRightGroup();
  5421. }
  5422. void CHThorLookupJoinActivity::failLimit()
  5423. {
  5424. helper.onMatchAbortLimitExceeded();
  5425. CommonXmlWriter xmlwrite(0);
  5426. if(input->queryOutputMeta() && input->queryOutputMeta()->hasXML())
  5427. {
  5428. input->queryOutputMeta()->toXML(static_cast<const unsigned char *>(left.get()), xmlwrite);
  5429. }
  5430. throw MakeStringException(0, "More than %u match candidates in join for row %s", limitLimit, xmlwrite.str());
  5431. }
  5432. unsigned const CHThorLookupJoinActivity::LookupTable::BadIndex(static_cast<unsigned>(-1));
  5433. //=====================================================================================================
  5434. CHThorAllJoinActivity::CHThorAllJoinActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorAllJoinArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL)
  5435. {
  5436. }
  5437. void CHThorAllJoinActivity::ready()
  5438. {
  5439. CHThorActivityBase::ready();
  5440. input1->ready();
  5441. outBuilder.setAllocator(rowAllocator);
  5442. leftOuterJoin = (helper.getJoinFlags() & JFleftouter) != 0;
  5443. exclude = (helper.getJoinFlags() & JFexclude) != 0;
  5444. if(leftOuterJoin && !defaultRight)
  5445. createDefaultRight();
  5446. if((helper.getJoinFlags() & (JFrightouter | JFfirst | JFfirstleft | JFfirstright)) != 0)
  5447. throwUnexpected();
  5448. keepLimit = helper.getKeepLimit();
  5449. if(keepLimit==0)
  5450. keepLimit = (unsigned)-1;
  5451. started = false;
  5452. countForLeft = keepLimit;
  5453. matchedLeft = false;
  5454. matchedGroup = false;
  5455. eog = false;
  5456. eos = false;
  5457. }
  5458. void CHThorAllJoinActivity::stop()
  5459. {
  5460. outBuilder.clear();
  5461. left.clear();
  5462. rightset.clear();
  5463. matchedRight.kill();
  5464. CHThorActivityBase::stop();
  5465. input1->stop();
  5466. }
  5467. void CHThorAllJoinActivity::createDefaultRight()
  5468. {
  5469. if (!defaultRight)
  5470. {
  5471. if (!defaultRightAllocator)
  5472. defaultRightAllocator.setown(agent.queryCodeContext()->getRowAllocator(input1->queryOutputMeta(), activityId));
  5473. RtlDynamicRowBuilder rowBuilder(defaultRightAllocator);
  5474. size32_t thisSize = helper.createDefaultRight(rowBuilder);
  5475. defaultRight.setown(rowBuilder.finalizeRowClear(thisSize));
  5476. }
  5477. }
  5478. void CHThorAllJoinActivity::loadRight()
  5479. {
  5480. const void * next;
  5481. while(true)
  5482. {
  5483. next = input1->nextRow();
  5484. if(!next)
  5485. next = input1->nextRow();
  5486. if(!next)
  5487. break;
  5488. rightset.append(next);
  5489. matchedRight.append(false);
  5490. }
  5491. rightIndex = 0;
  5492. joinCounter = 0;
  5493. rightOrdinality = rightset.ordinality();
  5494. }
  5495. const void * CHThorAllJoinActivity::joinRecords(const void * left, const void * right, unsigned counter, unsigned flags)
  5496. {
  5497. try
  5498. {
  5499. outBuilder.ensureRow();
  5500. memsize_t thisSize = helper.transform(outBuilder, left, right, counter, flags);
  5501. if(thisSize)
  5502. return outBuilder.finalizeRowClear(thisSize);
  5503. else
  5504. return NULL;
  5505. }
  5506. catch(IException * e)
  5507. {
  5508. throw makeWrappedException(e);
  5509. }
  5510. }
  5511. const void * CHThorAllJoinActivity::groupDenormalizeRecords(const void * curLeft, ConstPointerArray & rows, unsigned flags)
  5512. {
  5513. try
  5514. {
  5515. outBuilder.ensureRow();
  5516. unsigned numRows = rows.ordinality();
  5517. const void * right = numRows ? rows.item(0) : defaultRight.get();
  5518. if (numRows>0)
  5519. flags |= JTFmatchedright;
  5520. memsize_t thisSize = helper.transform(outBuilder, curLeft, right, numRows, (const void * *)rows.getArray(), flags);
  5521. if(thisSize)
  5522. return outBuilder.finalizeRowClear(thisSize);
  5523. else
  5524. return NULL;
  5525. }
  5526. catch(IException * e)
  5527. {
  5528. throw makeWrappedException(e);
  5529. }
  5530. }
  5531. void CHThorAllJoinActivity::setInput(unsigned index, IHThorInput * _input)
  5532. {
  5533. if (index==1)
  5534. input1 = _input;
  5535. else
  5536. {
  5537. CHThorActivityBase::setInput(index, _input);
  5538. leftIsGrouped = true; // input->isGrouped() is unreliable and it is just as good to always behave as if input is grouped
  5539. }
  5540. }
  5541. const void * CHThorAllJoinActivity::nextRow()
  5542. {
  5543. if(!started)
  5544. {
  5545. started = true;
  5546. left.setown(input->nextRow());
  5547. matchedLeft = false;
  5548. countForLeft = keepLimit;
  5549. if(!left)
  5550. {
  5551. eos = true;
  5552. return NULL;
  5553. }
  5554. loadRight();
  5555. }
  5556. const void * ret;
  5557. const void * right;
  5558. if(eos)
  5559. return NULL;
  5560. while(true)
  5561. {
  5562. ret = NULL;
  5563. if((rightIndex == rightOrdinality) || (countForLeft==0))
  5564. {
  5565. if(leftOuterJoin && left && !matchedLeft)
  5566. {
  5567. switch(kind)
  5568. {
  5569. case TAKalljoin:
  5570. ret = joinRecords(left, defaultRight, 0, JTFmatchedleft);
  5571. break;
  5572. case TAKalldenormalize:
  5573. ret = left.getClear();
  5574. break;
  5575. case TAKalldenormalizegroup:
  5576. filteredRight.kill();
  5577. ret = groupDenormalizeRecords(left, filteredRight, JTFmatchedleft);
  5578. break;
  5579. default:
  5580. throwUnexpected();
  5581. }
  5582. }
  5583. rightIndex = 0;
  5584. joinCounter = 0;
  5585. left.clear();
  5586. if(ret)
  5587. {
  5588. matchedGroup = true;
  5589. processed++;
  5590. return ret;
  5591. }
  5592. }
  5593. if(!left)
  5594. {
  5595. left.setown(input->nextRow());
  5596. matchedLeft = false;
  5597. countForLeft = keepLimit;
  5598. }
  5599. if(!left)
  5600. {
  5601. if(eog)
  5602. {
  5603. eos = true;
  5604. matchedGroup = false;
  5605. return NULL;
  5606. }
  5607. eog = true;
  5608. if(matchedGroup && leftIsGrouped)
  5609. {
  5610. matchedGroup = false;
  5611. return NULL;
  5612. }
  5613. matchedGroup = false;
  5614. continue;
  5615. }
  5616. eog = false;
  5617. switch(kind)
  5618. {
  5619. case TAKalljoin:
  5620. while(rightIndex < rightOrdinality)
  5621. {
  5622. right = rightset.item(rightIndex);
  5623. if(helper.match(left, right))
  5624. {
  5625. matchedLeft = true;
  5626. matchedRight.replace(true, rightIndex);
  5627. if(!exclude)
  5628. ret = joinRecords(left, right, ++joinCounter, JTFmatchedleft|JTFmatchedright);
  5629. }
  5630. rightIndex++;
  5631. if(ret)
  5632. {
  5633. countForLeft--;
  5634. matchedGroup = true;
  5635. processed++;
  5636. return ret;
  5637. }
  5638. }
  5639. case TAKalldenormalize:
  5640. {
  5641. OwnedConstRoxieRow newLeft;
  5642. newLeft.set(left);
  5643. unsigned rowSize = 0;
  5644. unsigned leftCount = 0;
  5645. while((rightIndex < rightOrdinality) && countForLeft)
  5646. {
  5647. right = rightset.item(rightIndex);
  5648. if(helper.match(left, right))
  5649. {
  5650. matchedLeft = true;
  5651. matchedRight.replace(true, rightIndex);
  5652. if(!exclude)
  5653. {
  5654. try
  5655. {
  5656. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  5657. unsigned thisSize = helper.transform(rowBuilder, newLeft, right, ++leftCount, JTFmatchedleft|JTFmatchedright);
  5658. if(thisSize)
  5659. {
  5660. rowSize = thisSize;
  5661. newLeft.setown(rowBuilder.finalizeRowClear(rowSize));
  5662. --countForLeft;
  5663. }
  5664. }
  5665. catch(IException * e)
  5666. {
  5667. throw makeWrappedException(e);
  5668. }
  5669. }
  5670. }
  5671. rightIndex++;
  5672. }
  5673. if(rowSize)
  5674. {
  5675. processed++;
  5676. return newLeft.getClear();
  5677. }
  5678. }
  5679. break;
  5680. case TAKalldenormalizegroup:
  5681. filteredRight.kill();
  5682. while((rightIndex < rightOrdinality) && countForLeft)
  5683. {
  5684. right = rightset.item(rightIndex);
  5685. if(helper.match(left, right))
  5686. {
  5687. matchedLeft = true;
  5688. matchedRight.replace(true, rightIndex);
  5689. filteredRight.append(right);
  5690. --countForLeft;
  5691. }
  5692. ++rightIndex;
  5693. }
  5694. if(!exclude && filteredRight.ordinality())
  5695. {
  5696. const void * ret = groupDenormalizeRecords(left, filteredRight, JTFmatchedleft);
  5697. filteredRight.kill();
  5698. if(ret)
  5699. {
  5700. processed++;
  5701. return ret;
  5702. }
  5703. }
  5704. break;
  5705. default:
  5706. throwUnexpected();
  5707. }
  5708. }
  5709. }
  5710. bool CHThorAllJoinActivity::isGrouped()
  5711. {
  5712. return input ? input->isGrouped() : false;
  5713. }
  5714. //=====================================================================================================
  5715. //=====================================================================================================
  5716. CHThorWorkUnitWriteActivity::CHThorWorkUnitWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorWorkUnitWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  5717. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5718. {
  5719. }
  5720. static void throwWuResultTooLarge(size32_t outputLimit, IHThorWorkUnitWriteArg &helper)
  5721. {
  5722. StringBuffer errMsg("Dataset too large to output to workunit (limit ");
  5723. errMsg.append(outputLimit/0x100000).append(" megabytes), in result (");
  5724. const char *name = helper.queryName();
  5725. if (name)
  5726. errMsg.append("name=").append(name);
  5727. else
  5728. errMsg.append("sequence=").append(helper.getSequence());
  5729. errMsg.append(")");
  5730. throw MakeStringExceptionDirect(0, errMsg.str());
  5731. }
  5732. void CHThorWorkUnitWriteActivity::execute()
  5733. {
  5734. unsigned flags = helper.getFlags();
  5735. grouped = (POFgrouped & flags) != 0;
  5736. // In absense of OPT_OUTPUTLIMIT check pre 5.2 legacy name OPT_OUTPUTLIMIT_LEGACY
  5737. size32_t outputLimit = agent.queryWorkUnit()->getDebugValueInt(OPT_OUTPUTLIMIT, agent.queryWorkUnit()->getDebugValueInt(OPT_OUTPUTLIMIT_LEGACY, defaultDaliResultLimit));
  5738. if (flags & POFmaxsize)
  5739. outputLimit = helper.getMaxSize();
  5740. if (outputLimit>daliResultOutputMax)
  5741. throw MakeStringException(0, "Dali result outputs are restricted to a maximum of %d MB, the current limit is %d MB. A huge dali result usually indicates the ECL needs altering.", daliResultOutputMax, defaultDaliResultLimit);
  5742. assertex(outputLimit<=0x1000); // 32bit limit because MemoryBuffer/CMessageBuffers involved etc.
  5743. outputLimit *= 0x100000;
  5744. MemoryBuffer rowdata;
  5745. __int64 rows = 0;
  5746. IRecordSize * inputMeta = input->queryOutputMeta();
  5747. if (0 != (POFextend & helper.getFlags()))
  5748. {
  5749. WorkunitUpdate w = agent.updateWorkUnit();
  5750. Owned<IWUResult> result = updateWorkUnitResult(w, helper.queryName(), helper.getSequence());
  5751. rows = result->getResultRowCount();
  5752. }
  5753. __int64 initialRows = rows;
  5754. Owned<IOutputRowSerializer> rowSerializer;
  5755. if (input->queryOutputMeta()->getMetaFlags() & MDFneedserializedisk)
  5756. rowSerializer.setown( input->queryOutputMeta()->createDiskSerializer(agent.queryCodeContext(), activityId) );
  5757. int seq = helper.getSequence();
  5758. bool toStdout = (seq >= 0) && agent.queryWriteResultsToStdout();
  5759. Owned<SimpleOutputWriter> writer;
  5760. if (toStdout)
  5761. writer.setown(new SimpleOutputWriter);
  5762. if (agent.queryOutputFmt() == ofXML && seq >= 0)
  5763. {
  5764. StringBuffer sb;
  5765. const char *name = helper.queryName();
  5766. if (name && *name)
  5767. sb.appendf("<Dataset name='%s'>\n", name);
  5768. else
  5769. sb.appendf("<Dataset name='Result %d'>\n", seq+1);
  5770. agent.queryOutputSerializer()->fwrite(seq, (const void*)sb.str(), 1, sb.length());
  5771. }
  5772. for (;;)
  5773. {
  5774. if ((unsigned __int64)rows >= agent.queryStopAfter())
  5775. break;
  5776. OwnedConstRoxieRow nextrec(input->nextRow());
  5777. if (grouped && (rows != initialRows))
  5778. rowdata.append(nextrec == NULL);
  5779. if (!nextrec)
  5780. {
  5781. nextrec.setown(input->nextRow());
  5782. if (!nextrec)
  5783. break;
  5784. }
  5785. size32_t thisSize = inputMeta->getRecordSize(nextrec);
  5786. if (outputLimit && ((rowdata.length() + thisSize) > outputLimit))
  5787. throwWuResultTooLarge(outputLimit, helper);
  5788. if (rowSerializer)
  5789. {
  5790. CThorDemoRowSerializer serializerTarget(rowdata);
  5791. rowSerializer->serialize(serializerTarget, (const byte *) nextrec.get() );
  5792. }
  5793. else
  5794. rowdata.append(thisSize, nextrec);
  5795. if (toStdout && seq >= 0)
  5796. {
  5797. if (agent.queryOutputFmt() == ofSTD)
  5798. {
  5799. helper.serializeXml((byte *) nextrec.get(), *writer);
  5800. writer->newline();
  5801. agent.queryOutputSerializer()->fwrite(seq, (const void*)writer->str(), 1, writer->length());
  5802. writer->clear();
  5803. }
  5804. else if (agent.queryOutputFmt() == ofXML)
  5805. {
  5806. CommonXmlWriter xmlwrite(0,1);
  5807. xmlwrite.outputBeginNested(DEFAULTXMLROWTAG, false);
  5808. helper.serializeXml((byte *) nextrec.get(), xmlwrite);
  5809. xmlwrite.outputEndNested(DEFAULTXMLROWTAG);
  5810. agent.queryOutputSerializer()->fwrite(seq, (const void*)xmlwrite.str(), 1, xmlwrite.length());
  5811. }
  5812. }
  5813. rows++;
  5814. }
  5815. WorkunitUpdate w = agent.updateWorkUnit();
  5816. Owned<IWUResult> result = updateWorkUnitResult(w, helper.queryName(), helper.getSequence());
  5817. if (0 != (POFextend & helper.getFlags()))
  5818. {
  5819. __int64 existingSz = result->getResultRawSize(nullptr, nullptr);
  5820. if (outputLimit && ((rowdata.length() + existingSz) > outputLimit))
  5821. throwWuResultTooLarge(outputLimit, helper);
  5822. result->addResultRaw(rowdata.length(), rowdata.toByteArray(), ResultFormatRaw);
  5823. }
  5824. else
  5825. result->setResultRaw(rowdata.length(), rowdata.toByteArray(), ResultFormatRaw);
  5826. result->setResultStatus(ResultStatusCalculated);
  5827. result->setResultRowCount(rows);
  5828. result->setResultTotalRowCount(rows); // Is this right??
  5829. if (toStdout && seq >= 0)
  5830. {
  5831. if (agent.queryOutputFmt() == ofXML)
  5832. {
  5833. StringBuffer sb;
  5834. sb.appendf(DEFAULTXMLFOOTER).newline();
  5835. agent.queryOutputSerializer()->fwrite(seq, (const void*)sb.str(), 1, sb.length());
  5836. }
  5837. else if (agent.queryOutputFmt() != ofSTD)
  5838. agent.outputFormattedResult(helper.queryName(), seq, false);
  5839. if (!(POFextend & helper.getFlags()))//POextend results will never get closed, so wont flush until serializer dtor
  5840. agent.queryOutputSerializer()->close(seq, false);
  5841. }
  5842. }
  5843. //=====================================================================================================
  5844. CHThorDictionaryWorkUnitWriteActivity::CHThorDictionaryWorkUnitWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDictionaryWorkUnitWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  5845. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5846. {
  5847. }
  5848. void CHThorDictionaryWorkUnitWriteActivity::execute()
  5849. {
  5850. int sequence = helper.getSequence();
  5851. const char *storedName = helper.queryName();
  5852. assertex(storedName && *storedName);
  5853. assertex(sequence < 0);
  5854. RtlLinkedDictionaryBuilder builder(rowAllocator, helper.queryHashLookupInfo());
  5855. for (;;)
  5856. {
  5857. const void *row = input->nextRow();
  5858. if (!row)
  5859. {
  5860. row = input->nextRow();
  5861. if (!row)
  5862. break;
  5863. }
  5864. builder.appendOwn(row);
  5865. processed++;
  5866. }
  5867. unsigned __int64 usedCount = rtlDictionaryCount(builder.getcount(), builder.queryrows());
  5868. // In absense of OPT_OUTPUTLIMIT check pre 5.2 legacy name OPT_OUTPUTLIMIT_LEGACY
  5869. size32_t outputLimit = agent.queryWorkUnit()->getDebugValueInt(OPT_OUTPUTLIMIT, agent.queryWorkUnit()->getDebugValueInt(OPT_OUTPUTLIMIT_LEGACY, defaultDaliResultLimit)) * 0x100000;
  5870. MemoryBuffer rowdata;
  5871. CThorDemoRowSerializer out(rowdata);
  5872. Owned<IOutputRowSerializer> serializer = input->queryOutputMeta()->createDiskSerializer(agent.queryCodeContext(), activityId);
  5873. rtlSerializeDictionary(out, serializer, builder.getcount(), builder.queryrows());
  5874. if(outputLimit && (rowdata.length() > outputLimit))
  5875. {
  5876. StringBuffer errMsg("Dictionary too large to output to workunit (limit ");
  5877. errMsg.append(outputLimit/0x100000).append(" megabytes), in result (");
  5878. const char *name = helper.queryName();
  5879. if (name)
  5880. errMsg.append("name=").append(name);
  5881. else
  5882. errMsg.append("sequence=").append(helper.getSequence());
  5883. errMsg.append(")");
  5884. throw MakeStringExceptionDirect(0, errMsg.str());
  5885. }
  5886. WorkunitUpdate w = agent.updateWorkUnit();
  5887. Owned<IWUResult> result = updateWorkUnitResult(w, helper.queryName(), helper.getSequence());
  5888. result->setResultRaw(rowdata.length(), rowdata.toByteArray(), ResultFormatRaw);
  5889. result->setResultStatus(ResultStatusCalculated);
  5890. result->setResultRowCount(usedCount);
  5891. result->setResultTotalRowCount(usedCount); // Is this right??
  5892. }
  5893. //=====================================================================================================
  5894. CHThorRemoteResultActivity::CHThorRemoteResultActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorRemoteResultArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  5895. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5896. {
  5897. }
  5898. void CHThorRemoteResultActivity::execute()
  5899. {
  5900. OwnedConstRoxieRow result(input->nextRow());
  5901. helper.sendResult(result);
  5902. }
  5903. //=====================================================================================================
  5904. CHThorInlineTableActivity::CHThorInlineTableActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorInlineTableArg &_arg, ThorActivityKind _kind, EclGraph & _graph) :
  5905. CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5906. {
  5907. }
  5908. void CHThorInlineTableActivity::ready()
  5909. {
  5910. CHThorSimpleActivityBase::ready();
  5911. curRow = 0;
  5912. numRows = helper.numRows();
  5913. }
  5914. const void *CHThorInlineTableActivity::nextRow()
  5915. {
  5916. // Filtering empty rows, returns the next valid row
  5917. while (curRow < numRows)
  5918. {
  5919. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  5920. size32_t size = helper.getRow(rowBuilder, curRow++);
  5921. if (size)
  5922. {
  5923. processed++;
  5924. return rowBuilder.finalizeRowClear(size);
  5925. }
  5926. }
  5927. return NULL;
  5928. }
  5929. //=====================================================================================================
  5930. CHThorNullActivity::CHThorNullActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  5931. {
  5932. }
  5933. const void *CHThorNullActivity::nextRow()
  5934. {
  5935. return NULL;
  5936. }
  5937. //=====================================================================================================
  5938. CHThorActionActivity::CHThorActionActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorActionArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5939. {
  5940. }
  5941. void CHThorActionActivity::execute()
  5942. {
  5943. helper.action();
  5944. }
  5945. const void *CHThorActionActivity::nextRow()
  5946. {
  5947. return NULL;
  5948. }
  5949. //=====================================================================================================
  5950. CHThorSideEffectActivity::CHThorSideEffectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSideEffectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  5951. {
  5952. }
  5953. const void *CHThorSideEffectActivity::nextRow()
  5954. {
  5955. try
  5956. {
  5957. helper.action();
  5958. }
  5959. catch(IException * e)
  5960. {
  5961. throw makeWrappedException(e);
  5962. }
  5963. return NULL;
  5964. }
  5965. //=====================================================================================================
  5966. CHThorDummyActivity::CHThorDummyActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  5967. {
  5968. }
  5969. void CHThorDummyActivity::execute()
  5970. {
  5971. }
  5972. const void *CHThorDummyActivity::nextRow()
  5973. {
  5974. return input ? input->nextRow() : NULL;
  5975. }
  5976. //=====================================================================================================
  5977. CHThorWhenActionActivity::CHThorWhenActionActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &_arg, ThorActivityKind _kind, EclGraph & _graph, EclGraphElement * _graphElement)
  5978. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), graphElement(_graphElement)
  5979. {
  5980. }
  5981. void CHThorWhenActionActivity::ready()
  5982. {
  5983. CHThorSimpleActivityBase::ready();
  5984. graphElement->executeDependentActions(agent, NULL, WhenBeforeId);
  5985. graphElement->executeDependentActions(agent, NULL, WhenParallelId);
  5986. }
  5987. void CHThorWhenActionActivity::execute()
  5988. {
  5989. graphElement->executeDependentActions(agent, NULL, 1);
  5990. }
  5991. const void * CHThorWhenActionActivity::nextRow()
  5992. {
  5993. return input->nextRow();
  5994. }
  5995. void CHThorWhenActionActivity::stop()
  5996. {
  5997. graphElement->executeDependentActions(agent, NULL, WhenSuccessId);
  5998. CHThorSimpleActivityBase::stop();
  5999. }
  6000. //=====================================================================================================
  6001. CHThorMultiInputActivity::CHThorMultiInputActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6002. {
  6003. }
  6004. void CHThorMultiInputActivity::ready()
  6005. {
  6006. CHThorSimpleActivityBase::ready();
  6007. ForEachItemIn(idx, inputs)
  6008. inputs.item(idx)->ready();
  6009. }
  6010. void CHThorMultiInputActivity::stop()
  6011. {
  6012. CHThorSimpleActivityBase::stop();
  6013. ForEachItemIn(idx, inputs)
  6014. inputs.item(idx)->stop();
  6015. }
  6016. void CHThorMultiInputActivity::resetEOF()
  6017. {
  6018. CHThorSimpleActivityBase::resetEOF();
  6019. ForEachItemIn(idx, inputs)
  6020. inputs.item(idx)->resetEOF();
  6021. }
  6022. void CHThorMultiInputActivity::setInput(unsigned index, IHThorInput *_input)
  6023. {
  6024. if (index==inputs.length())
  6025. {
  6026. inputs.append(_input);
  6027. }
  6028. else
  6029. {
  6030. while (!inputs.isItem(index))
  6031. inputs.append(NULL);
  6032. inputs.replace(_input, index);
  6033. }
  6034. }
  6035. void CHThorMultiInputActivity::updateProgress(IStatisticGatherer &progress) const
  6036. {
  6037. CHThorSimpleActivityBase::updateProgress(progress);
  6038. ForEachItemIn(idx, inputs)
  6039. {
  6040. IHThorInput *i = inputs.item(idx);
  6041. if (i)
  6042. i->updateProgress(progress);
  6043. }
  6044. }
  6045. //=====================================================================================================
  6046. CHThorConcatActivity::CHThorConcatActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorFunnelArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6047. {
  6048. }
  6049. void CHThorConcatActivity::ready()
  6050. {
  6051. grouped = helper.queryOutputMeta()->isGrouped();
  6052. inputIdx = 0;
  6053. curInput = inputs.item(inputIdx);
  6054. eogSeen = false;
  6055. anyThisGroup = false;
  6056. CHThorMultiInputActivity::ready();
  6057. }
  6058. const void *CHThorConcatActivity::nextRow()
  6059. {
  6060. if (!curInput)
  6061. return NULL; // eof
  6062. const void * next = curInput->nextRow();
  6063. if (next)
  6064. {
  6065. anyThisGroup = true;
  6066. eogSeen = false;
  6067. processed++;
  6068. return next;
  6069. }
  6070. else if (!eogSeen)
  6071. {
  6072. eogSeen = true;
  6073. if (grouped)
  6074. {
  6075. if (anyThisGroup)
  6076. {
  6077. anyThisGroup = false;
  6078. return NULL;
  6079. }
  6080. else
  6081. return nextRow();
  6082. }
  6083. else
  6084. return nextRow();
  6085. }
  6086. else if (inputIdx < inputs.length()-1)
  6087. {
  6088. inputIdx++;
  6089. curInput = inputs.item(inputIdx);
  6090. eogSeen = false;
  6091. anyThisGroup = false;
  6092. return nextRow();
  6093. }
  6094. else
  6095. {
  6096. curInput = NULL;
  6097. return NULL;
  6098. }
  6099. }
  6100. //=====================================================================================================
  6101. CHThorNonEmptyActivity::CHThorNonEmptyActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNonEmptyArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6102. {
  6103. }
  6104. void CHThorNonEmptyActivity::ready()
  6105. {
  6106. grouped = helper.queryOutputMeta()->isGrouped();
  6107. selectedInput = NULL;
  6108. CHThorMultiInputActivity::ready();
  6109. }
  6110. const void *CHThorNonEmptyActivity::nextRow()
  6111. {
  6112. if (!selectedInput)
  6113. {
  6114. ForEachItemIn(i, inputs)
  6115. {
  6116. IHThorInput * cur = inputs.item(i);
  6117. const void * next = cur->nextRow();
  6118. if (next)
  6119. {
  6120. selectedInput = cur;
  6121. processed++;
  6122. return next;
  6123. }
  6124. }
  6125. return NULL;
  6126. }
  6127. const void * next = selectedInput->nextRow();
  6128. if (next)
  6129. processed++;
  6130. return next;
  6131. }
  6132. //=====================================================================================================
  6133. CHThorRegroupActivity::CHThorRegroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorRegroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6134. {
  6135. }
  6136. void CHThorRegroupActivity::ready()
  6137. {
  6138. inputIndex = 0;
  6139. eof = false;
  6140. numProcessedLastGroup = processed;
  6141. CHThorMultiInputActivity::ready();
  6142. }
  6143. const void * CHThorRegroupActivity::nextFromInputs()
  6144. {
  6145. unsigned initialInput = inputIndex;
  6146. while (inputs.isItem(inputIndex))
  6147. {
  6148. OwnedConstRoxieRow next(inputs.item(inputIndex)->nextRow());
  6149. if (next)
  6150. {
  6151. if ((inputIndex != initialInput) && (inputIndex != initialInput+1))
  6152. {
  6153. throw MakeStringException(100, "Mismatched groups supplied to regroup %u", activityId);
  6154. }
  6155. return next.getClear();
  6156. }
  6157. inputIndex++;
  6158. }
  6159. if ((initialInput != 0) && (initialInput+1 != inputs.ordinality()))
  6160. throw MakeStringException(100, "Mismatched groups supplied to Regroup Activity(%u)", activityId);
  6161. inputIndex = 0;
  6162. return NULL;
  6163. }
  6164. const void * CHThorRegroupActivity::nextRow()
  6165. {
  6166. if (eof)
  6167. return NULL;
  6168. const void * ret = nextFromInputs();
  6169. if (ret)
  6170. {
  6171. processed++;
  6172. return ret;
  6173. }
  6174. if (numProcessedLastGroup != processed)
  6175. {
  6176. numProcessedLastGroup = processed;
  6177. return NULL;
  6178. }
  6179. eof = true;
  6180. return NULL;
  6181. }
  6182. //=====================================================================================================
  6183. CHThorRollupGroupActivity::CHThorRollupGroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorRollupGroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6184. {
  6185. }
  6186. void CHThorRollupGroupActivity::ready()
  6187. {
  6188. CHThorSimpleActivityBase::ready();
  6189. eof = false;
  6190. }
  6191. const void * CHThorRollupGroupActivity::nextRow()
  6192. {
  6193. if (eof)
  6194. return NULL;
  6195. for (;;)
  6196. {
  6197. OwnedRowArray group;
  6198. for (;;)
  6199. {
  6200. const void * in = input->nextRow();
  6201. if (!in)
  6202. break;
  6203. group.append(in);
  6204. }
  6205. if (group.ordinality() == 0)
  6206. {
  6207. eof = true;
  6208. return NULL;
  6209. }
  6210. try
  6211. {
  6212. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  6213. size32_t outSize = helper.transform(rowBuilder, group.ordinality(), (const void * *)group.getArray());
  6214. if (outSize)
  6215. {
  6216. processed++;
  6217. return rowBuilder.finalizeRowClear(outSize);
  6218. }
  6219. }
  6220. catch(IException * e)
  6221. {
  6222. throw makeWrappedException(e);
  6223. }
  6224. }
  6225. }
  6226. //=====================================================================================================
  6227. CHThorCombineActivity::CHThorCombineActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCombineArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6228. {
  6229. }
  6230. void CHThorCombineActivity::ready()
  6231. {
  6232. numProcessedLastGroup = processed;
  6233. CHThorMultiInputActivity::ready();
  6234. }
  6235. void CHThorCombineActivity::nextInputs(OwnedRowArray & out)
  6236. {
  6237. ForEachItemIn(i, inputs)
  6238. {
  6239. const void * next = inputs.item(i)->nextRow();
  6240. if (next)
  6241. out.append(next);
  6242. }
  6243. }
  6244. const void *CHThorCombineActivity::nextRow()
  6245. {
  6246. for (;;)
  6247. {
  6248. OwnedRowArray group;
  6249. nextInputs(group);
  6250. if ((group.ordinality() == 0) && (numProcessedLastGroup == processed))
  6251. nextInputs(group);
  6252. if (group.ordinality() == 0)
  6253. {
  6254. numProcessedLastGroup = processed;
  6255. return NULL;
  6256. }
  6257. else if (group.ordinality() != inputs.ordinality())
  6258. {
  6259. throw MakeStringException(101, "Mismatched group input for Combine Activity(%u)", activityId);
  6260. }
  6261. try
  6262. {
  6263. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  6264. size32_t outSize = helper.transform(rowBuilder, group.ordinality(), (const void * *)group.getArray());
  6265. if (outSize)
  6266. {
  6267. processed++;
  6268. return rowBuilder.finalizeRowClear(outSize);
  6269. }
  6270. }
  6271. catch(IException * e)
  6272. {
  6273. throw makeWrappedException(e);
  6274. }
  6275. }
  6276. }
  6277. //=====================================================================================================
  6278. CHThorCombineGroupActivity::CHThorCombineGroupActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCombineGroupArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6279. {
  6280. }
  6281. void CHThorCombineGroupActivity::ready()
  6282. {
  6283. numProcessedLastGroup = processed;
  6284. CHThorSimpleActivityBase::ready();
  6285. input1->ready();
  6286. }
  6287. void CHThorCombineGroupActivity::stop()
  6288. {
  6289. CHThorSimpleActivityBase::stop();
  6290. input1->stop();
  6291. }
  6292. void CHThorCombineGroupActivity::setInput(unsigned index, IHThorInput *_input)
  6293. {
  6294. if (index==1)
  6295. input1 = _input;
  6296. else
  6297. CHThorSimpleActivityBase::setInput(index, _input);
  6298. }
  6299. const void *CHThorCombineGroupActivity::nextRow()
  6300. {
  6301. for (;;)
  6302. {
  6303. OwnedConstRoxieRow left(input->nextRow());
  6304. if (!left && (numProcessedLastGroup == processed))
  6305. left.setown(input->nextRow());
  6306. if (!left)
  6307. {
  6308. if (numProcessedLastGroup == processed)
  6309. {
  6310. OwnedConstRoxieRow nextRight(input1->nextRow());
  6311. if (nextRight)
  6312. throw MakeStringException(101, "Missing LEFT record for Combine group Activity(%u)", activityId);
  6313. }
  6314. else
  6315. numProcessedLastGroup = processed;
  6316. return NULL;
  6317. }
  6318. OwnedRowArray group;
  6319. for (;;)
  6320. {
  6321. const void * in = input1->nextRow();
  6322. if (!in)
  6323. break;
  6324. group.append(in);
  6325. }
  6326. if (group.ordinality() == 0)
  6327. {
  6328. throw MakeStringException(101, "Missing RIGHT group for Combine Group Activity(%u)", activityId);
  6329. }
  6330. try
  6331. {
  6332. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  6333. size32_t outSize = helper.transform(rowBuilder, left, group.ordinality(), (const void * *)group.getArray());
  6334. if (outSize)
  6335. {
  6336. processed++;
  6337. return rowBuilder.finalizeRowClear(outSize);
  6338. }
  6339. }
  6340. catch(IException * e)
  6341. {
  6342. throw makeWrappedException(e);
  6343. }
  6344. }
  6345. }
  6346. //=====================================================================================================
  6347. CHThorApplyActivity::CHThorApplyActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorApplyArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6348. {
  6349. }
  6350. void CHThorApplyActivity::execute()
  6351. {
  6352. try
  6353. {
  6354. helper.start();
  6355. for (;;)
  6356. {
  6357. OwnedConstRoxieRow next(input->nextRow());
  6358. if (!next)
  6359. {
  6360. next.setown(input->nextRow());
  6361. if (!next)
  6362. break;
  6363. }
  6364. helper.apply(next);
  6365. }
  6366. helper.end();
  6367. }
  6368. catch (IException *e)
  6369. {
  6370. throw makeWrappedException(e);
  6371. }
  6372. }
  6373. //=====================================================================================================
  6374. CHThorDistributionActivity::CHThorDistributionActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDistributionArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  6375. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6376. {
  6377. }
  6378. void CHThorDistributionActivity::execute()
  6379. {
  6380. MemoryAttr ma;
  6381. IDistributionTable * * accumulator = (IDistributionTable * *)ma.allocate(helper.queryInternalRecordSize()->getMinRecordSize());
  6382. helper.clearAggregate(accumulator);
  6383. OwnedConstRoxieRow nextrec(input->nextRow());
  6384. for (;;)
  6385. {
  6386. if (!nextrec)
  6387. {
  6388. nextrec.setown(input->nextRow());
  6389. if (!nextrec)
  6390. break;
  6391. }
  6392. helper.process(accumulator, nextrec);
  6393. nextrec.setown(input->nextRow());
  6394. }
  6395. StringBuffer result;
  6396. result.append("<XML>");
  6397. helper.gatherResult(accumulator, result);
  6398. result.append("</XML>");
  6399. helper.sendResult(result.length(), result.str());
  6400. helper.destruct(accumulator);
  6401. }
  6402. //---------------------------------------------------------------------------
  6403. CHThorWorkunitReadActivity::CHThorWorkunitReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorWorkunitReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6404. {
  6405. first = true;
  6406. bufferStream.setown(createMemoryBufferSerialStream(resultBuffer));
  6407. deserializer.setStream(bufferStream);
  6408. }
  6409. CHThorWorkunitReadActivity::~CHThorWorkunitReadActivity()
  6410. {
  6411. }
  6412. void CHThorWorkunitReadActivity::ready()
  6413. {
  6414. CHThorSimpleActivityBase::ready();
  6415. rowDeserializer.setown(rowAllocator->createDiskDeserializer(agent.queryCodeContext()));
  6416. if(first)
  6417. {
  6418. checkForDiskRead();
  6419. first = false;
  6420. }
  6421. if(diskread)
  6422. {
  6423. diskread->ready();
  6424. return;
  6425. }
  6426. grouped = outputMeta.isGrouped();
  6427. unsigned lenData;
  6428. void * tempData;
  6429. OwnedRoxieString fromWuid(helper.getWUID());
  6430. ICsvToRowTransformer * csvTransformer = helper.queryCsvTransformer();
  6431. IXmlToRowTransformer * xmlTransformer = helper.queryXmlTransformer();
  6432. if (fromWuid)
  6433. agent.queryCodeContext()->getExternalResultRaw(lenData, tempData, fromWuid, helper.queryName(), helper.querySequence(), xmlTransformer, csvTransformer);
  6434. else
  6435. agent.queryCodeContext()->getResultRaw(lenData, tempData, helper.queryName(), helper.querySequence(), xmlTransformer, csvTransformer);
  6436. resultBuffer.setBuffer(lenData, tempData, true);
  6437. eogPending = false;
  6438. }
  6439. void CHThorWorkunitReadActivity::checkForDiskRead()
  6440. {
  6441. StringBuffer diskFilename;
  6442. OwnedRoxieString fromWuid(helper.getWUID());
  6443. if (agent.getWorkunitResultFilename(diskFilename, fromWuid, helper.queryName(), helper.querySequence()))
  6444. {
  6445. diskreadHelper.setown(createWorkUnitReadArg(diskFilename.str(), &helper));
  6446. try
  6447. {
  6448. diskreadHelper->onCreate(agent.queryCodeContext(), NULL, NULL);
  6449. }
  6450. catch(IException * e)
  6451. {
  6452. throw makeWrappedException(e);
  6453. }
  6454. diskread.setown(new CHThorDiskReadActivity(agent, activityId, subgraphId, *diskreadHelper, TAKdiskread, graph, nullptr));
  6455. }
  6456. }
  6457. void CHThorWorkunitReadActivity::stop()
  6458. {
  6459. if(diskread)
  6460. diskread->stop();
  6461. resultBuffer.resetBuffer();
  6462. CHThorSimpleActivityBase::stop();
  6463. }
  6464. const void *CHThorWorkunitReadActivity::nextRow()
  6465. {
  6466. if(diskread)
  6467. {
  6468. const void * ret = diskread->nextRow();
  6469. processed = diskread->queryProcessed();
  6470. return ret;
  6471. }
  6472. if (deserializer.eos())
  6473. return NULL;
  6474. if (eogPending)
  6475. {
  6476. eogPending = false;
  6477. return NULL;
  6478. }
  6479. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  6480. size32_t newSize = rowDeserializer->deserialize(rowBuilder, deserializer);
  6481. if (grouped)
  6482. deserializer.read(sizeof(bool), &eogPending);
  6483. processed++;
  6484. return rowBuilder.finalizeRowClear(newSize);
  6485. }
  6486. //=====================================================================================================
  6487. CHThorParseActivity::CHThorParseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorParseArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6488. {
  6489. //DebugBreak();
  6490. anyThisGroup = false;
  6491. curSearchTextLen = 0;
  6492. curSearchText = NULL;
  6493. algorithm = createThorParser(agent.queryCodeContext(), helper);
  6494. parser = algorithm->createParser(agent.queryCodeContext(), activityId, helper.queryHelper(), &helper);
  6495. rowIter = parser->queryResultIter();
  6496. }
  6497. CHThorParseActivity::~CHThorParseActivity()
  6498. {
  6499. if (curSearchText && helper.searchTextNeedsFree())
  6500. rtlFree(curSearchText);
  6501. parser->Release();
  6502. algorithm->Release();
  6503. }
  6504. void CHThorParseActivity::ready()
  6505. {
  6506. CHThorSimpleActivityBase::ready();
  6507. anyThisGroup = false;
  6508. parser->reset();
  6509. }
  6510. void CHThorParseActivity::stop()
  6511. {
  6512. CHThorSimpleActivityBase::stop();
  6513. if (curSearchText && helper.searchTextNeedsFree())
  6514. rtlFree(curSearchText);
  6515. curSearchText = NULL;
  6516. in.clear();
  6517. }
  6518. bool CHThorParseActivity::processRecord(const void * in)
  6519. {
  6520. if (curSearchText && helper.searchTextNeedsFree())
  6521. rtlFree(curSearchText);
  6522. curSearchTextLen = 0;
  6523. curSearchText = NULL;
  6524. helper.getSearchText(curSearchTextLen, curSearchText, in);
  6525. return parser->performMatch(*this, in, curSearchTextLen, curSearchText);
  6526. }
  6527. unsigned CHThorParseActivity::onMatch(ARowBuilder & self, const void * curRecord, IMatchedResults * results, IMatchWalker * walker)
  6528. {
  6529. try
  6530. {
  6531. return helper.transform(self, curRecord, results, walker);
  6532. }
  6533. catch(IException * e)
  6534. {
  6535. throw makeWrappedException(e);
  6536. }
  6537. }
  6538. const void * CHThorParseActivity::nextRow()
  6539. {
  6540. for (;;)
  6541. {
  6542. if (rowIter->isValid())
  6543. {
  6544. anyThisGroup = true;
  6545. OwnedConstRoxieRow out = rowIter->getRow();
  6546. rowIter->next();
  6547. processed++;
  6548. return out.getClear();
  6549. }
  6550. in.setown(input->nextRow());
  6551. if (!in)
  6552. {
  6553. if (anyThisGroup)
  6554. {
  6555. anyThisGroup = false;
  6556. return NULL;
  6557. }
  6558. in.setown(input->nextRow());
  6559. if (!in)
  6560. return NULL;
  6561. }
  6562. processRecord(in);
  6563. rowIter->first();
  6564. }
  6565. }
  6566. //=====================================================================================================
  6567. CHThorEnthActivity::CHThorEnthActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorEnthArg & _arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL)
  6568. {
  6569. }
  6570. void CHThorEnthActivity::ready()
  6571. {
  6572. CHThorSimpleActivityBase::ready();
  6573. outBuilder.setAllocator(rowAllocator);
  6574. numerator = helper.getProportionNumerator();
  6575. denominator = helper.getProportionDenominator();
  6576. started = false;
  6577. }
  6578. void CHThorEnthActivity::stop()
  6579. {
  6580. outBuilder.clear();
  6581. }
  6582. void CHThorEnthActivity::start()
  6583. {
  6584. if(denominator == 0) denominator = 1;
  6585. counter = (helper.getSampleNumber()-1) * greatestCommonDivisor(numerator, denominator);
  6586. if (counter >= denominator)
  6587. counter %= denominator;
  6588. started = true;
  6589. }
  6590. const void * CHThorEnthActivity::nextRow()
  6591. {
  6592. if(!started)
  6593. start();
  6594. OwnedConstRoxieRow ret;
  6595. for (;;)
  6596. {
  6597. ret.setown(input->nextRow());
  6598. if(!ret) //end of group
  6599. ret.setown(input->nextRow());
  6600. if(!ret) //eof
  6601. return NULL;
  6602. if (wanted())
  6603. {
  6604. processed++;
  6605. return ret.getClear();
  6606. }
  6607. }
  6608. }
  6609. //=====================================================================================================
  6610. CHThorTopNActivity::CHThorTopNActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorTopNArg & _arg, ThorActivityKind _kind, EclGraph & _graph)
  6611. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), compare(*helper.queryCompare())
  6612. {
  6613. hasBest = helper.hasBest();
  6614. grouped = outputMeta.isGrouped();
  6615. curIndex = 0;
  6616. sortedCount = 0;
  6617. limit = 0;
  6618. sorted = NULL;
  6619. }
  6620. CHThorTopNActivity::~CHThorTopNActivity()
  6621. {
  6622. roxiemem::ReleaseRoxieRowRange(sorted, curIndex, sortedCount);
  6623. free(sorted);
  6624. }
  6625. void CHThorTopNActivity::ready()
  6626. {
  6627. CHThorSimpleActivityBase::ready();
  6628. limit = helper.getLimit();
  6629. assertex(limit == (__int64)(size_t)limit);
  6630. sorted = (const void * *)checked_calloc((size_t)(limit+1), sizeof(void *), "topn");
  6631. sortedCount = 0;
  6632. curIndex = 0;
  6633. eof = false;
  6634. eoi = false;
  6635. }
  6636. void CHThorTopNActivity::stop()
  6637. {
  6638. CHThorSimpleActivityBase::stop();
  6639. roxiemem::ReleaseRoxieRowRange(sorted, curIndex, sortedCount);
  6640. free(sorted);
  6641. sorted = NULL;
  6642. curIndex = 0;
  6643. sortedCount = 0;
  6644. }
  6645. const void * CHThorTopNActivity::nextRow()
  6646. {
  6647. if(eof)
  6648. return NULL;
  6649. if(curIndex >= sortedCount)
  6650. {
  6651. bool eog = sortedCount != 0;
  6652. getSorted();
  6653. if(sortedCount == 0)
  6654. {
  6655. eof = true;
  6656. return NULL;
  6657. }
  6658. if (eog)
  6659. return NULL;
  6660. }
  6661. processed++;
  6662. return sorted[curIndex++];
  6663. }
  6664. bool CHThorTopNActivity::abortEarly()
  6665. {
  6666. if (hasBest && (sortedCount == limit))
  6667. {
  6668. int compare = helper.compareBest(sorted[sortedCount-1]);
  6669. if (compare == 0)
  6670. {
  6671. if (grouped)
  6672. {
  6673. //MORE: This would be more efficient if we had a away of skipping to the end of the incomming group.
  6674. OwnedConstRoxieRow next;
  6675. do
  6676. {
  6677. next.setown(input->nextRow());
  6678. } while(next);
  6679. }
  6680. else
  6681. eoi = true;
  6682. return true;
  6683. }
  6684. //This only checks the lowest element - we could check all elements inserted, but it would increase the number of compares
  6685. if (compare < 0)
  6686. throw MakeStringException(0, "TOPN: row found that exceeds the best value");
  6687. }
  6688. return false;
  6689. }
  6690. void CHThorTopNActivity::getSorted()
  6691. {
  6692. curIndex = 0;
  6693. sortedCount = 0;
  6694. if (eoi)
  6695. return;
  6696. OwnedConstRoxieRow next(input->nextRow());
  6697. while(next)
  6698. {
  6699. if(sortedCount < limit)
  6700. {
  6701. binary_vec_insert_stable(next.getClear(), sorted, sortedCount, compare);
  6702. sortedCount++;
  6703. if (abortEarly())
  6704. return;
  6705. }
  6706. else
  6707. {
  6708. // do not bother with insertion sort if we know next will fall off the end
  6709. if(limit && compare.docompare(sorted[sortedCount-1], next) > 0)
  6710. {
  6711. binary_vec_insert_stable(next.getClear(), sorted, sortedCount, compare);
  6712. ReleaseRoxieRow(sorted[sortedCount]);
  6713. if (abortEarly())
  6714. return;
  6715. }
  6716. }
  6717. next.setown(input->nextRow());
  6718. }
  6719. }
  6720. //=====================================================================================================
  6721. CHThorXmlParseActivity::CHThorXmlParseActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorXmlParseArg & _arg, ThorActivityKind _kind, EclGraph & _graph)
  6722. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6723. {
  6724. srchStrNeedsFree = helper.searchTextNeedsFree();
  6725. srchStr = NULL;
  6726. }
  6727. CHThorXmlParseActivity::~CHThorXmlParseActivity()
  6728. {
  6729. if(srchStrNeedsFree) rtlFree(srchStr);
  6730. }
  6731. void CHThorXmlParseActivity::ready()
  6732. {
  6733. CHThorSimpleActivityBase::ready();
  6734. numProcessedLastGroup = processed;
  6735. }
  6736. void CHThorXmlParseActivity::stop()
  6737. {
  6738. CHThorSimpleActivityBase::stop();
  6739. if(srchStrNeedsFree) rtlFree(srchStr);
  6740. srchStr = NULL;
  6741. in.clear();
  6742. }
  6743. const void * CHThorXmlParseActivity::nextRow()
  6744. {
  6745. for (;;)
  6746. {
  6747. if(xmlParser)
  6748. {
  6749. for (;;)
  6750. {
  6751. bool gotNext = false;
  6752. try
  6753. {
  6754. gotNext = xmlParser->next();
  6755. }
  6756. catch(IException * e)
  6757. {
  6758. throw makeWrappedException(e);
  6759. }
  6760. if(!gotNext)
  6761. {
  6762. if(srchStrNeedsFree)
  6763. {
  6764. rtlFree(srchStr);
  6765. srchStr = NULL;
  6766. }
  6767. xmlParser.clear();
  6768. break;
  6769. }
  6770. if(lastMatch)
  6771. {
  6772. try
  6773. {
  6774. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  6775. unsigned sizeGot = helper.transform(rowBuilder, in, lastMatch);
  6776. lastMatch.clear();
  6777. if (sizeGot)
  6778. {
  6779. processed++;
  6780. return rowBuilder.finalizeRowClear(sizeGot);
  6781. }
  6782. }
  6783. catch(IException * e)
  6784. {
  6785. throw makeWrappedException(e);
  6786. }
  6787. }
  6788. }
  6789. }
  6790. in.setown(input->nextRow());
  6791. if(!in)
  6792. {
  6793. if(numProcessedLastGroup == processed)
  6794. in.setown(input->nextRow());
  6795. if(!in)
  6796. {
  6797. numProcessedLastGroup = processed;
  6798. return NULL;
  6799. }
  6800. }
  6801. size32_t srchLen;
  6802. helper.getSearchText(srchLen, srchStr, in);
  6803. OwnedRoxieString xmlIteratorPath(helper.getXmlIteratorPath());
  6804. xmlParser.setown(createXMLParse(srchStr, srchLen, xmlIteratorPath, *this, ptr_noRoot, helper.requiresContents()));
  6805. }
  6806. }
  6807. //=====================================================================================================
  6808. class CHThorMergeActivity : public CHThorMultiInputActivity
  6809. {
  6810. protected:
  6811. IHThorMergeArg &helper;
  6812. CHThorStreamMerger merger;
  6813. public:
  6814. CHThorMergeActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorMergeArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6815. {
  6816. merger.init(helper.queryCompare(), helper.dedup(), NULL); // can mass null for range because merger.nextGE() never called
  6817. }
  6818. ~CHThorMergeActivity()
  6819. {
  6820. merger.cleanup();
  6821. }
  6822. virtual void ready()
  6823. {
  6824. CHThorMultiInputActivity::ready();
  6825. merger.initInputs(inputs.length(), inputs.getArray());
  6826. }
  6827. virtual void stop()
  6828. {
  6829. merger.done();
  6830. CHThorMultiInputActivity::stop();
  6831. }
  6832. virtual const void * nextRow()
  6833. {
  6834. const void * ret = merger.nextRow();
  6835. if (ret)
  6836. processed++;
  6837. return ret;
  6838. }
  6839. };
  6840. //=====================================================================================================
  6841. //Web Service Call base
  6842. CHThorWSCBaseActivity::CHThorWSCBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorWebServiceCallArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6843. {
  6844. callHelper = &_arg;
  6845. init();
  6846. }
  6847. CHThorWSCBaseActivity::CHThorWSCBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorWebServiceCallActionArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  6848. {
  6849. callHelper = NULL;
  6850. init();
  6851. }
  6852. void CHThorWSCBaseActivity::stop()
  6853. {
  6854. WSChelper.clear();//doesn't return until helper threads terminate
  6855. CHThorSimpleActivityBase::stop();
  6856. }
  6857. void CHThorWSCBaseActivity::init()
  6858. {
  6859. // Build authentication token
  6860. StringBuffer uidpair;
  6861. IUserDescriptor *userDesc = agent.queryCodeContext()->queryUserDescriptor();
  6862. if (userDesc)//NULL if standalone
  6863. {
  6864. userDesc->getUserName(uidpair);
  6865. uidpair.append(":");
  6866. userDesc->getPassword(uidpair);
  6867. JBASE64_Encode(uidpair.str(), uidpair.length(), authToken, false);
  6868. }
  6869. soapTraceLevel = agent.queryWorkUnit()->getDebugValueInt("soapTraceLevel", 1);
  6870. }
  6871. //---------------------------------------------------------------------------
  6872. CHThorWSCRowCallActivity::CHThorWSCRowCallActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorWebServiceCallArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorWSCBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6873. {
  6874. }
  6875. const void *CHThorWSCRowCallActivity::nextRow()
  6876. {
  6877. try
  6878. {
  6879. assertex(WSChelper);
  6880. OwnedConstRoxieRow ret = WSChelper->getRow();
  6881. if (!ret)
  6882. return NULL;
  6883. ++processed;
  6884. return ret.getClear();
  6885. }
  6886. catch(IException * e)
  6887. {
  6888. throw makeWrappedException(e);
  6889. }
  6890. }
  6891. //---------------------------------------------------------------------------
  6892. const void *CHThorHttpRowCallActivity::nextRow()
  6893. {
  6894. try
  6895. {
  6896. if (WSChelper == NULL)
  6897. {
  6898. WSChelper.setown(createHttpCallHelper(this, rowAllocator, authToken.str(), SCrow, NULL, queryDummyContextLogger(),NULL));
  6899. WSChelper->start();
  6900. }
  6901. return CHThorWSCRowCallActivity::nextRow();
  6902. }
  6903. catch(IException * e)
  6904. {
  6905. throw makeWrappedException(e);
  6906. }
  6907. }
  6908. //---------------------------------------------------------------------------
  6909. const void *CHThorSoapRowCallActivity::nextRow()
  6910. {
  6911. try
  6912. {
  6913. if (WSChelper == NULL)
  6914. {
  6915. WSChelper.setown(createSoapCallHelper(this, rowAllocator, authToken.str(), SCrow, NULL, queryDummyContextLogger(),NULL));
  6916. WSChelper->start();
  6917. }
  6918. return CHThorWSCRowCallActivity::nextRow();
  6919. }
  6920. catch(IException * e)
  6921. {
  6922. throw makeWrappedException(e);
  6923. }
  6924. }
  6925. //---------------------------------------------------------------------------
  6926. //---------------------------------------------------------------------------
  6927. //---------------------------------------------------------------------------
  6928. CHThorSoapRowActionActivity::CHThorSoapRowActionActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSoapActionArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorWSCBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6929. {
  6930. }
  6931. void CHThorSoapRowActionActivity::execute()
  6932. {
  6933. try
  6934. {
  6935. WSChelper.setown(createSoapCallHelper(this, NULL, authToken.str(), SCrow, NULL, queryDummyContextLogger(),NULL));
  6936. WSChelper->start();
  6937. WSChelper->waitUntilDone();
  6938. }
  6939. catch(IException * e)
  6940. {
  6941. throw makeWrappedException(e);
  6942. }
  6943. IException *e = WSChelper->getError();
  6944. if(e)
  6945. throw makeWrappedException(e);
  6946. }
  6947. //---------------------------------------------------------------------------
  6948. CHThorSoapDatasetCallActivity::CHThorSoapDatasetCallActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSoapCallArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorWSCBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6949. {
  6950. }
  6951. const void * CHThorSoapDatasetCallActivity::nextRow()
  6952. {
  6953. try
  6954. {
  6955. if (WSChelper == NULL)
  6956. {
  6957. WSChelper.setown(createSoapCallHelper(this, rowAllocator, authToken.str(), SCdataset, NULL, queryDummyContextLogger(),NULL));
  6958. WSChelper->start();
  6959. }
  6960. OwnedConstRoxieRow ret = WSChelper->getRow();
  6961. if (!ret)
  6962. return NULL;
  6963. ++processed;
  6964. return ret.getClear();
  6965. }
  6966. catch(IException * e)
  6967. {
  6968. throw makeWrappedException(e);
  6969. }
  6970. }
  6971. const void * CHThorSoapDatasetCallActivity::getNextRow()
  6972. {
  6973. CriticalBlock b(crit);
  6974. const void *nextrec = input->nextRow();
  6975. if (!nextrec)
  6976. {
  6977. nextrec = input->nextRow();
  6978. }
  6979. return nextrec;
  6980. };
  6981. //---------------------------------------------------------------------------
  6982. CHThorSoapDatasetActionActivity::CHThorSoapDatasetActionActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorSoapActionArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorWSCBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  6983. {
  6984. }
  6985. void CHThorSoapDatasetActionActivity::execute()
  6986. {
  6987. try
  6988. {
  6989. WSChelper.setown(createSoapCallHelper(this, NULL, authToken.str(), SCdataset, NULL, queryDummyContextLogger(),NULL));
  6990. WSChelper->start();
  6991. WSChelper->waitUntilDone();
  6992. }
  6993. catch(IException * e)
  6994. {
  6995. throw makeWrappedException(e);
  6996. }
  6997. IException *e = WSChelper->getError();
  6998. if(e)
  6999. throw makeWrappedException(e);
  7000. }
  7001. const void * CHThorSoapDatasetActionActivity::getNextRow()
  7002. {
  7003. CriticalBlock b(crit);
  7004. const void *nextrec = input->nextRow();
  7005. if (!nextrec)
  7006. {
  7007. nextrec = input->nextRow();
  7008. }
  7009. if (nextrec)
  7010. {
  7011. processed++;
  7012. }
  7013. return nextrec;
  7014. };
  7015. //=====================================================================================================
  7016. CHThorResultActivity::CHThorResultActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  7017. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  7018. {
  7019. }
  7020. void CHThorResultActivity::extractResult(unsigned & retSize, void * & ret)
  7021. {
  7022. unsigned len = rowdata.length();
  7023. retSize = len;
  7024. if (len)
  7025. {
  7026. void * temp = rtlMalloc(len);
  7027. memcpy(temp, rowdata.toByteArray(), len);
  7028. ret = temp;
  7029. }
  7030. else
  7031. ret = NULL;
  7032. }
  7033. //=====================================================================================================
  7034. CHThorDatasetResultActivity::CHThorDatasetResultActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDatasetResultArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  7035. : CHThorResultActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  7036. {
  7037. }
  7038. void CHThorDatasetResultActivity::execute()
  7039. {
  7040. rowdata.clear();
  7041. IRecordSize * inputMeta = input->queryOutputMeta();
  7042. for (;;)
  7043. {
  7044. OwnedConstRoxieRow nextrec(input->nextRow());
  7045. if (!nextrec)
  7046. {
  7047. nextrec.setown(input->nextRow());
  7048. if (!nextrec)
  7049. break;
  7050. }
  7051. rowdata.append(inputMeta->getRecordSize(nextrec), nextrec);
  7052. }
  7053. }
  7054. //=====================================================================================================
  7055. CHThorRowResultActivity::CHThorRowResultActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorRowResultArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  7056. : CHThorResultActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  7057. {
  7058. }
  7059. void CHThorRowResultActivity::execute()
  7060. {
  7061. OwnedConstRoxieRow nextrec(input->nextRow());
  7062. assertex(nextrec);
  7063. IRecordSize * inputMeta = input->queryOutputMeta();
  7064. unsigned length = inputMeta->getRecordSize(nextrec);
  7065. rowdata.clear().append(length, nextrec);
  7066. }
  7067. //=====================================================================================================
  7068. CHThorChildIteratorActivity::CHThorChildIteratorActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChildIteratorArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  7069. {
  7070. }
  7071. const void *CHThorChildIteratorActivity::nextRow()
  7072. {
  7073. if (eof)
  7074. return NULL;
  7075. bool ok;
  7076. if (!started)
  7077. {
  7078. ok = helper.first();
  7079. started = true;
  7080. }
  7081. else
  7082. ok = helper.next();
  7083. try
  7084. {
  7085. while(ok)
  7086. {
  7087. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  7088. size32_t outSize = helper.transform(rowBuilder);
  7089. if(outSize)
  7090. {
  7091. processed++;
  7092. return rowBuilder.finalizeRowClear(outSize);
  7093. }
  7094. ok = helper.next();
  7095. }
  7096. }
  7097. catch(IException * e)
  7098. {
  7099. throw makeWrappedException(e);
  7100. }
  7101. eof = true;
  7102. return NULL;
  7103. }
  7104. void CHThorChildIteratorActivity::ready()
  7105. {
  7106. CHThorSimpleActivityBase::ready();
  7107. started = false;
  7108. eof = false;
  7109. }
  7110. //=====================================================================================================
  7111. CHThorLinkedRawIteratorActivity::CHThorLinkedRawIteratorActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLinkedRawIteratorArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  7112. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  7113. {
  7114. }
  7115. const void *CHThorLinkedRawIteratorActivity::nextRow()
  7116. {
  7117. const void *ret =helper.next();
  7118. if (ret)
  7119. {
  7120. LinkRoxieRow(ret);
  7121. processed++;
  7122. }
  7123. return ret;
  7124. }
  7125. //=====================================================================================================
  7126. //=====================================================================================================
  7127. //== New implementations - none are currently used, created or tested =================================
  7128. //=====================================================================================================
  7129. CHThorChildNormalizeActivity::CHThorChildNormalizeActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChildNormalizeArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  7130. {
  7131. }
  7132. const void *CHThorChildNormalizeActivity::nextRow()
  7133. {
  7134. if (eof)
  7135. return NULL;
  7136. bool ok;
  7137. if (!started)
  7138. {
  7139. ok = helper.first();
  7140. started = true;
  7141. }
  7142. else
  7143. ok = helper.next();
  7144. try
  7145. {
  7146. if (ok)
  7147. {
  7148. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  7149. do {
  7150. unsigned thisSize = helper.transform(rowBuilder);
  7151. if (thisSize)
  7152. {
  7153. processed++;
  7154. return rowBuilder.finalizeRowClear(thisSize);
  7155. }
  7156. ok = helper.next();
  7157. }
  7158. while (ok);
  7159. }
  7160. }
  7161. catch(IException * e)
  7162. {
  7163. throw makeWrappedException(e);
  7164. }
  7165. eof = true;
  7166. return NULL;
  7167. }
  7168. void CHThorChildNormalizeActivity::ready()
  7169. {
  7170. CHThorSimpleActivityBase::ready();
  7171. started = false;
  7172. eof = false;
  7173. }
  7174. //=====================================================================================================
  7175. CHThorChildAggregateActivity::CHThorChildAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChildAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  7176. {
  7177. }
  7178. const void *CHThorChildAggregateActivity::nextRow()
  7179. {
  7180. if (eof)
  7181. return NULL;
  7182. eof = true;
  7183. processed++;
  7184. try
  7185. {
  7186. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  7187. helper.clearAggregate(rowBuilder);
  7188. helper.processRows(rowBuilder);
  7189. size32_t finalSize = outputMeta.getRecordSize(rowBuilder.getSelf());
  7190. return rowBuilder.finalizeRowClear(finalSize);
  7191. }
  7192. catch(IException * e)
  7193. {
  7194. throw makeWrappedException(e);
  7195. }
  7196. }
  7197. void CHThorChildAggregateActivity::ready()
  7198. {
  7199. CHThorSimpleActivityBase::ready();
  7200. eof = false;
  7201. }
  7202. //=====================================================================================================
  7203. CHThorChildGroupAggregateActivity::CHThorChildGroupAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChildGroupAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  7204. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph),
  7205. helper(_arg),
  7206. aggregated(_arg, _arg)
  7207. {
  7208. }
  7209. void CHThorChildGroupAggregateActivity::ready()
  7210. {
  7211. CHThorSimpleActivityBase::ready();
  7212. eof = false;
  7213. gathered = false;
  7214. aggregated.start(rowAllocator, agent.queryCodeContext(), activityId);
  7215. }
  7216. void CHThorChildGroupAggregateActivity::stop()
  7217. {
  7218. aggregated.reset();
  7219. CHThorSimpleActivityBase::stop();
  7220. }
  7221. void CHThorChildGroupAggregateActivity::processRow(const void * next)
  7222. {
  7223. aggregated.addRow(next);
  7224. }
  7225. const void * CHThorChildGroupAggregateActivity::nextRow()
  7226. {
  7227. if (eof)
  7228. return NULL;
  7229. if (!gathered)
  7230. {
  7231. helper.processRows(this);
  7232. gathered = true;
  7233. }
  7234. Owned<AggregateRowBuilder> next = aggregated.nextResult();
  7235. if (next)
  7236. {
  7237. processed++;
  7238. return next->finalizeRowClear();
  7239. }
  7240. eof = true;
  7241. return NULL;
  7242. }
  7243. //=====================================================================================================
  7244. CHThorChildThroughNormalizeActivity::CHThorChildThroughNormalizeActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorChildThroughNormalizeArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), outBuilder(NULL)
  7245. {
  7246. }
  7247. void CHThorChildThroughNormalizeActivity::stop()
  7248. {
  7249. outBuilder.clear();
  7250. lastInput.clear();
  7251. CHThorSimpleActivityBase::stop();
  7252. }
  7253. void CHThorChildThroughNormalizeActivity::ready()
  7254. {
  7255. CHThorSimpleActivityBase::ready();
  7256. outBuilder.setAllocator(rowAllocator);
  7257. numProcessedLastGroup = processed;
  7258. ok = false;
  7259. }
  7260. const void *CHThorChildThroughNormalizeActivity::nextRow()
  7261. {
  7262. try
  7263. {
  7264. for (;;)
  7265. {
  7266. if (ok)
  7267. ok = helper.next();
  7268. while (!ok)
  7269. {
  7270. lastInput.setown(input->nextRow());
  7271. if (!lastInput)
  7272. {
  7273. if (numProcessedLastGroup != processed)
  7274. {
  7275. numProcessedLastGroup = processed;
  7276. return NULL;
  7277. }
  7278. lastInput.setown(input->nextRow());
  7279. if (!lastInput)
  7280. return NULL;
  7281. }
  7282. ok = helper.first(lastInput);
  7283. }
  7284. outBuilder.ensureRow();
  7285. do
  7286. {
  7287. size32_t thisSize = helper.transform(outBuilder);
  7288. if (thisSize)
  7289. {
  7290. processed++;
  7291. return outBuilder.finalizeRowClear(thisSize);
  7292. }
  7293. ok = helper.next();
  7294. } while (ok);
  7295. }
  7296. }
  7297. catch(IException * e)
  7298. {
  7299. throw makeWrappedException(e);
  7300. }
  7301. }
  7302. //=====================================================================================================
  7303. CHThorDiskReadBaseActivity::CHThorDiskReadBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskReadBaseArg &_arg, ThorActivityKind _kind, IPropertyTree *_node, EclGraph & _graph) : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  7304. {
  7305. helper.setCallback(this);
  7306. expectedDiskMeta = helper.queryDiskRecordSize();
  7307. projectedDiskMeta = helper.queryProjectedDiskRecordSize();
  7308. actualDiskMeta.set(helper.queryDiskRecordSize()->querySerializedDiskMeta());
  7309. isCodeSigned = false;
  7310. if (_node)
  7311. {
  7312. const char *recordTranslationModeHintText = _node->queryProp("hint[@name='layouttranslation']/@value");
  7313. if (recordTranslationModeHintText)
  7314. recordTranslationModeHint = getTranslationMode(recordTranslationModeHintText, true);
  7315. isCodeSigned = isActivityCodeSigned(*_node);
  7316. }
  7317. }
  7318. CHThorDiskReadBaseActivity::~CHThorDiskReadBaseActivity()
  7319. {
  7320. close();
  7321. }
  7322. void CHThorDiskReadBaseActivity::ready()
  7323. {
  7324. CHThorActivityBase::ready();
  7325. grouped = false;
  7326. fixedDiskRecordSize = 0;
  7327. eofseen = false;
  7328. opened = false;
  7329. compressed = false;
  7330. rowcompressed = false;
  7331. blockcompressed = false;
  7332. persistent = false;
  7333. localOffset = 0;
  7334. offsetOfPart = 0;
  7335. partNum = (unsigned)-1;
  7336. resolve();
  7337. unsigned expectedCrc = helper.getDiskFormatCrc();
  7338. unsigned projectedCrc = helper.getProjectedFormatCrc();
  7339. IDistributedFile *dFile = nullptr;
  7340. if (ldFile)
  7341. dFile = ldFile->queryDistributedFile(); // Null for local file usage
  7342. Owned<IOutputMetaData> publishedMeta;
  7343. unsigned publishedCrc = 0;
  7344. RecordTranslationMode translationMode = getLayoutTranslationMode();
  7345. StringBuffer traceName;
  7346. if (dFile)
  7347. {
  7348. const char *kind = queryFileKind(dFile);
  7349. if (strisame(kind, "flat") || (RecordTranslationMode::AlwaysDisk == translationMode))
  7350. {
  7351. IPropertyTree &props = dFile->queryAttributes();
  7352. publishedMeta.setown(getDaliLayoutInfo(props));
  7353. if (publishedMeta)
  7354. publishedCrc = props.getPropInt("@formatCrc");
  7355. }
  7356. dFile->getLogicalName(traceName);
  7357. }
  7358. else
  7359. traceName.set("hthor-diskread");
  7360. translators.setown(::getTranslators(traceName.str(), expectedCrc, expectedDiskMeta, publishedCrc, publishedMeta, projectedCrc, projectedDiskMeta, translationMode));
  7361. if (translators)
  7362. {
  7363. if (publishedCrc && expectedCrc && publishedCrc != expectedCrc)
  7364. {
  7365. VStringBuffer msg("Record layout translation required for %s", traceName.str());
  7366. agent.addWuExceptionEx(msg.str(), WRN_UseLayoutTranslation, SeverityInformation, MSGAUD_user, "hthor");
  7367. }
  7368. translator = &translators->queryTranslator();
  7369. keyedTranslator = translators->queryKeyedTranslator();
  7370. actualDiskMeta.set(&translators->queryActualFormat());
  7371. }
  7372. else
  7373. {
  7374. translator = nullptr;
  7375. keyedTranslator = nullptr;
  7376. actualDiskMeta.set(helper.queryDiskRecordSize()->querySerializedDiskMeta());
  7377. }
  7378. }
  7379. void CHThorDiskReadBaseActivity::stop()
  7380. {
  7381. close();
  7382. CHThorActivityBase::stop();
  7383. }
  7384. #define TE_FileTypeMismatch 10138 // NB: duplicated from thorlcr/shared/thexception.hpp, but be moved to common header
  7385. void CHThorDiskReadBaseActivity::checkFileType(IDistributedFile *file)
  7386. {
  7387. if (rt_csv == readType)
  7388. return; // CSV read is permitted to read any type
  7389. if (!agent.queryWorkUnit()->getDebugValueInt(OPT_VALIDATE_FILE_TYPE, true))
  7390. return;
  7391. bool warningOnly = false;
  7392. const char *expectedType = nullptr;
  7393. switch (readType)
  7394. {
  7395. case rt_binary:
  7396. if (fixedDiskRecordSize) // we allow fixed width reads of other formats
  7397. return;
  7398. expectedType = "flat";
  7399. break;
  7400. case rt_xml:
  7401. expectedType = "xml";
  7402. warningOnly = true;
  7403. break;
  7404. case rt_json:
  7405. expectedType = "json";
  7406. warningOnly = true;
  7407. break;
  7408. default:
  7409. throwUnexpected();
  7410. }
  7411. const char *kind = queryFileKind(file);
  7412. if (isEmptyString(kind)) // file has no published kind, can't validate
  7413. return;
  7414. if (!strieq(kind, expectedType))
  7415. {
  7416. Owned<IException> e = makeStringExceptionV(TE_FileTypeMismatch, "File format mismatch reading file: '%s'. Expected type '%s', but file is type '%s'", file->queryLogicalName(), expectedType, kind);
  7417. if (!warningOnly)
  7418. throw e.getClear();
  7419. StringBuffer tmp;
  7420. agent.addWuException(e->errorMessage(tmp), e->errorCode(), SeverityWarning, "eclagent");
  7421. }
  7422. }
  7423. void CHThorDiskReadBaseActivity::resolve()
  7424. {
  7425. OwnedRoxieString fileName(helper.getFileName());
  7426. mangleHelperFileName(mangledHelperFileName, fileName, agent.queryWuid(), helper.getFlags());
  7427. if (helper.getFlags() & (TDXtemporary | TDXjobtemp))
  7428. {
  7429. StringBuffer mangledFilename;
  7430. mangleLocalTempFilename(mangledFilename, mangledHelperFileName.str(), nullptr);
  7431. tempFileName.set(agent.queryTemporaryFile(mangledFilename.str()));
  7432. logicalFileName.set(tempFileName);
  7433. gatherInfo(NULL);
  7434. }
  7435. else
  7436. {
  7437. ldFile.setown(resolveLFNFlat(agent, mangledHelperFileName.str(), "Read", 0 != (helper.getFlags() & TDRoptional), isCodeSigned));
  7438. if ( mangledHelperFileName.charAt(0) == '~')
  7439. logicalFileName.set(mangledHelperFileName.str()+1);
  7440. else
  7441. logicalFileName.set(mangledHelperFileName.str());
  7442. if (ldFile)
  7443. {
  7444. Owned<IFileDescriptor> fdesc;
  7445. fdesc.setown(ldFile->getFileDescriptor());
  7446. gatherInfo(fdesc);
  7447. if (ldFile->isExternal())
  7448. compressed = checkWriteIsCompressed(helper.getFlags(), fixedDiskRecordSize, false);//grouped=FALSE because fixedDiskRecordSize already includes grouped
  7449. IDistributedFile *dFile = ldFile->queryDistributedFile();
  7450. if (dFile) //only makes sense for distributed (non local) files
  7451. {
  7452. checkFileType(dFile); // throws an exception if file types mismatch
  7453. persistent = dFile->queryAttributes().getPropBool("@persistent");
  7454. dfsParts.setown(dFile->getIterator());
  7455. IDistributedSuperFile *super = dFile->querySuperFile();
  7456. if (super)
  7457. {
  7458. assertex(fdesc);
  7459. superfile.set(fdesc->querySuperFileDescriptor());
  7460. if (helper.getFlags() & TDRfilenamecallback)
  7461. {
  7462. unsigned numsubs = super->numSubFiles(true);
  7463. unsigned s=0;
  7464. for (; s<numsubs; s++)
  7465. {
  7466. IDistributedFile &subfile = super->querySubFile(s, true);
  7467. subfileLogicalFilenames.append(subfile.queryLogicalName());
  7468. }
  7469. if (!superfile && numsubs>0)
  7470. logicalFileName.set(subfileLogicalFilenames.item(0));
  7471. }
  7472. }
  7473. if((helper.getFlags() & (TDXtemporary | TDXjobtemp)) == 0)
  7474. agent.logFileAccess(dFile, "HThor", "READ", graph);
  7475. if(getLayoutTranslationMode()==RecordTranslationMode::None)
  7476. verifyRecordFormatCrc();
  7477. }
  7478. }
  7479. if (!ldFile)
  7480. {
  7481. StringBuffer buff;
  7482. buff.appendf("Input file '%s' was missing but declared optional", mangledHelperFileName.str());
  7483. agent.addWuExceptionEx(buff.str(), WRN_SkipMissingOptFile, SeverityInformation, MSGAUD_user, "hthor");
  7484. }
  7485. }
  7486. }
  7487. void CHThorDiskReadBaseActivity::gatherInfo(IFileDescriptor * fileDesc)
  7488. {
  7489. if (fileDesc)
  7490. {
  7491. if (!agent.queryResolveFilesLocally())
  7492. {
  7493. grouped = fileDesc->isGrouped();
  7494. if (grouped != ((helper.getFlags() & TDXgrouped) != 0))
  7495. {
  7496. StringBuffer msg;
  7497. msg.append("DFS and code generated group info. differs: DFS(").append(grouped ? "grouped" : "ungrouped").append("), CodeGen(").append(grouped ? "ungrouped" : "grouped").append("), using DFS info");
  7498. agent.addWuExceptionEx(msg.str(), WRN_MismatchGroupInfo, SeverityError, MSGAUD_user, "hthor");
  7499. }
  7500. }
  7501. else
  7502. grouped = ((helper.getFlags() & TDXgrouped) != 0);
  7503. }
  7504. else
  7505. {
  7506. grouped = ((helper.getFlags() & TDXgrouped) != 0);
  7507. }
  7508. calcFixedDiskRecordSize();
  7509. if (fileDesc)
  7510. {
  7511. compressed = fileDesc->isCompressed(&blockcompressed); //try new decompression, fall back to old unless marked as block
  7512. if (fixedDiskRecordSize)
  7513. {
  7514. if (!compressed && (((helper.getFlags() & TDXcompress) != 0) && (fixedDiskRecordSize >= MIN_ROWCOMPRESS_RECSIZE)))
  7515. {
  7516. StringBuffer msg;
  7517. msg.append("Ignoring compression attribute on file ").append(mangledHelperFileName.str()).append(", which is not published as compressed");
  7518. agent.addWuExceptionEx(msg.str(), WRN_MismatchCompressInfo, SeverityWarning, MSGAUD_user, "hthor");
  7519. compressed = true;
  7520. }
  7521. }
  7522. }
  7523. else
  7524. {
  7525. compressed = checkReadIsCompressed(helper.getFlags(), fixedDiskRecordSize, false); //grouped=FALSE because fixedDiskRecordSize already includes grouped
  7526. }
  7527. void *k;
  7528. size32_t kl;
  7529. helper.getEncryptKey(kl,k);
  7530. encryptionkey.setOwn(kl,k);
  7531. if (encryptionkey.length()!=0)
  7532. {
  7533. blockcompressed = true;
  7534. compressed = true;
  7535. }
  7536. }
  7537. void CHThorDiskReadBaseActivity::close()
  7538. {
  7539. closepart();
  7540. tempFileName.clear();
  7541. dfsParts.clear();
  7542. if(ldFile)
  7543. {
  7544. IDistributedFile * dFile = ldFile->queryDistributedFile();
  7545. if(dFile)
  7546. dFile->setAccessed();
  7547. ldFile.clear();
  7548. }
  7549. }
  7550. unsigned __int64 CHThorDiskReadBaseActivity::getFilePosition(const void * row)
  7551. {
  7552. return localOffset + offsetOfPart;
  7553. }
  7554. unsigned __int64 CHThorDiskReadBaseActivity::getLocalFilePosition(const void * row)
  7555. {
  7556. return makeLocalFposOffset(partNum-1, localOffset);
  7557. }
  7558. void CHThorDiskReadBaseActivity::closepart()
  7559. {
  7560. if (opened && inputfileio && ldFile && partNum > 0)
  7561. {
  7562. unsigned previousPartNum = partNum-1;
  7563. if (previousPartNum < ldFile->numParts())
  7564. {
  7565. stat_type curDiskReads = inputfileio->getStatistic(StNumDiskReads);
  7566. IDistributedFile * dFile = ldFile->queryDistributedFile();
  7567. if (dFile)
  7568. {
  7569. if (superfile)
  7570. {
  7571. unsigned subfile, lnum;
  7572. if (superfile->mapSubPart(previousPartNum, subfile, lnum))
  7573. {
  7574. IDistributedSuperFile *super = dFile->querySuperFile();
  7575. dFile = &(super->querySubFile(subfile, true));
  7576. }
  7577. }
  7578. dFile->addAttrValue("@numDiskReads", curDiskReads);
  7579. StringBuffer clusterName;
  7580. dFile->getClusterName(0, clusterName);
  7581. diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, 0, curDiskReads));
  7582. }
  7583. numDiskReads += curDiskReads;
  7584. }
  7585. }
  7586. inputstream.clear();
  7587. inputfileio.clear();
  7588. inputfile.clear();
  7589. }
  7590. bool CHThorDiskReadBaseActivity::openNext()
  7591. {
  7592. offsetOfPart += localOffset;
  7593. localOffset = 0;
  7594. saveOpenExc.clear();
  7595. actualFilter.clear();
  7596. if (translators)
  7597. {
  7598. /* If previous part was remotely accessed, the format used (actualDiskMeta), became the projected meta.
  7599. * Reset for local/direct access.
  7600. */
  7601. translator = &translators->queryTranslator();
  7602. keyedTranslator = translators->queryKeyedTranslator();
  7603. actualDiskMeta.set(&translators->queryActualFormat());
  7604. }
  7605. if (dfsParts||ldFile)
  7606. {
  7607. // open next part of a multipart, if there is one
  7608. while ((dfsParts&&dfsParts->isValid())||
  7609. (!dfsParts&&(partNum<ldFile->numParts())))
  7610. {
  7611. IDistributedFilePart * curPart = dfsParts?&dfsParts->query():NULL;
  7612. unsigned numCopies = curPart?curPart->numCopies():ldFile->numPartCopies(partNum);
  7613. //MORE: Order of copies should be optimized at this point....
  7614. StringBuffer file, filelist;
  7615. closepart();
  7616. if (dfsParts && superfile && curPart && !subfileLogicalFilenames.empty())
  7617. {
  7618. unsigned subfile;
  7619. unsigned lnum;
  7620. if (superfile->mapSubPart(partNum, subfile, lnum))
  7621. {
  7622. logicalFileName.set(subfileLogicalFilenames.item(subfile));
  7623. // MORE - need to set dFile = superfile->getSubFilePart(subfile) to support different formats on different file parts
  7624. }
  7625. }
  7626. if (keyedTranslator && keyedTranslator->needsTranslate())
  7627. keyedTranslator->translate(actualFilter, fieldFilters);
  7628. else
  7629. actualFilter.appendFilters(fieldFilters);
  7630. bool tryRemoteStream = actualDiskMeta->queryTypeInfo()->canInterpret() && actualDiskMeta->queryTypeInfo()->canSerialize() &&
  7631. projectedDiskMeta->queryTypeInfo()->canInterpret() && projectedDiskMeta->queryTypeInfo()->canSerialize();
  7632. /* If part can potentially be remotely streamed, 1st check if any part is local,
  7633. * then try to remote stream, and otherwise failover to legacy remote access
  7634. */
  7635. unsigned startCopy = 0;
  7636. if (tryRemoteStream && (rt_binary == readType))
  7637. {
  7638. std::vector<unsigned> remoteCandidates;
  7639. // scan for local part 1st
  7640. for (unsigned copy=0; copy<numCopies; copy++)
  7641. {
  7642. RemoteFilename rfn;
  7643. if (curPart)
  7644. curPart->getFilename(rfn,copy);
  7645. else
  7646. ldFile->getPartFilename(rfn, partNum, copy);
  7647. if (!isRemoteReadCandidate(agent, rfn))
  7648. {
  7649. StringBuffer path;
  7650. rfn.getPath(path);
  7651. Owned<IFile> iFile = createIFile(path);
  7652. try
  7653. {
  7654. if (iFile->exists())
  7655. {
  7656. startCopy = copy;
  7657. remoteCandidates.clear();
  7658. break;
  7659. }
  7660. }
  7661. catch (IException *e)
  7662. {
  7663. EXCLOG(e, "CHThorDiskReadBaseActivity::openNext()");
  7664. e->Release();
  7665. }
  7666. }
  7667. else
  7668. remoteCandidates.push_back(copy);
  7669. }
  7670. for (unsigned &copy: remoteCandidates)
  7671. {
  7672. RemoteFilename rfilename;
  7673. if (curPart)
  7674. curPart->getFilename(rfilename,copy);
  7675. else
  7676. ldFile->getPartFilename(rfilename,partNum,copy);
  7677. rfilename.getPath(file.clear());
  7678. filelist.append('\n').append(file);
  7679. try
  7680. {
  7681. // NB: only binary handles can be remotely processed by dafilesrv at the moment
  7682. // Open a stream from remote file, having passed actual, expected, projected, and filters to it
  7683. SocketEndpoint ep(rfilename.queryEndpoint());
  7684. setDafsEndpointPort(ep);
  7685. StringBuffer localPath;
  7686. rfilename.getLocalPath(localPath);
  7687. Owned<IRemoteFileIO> remoteFileIO = createRemoteFilteredFile(ep, localPath, actualDiskMeta, projectedDiskMeta, actualFilter, compressed, grouped, remoteLimit);
  7688. if (remoteFileIO)
  7689. {
  7690. StringBuffer tmp;
  7691. remoteFileIO->addVirtualFieldMapping("logicalFilename", logicalFileName.str());
  7692. remoteFileIO->addVirtualFieldMapping("baseFpos", tmp.clear().append(offsetOfPart).str());
  7693. remoteFileIO->addVirtualFieldMapping("partNum", tmp.clear().append(curPart->getPartIndex()).str());
  7694. try
  7695. {
  7696. remoteFileIO->ensureAvailable(); // force open now, because want to failover to other copies or legacy if fails
  7697. }
  7698. catch (IException *e)
  7699. {
  7700. #ifdef _DEBUG
  7701. EXCLOG(e, nullptr);
  7702. #endif
  7703. VStringBuffer msg("Remote streaming failure, failing over to direct read for: '%s'. ", file.str());
  7704. e->errorMessage(msg);
  7705. agent.addWuExceptionEx(msg.str(), WRN_RemoteReadFailure, SeverityWarning, MSGAUD_user, "hthor");
  7706. e->Release();
  7707. continue; // try next copy and ultimately failover to local when no more copies
  7708. }
  7709. Owned<IFile> iFile = createIFile(rfilename);
  7710. // remote side does projection/translation/filtering
  7711. actualDiskMeta.set(projectedDiskMeta);
  7712. translator = nullptr;
  7713. keyedTranslator = nullptr;
  7714. actualFilter.clear();
  7715. inputfileio.setown(remoteFileIO.getClear());
  7716. if (inputfileio)
  7717. {
  7718. inputfile.setown(iFile.getClear());
  7719. break;
  7720. }
  7721. }
  7722. }
  7723. catch (IException *E)
  7724. {
  7725. if (saveOpenExc.get())
  7726. E->Release();
  7727. else
  7728. saveOpenExc.setown(E);
  7729. }
  7730. closepart();
  7731. }
  7732. }
  7733. if (!inputfile)
  7734. {
  7735. unsigned copy = startCopy;
  7736. while (true)
  7737. {
  7738. RemoteFilename rfilename;
  7739. if (curPart)
  7740. curPart->getFilename(rfilename,copy);
  7741. else
  7742. ldFile->getPartFilename(rfilename,partNum,copy);
  7743. rfilename.getPath(file.clear());
  7744. filelist.append('\n').append(file);
  7745. try
  7746. {
  7747. inputfile.setown(createIFile(rfilename));
  7748. if (compressed)
  7749. {
  7750. Owned<IExpander> eexp;
  7751. if (encryptionkey.length()!=0)
  7752. eexp.setown(createAESExpander256((size32_t)encryptionkey.length(),encryptionkey.bufferBase()));
  7753. inputfileio.setown(createCompressedFileReader(inputfile,eexp));
  7754. if(!inputfileio && !blockcompressed) //fall back to old decompression, unless dfs marked as new
  7755. {
  7756. inputfileio.setown(inputfile->open(IFOread));
  7757. if(inputfileio)
  7758. rowcompressed = true;
  7759. }
  7760. }
  7761. else
  7762. inputfileio.setown(inputfile->open(IFOread));
  7763. if (inputfileio)
  7764. break;
  7765. }
  7766. catch (IException *E)
  7767. {
  7768. if (saveOpenExc.get())
  7769. E->Release();
  7770. else
  7771. saveOpenExc.setown(E);
  7772. }
  7773. if (++copy == numCopies) // wrap
  7774. copy = 0;
  7775. if (copy == startCopy) // reached starting copy, so scanned all and failed to open any.
  7776. break;
  7777. }
  7778. }
  7779. calcFixedDiskRecordSize();
  7780. if (dfsParts)
  7781. dfsParts->next();
  7782. partNum++;
  7783. if (checkOpenedFile(file.str(), filelist.str()))
  7784. {
  7785. opened = true;
  7786. return true;
  7787. }
  7788. }
  7789. return false;
  7790. }
  7791. else if (!tempFileName.isEmpty())
  7792. {
  7793. StringBuffer file(tempFileName.get());
  7794. tempFileName.clear();
  7795. closepart();
  7796. try
  7797. {
  7798. inputfile.setown(createIFile(file.str()));
  7799. if(compressed)
  7800. {
  7801. Owned<IExpander> eexp;
  7802. if (encryptionkey.length())
  7803. eexp.setown(createAESExpander256((size32_t) encryptionkey.length(),encryptionkey.bufferBase()));
  7804. inputfileio.setown(createCompressedFileReader(inputfile,eexp));
  7805. if(!inputfileio && !blockcompressed) //fall back to old decompression, unless dfs marked as new
  7806. {
  7807. inputfileio.setown(inputfile->open(IFOread));
  7808. if(inputfileio)
  7809. rowcompressed = true;
  7810. }
  7811. }
  7812. else
  7813. inputfileio.setown(inputfile->open(IFOread));
  7814. }
  7815. catch (IException *E)
  7816. {
  7817. closepart();
  7818. StringBuffer msg;
  7819. IWARNLOG("%s", E->errorMessage(msg).str());
  7820. if (saveOpenExc.get())
  7821. E->Release();
  7822. else
  7823. saveOpenExc.setown(E);
  7824. }
  7825. partNum++;
  7826. if (checkOpenedFile(file.str(), NULL))
  7827. {
  7828. actualFilter.appendFilters(fieldFilters);
  7829. opened = true;
  7830. return true;
  7831. }
  7832. }
  7833. return false;
  7834. }
  7835. bool CHThorDiskReadBaseActivity::checkOpenedFile(char const * filename, char const * filenamelist)
  7836. {
  7837. unsigned __int64 filesize = 0;
  7838. if (!inputfileio)
  7839. {
  7840. if (!(helper.getFlags() & TDRoptional))
  7841. {
  7842. StringBuffer s;
  7843. if(filenamelist) {
  7844. if (saveOpenExc.get())
  7845. {
  7846. if (strstr(mangledHelperFileName.str(),"::>")!=NULL) // if a 'special' filename just use saved exception
  7847. saveOpenExc->errorMessage(s);
  7848. else
  7849. {
  7850. s.append("Could not open logical file ").append(mangledHelperFileName.str()).append(" in any of these locations:").append(filenamelist).append(" (");
  7851. saveOpenExc->errorMessage(s).append(")");
  7852. }
  7853. }
  7854. else
  7855. s.append("Could not open logical file ").append(mangledHelperFileName.str()).append(" in any of these locations:").append(filenamelist).append(" (").append((unsigned)GetLastError()).append(")");
  7856. }
  7857. else
  7858. s.append("Could not open local physical file ").append(filename).append(" (").append((unsigned)GetLastError()).append(")");
  7859. agent.fail(1, s.str());
  7860. }
  7861. }
  7862. else
  7863. filesize = inputfileio->size();
  7864. saveOpenExc.clear();
  7865. if (filesize)
  7866. {
  7867. if (!compressed && fixedDiskRecordSize && ((offset_t)-1 != filesize) && (filesize % fixedDiskRecordSize) != 0)
  7868. {
  7869. StringBuffer s;
  7870. s.append("File ").append(filename).append(" size is ").append(filesize).append(" which is not a multiple of ").append(fixedDiskRecordSize);
  7871. agent.fail(1, s.str());
  7872. }
  7873. unsigned readBufferSize = queryReadBufferSize();
  7874. inputstream.setown(createFileSerialStream(inputfileio, 0, filesize, readBufferSize));
  7875. StringBuffer report("Reading file ");
  7876. report.append(inputfile->queryFilename());
  7877. agent.reportProgress(report.str());
  7878. }
  7879. return (filesize != 0);
  7880. }
  7881. void CHThorDiskReadBaseActivity::open()
  7882. {
  7883. assertex(!opened);
  7884. partNum = 0;
  7885. if (dfsParts)
  7886. eofseen = !dfsParts->first() || !openNext();
  7887. else if (ldFile||tempFileName.length())
  7888. eofseen = !openNext();
  7889. else
  7890. eofseen = true;
  7891. opened = true;
  7892. }
  7893. void CHThorDiskReadBaseActivity::updateProgress(IStatisticGatherer &progress) const
  7894. {
  7895. CHThorActivityBase::updateProgress(progress);
  7896. StatsActivityScope scope(progress, activityId);
  7897. progress.addStatistic(StNumDiskReads, numDiskReads);
  7898. progress.addStatistic(StCostFileAccess, diskAccessCost);
  7899. }
  7900. //=====================================================================================================
  7901. CHThorBinaryDiskReadBase::CHThorBinaryDiskReadBase(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskReadBaseArg &_arg, IHThorCompoundBaseArg & _segHelper, ThorActivityKind _kind, IPropertyTree *_node, EclGraph & _graph)
  7902. : CHThorDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _node, _graph),
  7903. segHelper(_segHelper), prefetchBuffer(NULL)
  7904. {
  7905. readType = rt_binary;
  7906. }
  7907. void CHThorBinaryDiskReadBase::calcFixedDiskRecordSize()
  7908. {
  7909. fixedDiskRecordSize = actualDiskMeta->getFixedSize();
  7910. if (fixedDiskRecordSize && grouped)
  7911. fixedDiskRecordSize += 1;
  7912. }
  7913. void CHThorBinaryDiskReadBase::append(FFoption option, const IFieldFilter * filter)
  7914. {
  7915. if (filter->isWild())
  7916. filter->Release();
  7917. else
  7918. fieldFilters.append(*filter);
  7919. }
  7920. void CHThorBinaryDiskReadBase::ready()
  7921. {
  7922. CHThorDiskReadBaseActivity::ready();
  7923. fieldFilters.kill();
  7924. segHelper.createSegmentMonitors(this);
  7925. }
  7926. bool CHThorBinaryDiskReadBase::openNext()
  7927. {
  7928. if (CHThorDiskReadBaseActivity::openNext())
  7929. {
  7930. if(rowcompressed && fixedDiskRecordSize)
  7931. {
  7932. throwUnexpected();
  7933. //MORE: What happens here
  7934. PROGLOG("Disk read falling back to legacy decompression routine");
  7935. //in.setown(createRowCompReadSeq(*inputfileiostream, 0, fixedDiskRecordSize));
  7936. }
  7937. //Only one of these will actually be used.
  7938. prefetcher.setown(actualDiskMeta->createDiskPrefetcher());
  7939. deserializer.setown(actualDiskMeta->createDiskDeserializer(agent.queryCodeContext(), activityId));
  7940. prefetchBuffer.setStream(inputstream);
  7941. deserializeSource.setStream(inputstream);
  7942. return true;
  7943. }
  7944. return false;
  7945. }
  7946. void CHThorBinaryDiskReadBase::closepart()
  7947. {
  7948. prefetchBuffer.clearStream();
  7949. deserializeSource.clearStream();
  7950. CHThorDiskReadBaseActivity::closepart();
  7951. }
  7952. unsigned CHThorBinaryDiskReadBase::queryReadBufferSize()
  7953. {
  7954. return hthorReadBufferSize;
  7955. }
  7956. void CHThorBinaryDiskReadBase::open()
  7957. {
  7958. if (!segHelper.canMatchAny())
  7959. {
  7960. eofseen = true;
  7961. opened = true;
  7962. }
  7963. else
  7964. CHThorDiskReadBaseActivity::open();
  7965. }
  7966. //=====================================================================================================
  7967. CHThorDiskReadActivity::CHThorDiskReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorBinaryDiskReadBase(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph), helper(_arg), outBuilder(NULL)
  7968. {
  7969. needTransform = false;
  7970. eogPending = 0;
  7971. lastGroupProcessed = 0;
  7972. }
  7973. void CHThorDiskReadActivity::ready()
  7974. {
  7975. PARENT::ready();
  7976. outBuilder.setAllocator(rowAllocator);
  7977. eogPending = false;
  7978. lastGroupProcessed = processed;
  7979. needTransform = helper.needTransform() || fieldFilters.length();
  7980. limit = helper.getRowLimit();
  7981. if (helper.getFlags() & TDRlimitskips)
  7982. limit = (unsigned __int64) -1;
  7983. stopAfter = helper.getChooseNLimit();
  7984. if (!helper.transformMayFilter() && !helper.hasMatchFilter())
  7985. remoteLimit = stopAfter;
  7986. }
  7987. void CHThorDiskReadActivity::stop()
  7988. {
  7989. outBuilder.clear();
  7990. PARENT::stop();
  7991. }
  7992. const void *CHThorDiskReadActivity::nextRow()
  7993. {
  7994. if (!opened) open();
  7995. if (eogPending && (lastGroupProcessed != processed))
  7996. {
  7997. eogPending = false;
  7998. lastGroupProcessed = processed;
  7999. return NULL;
  8000. }
  8001. try
  8002. {
  8003. if (needTransform || grouped || translator || keyedTranslator)
  8004. {
  8005. while (!eofseen && ((stopAfter == 0) || ((processed - initialProcessed) < stopAfter)))
  8006. {
  8007. queryUpdateProgress();
  8008. while (!prefetchBuffer.eos())
  8009. {
  8010. queryUpdateProgress();
  8011. prefetcher->readAhead(prefetchBuffer);
  8012. const byte * next = prefetchBuffer.queryRow();
  8013. size32_t sizeRead = prefetchBuffer.queryRowSize();
  8014. size32_t thisSize = 0;
  8015. if (likely(segMonitorsMatch(next))) // NOTE - keyed fields are checked pre-translation
  8016. {
  8017. MemoryBuffer translated;
  8018. if (translator)
  8019. {
  8020. MemoryBufferBuilder aBuilder(translated, 0);
  8021. translator->translate(aBuilder, *this, next);
  8022. next = aBuilder.getSelf();
  8023. }
  8024. if (likely(helper.canMatch(next)))
  8025. thisSize = helper.transform(outBuilder.ensureRow(), next);
  8026. }
  8027. bool eog = false;
  8028. if (grouped)
  8029. prefetchBuffer.read(sizeof(eog), &eog);
  8030. prefetchBuffer.finishedRow();
  8031. localOffset += sizeRead;
  8032. if (thisSize)
  8033. {
  8034. if (grouped)
  8035. eogPending = eog;
  8036. if ((processed - initialProcessed) >= limit)
  8037. {
  8038. outBuilder.clear();
  8039. if ( agent.queryCodeContext()->queryDebugContext())
  8040. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  8041. helper.onLimitExceeded();
  8042. return NULL;
  8043. }
  8044. processed++;
  8045. return outBuilder.finalizeRowClear(thisSize);
  8046. }
  8047. if (eog && (lastGroupProcessed != processed))
  8048. {
  8049. lastGroupProcessed = processed;
  8050. return NULL;
  8051. }
  8052. }
  8053. eofseen = !openNext();
  8054. }
  8055. }
  8056. else
  8057. {
  8058. while(!eofseen && ((stopAfter == 0) || (processed - initialProcessed) < stopAfter))
  8059. {
  8060. queryUpdateProgress();
  8061. while (!inputstream->eos())
  8062. {
  8063. size32_t sizeRead = deserializer->deserialize(outBuilder.ensureRow(), deserializeSource);
  8064. //In this case size read from disk == size created in memory
  8065. localOffset += sizeRead;
  8066. OwnedConstRoxieRow ret = outBuilder.finalizeRowClear(sizeRead);
  8067. if ((processed - initialProcessed)>=limit)
  8068. {
  8069. if ( agent.queryCodeContext()->queryDebugContext())
  8070. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  8071. helper.onLimitExceeded();
  8072. return NULL;
  8073. }
  8074. if (likely(helper.canMatch(ret)))
  8075. {
  8076. processed++;
  8077. return ret.getClear();
  8078. }
  8079. }
  8080. eofseen = !openNext();
  8081. }
  8082. }
  8083. close();
  8084. }
  8085. catch(IException * e)
  8086. {
  8087. throw makeWrappedException(e);
  8088. }
  8089. return NULL;
  8090. }
  8091. //=====================================================================================================
  8092. CHThorDiskNormalizeActivity::CHThorDiskNormalizeActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskNormalizeArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorBinaryDiskReadBase(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph), helper(_arg), outBuilder(NULL)
  8093. {
  8094. }
  8095. void CHThorDiskNormalizeActivity::stop()
  8096. {
  8097. outBuilder.clear();
  8098. PARENT::stop();
  8099. }
  8100. void CHThorDiskNormalizeActivity::ready()
  8101. {
  8102. PARENT::ready();
  8103. outBuilder.setAllocator(rowAllocator);
  8104. limit = helper.getRowLimit();
  8105. if (helper.getFlags() & TDRlimitskips)
  8106. limit = (unsigned __int64) -1;
  8107. stopAfter = helper.getChooseNLimit();
  8108. lastSizeRead = 0;
  8109. expanding = false;
  8110. }
  8111. void CHThorDiskNormalizeActivity::gatherInfo(IFileDescriptor * fd)
  8112. {
  8113. PARENT::gatherInfo(fd);
  8114. assertex(!grouped);
  8115. }
  8116. const void *CHThorDiskNormalizeActivity::nextRow()
  8117. {
  8118. if (!opened) open();
  8119. for (;;)
  8120. {
  8121. if (eofseen || (stopAfter && (processed - initialProcessed) >= stopAfter))
  8122. break;
  8123. for (;;)
  8124. {
  8125. if (expanding)
  8126. {
  8127. for (;;)
  8128. {
  8129. expanding = helper.next();
  8130. if (!expanding)
  8131. break;
  8132. const void * ret = createNextRow();
  8133. if (ret)
  8134. return ret;
  8135. }
  8136. }
  8137. localOffset += lastSizeRead;
  8138. prefetchBuffer.finishedRow();
  8139. if (prefetchBuffer.eos())
  8140. {
  8141. lastSizeRead = 0;
  8142. break;
  8143. }
  8144. prefetcher->readAhead(prefetchBuffer);
  8145. const byte * next = prefetchBuffer.queryRow();
  8146. lastSizeRead = prefetchBuffer.queryRowSize();
  8147. queryUpdateProgress();
  8148. if (segMonitorsMatch(next))
  8149. {
  8150. try
  8151. {
  8152. if (unlikely(translator))
  8153. {
  8154. MemoryBufferBuilder aBuilder(translatedRow.clear(), 0);
  8155. translator->translate(aBuilder, *this, next);
  8156. next = aBuilder.getSelf();
  8157. }
  8158. expanding = helper.first(next);
  8159. }
  8160. catch(IException * e)
  8161. {
  8162. throw makeWrappedException(e);
  8163. }
  8164. if (expanding)
  8165. {
  8166. const void * ret = createNextRow();
  8167. if (ret)
  8168. return ret;
  8169. }
  8170. }
  8171. }
  8172. eofseen = !openNext();
  8173. }
  8174. close();
  8175. return NULL;
  8176. }
  8177. const void * CHThorDiskNormalizeActivity::createNextRow()
  8178. {
  8179. try
  8180. {
  8181. size32_t thisSize = helper.transform(outBuilder.ensureRow());
  8182. if (thisSize == 0)
  8183. return NULL;
  8184. if ((processed - initialProcessed) >=limit)
  8185. {
  8186. outBuilder.clear();
  8187. if ( agent.queryCodeContext()->queryDebugContext())
  8188. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  8189. helper.onLimitExceeded();
  8190. return NULL;
  8191. }
  8192. processed++;
  8193. return outBuilder.finalizeRowClear(thisSize);
  8194. }
  8195. catch(IException * e)
  8196. {
  8197. throw makeWrappedException(e);
  8198. }
  8199. }
  8200. //=====================================================================================================
  8201. CHThorDiskAggregateActivity::CHThorDiskAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorBinaryDiskReadBase(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph), helper(_arg), outBuilder(NULL)
  8202. {
  8203. }
  8204. void CHThorDiskAggregateActivity::stop()
  8205. {
  8206. outBuilder.clear();
  8207. PARENT::stop();
  8208. }
  8209. void CHThorDiskAggregateActivity::ready()
  8210. {
  8211. PARENT::ready();
  8212. outBuilder.setAllocator(rowAllocator);
  8213. finished = false;
  8214. }
  8215. void CHThorDiskAggregateActivity::gatherInfo(IFileDescriptor * fd)
  8216. {
  8217. PARENT::gatherInfo(fd);
  8218. assertex(!grouped);
  8219. }
  8220. const void *CHThorDiskAggregateActivity::nextRow()
  8221. {
  8222. if (finished) return NULL;
  8223. try
  8224. {
  8225. if (!opened) open();
  8226. outBuilder.ensureRow();
  8227. helper.clearAggregate(outBuilder);
  8228. while (!eofseen)
  8229. {
  8230. while (!prefetchBuffer.eos())
  8231. {
  8232. queryUpdateProgress();
  8233. prefetcher->readAhead(prefetchBuffer);
  8234. const byte * next = prefetchBuffer.queryRow();
  8235. size32_t sizeRead = prefetchBuffer.queryRowSize();
  8236. if (segMonitorsMatch(next))
  8237. {
  8238. if (unlikely(translator))
  8239. {
  8240. MemoryBufferBuilder aBuilder(translatedRow.clear(), 0);
  8241. translator->translate(aBuilder, *this, next);
  8242. helper.processRow(outBuilder, aBuilder.getSelf());
  8243. }
  8244. else
  8245. helper.processRow(outBuilder, next);
  8246. }
  8247. prefetchBuffer.finishedRow();
  8248. localOffset += sizeRead;
  8249. }
  8250. eofseen = !openNext();
  8251. }
  8252. close();
  8253. processed++;
  8254. finished = true;
  8255. unsigned retSize = outputMeta.getRecordSize(outBuilder.getSelf());
  8256. return outBuilder.finalizeRowClear(retSize);
  8257. }
  8258. catch(IException * e)
  8259. {
  8260. throw makeWrappedException(e);
  8261. }
  8262. }
  8263. //=====================================================================================================
  8264. CHThorDiskCountActivity::CHThorDiskCountActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskCountArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorBinaryDiskReadBase(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph), helper(_arg)
  8265. {
  8266. finished = true;
  8267. }
  8268. CHThorDiskCountActivity::~CHThorDiskCountActivity()
  8269. {
  8270. }
  8271. void CHThorDiskCountActivity::ready()
  8272. {
  8273. PARENT::ready();
  8274. finished = false;
  8275. stopAfter = helper.getChooseNLimit();
  8276. if (!helper.hasFilter())
  8277. remoteLimit = stopAfter;
  8278. }
  8279. void CHThorDiskCountActivity::gatherInfo(IFileDescriptor * fd)
  8280. {
  8281. PARENT::gatherInfo(fd);
  8282. assertex(!grouped);
  8283. }
  8284. const void *CHThorDiskCountActivity::nextRow()
  8285. {
  8286. if (finished) return NULL;
  8287. unsigned __int64 totalCount = 0;
  8288. if (fieldFilters.ordinality() == 0 && !helper.hasFilter() &&
  8289. (fixedDiskRecordSize != 0) && !(helper.getFlags() & (TDXtemporary | TDXjobtemp)) &&
  8290. !((helper.getFlags() & TDXcompress) && agent.queryResolveFilesLocally()) )
  8291. {
  8292. resolve();
  8293. if (segHelper.canMatchAny() && ldFile)
  8294. {
  8295. try
  8296. {
  8297. unsigned __int64 size = ldFile->getFileSize();
  8298. if (size % fixedDiskRecordSize)
  8299. throw MakeStringException(0, "Physical file %s has size %" I64F "d which is not a multiple of record size %d", ldFile->queryLogicalName(), size, fixedDiskRecordSize);
  8300. totalCount = size / fixedDiskRecordSize;
  8301. }
  8302. catch (IException * e)
  8303. {
  8304. if (!(helper.getFlags() & TDRoptional) || (e->errorCode() != DFSERR_CannotFindPartFileSize))
  8305. throw;
  8306. e->Release();
  8307. }
  8308. }
  8309. }
  8310. else
  8311. {
  8312. if (!opened) open();
  8313. for (;;)
  8314. {
  8315. if (eofseen)
  8316. break;
  8317. while (!prefetchBuffer.eos())
  8318. {
  8319. queryUpdateProgress();
  8320. prefetcher->readAhead(prefetchBuffer);
  8321. const byte * next = prefetchBuffer.queryRow();
  8322. size32_t sizeRead = prefetchBuffer.queryRowSize();
  8323. if (segMonitorsMatch(next))
  8324. totalCount += helper.numValid(next);
  8325. prefetchBuffer.finishedRow();
  8326. localOffset += sizeRead;
  8327. if (totalCount > stopAfter)
  8328. break;
  8329. }
  8330. if (totalCount > stopAfter)
  8331. break;
  8332. eofseen = !openNext();
  8333. }
  8334. close();
  8335. }
  8336. if (totalCount > stopAfter)
  8337. totalCount = stopAfter;
  8338. finished = true;
  8339. processed++;
  8340. size32_t outSize = outputMeta.getFixedSize();
  8341. void * ret = rowAllocator->createRow();
  8342. if (outSize == 1)
  8343. {
  8344. assertex(stopAfter == 1);
  8345. *(byte *)ret = (byte)totalCount;
  8346. }
  8347. else
  8348. {
  8349. assertex(outSize == sizeof(unsigned __int64));
  8350. *(unsigned __int64 *)ret = totalCount;
  8351. }
  8352. return rowAllocator->finalizeRow(outSize, ret, outSize);
  8353. }
  8354. //=====================================================================================================
  8355. CHThorDiskGroupAggregateActivity::CHThorDiskGroupAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskGroupAggregateArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node)
  8356. : CHThorBinaryDiskReadBase(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph),
  8357. helper(_arg),
  8358. aggregated(_arg, _arg)
  8359. {
  8360. }
  8361. void CHThorDiskGroupAggregateActivity::ready()
  8362. {
  8363. PARENT::ready();
  8364. eof = false;
  8365. gathered = false;
  8366. }
  8367. void CHThorDiskGroupAggregateActivity::gatherInfo(IFileDescriptor * fd)
  8368. {
  8369. PARENT::gatherInfo(fd);
  8370. assertex(!grouped);
  8371. aggregated.start(rowAllocator, agent.queryCodeContext(), activityId);
  8372. }
  8373. void CHThorDiskGroupAggregateActivity::processRow(const void * next)
  8374. {
  8375. aggregated.addRow(next);
  8376. }
  8377. const void *CHThorDiskGroupAggregateActivity::nextRow()
  8378. {
  8379. if (eof)
  8380. return NULL;
  8381. try
  8382. {
  8383. if (!gathered)
  8384. {
  8385. if (!opened) open();
  8386. while (!eofseen)
  8387. {
  8388. while (!prefetchBuffer.eos())
  8389. {
  8390. queryUpdateProgress();
  8391. prefetcher->readAhead(prefetchBuffer);
  8392. const byte * next = prefetchBuffer.queryRow();
  8393. size32_t sizeRead = prefetchBuffer.queryRowSize();
  8394. if (segMonitorsMatch(next))
  8395. {
  8396. if (unlikely(translator))
  8397. {
  8398. MemoryBufferBuilder aBuilder(translatedRow.clear(), 0);
  8399. translator->translate(aBuilder, *this, next);
  8400. helper.processRow(aBuilder.getSelf(), this);
  8401. }
  8402. else
  8403. helper.processRow(next, this);
  8404. }
  8405. prefetchBuffer.finishedRow();
  8406. localOffset += sizeRead;
  8407. }
  8408. eofseen = !openNext();
  8409. }
  8410. close();
  8411. gathered = true;
  8412. }
  8413. }
  8414. catch(IException * e)
  8415. {
  8416. throw makeWrappedException(e);
  8417. }
  8418. Owned<AggregateRowBuilder> next = aggregated.nextResult();
  8419. if (next)
  8420. {
  8421. processed++;
  8422. return next->finalizeRowClear();
  8423. }
  8424. eof = true;
  8425. return NULL;
  8426. }
  8427. //=====================================================================================================
  8428. CHThorCsvReadActivity::CHThorCsvReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorCsvReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _node, _graph), helper(_arg)
  8429. {
  8430. maxRowSize = agent.queryWorkUnit()->getDebugValueInt(OPT_MAXCSVROWSIZE, defaultMaxCsvRowSize) * 1024 * 1024;
  8431. readType = rt_csv;
  8432. }
  8433. CHThorCsvReadActivity::~CHThorCsvReadActivity()
  8434. {
  8435. }
  8436. void CHThorCsvReadActivity::ready()
  8437. {
  8438. PARENT::ready();
  8439. }
  8440. void CHThorCsvReadActivity::stop()
  8441. {
  8442. csvSplitter.reset();
  8443. PARENT::stop();
  8444. }
  8445. void CHThorCsvReadActivity::gatherInfo(IFileDescriptor * fd)
  8446. {
  8447. PARENT::gatherInfo(fd);
  8448. ICsvParameters * csvInfo = helper.queryCsvParameters();
  8449. headerLines = csvInfo->queryHeaderLen();
  8450. maxDiskSize = csvInfo->queryMaxSize();
  8451. limit = helper.getRowLimit();
  8452. if (helper.getFlags() & TDRlimitskips)
  8453. limit = (unsigned __int64) -1;
  8454. stopAfter = helper.getChooseNLimit();
  8455. const char * quotes = NULL;
  8456. const char * separators = NULL;
  8457. const char * terminators = NULL;
  8458. const char * escapes = NULL;
  8459. IDistributedFile * dFile = ldFile?ldFile->queryDistributedFile():NULL;
  8460. if (dFile) //only makes sense for distributed (non local) files
  8461. {
  8462. IPropertyTree & options = dFile->queryAttributes();
  8463. quotes = options.queryProp("@csvQuote");
  8464. separators = options.queryProp("@csvSeparate");
  8465. terminators = options.queryProp("@csvTerminate");
  8466. escapes = options.queryProp("@csvEscape");
  8467. }
  8468. csvSplitter.init(helper.getMaxColumns(), csvInfo, quotes, separators, terminators, escapes);
  8469. }
  8470. void CHThorCsvReadActivity::calcFixedDiskRecordSize()
  8471. {
  8472. fixedDiskRecordSize = 0;
  8473. }
  8474. const void *CHThorCsvReadActivity::nextRow()
  8475. {
  8476. while (!stopAfter || (processed - initialProcessed) < stopAfter)
  8477. {
  8478. checkOpenNext();
  8479. if (eofseen)
  8480. break;
  8481. size32_t thisLineLength = csvSplitter.splitLine(inputstream, maxRowSize);
  8482. if (thisLineLength)
  8483. {
  8484. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  8485. unsigned thisSize;
  8486. try
  8487. {
  8488. thisSize = helper.transform(rowBuilder, csvSplitter.queryLengths(), (const char * *)csvSplitter.queryData());
  8489. }
  8490. catch(IException * e)
  8491. {
  8492. throw makeWrappedException(e);
  8493. }
  8494. inputstream->skip(thisLineLength);
  8495. localOffset += thisLineLength;
  8496. if (thisSize)
  8497. {
  8498. OwnedConstRoxieRow ret = rowBuilder.finalizeRowClear(thisSize);
  8499. if ((processed - initialProcessed) >= limit)
  8500. {
  8501. if ( agent.queryCodeContext()->queryDebugContext())
  8502. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  8503. helper.onLimitExceeded();
  8504. return NULL;
  8505. }
  8506. processed++;
  8507. return ret.getClear();
  8508. }
  8509. }
  8510. }
  8511. close();
  8512. return NULL;
  8513. }
  8514. bool CHThorCsvReadActivity::openNext()
  8515. {
  8516. if (CHThorDiskReadBaseActivity::openNext())
  8517. {
  8518. unsigned lines = headerLines;
  8519. while (lines-- && !inputstream->eos())
  8520. {
  8521. size32_t numAvailable;
  8522. const void * next = inputstream->peek(maxDiskSize, numAvailable);
  8523. inputstream->skip(csvSplitter.splitLine(numAvailable, (const byte *)next));
  8524. }
  8525. // only skip header in the first file - since spray doesn't duplicate the header.
  8526. headerLines = 0;
  8527. return true;
  8528. }
  8529. return false;
  8530. }
  8531. void CHThorCsvReadActivity::checkOpenNext()
  8532. {
  8533. agent.reportProgress(NULL);
  8534. if (!opened)
  8535. {
  8536. agent.reportProgress(NULL);
  8537. if (!helper.canMatchAny())
  8538. {
  8539. eofseen = true;
  8540. opened = true;
  8541. }
  8542. else
  8543. open();
  8544. }
  8545. for (;;)
  8546. {
  8547. if (eofseen || !inputstream->eos())
  8548. return;
  8549. eofseen = !openNext();
  8550. }
  8551. }
  8552. //=====================================================================================================
  8553. CHThorXmlReadActivity::CHThorXmlReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorXmlReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node) : CHThorDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _node, _graph), helper(_arg)
  8554. {
  8555. readType = (kind==TAKjsonread) ? rt_json : rt_xml;
  8556. }
  8557. void CHThorXmlReadActivity::ready()
  8558. {
  8559. CHThorDiskReadBaseActivity::ready();
  8560. rowTransformer.set(helper.queryTransformer());
  8561. localOffset = 0;
  8562. limit = helper.getRowLimit();
  8563. if (helper.getFlags() & TDRlimitskips)
  8564. limit = (unsigned __int64) -1;
  8565. stopAfter = helper.getChooseNLimit();
  8566. }
  8567. void CHThorXmlReadActivity::stop()
  8568. {
  8569. xmlParser.clear();
  8570. CHThorDiskReadBaseActivity::stop();
  8571. }
  8572. void CHThorXmlReadActivity::gatherInfo(IFileDescriptor * fd)
  8573. {
  8574. PARENT::gatherInfo(fd);
  8575. }
  8576. void CHThorXmlReadActivity::calcFixedDiskRecordSize()
  8577. {
  8578. fixedDiskRecordSize = 0;
  8579. }
  8580. const void *CHThorXmlReadActivity::nextRow()
  8581. {
  8582. if(!opened) open();
  8583. while (!eofseen && (!stopAfter || (processed - initialProcessed) < stopAfter))
  8584. {
  8585. agent.reportProgress(NULL);
  8586. //call to next() will callback on the IXmlSelect interface
  8587. bool gotNext = false;
  8588. try
  8589. {
  8590. gotNext = xmlParser->next();
  8591. }
  8592. catch(IException * e)
  8593. {
  8594. throw makeWrappedException(e, inputfile->queryFilename());
  8595. }
  8596. if(!gotNext)
  8597. eofseen = !openNext();
  8598. else if (lastMatch)
  8599. {
  8600. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  8601. unsigned sizeGot;
  8602. try
  8603. {
  8604. sizeGot = rowTransformer->transform(rowBuilder, lastMatch, this);
  8605. }
  8606. catch(IException * e)
  8607. {
  8608. throw makeWrappedException(e);
  8609. }
  8610. lastMatch.clear();
  8611. localOffset = 0;
  8612. if (sizeGot)
  8613. {
  8614. OwnedConstRoxieRow ret = rowBuilder.finalizeRowClear(sizeGot);
  8615. if ((processed - initialProcessed) >= limit)
  8616. {
  8617. if ( agent.queryCodeContext()->queryDebugContext())
  8618. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  8619. helper.onLimitExceeded();
  8620. return NULL;
  8621. }
  8622. processed++;
  8623. return ret.getClear();
  8624. }
  8625. }
  8626. }
  8627. return NULL;
  8628. }
  8629. bool CHThorXmlReadActivity::openNext()
  8630. {
  8631. if (inputfileio)
  8632. offsetOfPart += inputfileio->size();
  8633. localOffset = 0;
  8634. if (CHThorDiskReadBaseActivity::openNext())
  8635. {
  8636. unsigned readBufferSize = queryReadBufferSize();
  8637. OwnedIFileIOStream inputfileiostream;
  8638. if(readBufferSize)
  8639. inputfileiostream.setown(createBufferedIOStream(inputfileio, readBufferSize));
  8640. else
  8641. inputfileiostream.setown(createIOStream(inputfileio));
  8642. OwnedRoxieString xmlIterator(helper.getXmlIteratorPath());
  8643. if (kind==TAKjsonread)
  8644. xmlParser.setown(createJSONParse(*inputfileiostream, xmlIterator, *this, (0 != (TDRxmlnoroot & helper.getFlags()))?ptr_noRoot:ptr_none, (helper.getFlags() & TDRusexmlcontents) != 0));
  8645. else
  8646. xmlParser.setown(createXMLParse(*inputfileiostream, xmlIterator, *this, (0 != (TDRxmlnoroot & helper.getFlags()))?ptr_noRoot:ptr_none, (helper.getFlags() & TDRusexmlcontents) != 0));
  8647. return true;
  8648. }
  8649. return false;
  8650. }
  8651. void CHThorXmlReadActivity::closepart()
  8652. {
  8653. xmlParser.clear();
  8654. CHThorDiskReadBaseActivity::closepart();
  8655. }
  8656. //---------------------------------------------------------------------------
  8657. CHThorLocalResultReadActivity::CHThorLocalResultReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLocalResultReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8658. {
  8659. physicalRecordSize = outputMeta;
  8660. grouped = outputMeta.isGrouped();
  8661. graph = resolveLocalQuery(graphId);
  8662. result = NULL;
  8663. }
  8664. void CHThorLocalResultReadActivity::ready()
  8665. {
  8666. CHThorSimpleActivityBase::ready();
  8667. result = graph->queryResult(helper.querySequence());
  8668. curRow = 0;
  8669. }
  8670. const void *CHThorLocalResultReadActivity::nextRow()
  8671. {
  8672. const void * next = result->queryRow(curRow++);
  8673. if (next)
  8674. {
  8675. processed++;
  8676. LinkRoxieRow(next);
  8677. return next;
  8678. }
  8679. return NULL;
  8680. }
  8681. //=====================================================================================================
  8682. CHThorLocalResultWriteActivity::CHThorLocalResultWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLocalResultWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId)
  8683. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8684. {
  8685. graph = resolveLocalQuery(graphId);
  8686. }
  8687. void CHThorLocalResultWriteActivity::execute()
  8688. {
  8689. IHThorGraphResult * result = graph->createResult(helper.querySequence(), LINK(rowAllocator));
  8690. for (;;)
  8691. {
  8692. const void *nextrec = input->nextRow();
  8693. if (!nextrec)
  8694. {
  8695. nextrec = input->nextRow();
  8696. if (!nextrec)
  8697. break;
  8698. result->addRowOwn(NULL);
  8699. }
  8700. result->addRowOwn(nextrec);
  8701. }
  8702. }
  8703. //=====================================================================================================
  8704. CHThorDictionaryResultWriteActivity::CHThorDictionaryResultWriteActivity (IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDictionaryResultWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId)
  8705. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8706. {
  8707. graph = resolveLocalQuery(graphId);
  8708. }
  8709. void CHThorDictionaryResultWriteActivity::execute()
  8710. {
  8711. RtlLinkedDictionaryBuilder builder(rowAllocator, helper.queryHashLookupInfo());
  8712. for (;;)
  8713. {
  8714. const void *row = input->nextRow();
  8715. if (!row)
  8716. {
  8717. row = input->nextRow();
  8718. if (!row)
  8719. break;
  8720. }
  8721. builder.appendOwn(row);
  8722. }
  8723. IHThorGraphResult * result = graph->createResult(helper.querySequence(), LINK(rowAllocator));
  8724. size32_t dictSize = builder.getcount();
  8725. const byte ** dictRows = builder.queryrows();
  8726. for (size32_t row = 0; row < dictSize; row++)
  8727. {
  8728. const byte *thisRow = dictRows[row];
  8729. if (thisRow)
  8730. LinkRoxieRow(thisRow);
  8731. result->addRowOwn(thisRow);
  8732. }
  8733. }
  8734. //=====================================================================================================
  8735. CHThorLocalResultSpillActivity::CHThorLocalResultSpillActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLocalResultSpillArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId)
  8736. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8737. {
  8738. result = NULL;
  8739. nullPending = false;
  8740. graph = resolveLocalQuery(graphId);
  8741. assertex(graph);
  8742. }
  8743. void CHThorLocalResultSpillActivity::ready()
  8744. {
  8745. CHThorSimpleActivityBase::ready();
  8746. result = graph->createResult(helper.querySequence(), LINK(rowAllocator));
  8747. nullPending = false;
  8748. }
  8749. const void * CHThorLocalResultSpillActivity::nextRow()
  8750. {
  8751. const void * ret = input->nextRow();
  8752. if (ret)
  8753. {
  8754. if (nullPending)
  8755. {
  8756. result->addRowOwn(NULL);
  8757. nullPending = false;
  8758. }
  8759. LinkRoxieRow(ret);
  8760. result->addRowOwn(ret);
  8761. processed++;
  8762. }
  8763. else
  8764. nullPending = true;
  8765. return ret;
  8766. }
  8767. void CHThorLocalResultSpillActivity::stop()
  8768. {
  8769. for (;;)
  8770. {
  8771. const void * ret = input->nextRow();
  8772. if (!ret)
  8773. {
  8774. if (nullPending)
  8775. break;
  8776. nullPending = true;
  8777. }
  8778. else
  8779. {
  8780. if (nullPending)
  8781. {
  8782. result->addRowOwn(NULL);
  8783. nullPending = false;
  8784. }
  8785. result->addRowOwn(ret);
  8786. }
  8787. }
  8788. CHThorSimpleActivityBase::stop();
  8789. }
  8790. //=====================================================================================================
  8791. CHThorLoopActivity::CHThorLoopActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLoopArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  8792. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  8793. {
  8794. flags = helper.getFlags();
  8795. maxIterations = 0;
  8796. }
  8797. CHThorLoopActivity::~CHThorLoopActivity()
  8798. {
  8799. ForEachItemIn(idx, loopPending)
  8800. ReleaseRoxieRow(loopPending.item(idx));
  8801. }
  8802. void CHThorLoopActivity::ready()
  8803. {
  8804. curInput = &input->queryStream();
  8805. eof = false;
  8806. loopCounter = 1;
  8807. CHThorSimpleActivityBase::ready();
  8808. maxIterations = helper.numIterations();
  8809. if ((int)maxIterations < 0) maxIterations = 0;
  8810. finishedLooping = ((kind == TAKloopcount) && (maxIterations == 0));
  8811. if ((flags & IHThorLoopArg::LFnewloopagain) && !helper.loopFirstTime())
  8812. finishedLooping = true;
  8813. extractBuilder.clear();
  8814. helper.createParentExtract(extractBuilder);
  8815. }
  8816. const void * CHThorLoopActivity::nextRow()
  8817. {
  8818. if (eof)
  8819. return NULL;
  8820. unsigned emptyIterations = 0;
  8821. for (;;)
  8822. {
  8823. for (;;)
  8824. {
  8825. const void * ret = curInput->nextRow();
  8826. if (!ret)
  8827. {
  8828. ret = curInput->nextRow(); // more cope with groups somehow....
  8829. if (!ret)
  8830. {
  8831. if (finishedLooping)
  8832. {
  8833. eof = true;
  8834. return NULL;
  8835. }
  8836. break;
  8837. }
  8838. }
  8839. if (finishedLooping ||
  8840. ((flags & IHThorLoopArg::LFfiltered) && !helper.sendToLoop(loopCounter, ret)))
  8841. {
  8842. processed++;
  8843. return ret;
  8844. }
  8845. loopPending.append(ret);
  8846. }
  8847. switch (kind)
  8848. {
  8849. case TAKloopdataset:
  8850. {
  8851. if (!(flags & IHThorLoopArg::LFnewloopagain))
  8852. {
  8853. if (!helper.loopAgain(loopCounter, loopPending.ordinality(), (const void * *)loopPending.getArray()))
  8854. {
  8855. if (loopPending.ordinality() == 0)
  8856. {
  8857. eof = true;
  8858. return NULL;
  8859. }
  8860. arrayInput.init(&loopPending);
  8861. curInput = &arrayInput;
  8862. finishedLooping = true;
  8863. continue; // back to the input loop again
  8864. }
  8865. }
  8866. break;
  8867. }
  8868. case TAKlooprow:
  8869. if (loopPending.empty())
  8870. {
  8871. finishedLooping = true;
  8872. eof = true;
  8873. return NULL;
  8874. }
  8875. break;
  8876. }
  8877. if (loopPending.ordinality())
  8878. emptyIterations = 0;
  8879. else
  8880. {
  8881. //note: any outputs which didn't go around the loop again, would return the record, reinitializing emptyIterations
  8882. emptyIterations++;
  8883. if (emptyIterations > EMPTY_LOOP_LIMIT)
  8884. throw MakeStringException(0, "Executed LOOP with empty input and output %u times", emptyIterations);
  8885. if (emptyIterations % 32 == 0)
  8886. DBGLOG("Executing LOOP with empty input and output %u times", emptyIterations);
  8887. }
  8888. void * counterRow = NULL;
  8889. if (flags & IHThorLoopArg::LFcounter)
  8890. {
  8891. counterRow = queryRowManager()->allocate(sizeof(thor_loop_counter_t), activityId);
  8892. *((thor_loop_counter_t *)counterRow) = loopCounter;
  8893. }
  8894. Owned<IHThorGraphResults> curResults = loopGraph->execute(counterRow, loopPending, extractBuilder.getbytes());
  8895. if (flags & IHThorLoopArg::LFnewloopagain)
  8896. {
  8897. IHThorGraphResult * result = curResults->queryResult(helper.loopAgainResult());
  8898. assertex(result);
  8899. const void * row = result->queryRow(0);
  8900. assertex(row);
  8901. //Result is a row which contains a single boolean field.
  8902. if (!((const bool *)row)[0])
  8903. finishedLooping = true;
  8904. }
  8905. resultInput.init(curResults->queryResult(0));
  8906. curInput = &resultInput;
  8907. loopCounter++;
  8908. if ((kind == TAKloopcount) && (loopCounter > maxIterations))
  8909. finishedLooping = true;
  8910. }
  8911. }
  8912. void CHThorLoopActivity::stop()
  8913. {
  8914. ForEachItemIn(idx, loopPending)
  8915. ReleaseRoxieRow(loopPending.item(idx));
  8916. loopPending.kill();
  8917. CHThorSimpleActivityBase::stop();
  8918. }
  8919. //---------------------------------------------------------------------------
  8920. CHThorGraphLoopResultReadActivity::CHThorGraphLoopResultReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGraphLoopResultReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(&_arg)
  8921. {
  8922. physicalRecordSize = outputMeta;
  8923. grouped = outputMeta.isGrouped();
  8924. result = NULL;
  8925. graph = resolveLocalQuery(graphId);
  8926. }
  8927. CHThorGraphLoopResultReadActivity::CHThorGraphLoopResultReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg & _arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId, unsigned _sequence, bool _grouped) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(NULL)
  8928. {
  8929. physicalRecordSize = outputMeta;
  8930. sequence = _sequence;
  8931. grouped = _grouped;
  8932. result = NULL;
  8933. graph = resolveLocalQuery(graphId);
  8934. }
  8935. void CHThorGraphLoopResultReadActivity::ready()
  8936. {
  8937. CHThorSimpleActivityBase::ready();
  8938. if (helper)
  8939. sequence = helper->querySequence();
  8940. if ((int)sequence >= 0)
  8941. result = graph->queryGraphLoopResult(sequence);
  8942. else
  8943. result = NULL;
  8944. curRow = 0;
  8945. }
  8946. const void *CHThorGraphLoopResultReadActivity::nextRow()
  8947. {
  8948. if (result)
  8949. {
  8950. const void * next = result->queryRow(curRow++);
  8951. if (next)
  8952. {
  8953. processed++;
  8954. LinkRoxieRow(next);
  8955. return (void *)next;
  8956. }
  8957. }
  8958. return NULL;
  8959. }
  8960. //=====================================================================================================
  8961. CHThorGraphLoopResultWriteActivity::CHThorGraphLoopResultWriteActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGraphLoopResultWriteArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 graphId)
  8962. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph)
  8963. {
  8964. graph = resolveLocalQuery(graphId);
  8965. }
  8966. void CHThorGraphLoopResultWriteActivity::execute()
  8967. {
  8968. IHThorGraphResult * result = graph->createGraphLoopResult(LINK(rowAllocator));
  8969. for (;;)
  8970. {
  8971. const void *nextrec = input->nextRow();
  8972. if (!nextrec)
  8973. {
  8974. nextrec = input->nextRow();
  8975. if (!nextrec)
  8976. break;
  8977. result->addRowOwn(NULL);
  8978. }
  8979. result->addRowOwn(nextrec);
  8980. }
  8981. }
  8982. //=====================================================================================================
  8983. class CCounterMeta : implements IOutputMetaData, public CInterface
  8984. {
  8985. public:
  8986. IMPLEMENT_IINTERFACE
  8987. virtual size32_t getRecordSize(const void *rec) { return sizeof(thor_loop_counter_t); }
  8988. virtual size32_t getMinRecordSize() const { return sizeof(thor_loop_counter_t); }
  8989. virtual size32_t getFixedSize() const { return sizeof(thor_loop_counter_t); }
  8990. virtual void toXML(const byte * self, IXmlWriter & out) { }
  8991. virtual unsigned getVersion() const { return OUTPUTMETADATA_VERSION; }
  8992. virtual unsigned getMetaFlags() { return 0; }
  8993. virtual const RtlTypeInfo * queryTypeInfo() const { return nullptr; }
  8994. virtual void destruct(byte * self) {}
  8995. virtual IOutputRowSerializer * createDiskSerializer(ICodeContext * ctx, unsigned activityId) { return NULL; }
  8996. virtual IOutputRowDeserializer * createDiskDeserializer(ICodeContext * ctx, unsigned activityId) { return NULL; }
  8997. virtual ISourceRowPrefetcher * createDiskPrefetcher() { return NULL; }
  8998. virtual IOutputMetaData * querySerializedDiskMeta() { return this; }
  8999. virtual IOutputRowSerializer * createInternalSerializer(ICodeContext * ctx, unsigned activityId) { return NULL; }
  9000. virtual IOutputRowDeserializer * createInternalDeserializer(ICodeContext * ctx, unsigned activityId) { return NULL; }
  9001. virtual void process(const byte * self, IFieldProcessor & target, unsigned from, unsigned to) {}
  9002. virtual void walkIndirectMembers(const byte * self, IIndirectMemberVisitor & visitor) {}
  9003. virtual IOutputMetaData * queryChildMeta(unsigned i) { return NULL; }
  9004. virtual const RtlRecord &queryRecordAccessor(bool expand) const { throwUnexpected(); } // could provide a static implementation if needed
  9005. };
  9006. //=====================================================================================================
  9007. CHThorGraphLoopActivity::CHThorGraphLoopActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGraphLoopArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  9008. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9009. {
  9010. flags = helper.getFlags();
  9011. maxIterations = 0;
  9012. counterMeta.setown(new CCounterMeta);
  9013. }
  9014. void CHThorGraphLoopActivity::ready()
  9015. {
  9016. executed = false;
  9017. resultIndex = 0;
  9018. CHThorSimpleActivityBase::ready();
  9019. maxIterations = helper.numIterations();
  9020. if ((int)maxIterations < 0) maxIterations = 0;
  9021. loopResults.setown(agent.createGraphLoopResults());
  9022. extractBuilder.clear();
  9023. helper.createParentExtract(extractBuilder);
  9024. rowAllocator.setown(agent.queryCodeContext()->getRowAllocator(queryOutputMeta(), activityId));
  9025. rowAllocatorCounter.setown(agent.queryCodeContext()->getRowAllocator(counterMeta, activityId));
  9026. }
  9027. const void * CHThorGraphLoopActivity::nextRow()
  9028. {
  9029. if (!executed)
  9030. {
  9031. executed = true;
  9032. IHThorGraphResult * inputResult = loopResults->createResult(0, LINK(rowAllocator));
  9033. for (;;)
  9034. {
  9035. const void * ret = input->nextRow();
  9036. if (!ret)
  9037. {
  9038. ret = input->nextRow();
  9039. if (!ret)
  9040. break;
  9041. inputResult->addRowOwn(NULL);
  9042. }
  9043. inputResult->addRowOwn(ret);
  9044. }
  9045. for (unsigned loopCounter = 1; loopCounter <= maxIterations; loopCounter++)
  9046. {
  9047. void * counterRow = NULL;
  9048. if (flags & IHThorGraphLoopArg::GLFcounter)
  9049. {
  9050. counterRow = rowAllocatorCounter->createRow();
  9051. *((thor_loop_counter_t *)counterRow) = loopCounter;
  9052. counterRow = rowAllocatorCounter->finalizeRow(sizeof(thor_loop_counter_t), counterRow, sizeof(thor_loop_counter_t));
  9053. }
  9054. loopGraph->execute(counterRow, loopResults, extractBuilder.getbytes());
  9055. }
  9056. int iNumResults = loopResults->ordinality();
  9057. finalResult = loopResults->queryResult(iNumResults-1); //Get the last result, which isnt necessarily 'maxIterations'
  9058. }
  9059. const void * next = finalResult->getOwnRow(resultIndex++);
  9060. if (next)
  9061. processed++;
  9062. return next;
  9063. }
  9064. void CHThorGraphLoopActivity::stop()
  9065. {
  9066. rowAllocator.clear();
  9067. finalResult = NULL;
  9068. loopResults.clear();
  9069. CHThorSimpleActivityBase::stop();
  9070. }
  9071. //=====================================================================================================
  9072. CHThorParallelGraphLoopActivity::CHThorParallelGraphLoopActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorGraphLoopArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  9073. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9074. {
  9075. flags = helper.getFlags();
  9076. maxIterations = 0;
  9077. }
  9078. void CHThorParallelGraphLoopActivity::ready()
  9079. {
  9080. executed = false;
  9081. resultIndex = 0;
  9082. CHThorSimpleActivityBase::ready();
  9083. maxIterations = helper.numIterations();
  9084. if ((int)maxIterations < 0) maxIterations = 0;
  9085. loopResults.setown(agent.createGraphLoopResults());
  9086. extractBuilder.clear();
  9087. helper.createParentExtract(extractBuilder);
  9088. rowAllocator.setown(agent.queryCodeContext()->getRowAllocator(queryOutputMeta(), activityId));
  9089. }
  9090. const void * CHThorParallelGraphLoopActivity::nextRow()
  9091. {
  9092. if (!executed)
  9093. {
  9094. executed = true;
  9095. IHThorGraphResult * inputResult = loopResults->createResult(0, LINK(rowAllocator));
  9096. for (;;)
  9097. {
  9098. const void * ret = input->nextRow();
  9099. if (!ret)
  9100. {
  9101. ret = input->nextRow();
  9102. if (!ret)
  9103. break;
  9104. inputResult->addRowOwn(NULL);
  9105. }
  9106. inputResult->addRowOwn(ret);
  9107. }
  9108. // The lack of separation between pre-creation and creation means this would require cloning lots of structures.
  9109. // not implemented for the moment.
  9110. // loopGraph->executeParallel(loopResults, extractBuilder.getbytes(), maxIterations);
  9111. finalResult = loopResults->queryResult(maxIterations);
  9112. }
  9113. const void * next = finalResult->getOwnRow(resultIndex++);
  9114. if (next)
  9115. processed++;
  9116. return next;
  9117. }
  9118. void CHThorParallelGraphLoopActivity::stop()
  9119. {
  9120. rowAllocator.clear();
  9121. finalResult = NULL;
  9122. loopResults.clear();
  9123. CHThorSimpleActivityBase::stop();
  9124. }
  9125. //=====================================================================================================
  9126. LibraryCallOutput::LibraryCallOutput(CHThorLibraryCallActivity * _owner, unsigned _output, IOutputMetaData * _meta) : owner(_owner), output(_output), meta(_meta)
  9127. {
  9128. processed = 0;
  9129. }
  9130. const void * LibraryCallOutput::nextRow()
  9131. {
  9132. if (!gotRows)
  9133. {
  9134. result.set(owner->getResultRows(output));
  9135. gotRows = true;
  9136. }
  9137. const void * ret = result->getOwnRow(curRow++);
  9138. if (ret)
  9139. processed++;
  9140. return ret;
  9141. }
  9142. bool LibraryCallOutput::isGrouped()
  9143. {
  9144. return meta->isGrouped();
  9145. }
  9146. IOutputMetaData * LibraryCallOutput::queryOutputMeta() const
  9147. {
  9148. return meta;
  9149. }
  9150. void LibraryCallOutput::ready()
  9151. {
  9152. owner->ready();
  9153. gotRows = false;
  9154. result.clear();
  9155. curRow = 0;
  9156. }
  9157. void LibraryCallOutput::stop()
  9158. {
  9159. owner->stop();
  9160. result.clear();
  9161. }
  9162. void LibraryCallOutput::resetEOF()
  9163. {
  9164. throwUnexpected();
  9165. }
  9166. void LibraryCallOutput::updateProgress(IStatisticGatherer &progress) const
  9167. {
  9168. owner->updateOutputProgress(progress, *this, processed);
  9169. }
  9170. CHThorLibraryCallActivity::CHThorLibraryCallActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorLibraryCallArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree * node)
  9171. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9172. {
  9173. libraryName.set(node->queryProp("att[@name=\"libname\"]/@value"));
  9174. interfaceHash = node->getPropInt("att[@name=\"_interfaceHash\"]/@value", 0);
  9175. bool embedded = node->getPropBool("att[@name=\"embedded\"]/@value", false) ;
  9176. if (embedded)
  9177. {
  9178. embeddedGraphName.set(node->queryProp("att[@name=\"graph\"]/@value"));
  9179. if (!embeddedGraphName)
  9180. embeddedGraphName.set(libraryName);
  9181. }
  9182. Owned<IPropertyTreeIterator> iter = node->getElements("att[@name=\"_outputUsed\"]");
  9183. ForEach(*iter)
  9184. {
  9185. unsigned whichOutput = iter->query().getPropInt("@value");
  9186. IOutputMetaData * meta = helper.queryOutputMeta(whichOutput);
  9187. outputs.append(*new LibraryCallOutput(this, whichOutput, meta));
  9188. }
  9189. state = StateCreated;
  9190. }
  9191. IHThorGraphResult * CHThorLibraryCallActivity::getResultRows(unsigned whichOutput)
  9192. {
  9193. CriticalBlock procedure(cs);
  9194. if (!results)
  9195. {
  9196. if (libraryName.length() == 0)
  9197. libraryName.setown(helper.getLibraryName());
  9198. helper.createParentExtract(extractBuilder);
  9199. results.setown(agent.executeLibraryGraph(libraryName, interfaceHash, activityId, embeddedGraphName, extractBuilder.getbytes()));
  9200. }
  9201. return results->queryResult(whichOutput);
  9202. }
  9203. IHThorInput * CHThorLibraryCallActivity::queryOutput(unsigned idx)
  9204. {
  9205. assert(outputs.isItem(idx));
  9206. return &outputs.item(idx);
  9207. }
  9208. void CHThorLibraryCallActivity::updateOutputProgress(IStatisticGatherer &progress, const LibraryCallOutput & _output, unsigned __int64 numProcessed) const
  9209. {
  9210. LibraryCallOutput & output = const_cast<LibraryCallOutput &>(_output);
  9211. updateProgressForOther(progress, activityId, subgraphId, outputs.find(output), numProcessed);
  9212. }
  9213. void CHThorLibraryCallActivity::ready()
  9214. {
  9215. CriticalBlock procedure(cs);
  9216. if (state != StateReady)
  9217. {
  9218. results.clear();
  9219. CHThorSimpleActivityBase::ready();
  9220. state = StateReady;
  9221. }
  9222. }
  9223. const void * CHThorLibraryCallActivity::nextRow()
  9224. {
  9225. throwUnexpected();
  9226. }
  9227. void CHThorLibraryCallActivity::stop()
  9228. {
  9229. CriticalBlock procedure(cs);
  9230. if (state != StateDone)
  9231. {
  9232. results.clear();
  9233. CHThorSimpleActivityBase::stop();
  9234. }
  9235. }
  9236. //=====================================================================================================
  9237. class CHThorNWayInputActivity : public CHThorSimpleActivityBase, implements IHThorNWayInput
  9238. {
  9239. IHThorNWayInputArg & helper;
  9240. InputArrayType inputs;
  9241. InputArrayType selectedInputs;
  9242. public:
  9243. CHThorNWayInputActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNWayInputArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9244. {
  9245. }
  9246. virtual void ready()
  9247. {
  9248. bool selectionIsAll;
  9249. size32_t selectionLen;
  9250. rtlDataAttr selection;
  9251. helper.getInputSelection(selectionIsAll, selectionLen, selection.refdata());
  9252. selectedInputs.kill();
  9253. if (selectionIsAll)
  9254. {
  9255. ForEachItemIn(i, inputs)
  9256. selectedInputs.append(inputs.item(i));
  9257. }
  9258. else
  9259. {
  9260. const size32_t * selections = (const size32_t *)selection.getdata();
  9261. unsigned max = selectionLen/sizeof(size32_t);
  9262. for (unsigned i = 0; i < max; i++)
  9263. {
  9264. unsigned nextIndex = selections[i];
  9265. //Check there are no duplicates..... Assumes there are a fairly small number of inputs, so n^2 search is ok.
  9266. for (unsigned j=i+1; j < max; j++)
  9267. {
  9268. if (nextIndex == selections[j])
  9269. throw MakeStringException(100, "Selection list for nway input can not contain duplicates");
  9270. }
  9271. if (!inputs.isItem(nextIndex-1))
  9272. throw MakeStringException(100, "Index %d in RANGE selection list is out of range", nextIndex);
  9273. selectedInputs.append(inputs.item(nextIndex-1));
  9274. }
  9275. }
  9276. ForEachItemIn(i2, selectedInputs)
  9277. selectedInputs.item(i2)->ready();
  9278. }
  9279. virtual void setInput(unsigned idx, IHThorInput *_in)
  9280. {
  9281. assertex(idx == inputs.ordinality());
  9282. inputs.append(_in);
  9283. }
  9284. virtual const void * nextRow()
  9285. {
  9286. throwUnexpected();
  9287. }
  9288. virtual void updateProgress(IStatisticGatherer &progress) const
  9289. {
  9290. // CHThorSimpleActivityBase::updateProgress(progress);
  9291. ForEachItemIn(i, inputs)
  9292. inputs.item(i)->updateProgress(progress);
  9293. }
  9294. virtual unsigned numConcreteOutputs() const
  9295. {
  9296. return selectedInputs.ordinality();
  9297. }
  9298. virtual IHThorInput * queryConcreteInput(unsigned idx) const
  9299. {
  9300. if (selectedInputs.isItem(idx))
  9301. return selectedInputs.item(idx);
  9302. return NULL;
  9303. }
  9304. };
  9305. //=====================================================================================================
  9306. class CHThorNWayGraphLoopResultReadActivity : public CHThorSimpleActivityBase, implements IHThorNWayInput
  9307. {
  9308. IHThorNWayGraphLoopResultReadArg & helper;
  9309. CIArrayOf<CHThorActivityBase> inputs;
  9310. __int64 graphId;
  9311. bool grouped;
  9312. public:
  9313. CHThorNWayGraphLoopResultReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNWayGraphLoopResultReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, __int64 _graphId) : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9314. {
  9315. grouped = helper.isGrouped();
  9316. graphId = _graphId;
  9317. }
  9318. virtual bool isGrouped()
  9319. {
  9320. return grouped;
  9321. }
  9322. virtual void ready()
  9323. {
  9324. bool selectionIsAll;
  9325. size32_t selectionLen;
  9326. rtlDataAttr selection;
  9327. helper.getInputSelection(selectionIsAll, selectionLen, selection.refdata());
  9328. if (selectionIsAll)
  9329. throw MakeStringException(100, "ALL not yet supported for NWay graph inputs");
  9330. unsigned max = selectionLen / sizeof(size32_t);
  9331. const size32_t * selections = (const size32_t *)selection.getdata();
  9332. for (unsigned i = 0; i < max; i++)
  9333. {
  9334. CHThorActivityBase * resultInput = new CHThorGraphLoopResultReadActivity(agent, activityId, subgraphId, helper, kind, graph, graphId, selections[i], grouped);
  9335. inputs.append(*resultInput);
  9336. resultInput->ready();
  9337. }
  9338. }
  9339. virtual void stop()
  9340. {
  9341. inputs.kill();
  9342. }
  9343. virtual void setInput(unsigned idx, IHThorInput *_in)
  9344. {
  9345. throwUnexpected();
  9346. }
  9347. virtual const void * nextRow()
  9348. {
  9349. throwUnexpected();
  9350. }
  9351. virtual unsigned numConcreteOutputs() const
  9352. {
  9353. return inputs.ordinality();
  9354. }
  9355. virtual IHThorInput * queryConcreteInput(unsigned idx) const
  9356. {
  9357. if (inputs.isItem(idx))
  9358. return &inputs.item(idx);
  9359. return NULL;
  9360. }
  9361. };
  9362. //=====================================================================================================
  9363. CHThorNWaySelectActivity::CHThorNWaySelectActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNWaySelectArg &_arg, ThorActivityKind _kind, EclGraph & _graph) : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9364. {
  9365. selectedInput = NULL;
  9366. }
  9367. void CHThorNWaySelectActivity::stop()
  9368. {
  9369. selectedInput = NULL;
  9370. CHThorMultiInputActivity::stop();
  9371. }
  9372. void CHThorNWaySelectActivity::ready()
  9373. {
  9374. CHThorMultiInputActivity::ready();
  9375. unsigned whichInput = helper.getInputIndex();
  9376. selectedInput = NULL;
  9377. if (whichInput--)
  9378. {
  9379. ForEachItemIn(i, inputs)
  9380. {
  9381. IHThorInput * cur = inputs.item(i);
  9382. IHThorNWayInput * nWayInput = dynamic_cast<IHThorNWayInput *>(cur);
  9383. if (nWayInput)
  9384. {
  9385. unsigned numRealInputs = nWayInput->numConcreteOutputs();
  9386. if (whichInput < numRealInputs)
  9387. selectedInput = nWayInput->queryConcreteInput(whichInput);
  9388. whichInput -= numRealInputs;
  9389. }
  9390. else
  9391. {
  9392. if (whichInput == 0)
  9393. selectedInput = cur;
  9394. whichInput -= 1;
  9395. }
  9396. if (selectedInput)
  9397. break;
  9398. }
  9399. }
  9400. }
  9401. const void * CHThorNWaySelectActivity::nextRow()
  9402. {
  9403. if (!selectedInput)
  9404. return NULL;
  9405. return selectedInput->nextRow();
  9406. }
  9407. const void * CHThorNWaySelectActivity::nextRowGE(const void * seek, unsigned numFields, bool &wasCompleteMatch, const SmartStepExtra &stepExtra)
  9408. {
  9409. if (!selectedInput)
  9410. return NULL;
  9411. return selectedInput->nextRowGE(seek, numFields, wasCompleteMatch, stepExtra);
  9412. }
  9413. IInputSteppingMeta * CHThorNWaySelectActivity::querySteppingMeta()
  9414. {
  9415. if (selectedInput)
  9416. return selectedInput->querySteppingMeta();
  9417. return NULL;
  9418. }
  9419. //=====================================================================================================
  9420. CHThorStreamedIteratorActivity::CHThorStreamedIteratorActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorStreamedIteratorArg &_arg, ThorActivityKind _kind, EclGraph & _graph)
  9421. : CHThorSimpleActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg)
  9422. {
  9423. }
  9424. void CHThorStreamedIteratorActivity::ready()
  9425. {
  9426. CHThorSimpleActivityBase::ready();
  9427. rows.setown(helper.createInput());
  9428. }
  9429. const void *CHThorStreamedIteratorActivity::nextRow()
  9430. {
  9431. assertex(rows);
  9432. const void * next = rows->nextRow();
  9433. if (next)
  9434. processed++;
  9435. return next;
  9436. }
  9437. void CHThorStreamedIteratorActivity::stop()
  9438. {
  9439. if (rows)
  9440. {
  9441. rows->stop();
  9442. rows.clear();
  9443. }
  9444. }
  9445. //=====================================================================================================
  9446. CHThorExternalActivity::CHThorExternalActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorExternalArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree * _graphNode)
  9447. : CHThorMultiInputActivity(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), graphNode(_graphNode), activityContext(1, 0)
  9448. {
  9449. }
  9450. void CHThorExternalActivity::setInput(unsigned index, IHThorInput *_input)
  9451. {
  9452. CHThorMultiInputActivity::setInput(index, _input);
  9453. CHThorInputAdaptor * adaptedInput = new CHThorInputAdaptor(_input);
  9454. inputAdaptors.append(*adaptedInput);
  9455. helper.setInput(index, adaptedInput);
  9456. }
  9457. void CHThorExternalActivity::ready()
  9458. {
  9459. CHThorMultiInputActivity::ready();
  9460. if (kind != TAKexternalsink)
  9461. rows.setown(helper.createOutput(&activityContext));
  9462. }
  9463. const void *CHThorExternalActivity::nextRow()
  9464. {
  9465. assertex(rows);
  9466. const void * next = rows->nextRow();
  9467. if (next)
  9468. processed++;
  9469. return next;
  9470. }
  9471. void CHThorExternalActivity::execute()
  9472. {
  9473. assertex(!rows);
  9474. helper.execute(&activityContext);
  9475. }
  9476. void CHThorExternalActivity::stop()
  9477. {
  9478. if (rows)
  9479. {
  9480. rows->stop();
  9481. rows.clear();
  9482. }
  9483. CHThorMultiInputActivity::stop();
  9484. }
  9485. //=====================================================================================================
  9486. CHThorNewDiskReadBaseActivity::CHThorNewDiskReadBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadBaseArg &_arg, IHThorCompoundBaseArg & _segHelper, ThorActivityKind _kind, IPropertyTree *_node, EclGraph & _graph)
  9487. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), segHelper(_segHelper)
  9488. {
  9489. helper.setCallback(this);
  9490. expectedDiskMeta = helper.queryDiskRecordSize();
  9491. projectedDiskMeta = helper.queryProjectedDiskRecordSize();
  9492. formatOptions.setown(createPTree());
  9493. isCodeSigned = false;
  9494. if (_node)
  9495. {
  9496. const char *recordTranslationModeHintText = _node->queryProp("hint[@name='layouttranslation']/@value");
  9497. if (recordTranslationModeHintText)
  9498. recordTranslationModeHint = getTranslationMode(recordTranslationModeHintText, true);
  9499. isCodeSigned = isActivityCodeSigned(*_node);
  9500. }
  9501. CPropertyTreeWriter writer(formatOptions);
  9502. helper.getFormatOptions(writer);
  9503. }
  9504. CHThorNewDiskReadBaseActivity::~CHThorNewDiskReadBaseActivity()
  9505. {
  9506. close();
  9507. }
  9508. void CHThorNewDiskReadBaseActivity::ready()
  9509. {
  9510. CHThorActivityBase::ready();
  9511. opened = false;
  9512. offsetOfPart = 0;
  9513. partNum = (unsigned)-1;
  9514. resolveFile();
  9515. fieldFilters.kill();
  9516. segHelper.createSegmentMonitors(this);
  9517. }
  9518. void CHThorNewDiskReadBaseActivity::stop()
  9519. {
  9520. close();
  9521. CHThorActivityBase::stop();
  9522. }
  9523. unsigned __int64 CHThorNewDiskReadBaseActivity::getFilePosition(const void * row)
  9524. {
  9525. //Ideally these functions would not need to be implemented - they should always be implemented by the translation layer
  9526. throwUnexpected();
  9527. }
  9528. unsigned __int64 CHThorNewDiskReadBaseActivity::getLocalFilePosition(const void * row)
  9529. {
  9530. throwUnexpected();
  9531. }
  9532. const char * CHThorNewDiskReadBaseActivity::queryLogicalFilename(const void * row)
  9533. {
  9534. throwUnexpected();
  9535. }
  9536. void CHThorNewDiskReadBaseActivity::resolveFile()
  9537. {
  9538. //If in a child query, and the filenames haven't changed, the information about the resolved filenames will also not have changed
  9539. //MORE: Is this ever untrue?
  9540. if (subfiles && !(helper.getFlags() & (TDXvarfilename|TDRdynformatoptions)))
  9541. return;
  9542. //Only clear these members if we are re-resolving the file - otherwise the previous entries are still valid
  9543. ldFile.clear();
  9544. tempFileName.clear();
  9545. dfsParts.clear();
  9546. subfiles.kill();
  9547. Owned<IPropertyTree> curFormatOptions;
  9548. if (helper.getFlags() & TDRdynformatoptions)
  9549. {
  9550. curFormatOptions.setown(createPTreeFromIPT(formatOptions));
  9551. CPropertyTreeWriter writer(curFormatOptions);
  9552. helper.getFormatDynOptions(writer);
  9553. }
  9554. else
  9555. curFormatOptions.set(formatOptions);
  9556. OwnedRoxieString fileName(helper.getFileName());
  9557. mangleHelperFileName(mangledHelperFileName, fileName, agent.queryWuid(), helper.getFlags());
  9558. if (helper.getFlags() & (TDXtemporary | TDXjobtemp))
  9559. {
  9560. StringBuffer mangledFilename;
  9561. mangleLocalTempFilename(mangledFilename, mangledHelperFileName.str(), nullptr);
  9562. tempFileName.set(agent.queryTemporaryFile(mangledFilename.str()));
  9563. logicalFileName = tempFileName.str();
  9564. gatherInfo(NULL);
  9565. subfiles.append(*extractFileInformation(nullptr, curFormatOptions));
  9566. }
  9567. else
  9568. {
  9569. ldFile.setown(resolveLFNFlat(agent, mangledHelperFileName.str(), "Read", 0 != (helper.getFlags() & TDRoptional), isCodeSigned));
  9570. if ( mangledHelperFileName.charAt(0) == '~')
  9571. logicalFileName = mangledHelperFileName.str()+1;
  9572. else
  9573. logicalFileName = mangledHelperFileName.str();
  9574. if (ldFile)
  9575. {
  9576. Owned<IFileDescriptor> fdesc;
  9577. fdesc.setown(ldFile->getFileDescriptor());
  9578. gatherInfo(fdesc);
  9579. IDistributedFile *dFile = ldFile->queryDistributedFile();
  9580. if (dFile) //only makes sense for distributed (non local) files
  9581. {
  9582. dfsParts.setown(dFile->getIterator());
  9583. IDistributedSuperFile *super = dFile->querySuperFile();
  9584. if (super)
  9585. {
  9586. unsigned numsubs = super->numSubFiles(true);
  9587. unsigned s=0;
  9588. for (; s<numsubs; s++)
  9589. {
  9590. IDistributedFile &subfile = super->querySubFile(s, true);
  9591. subfiles.append(*extractFileInformation(&subfile, curFormatOptions));
  9592. }
  9593. assertex(fdesc);
  9594. superfile.set(fdesc->querySuperFileDescriptor());
  9595. }
  9596. else
  9597. subfiles.append(*extractFileInformation(dFile, curFormatOptions));
  9598. if((helper.getFlags() & (TDXtemporary | TDXjobtemp)) == 0)
  9599. agent.logFileAccess(dFile, "HThor", "READ", graph);
  9600. }
  9601. else
  9602. subfiles.append(*extractFileInformation(nullptr, curFormatOptions));
  9603. }
  9604. else
  9605. subfiles.append(*extractFileInformation(nullptr, curFormatOptions));
  9606. if (!ldFile)
  9607. {
  9608. StringBuffer buff;
  9609. buff.appendf("Input file '%s' was missing but declared optional", mangledHelperFileName.str());
  9610. agent.addWuExceptionEx(buff.str(), WRN_SkipMissingOptFile, SeverityInformation, MSGAUD_user, "hthor");
  9611. }
  9612. }
  9613. assertex(subfiles.ordinality() != 0);
  9614. }
  9615. void CHThorNewDiskReadBaseActivity::gatherInfo(IFileDescriptor * fileDesc)
  9616. {
  9617. if (fileDesc)
  9618. {
  9619. if (!agent.queryResolveFilesLocally())
  9620. {
  9621. grouped = fileDesc->isGrouped();
  9622. if (grouped != ((helper.getFlags() & TDXgrouped) != 0))
  9623. {
  9624. StringBuffer msg;
  9625. msg.append("DFS and code generated group info. differs: DFS(").append(grouped ? "grouped" : "ungrouped").append("), CodeGen(").append(grouped ? "ungrouped" : "grouped").append("), using DFS info");
  9626. agent.addWuExceptionEx(msg.str(), WRN_MismatchGroupInfo, SeverityError, MSGAUD_user, "hthor");
  9627. }
  9628. }
  9629. else
  9630. grouped = ((helper.getFlags() & TDXgrouped) != 0);
  9631. }
  9632. else
  9633. {
  9634. grouped = ((helper.getFlags() & TDXgrouped) != 0);
  9635. }
  9636. }
  9637. static void queryInheritProp(IPropertyTree & target, const char * targetName, IPropertyTree & source, const char * sourceName)
  9638. {
  9639. if (source.hasProp(sourceName) && !target.hasProp(targetName))
  9640. target.setProp(targetName, source.queryProp(sourceName));
  9641. }
  9642. static void queryInheritSeparatorProp(IPropertyTree & target, const char * targetName, IPropertyTree & source, const char * sourceName)
  9643. {
  9644. //Legacy - commas are quoted if they occur in a separator list, so need to remove the leading backslashes
  9645. if (source.hasProp(sourceName) && !target.hasProp(targetName))
  9646. {
  9647. StringBuffer unquoted;
  9648. const char * text = source.queryProp(sourceName);
  9649. while (*text)
  9650. {
  9651. if ((text[0] == '\\') && (text[1] == ','))
  9652. text++;
  9653. unquoted.append(*text++);
  9654. }
  9655. target.setProp(targetName, unquoted);
  9656. }
  9657. }
  9658. CHThorNewDiskReadBaseActivity::InputFileInfo * CHThorNewDiskReadBaseActivity::extractFileInformation(IDistributedFile * distributedFile, const IPropertyTree * curFormatOptions)
  9659. {
  9660. Owned<IPropertyTree> meta = createPTree();
  9661. unsigned actualCrc = helper.getDiskFormatCrc();
  9662. Linked<IOutputMetaData> actualDiskMeta = expectedDiskMeta;
  9663. Linked<IPropertyTree> fileFormatOptions = createPTreeFromIPT(curFormatOptions);
  9664. bool compressed = false;
  9665. bool blockcompressed = false;
  9666. const char * readFormat = helper.queryFormat();
  9667. if (distributedFile)
  9668. {
  9669. const char *kind = queryFileKind(distributedFile);
  9670. //Do not use the field translation if the file was originally csv/xml - unless explicitly set
  9671. if ((strisame(kind, "flat") || (RecordTranslationMode::AlwaysDisk == getLayoutTranslationMode())) &&
  9672. // (strisame(readFormat, "flat") || strisame(kind, readFormat)))
  9673. (strisame(readFormat, "flat"))) // Not sure about this - only allow fixed source format if reading as flat
  9674. {
  9675. //Yuk this will be horrible - it needs to cache it for each distributed file
  9676. //and also common them up if they are the same.
  9677. IPropertyTree &props = distributedFile->queryAttributes();
  9678. Owned<IOutputMetaData> publishedMeta = getDaliLayoutInfo(props);
  9679. if (publishedMeta)
  9680. {
  9681. actualDiskMeta.setown(publishedMeta.getClear());
  9682. actualCrc = props.getPropInt("@formatCrc");
  9683. }
  9684. size32_t dfsSize = props.getPropInt("@recordSize");
  9685. if (dfsSize != 0)
  9686. meta->setPropInt("@recordSize", dfsSize);
  9687. }
  9688. compressed = distributedFile->isCompressed(&blockcompressed); //try new decompression, fall back to old unless marked as block
  9689. //Check for encryption key
  9690. void *k;
  9691. size32_t kl;
  9692. helper.getEncryptKey(kl,k);
  9693. if (kl)
  9694. {
  9695. meta->setPropBin("encryptionKey", kl, k);
  9696. blockcompressed = true;
  9697. compressed = true;
  9698. }
  9699. //MORE: There should probably be a generic way of storing and extracting format options for a file
  9700. IPropertyTree & options = distributedFile->queryAttributes();
  9701. queryInheritProp(*fileFormatOptions, "quote", options, "@csvQuote");
  9702. queryInheritSeparatorProp(*fileFormatOptions, "separator", options, "@csvSeparate");
  9703. queryInheritProp(*fileFormatOptions, "terminator", options, "@csvTerminate");
  9704. queryInheritProp(*fileFormatOptions, "escape", options, "@csvEscape");
  9705. dbglogXML(fileFormatOptions);
  9706. dbglogXML(fileFormatOptions);
  9707. }
  9708. meta->setPropBool("@grouped", grouped);
  9709. meta->setPropBool("@compressed", compressed);
  9710. meta->setPropBool("@blockCompressed", blockcompressed);
  9711. meta->setPropBool("@forceCompressed", (helper.getFlags() & TDXcompress) != 0);
  9712. meta->setPropTree("formatOptions", fileFormatOptions.getClear());
  9713. InputFileInfo & target = * new InputFileInfo;
  9714. target.file = distributedFile;
  9715. target.meta.setown(meta.getClear());
  9716. target.actualCrc = actualCrc;
  9717. target.actualMeta.swap(actualDiskMeta);
  9718. return &target;
  9719. }
  9720. void CHThorNewDiskReadBaseActivity::close()
  9721. {
  9722. closepart();
  9723. if(ldFile)
  9724. {
  9725. IDistributedFile * dFile = ldFile->queryDistributedFile();
  9726. if(dFile)
  9727. dFile->setAccessed();
  9728. }
  9729. }
  9730. void CHThorNewDiskReadBaseActivity::closepart()
  9731. {
  9732. if (activeReader)
  9733. {
  9734. activeReader->clearInput();
  9735. activeReader = nullptr;
  9736. }
  9737. logicalFileName = "";
  9738. }
  9739. static void saveOrRelease(Owned<IException> & target, IException * e)
  9740. {
  9741. if (target.get())
  9742. ::Release(e);
  9743. else
  9744. target.setown(e);
  9745. }
  9746. static void getFilename(RemoteFilename & rfilename, IDistributedFilePart * curPart, ILocalOrDistributedFile * localFile, unsigned partNum, unsigned copy)
  9747. {
  9748. if (curPart)
  9749. curPart->getFilename(rfilename,copy);
  9750. else
  9751. localFile->getPartFilename(rfilename,partNum,copy);
  9752. }
  9753. bool CHThorNewDiskReadBaseActivity::openFirstPart()
  9754. {
  9755. partNum = 0;
  9756. if (dfsParts) // more should really be fileDesc or something
  9757. {
  9758. if (dfsParts->first())
  9759. {
  9760. if (openFilePart(ldFile, &dfsParts->query(), 0))
  9761. return true;
  9762. return openNextPart(true);
  9763. }
  9764. }
  9765. else if (ldFile)
  9766. {
  9767. if (ldFile->numParts() != 0)
  9768. {
  9769. if (openFilePart(ldFile, nullptr, 0))
  9770. return true;
  9771. return openNextPart(true);
  9772. }
  9773. }
  9774. else if (!tempFileName.isEmpty())
  9775. {
  9776. if (openFilePart(tempFileName))
  9777. return true;
  9778. }
  9779. setEmptyStream();
  9780. return false;
  9781. }
  9782. bool CHThorNewDiskReadBaseActivity::openNextPart(bool prevWasMissing)
  9783. {
  9784. if (finishedParts)
  9785. return false;
  9786. if (!prevWasMissing)
  9787. {
  9788. offset_t sizeFilePart = 0;
  9789. if (dfsParts)
  9790. sizeFilePart = dfsParts->query().getFileSize(true, false);
  9791. else if (ldFile)
  9792. sizeFilePart = ldFile->getPartFileSize(partNum);
  9793. offsetOfPart += sizeFilePart;
  9794. closepart();
  9795. }
  9796. for (;;)
  9797. {
  9798. partNum++;
  9799. if (dfsParts)
  9800. {
  9801. if (dfsParts->next())
  9802. {
  9803. if (openFilePart(ldFile, &dfsParts->query(), partNum))
  9804. return true;
  9805. continue; // try the next file part
  9806. }
  9807. }
  9808. else if (ldFile)
  9809. {
  9810. if (partNum < ldFile->numParts())
  9811. {
  9812. if (openFilePart(ldFile, nullptr, partNum))
  9813. return true;
  9814. continue; // try the next file part
  9815. }
  9816. }
  9817. setEmptyStream();
  9818. return false;
  9819. }
  9820. }
  9821. void CHThorNewDiskReadBaseActivity::initStream(IDiskRowReader * reader, const char * filename)
  9822. {
  9823. activeReader = reader;
  9824. inputRowStream = reader->queryAllocatedRowStream(rowAllocator);
  9825. StringBuffer report("Reading file ");
  9826. report.append(filename);
  9827. agent.reportProgress(report.str());
  9828. }
  9829. void CHThorNewDiskReadBaseActivity::setEmptyStream()
  9830. {
  9831. inputRowStream = queryNullDiskRowStream();
  9832. finishedParts = true;
  9833. }
  9834. IDiskRowReader * CHThorNewDiskReadBaseActivity::ensureRowReader(const char * format, bool streamRemote, unsigned expectedCrc, IOutputMetaData & expected, unsigned projectedCrc, IOutputMetaData & projected, unsigned actualCrc, IOutputMetaData & actual, const IPropertyTree * options)
  9835. {
  9836. Owned<IDiskReadMapping> mapping = createDiskReadMapping(getLayoutTranslationMode(), format, actualCrc, actual, expectedCrc, expected, projectedCrc, projected, options);
  9837. ForEachItemIn(i, readers)
  9838. {
  9839. IDiskRowReader & cur = readers.item(i);
  9840. if (cur.matches(format, streamRemote, mapping))
  9841. return &cur;
  9842. }
  9843. IDiskRowReader * reader = createDiskReader(format, streamRemote, mapping);
  9844. readers.append(*reader);
  9845. return reader;
  9846. }
  9847. bool CHThorNewDiskReadBaseActivity::openFilePart(const char * filename)
  9848. {
  9849. const char * format = helper.queryFormat(); // more - should extract from the current file (could even mix flat and csv...)
  9850. InputFileInfo * fileInfo = &subfiles.item(0);
  9851. unsigned expectedCrc = helper.getDiskFormatCrc();
  9852. unsigned projectedCrc = helper.getProjectedFormatCrc();
  9853. IDiskRowReader * reader = ensureRowReader(format, false, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, expectedCrc, *expectedDiskMeta, fileInfo->meta);
  9854. if (reader->setInputFile(filename, logicalFileName, 0, offsetOfPart, fileInfo->meta, fieldFilters))
  9855. {
  9856. initStream(reader, filename);
  9857. return true;
  9858. }
  9859. return false;
  9860. }
  9861. bool CHThorNewDiskReadBaseActivity::openFilePart(ILocalOrDistributedFile * localFile, IDistributedFilePart * filePart, unsigned whichPart)
  9862. {
  9863. IDistributedFile * distributedFile = localFile->queryDistributedFile();
  9864. InputFileInfo * fileInfo = &subfiles.item(0);
  9865. if (superfile && filePart)
  9866. {
  9867. unsigned subfile;
  9868. unsigned lnum;
  9869. if (superfile->mapSubPart(partNum, subfile, lnum))
  9870. {
  9871. fileInfo = &subfiles.item(subfile);
  9872. distributedFile = fileInfo->file;
  9873. logicalFileName = distributedFile->queryLogicalName();
  9874. }
  9875. }
  9876. unsigned expectedCrc = helper.getDiskFormatCrc();
  9877. unsigned projectedCrc = helper.getProjectedFormatCrc();
  9878. unsigned actualCrc = fileInfo->actualCrc;
  9879. IOutputMetaData * actualDiskMeta = fileInfo->actualMeta;
  9880. bool tryRemoteStream = actualDiskMeta->queryTypeInfo()->canInterpret() && actualDiskMeta->queryTypeInfo()->canSerialize() &&
  9881. projectedDiskMeta->queryTypeInfo()->canInterpret() && projectedDiskMeta->queryTypeInfo()->canSerialize();
  9882. /*
  9883. * If a file part can be accessed local, then read it locally
  9884. * If a file part supports a remote stream, then use that
  9885. * Otherwise failover to the legacy remote access.
  9886. */
  9887. const char * format = helper.queryFormat(); // more - should extract from the current file (could even mix flat and csv...)
  9888. Owned<IException> saveOpenExc;
  9889. StringBuffer filename, filenamelist;
  9890. std::vector<unsigned> remoteCandidates;
  9891. // scan for local part 1st
  9892. //MORE: Order of copies should be optimized at this point....
  9893. unsigned numCopies = filePart?filePart->numCopies():ldFile->numPartCopies(partNum);
  9894. for (unsigned copy=0; copy<numCopies; copy++)
  9895. {
  9896. RemoteFilename rfn;
  9897. getFilename(rfn, filePart, localFile, partNum, copy);
  9898. if (!isRemoteReadCandidate(agent, rfn))
  9899. {
  9900. StringBuffer path;
  9901. rfn.getPath(path);
  9902. IDiskRowReader * reader = ensureRowReader(format, false, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, fileInfo->meta);
  9903. if (reader->setInputFile(path.str(), logicalFileName, whichPart, offsetOfPart, fileInfo->meta, fieldFilters))
  9904. {
  9905. initStream(reader, path.str());
  9906. return true;
  9907. }
  9908. }
  9909. else
  9910. remoteCandidates.push_back(copy);
  9911. }
  9912. //First try remote streaming, and if that does not succeed, fall back to remote reading.
  9913. bool allowFallbackToNonStreaming = false;
  9914. for (;;)
  9915. {
  9916. for (unsigned copy: remoteCandidates)
  9917. {
  9918. RemoteFilename rfilename;
  9919. getFilename(rfilename, filePart, localFile, partNum, copy);
  9920. rfilename.getPath(filename.clear());
  9921. filenamelist.append('\n').append(filename);
  9922. try
  9923. {
  9924. IDiskRowReader * reader = ensureRowReader(format, tryRemoteStream, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, fileInfo->meta);
  9925. if (reader->setInputFile(rfilename, logicalFileName, whichPart, offsetOfPart, fileInfo->meta, fieldFilters))
  9926. {
  9927. initStream(reader, filename);
  9928. return true;
  9929. }
  9930. }
  9931. catch (IException *E)
  9932. {
  9933. saveOrRelease(saveOpenExc, E);
  9934. }
  9935. }
  9936. if (!tryRemoteStream || !allowFallbackToNonStreaming)
  9937. break;
  9938. tryRemoteStream = false;
  9939. }
  9940. if (!(helper.getFlags() & TDRoptional))
  9941. {
  9942. StringBuffer s;
  9943. if (filenamelist)
  9944. {
  9945. if (saveOpenExc.get())
  9946. {
  9947. if (strstr(mangledHelperFileName.str(),"::>")!=NULL) // if a 'special' filename just use saved exception
  9948. saveOpenExc->errorMessage(s);
  9949. else
  9950. {
  9951. s.append("Could not open logical file ").append(mangledHelperFileName.str()).append(" in any of these locations:").append(filenamelist).append(" (");
  9952. saveOpenExc->errorMessage(s).append(")");
  9953. }
  9954. }
  9955. else
  9956. s.append("Could not open logical file ").append(mangledHelperFileName.str()).append(" in any of these locations:").append(filenamelist).append(" (").append((unsigned)GetLastError()).append(")");
  9957. }
  9958. else
  9959. s.append("Could not open local physical file ").append(filename).append(" (").append((unsigned)GetLastError()).append(")");
  9960. agent.fail(1, s.str());
  9961. }
  9962. return false;
  9963. }
  9964. bool CHThorNewDiskReadBaseActivity::openNext()
  9965. {
  9966. return openNextPart(false);
  9967. }
  9968. void CHThorNewDiskReadBaseActivity::open()
  9969. {
  9970. assertex(!opened);
  9971. opened = true;
  9972. if (!segHelper.canMatchAny())
  9973. {
  9974. setEmptyStream();
  9975. }
  9976. else
  9977. {
  9978. if (!openFirstPart())
  9979. setEmptyStream();
  9980. }
  9981. }
  9982. void CHThorNewDiskReadBaseActivity::verifyRecordFormatCrc()
  9983. {
  9984. //MORE: Need to configure based on csv/xml
  9985. ::verifyFormatCrcSuper(helper.getDiskFormatCrc(), ldFile?ldFile->queryDistributedFile():NULL, false, true);
  9986. }
  9987. void CHThorNewDiskReadBaseActivity::append(FFoption option, const IFieldFilter * filter)
  9988. {
  9989. if (filter->isWild())
  9990. filter->Release();
  9991. else
  9992. fieldFilters.append(*filter);
  9993. }
  9994. //=====================================================================================================
  9995. CHThorNewDiskReadActivity::CHThorNewDiskReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node)
  9996. : CHThorNewDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node, _graph), helper(_arg), outBuilder(NULL)
  9997. {
  9998. needTransform = false;
  9999. lastGroupProcessed = 0;
  10000. hasMatchFilter = helper.hasMatchFilter();
  10001. useRawStream = hasMatchFilter || helper.needTransform();
  10002. }
  10003. void CHThorNewDiskReadActivity::ready()
  10004. {
  10005. PARENT::ready();
  10006. outBuilder.setAllocator(rowAllocator);
  10007. lastGroupProcessed = processed;
  10008. needTransform = helper.needTransform() || fieldFilters.length();
  10009. limit = helper.getRowLimit();
  10010. if (helper.getFlags() & TDRlimitskips)
  10011. limit = (unsigned __int64) -1;
  10012. stopAfter = helper.getChooseNLimit();
  10013. if (!helper.transformMayFilter() && !helper.hasMatchFilter())
  10014. remoteLimit = stopAfter;
  10015. finishedParts = false;
  10016. }
  10017. void CHThorNewDiskReadActivity::stop()
  10018. {
  10019. outBuilder.clear();
  10020. PARENT::stop();
  10021. }
  10022. void CHThorNewDiskReadActivity::onLimitExceeded()
  10023. {
  10024. if ( agent.queryCodeContext()->queryDebugContext())
  10025. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  10026. helper.onLimitExceeded();
  10027. }
  10028. const void *CHThorNewDiskReadActivity::nextRow()
  10029. {
  10030. //Avoid this check on each row- e.g., initialising streams with a null stream, which returns eof, and falls through to eof processing
  10031. if (!opened) open();
  10032. // Only check once per row returned. Potentially means that heavily filtered datasets may wait a long time to check for abort
  10033. queryUpdateProgress();
  10034. //Avoid this test... Combine the limit checking with choosen, and have choosen/limit triggering set the
  10035. //stream to a special no more rows stream so that subsequent calls do not read records.
  10036. if ((processed - initialProcessed) >= stopAfter)
  10037. return nullptr;
  10038. try
  10039. {
  10040. if (useRawStream)
  10041. {
  10042. for (;;)
  10043. {
  10044. //Returns a row in the serialized form of the projected format
  10045. size32_t nextSize;
  10046. const byte * next = (const byte *)inputRowStream->nextRow(nextSize);
  10047. if (!isSpecialRow(next))
  10048. {
  10049. if (likely(!hasMatchFilter || helper.canMatch(next)))
  10050. {
  10051. size32_t thisSize = helper.transform(outBuilder.ensureRow(), next);
  10052. if (thisSize != 0)
  10053. {
  10054. if (unlikely((processed - initialProcessed) >= limit))
  10055. {
  10056. outBuilder.clear();
  10057. onLimitExceeded();
  10058. return nullptr;
  10059. }
  10060. processed++;
  10061. return outBuilder.finalizeRowClear(thisSize);
  10062. }
  10063. }
  10064. }
  10065. else
  10066. {
  10067. switch (getSpecialRowType(next))
  10068. {
  10069. case SpecialRow::eof:
  10070. if (!openNext())
  10071. return next; // i.e. eof
  10072. //rawStream will have changed, but it cannot change into a rowStream
  10073. break;
  10074. case SpecialRow::eos:
  10075. return next;
  10076. case SpecialRow::eog:
  10077. if (processed != lastGroupProcessed)
  10078. {
  10079. lastGroupProcessed = processed;
  10080. //MORE: Change to return next - i.e. an eog marker
  10081. return nullptr;
  10082. }
  10083. break;
  10084. default:
  10085. throwUnexpected();
  10086. }
  10087. }
  10088. }
  10089. }
  10090. else
  10091. {
  10092. //This branch avoids a memcpy from actual to projected followed by a deserialize - since it can map directly
  10093. //May be more efficient to use this branch if serialized==deserialized and there is a filter, but no transform.
  10094. //It would be possibel to have two (or more) different implementations, which were created based on
  10095. //whether there was a limit, a transform etc., but unlikely to save more than a couple of boolean tests.
  10096. for (;;)
  10097. {
  10098. const byte * next = (const byte *)inputRowStream->nextRow();
  10099. if (!isSpecialRow(next))
  10100. {
  10101. if (unlikely((processed - initialProcessed) >= limit))
  10102. {
  10103. ReleaseRoxieRow(next);
  10104. onLimitExceeded();
  10105. return nullptr;
  10106. }
  10107. processed++;
  10108. return next;
  10109. }
  10110. else
  10111. {
  10112. switch (getSpecialRowType(next))
  10113. {
  10114. case SpecialRow::eof:
  10115. if (!openNext())
  10116. return next;
  10117. //rowStream will have changed
  10118. break;
  10119. case SpecialRow::eos:
  10120. return next;
  10121. case SpecialRow::eog:
  10122. if (processed != lastGroupProcessed)
  10123. {
  10124. lastGroupProcessed = processed;
  10125. return nullptr;
  10126. }
  10127. break;
  10128. default:
  10129. throwUnexpected();
  10130. }
  10131. }
  10132. }
  10133. }
  10134. }
  10135. catch(IException * e)
  10136. {
  10137. throw makeWrappedException(e);
  10138. }
  10139. return NULL;
  10140. }
  10141. //=====================================================================================================
  10142. bool RemoteReadChecker::onlyReadLocally(const CLogicalFileSlice & slice, unsigned copy)
  10143. {
  10144. //Allow all operations to be forced to be executed locally.
  10145. if (forceRemoteDisabled.getValue(false))
  10146. return true;
  10147. //If not locally attached then there is no benefit in reading remotely
  10148. if (!slice.onAttachedStorage(copy))
  10149. return true;
  10150. //If the file is not local then execute it remotely
  10151. if (!slice.isLocal(copy))
  10152. return false;
  10153. StringBuffer localPath;
  10154. slice.getURL(localPath, copy);
  10155. if (forceRemoteRead.getValue(testForceRemote(localPath)))
  10156. return false;
  10157. return true;
  10158. }
  10159. CHThorGenericDiskReadBaseActivity::CHThorGenericDiskReadBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadBaseArg &_arg, IHThorCompoundBaseArg & _segHelper, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node)
  10160. : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind, _graph), helper(_arg), segHelper(_segHelper), remoteReadChecker(_agent.queryWorkUnit())
  10161. {
  10162. helper.setCallback(this);
  10163. expectedDiskMeta = helper.queryDiskRecordSize();
  10164. projectedDiskMeta = helper.queryProjectedDiskRecordSize();
  10165. isCodeSigned = false;
  10166. if (_node)
  10167. {
  10168. const char *recordTranslationModeHintText = _node->queryProp("hint[@name='layouttranslation']/@value");
  10169. if (recordTranslationModeHintText)
  10170. recordTranslationModeHint = getTranslationMode(recordTranslationModeHintText, true);
  10171. isCodeSigned = isActivityCodeSigned(*_node);
  10172. }
  10173. grouped = ((helper.getFlags() & TDXgrouped) != 0);
  10174. inputOptions.setown(createPTree());
  10175. inputOptions->setPropBool("@grouped", grouped);
  10176. inputOptions->setPropBool("@forceCompressed", (helper.getFlags() & TDXcompress) != 0);
  10177. if (helper.getFlags() & TDRoptional)
  10178. inputOptions->setPropBool("@optional", true);
  10179. if ((helper.getFlags() & TDRcloneappendvirtual) != 0)
  10180. inputOptions->setPropBool("@cloneAppendVirtuals", true);
  10181. CPropertyTreeWriter writer(ensurePTree(inputOptions, "formatOptions"));
  10182. helper.getFormatOptions(writer);
  10183. outputGrouped = helper.queryOutputMeta()->isGrouped(); // It is possible for input to be incorrectly marked as grouped, and input not or vice-versa
  10184. bool isTemporary = (helper.getFlags() & (TDXtemporary | TDXjobtemp)) != 0;
  10185. files.init(this, agent.queryWuid(), isTemporary, agent.queryResolveFilesLocally(), isCodeSigned, agent.queryCodeContext()->queryUserDescriptor(), expectedDiskMeta);
  10186. if (isTemporary)
  10187. {
  10188. StringBuffer spillPath;
  10189. agent.getTempfileBase(spillPath);
  10190. //Should probably be in eclagent
  10191. spillPlane.setown(createPTree("planes"));
  10192. spillPlane->setProp("@name", "localspill");
  10193. spillPlane->setProp("@prefix", spillPath);
  10194. }
  10195. }
  10196. CHThorGenericDiskReadBaseActivity::~CHThorGenericDiskReadBaseActivity()
  10197. {
  10198. close();
  10199. }
  10200. void CHThorGenericDiskReadBaseActivity::ready()
  10201. {
  10202. CHThorActivityBase::ready();
  10203. opened = false;
  10204. curSlice = NotFound;
  10205. resolveFile();
  10206. fieldFilters.kill();
  10207. segHelper.createSegmentMonitors(this);
  10208. }
  10209. void CHThorGenericDiskReadBaseActivity::stop()
  10210. {
  10211. close();
  10212. CHThorActivityBase::stop();
  10213. }
  10214. unsigned __int64 CHThorGenericDiskReadBaseActivity::getFilePosition(const void * row)
  10215. {
  10216. //These functions do not need to be implemented - they will be implemented by the translation layer
  10217. throwUnexpected();
  10218. }
  10219. unsigned __int64 CHThorGenericDiskReadBaseActivity::getLocalFilePosition(const void * row)
  10220. {
  10221. throwUnexpected();
  10222. }
  10223. void CHThorGenericDiskReadBaseActivity::noteException(unsigned severity, unsigned code, const char * text)
  10224. {
  10225. //MORE: This should really supply the activity and the scope - a general issue for hthor errors...
  10226. agent.addWuExceptionEx(text, code, severity, MSGAUD_user, "hthor");
  10227. }
  10228. const char * CHThorGenericDiskReadBaseActivity::queryLogicalFilename(const void * row)
  10229. {
  10230. throwUnexpected();
  10231. }
  10232. void CHThorGenericDiskReadBaseActivity::resolveFile()
  10233. {
  10234. //If in a child query, and the filenames haven't changed, the information about the resolved filenames will also not have changed
  10235. //Assume that is also true for format properties - require dynamic if they are to be recalculated.
  10236. if (resolved && !(helper.getFlags() & (TDXvarfilename|TDRdynformatoptions)))
  10237. return;
  10238. resolved = true;
  10239. //Update the inputOptions and formatOptions if they depend on the current context
  10240. curInputOptions.set(inputOptions);
  10241. //Check for encryption key
  10242. void *k;
  10243. size32_t kl;
  10244. helper.getEncryptKey(kl,k);
  10245. if (kl || (helper.getFlags() & TDRdynformatoptions))
  10246. {
  10247. curInputOptions.setown(createPTreeFromIPT(inputOptions));
  10248. if (kl)
  10249. {
  10250. curInputOptions->setPropBin("encryptionKey", kl, k);
  10251. curInputOptions->setPropBool("blockcompressed", true);
  10252. curInputOptions->setPropBool("compressed", true);
  10253. }
  10254. if (helper.getFlags() & TDRdynformatoptions)
  10255. {
  10256. Owned<IPropertyTree> helperFormatOptions = createPTree("formatOptions");
  10257. CPropertyTreeWriter writer(helperFormatOptions);
  10258. helper.getFormatDynOptions(writer);
  10259. IPropertyTree * curFormatOptions = ensurePTree(curInputOptions, "formatOptions");
  10260. mergeConfiguration(*curFormatOptions, *helperFormatOptions, nullptr, true);
  10261. }
  10262. }
  10263. //Extract meta information from the helper. Another (possibly more efficient) alternative to an IPropertyTree would be a class.
  10264. bool isTemporary = (helper.getFlags() & (TDXtemporary | TDXjobtemp)) != 0;
  10265. OwnedRoxieString fileName(helper.getFileName());
  10266. if (isTemporary)
  10267. {
  10268. StringBuffer mangledFilename;
  10269. mangleLocalTempFilename(mangledFilename, fileName, agent.queryWuid()); // should this occur inside setEclFilename?
  10270. curInputOptions->setPropBool("@singlePartNoSuffix", true);
  10271. files.setTempFilename(mangledFilename, curInputOptions, spillPlane);
  10272. }
  10273. else
  10274. {
  10275. StringBuffer lfn;
  10276. expandLogicalFilename(lfn, fileName, agent.queryWorkUnit(), false, false);
  10277. files.setEclFilename(lfn, curInputOptions);
  10278. }
  10279. slices.clear();
  10280. files.calcPartition(slices, 1, 0, false, true);
  10281. curSlice = 0;
  10282. }
  10283. void CHThorGenericDiskReadBaseActivity::close()
  10284. {
  10285. closepart();
  10286. if (activeSlice)
  10287. activeSlice->setAccessed();
  10288. }
  10289. void CHThorGenericDiskReadBaseActivity::closepart()
  10290. {
  10291. if (activeReader)
  10292. {
  10293. activeReader->clearInput();
  10294. activeReader = nullptr;
  10295. activeSlice = nullptr;
  10296. }
  10297. }
  10298. bool CHThorGenericDiskReadBaseActivity::openFirstPart()
  10299. {
  10300. if (openFilePart(0U))
  10301. return true;
  10302. setEmptyStream();
  10303. return false;
  10304. }
  10305. bool CHThorGenericDiskReadBaseActivity::openNextPart()
  10306. {
  10307. if (curSlice == NotFound)
  10308. return false;
  10309. if (activeSlice)
  10310. closepart();
  10311. if (openFilePart(curSlice+1))
  10312. return true;
  10313. setEmptyStream();
  10314. return false;
  10315. }
  10316. void CHThorGenericDiskReadBaseActivity::initStream(CLogicalFileSlice * slice, IDiskRowReader * reader)
  10317. {
  10318. activeSlice = slice;
  10319. activeReader = reader;
  10320. inputRowStream = reader->queryAllocatedRowStream(rowAllocator);
  10321. StringBuffer report("Reading file ");
  10322. activeSlice->getTracingFilename(report);
  10323. agent.reportProgress(report.str());
  10324. }
  10325. void CHThorGenericDiskReadBaseActivity::setEmptyStream()
  10326. {
  10327. inputRowStream = queryNullDiskRowStream();
  10328. finishedParts = true;
  10329. }
  10330. IDiskRowReader * CHThorGenericDiskReadBaseActivity::ensureRowReader(const char * format, bool streamRemote, unsigned expectedCrc, IOutputMetaData & expected, unsigned projectedCrc, IOutputMetaData & projected, unsigned actualCrc, IOutputMetaData & actual, CLogicalFileSlice * slice)
  10331. {
  10332. bool translateFromActual = strsame(format, slice->queryFormat());
  10333. //Backwards compatibility - there should be an option to override this
  10334. if (strsame(format, "csv") || strsame(format, "xml"))
  10335. translateFromActual = false;
  10336. //If the actual and expected file formats do not translate from the actual file format - use the expected format instead
  10337. Owned<IDiskReadMapping> mapping;
  10338. if (translateFromActual)
  10339. mapping.setown(createDiskReadMapping(getLayoutTranslationMode(), format, actualCrc, actual, expectedCrc, expected, projectedCrc, projected, slice->queryFileMeta()));
  10340. else
  10341. mapping.setown(createDiskReadMapping(getLayoutTranslationMode(), format, expectedCrc, expected, expectedCrc, expected, projectedCrc, projected, slice->queryFileMeta()));
  10342. ForEachItemIn(i, readers)
  10343. {
  10344. IDiskRowReader & cur = readers.item(i);
  10345. if (cur.matches(format, streamRemote, mapping))
  10346. return &cur;
  10347. }
  10348. IDiskRowReader * reader = createDiskReader(format, streamRemote, mapping);
  10349. readers.append(*reader);
  10350. return reader;
  10351. }
  10352. bool CHThorGenericDiskReadBaseActivity::openFilePart(unsigned whichSlice)
  10353. {
  10354. for (;;)
  10355. {
  10356. if (whichSlice >= slices.size())
  10357. {
  10358. curSlice = NotFound;
  10359. return false;
  10360. }
  10361. if (openFilePart(&slices[whichSlice]))
  10362. {
  10363. curSlice = whichSlice;
  10364. activeSlice = &slices[whichSlice];
  10365. return true;
  10366. }
  10367. whichSlice++;
  10368. }
  10369. }
  10370. bool CHThorGenericDiskReadBaseActivity::openFilePart(CLogicalFileSlice * nextSlice)
  10371. {
  10372. unsigned expectedCrc = helper.getDiskFormatCrc();
  10373. unsigned projectedCrc = helper.getProjectedFormatCrc();
  10374. unsigned actualCrc = nextSlice->queryFile()->queryActualCrc();
  10375. IOutputMetaData * actualDiskMeta = nextSlice->queryFile()->queryActualMeta();
  10376. bool tryRemoteStream = actualDiskMeta->queryTypeInfo()->canInterpret() && actualDiskMeta->queryTypeInfo()->canSerialize() &&
  10377. projectedDiskMeta->queryTypeInfo()->canInterpret() && projectedDiskMeta->queryTypeInfo()->canSerialize();
  10378. /*
  10379. * If a file part can be accessed local, then read it locally
  10380. * If a file part supports a remote stream, then use that
  10381. * Otherwise failover to the legacy remote access.
  10382. */
  10383. const char * format = helper.queryFormat();
  10384. // If format is not specified in the ECL then it is deduced from the file. It must be the same for all copies of a file part
  10385. if (!format)
  10386. format = nextSlice->queryFormat();
  10387. Owned<IException> saveOpenExc;
  10388. StringBuffer filenamelist;
  10389. std::vector<unsigned> remoteCandidates;
  10390. // scan for local part 1st
  10391. //MORE: Order of copies should be optimized at this point....
  10392. unsigned numCopies = nextSlice->getNumCopies();
  10393. for (unsigned copy=0; copy<numCopies; copy++)
  10394. {
  10395. if (remoteReadChecker.onlyReadLocally(*nextSlice, copy))
  10396. {
  10397. IDiskRowReader * reader = ensureRowReader(format, false, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, nextSlice);
  10398. if (reader->setInputFile(*nextSlice, fieldFilters, copy))
  10399. {
  10400. initStream(nextSlice, reader);
  10401. return true;
  10402. }
  10403. }
  10404. else
  10405. remoteCandidates.push_back(copy);
  10406. }
  10407. //First try remote streaming, and if that does not succeed, fall back to remote reading.
  10408. bool allowFallbackToNonStreaming = true;
  10409. for (;;)
  10410. {
  10411. for (unsigned copy: remoteCandidates)
  10412. {
  10413. StringBuffer filename;
  10414. nextSlice->getURL(filename, copy);
  10415. filenamelist.append('\n').append(filename);
  10416. try
  10417. {
  10418. IDiskRowReader * reader = ensureRowReader(format, tryRemoteStream, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, nextSlice);
  10419. if (reader->setInputFile(*nextSlice, fieldFilters, copy))
  10420. {
  10421. initStream(nextSlice, reader);
  10422. return true;
  10423. }
  10424. }
  10425. catch (IException *E)
  10426. {
  10427. saveOrRelease(saveOpenExc, E);
  10428. }
  10429. }
  10430. if (!tryRemoteStream || !allowFallbackToNonStreaming)
  10431. break;
  10432. tryRemoteStream = false;
  10433. }
  10434. if (!(helper.getFlags() & TDRoptional))
  10435. {
  10436. //Should this be unconditional? If the logical file exists, but the file can't be opened, it isn't really what OPT means.
  10437. StringBuffer s;
  10438. StringBuffer tracingName;
  10439. nextSlice->getTracingFilename(tracingName);
  10440. if (filenamelist)
  10441. {
  10442. if (saveOpenExc.get())
  10443. {
  10444. if (!nextSlice->isLogicalFile())
  10445. saveOpenExc->errorMessage(s);
  10446. else
  10447. {
  10448. s.append("Could not open logical file ").append(tracingName).append(" in any of these locations:").append(filenamelist).append(" (");
  10449. saveOpenExc->errorMessage(s).append(")");
  10450. }
  10451. }
  10452. else
  10453. s.append("Could not open logical file ").append(tracingName).append(" in any of these locations:").append(filenamelist).append(" (").append((unsigned)GetLastError()).append(")");
  10454. }
  10455. else
  10456. {
  10457. const char * filename = nextSlice->queryFile()->queryLogicalFilename();
  10458. s.append("Could not open local physical file ").append(filename).append(" (").append((unsigned)GetLastError()).append(")");
  10459. }
  10460. agent.fail(1, s.str());
  10461. }
  10462. return false;
  10463. }
  10464. bool CHThorGenericDiskReadBaseActivity::openNext()
  10465. {
  10466. return openNextPart();
  10467. }
  10468. void CHThorGenericDiskReadBaseActivity::open()
  10469. {
  10470. assertex(!opened);
  10471. opened = true;
  10472. if (!segHelper.canMatchAny())
  10473. {
  10474. setEmptyStream();
  10475. }
  10476. else
  10477. {
  10478. if (!openFirstPart())
  10479. setEmptyStream();
  10480. }
  10481. }
  10482. void CHThorGenericDiskReadBaseActivity::append(FFoption option, const IFieldFilter * filter)
  10483. {
  10484. if (filter->isWild())
  10485. filter->Release();
  10486. else
  10487. fieldFilters.append(*filter);
  10488. }
  10489. //=====================================================================================================
  10490. CHThorGenericDiskReadActivity::CHThorGenericDiskReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadArg &_arg, ThorActivityKind _kind, EclGraph & _graph, IPropertyTree *_node)
  10491. : CHThorGenericDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _graph, _node), helper(_arg), outBuilder(NULL)
  10492. {
  10493. hasMatchFilter = helper.hasMatchFilter();
  10494. useRawStream = hasMatchFilter || helper.needTransform();
  10495. }
  10496. void CHThorGenericDiskReadActivity::ready()
  10497. {
  10498. PARENT::ready();
  10499. outBuilder.setAllocator(rowAllocator);
  10500. lastGroupProcessed = processed;
  10501. needTransform = helper.needTransform() || fieldFilters.length();
  10502. limit = helper.getRowLimit();
  10503. if (helper.getFlags() & TDRlimitskips)
  10504. limit = (unsigned __int64) -1;
  10505. stopAfter = helper.getChooseNLimit();
  10506. if (!helper.transformMayFilter() && !helper.hasMatchFilter())
  10507. remoteLimit = stopAfter;
  10508. finishedParts = false;
  10509. }
  10510. void CHThorGenericDiskReadActivity::stop()
  10511. {
  10512. outBuilder.clear();
  10513. PARENT::stop();
  10514. }
  10515. void CHThorGenericDiskReadActivity::onLimitExceeded()
  10516. {
  10517. if ( agent.queryCodeContext()->queryDebugContext())
  10518. agent.queryCodeContext()->queryDebugContext()->checkBreakpoint(DebugStateLimit, NULL, static_cast<IActivityBase *>(this));
  10519. helper.onLimitExceeded();
  10520. }
  10521. const void *CHThorGenericDiskReadActivity::nextRow()
  10522. {
  10523. //Avoid this check on each row- e.g., initialising streams with a null stream, which returns eof, and falls through to eof processing
  10524. if (!opened) open();
  10525. // Only check once per row returned. Potentially means that heavily filtered datasets may wait a long time to check for abort
  10526. queryUpdateProgress();
  10527. //Avoid this test... Combine the limit checking with choosen, and have choosen/limit triggering set the
  10528. //stream to a special no more rows stream so that subsequent calls do not read records.
  10529. if ((processed - initialProcessed) >= stopAfter)
  10530. return nullptr;
  10531. try
  10532. {
  10533. if (useRawStream)
  10534. {
  10535. for (;;)
  10536. {
  10537. //Returns a row in the serialized form of the projected format
  10538. size32_t nextSize;
  10539. const byte * next = (const byte *)inputRowStream->nextRow(nextSize);
  10540. if (!isSpecialRow(next))
  10541. {
  10542. if (likely(!hasMatchFilter || helper.canMatch(next)))
  10543. {
  10544. size32_t thisSize = helper.transform(outBuilder.ensureRow(), next);
  10545. if (thisSize != 0)
  10546. {
  10547. if (unlikely((processed - initialProcessed) >= limit))
  10548. {
  10549. outBuilder.clear();
  10550. onLimitExceeded();
  10551. return nullptr;
  10552. }
  10553. processed++;
  10554. return outBuilder.finalizeRowClear(thisSize);
  10555. }
  10556. }
  10557. }
  10558. else
  10559. {
  10560. switch (getSpecialRowType(next))
  10561. {
  10562. case SpecialRow::eof:
  10563. if (!openNext())
  10564. return next; // i.e. eof
  10565. //rawStream will have changed, but it cannot change into a rowStream
  10566. break;
  10567. case SpecialRow::eos:
  10568. return next;
  10569. case SpecialRow::eog:
  10570. if (outputGrouped && (processed != lastGroupProcessed))
  10571. {
  10572. lastGroupProcessed = processed;
  10573. //MORE: Change to return next - i.e. an eog marker
  10574. return nullptr;
  10575. }
  10576. break;
  10577. default:
  10578. throwUnexpected();
  10579. }
  10580. }
  10581. }
  10582. }
  10583. else
  10584. {
  10585. //This branch avoids a memcpy from actual to projected followed by a deserialize - since it can map directly
  10586. //May be more efficient to use this branch if serialized==deserialized and there is a filter, but no transform.
  10587. //It would be possibel to have two (or more) different implementations, which were created based on
  10588. //whether there was a limit, a transform etc., but unlikely to save more than a couple of boolean tests.
  10589. for (;;)
  10590. {
  10591. const byte * next = (const byte *)inputRowStream->nextRow();
  10592. if (!isSpecialRow(next))
  10593. {
  10594. if (unlikely((processed - initialProcessed) >= limit))
  10595. {
  10596. ReleaseRoxieRow(next);
  10597. onLimitExceeded();
  10598. return nullptr;
  10599. }
  10600. processed++;
  10601. return next;
  10602. }
  10603. else
  10604. {
  10605. switch (getSpecialRowType(next))
  10606. {
  10607. case SpecialRow::eof:
  10608. if (!openNext())
  10609. return next;
  10610. //rowStream will have changed
  10611. break;
  10612. case SpecialRow::eos:
  10613. return next;
  10614. case SpecialRow::eog:
  10615. if (processed != lastGroupProcessed)
  10616. {
  10617. lastGroupProcessed = processed;
  10618. return nullptr;
  10619. }
  10620. break;
  10621. default:
  10622. throwUnexpected();
  10623. }
  10624. }
  10625. }
  10626. }
  10627. }
  10628. catch(IException * e)
  10629. {
  10630. throw makeWrappedException(e);
  10631. }
  10632. return NULL;
  10633. }
  10634. //=====================================================================================================
  10635. MAKEFACTORY(DiskWrite);
  10636. MAKEFACTORY(Iterate);
  10637. MAKEFACTORY(Filter);
  10638. MAKEFACTORY(Aggregate);
  10639. MAKEFACTORY(Rollup);
  10640. MAKEFACTORY(Project);
  10641. MAKEFACTORY(PrefetchProject);
  10642. MAKEFACTORY(FilterProject);
  10643. extern HTHOR_API IHThorActivity * createGroupDedupActivity(IAgentContext & _agent, unsigned _activityId, unsigned _subgraphId, IHThorDedupArg & arg, ThorActivityKind kind, EclGraph & _graph)
  10644. {
  10645. if(arg.compareAll())
  10646. return new CHThorGroupDedupAllActivity(_agent, _activityId, _subgraphId, arg, kind, _graph);
  10647. else if (arg.keepLeft() && !arg.keepBest())
  10648. return new CHThorGroupDedupKeepLeftActivity(_agent, _activityId, _subgraphId, arg, kind, _graph);
  10649. else
  10650. return new CHThorGroupDedupKeepRightActivity(_agent, _activityId, _subgraphId, arg, kind, _graph);
  10651. }
  10652. MAKEFACTORY(HashDedup);
  10653. MAKEFACTORY(Group);
  10654. MAKEFACTORY(Degroup);
  10655. MAKEFACTORY_ARG(GroupSort, Sort);
  10656. MAKEFACTORY(Join);
  10657. MAKEFACTORY_ARG(SelfJoin, Join);
  10658. MAKEFACTORY_ARG(LookupJoin, HashJoin);
  10659. MAKEFACTORY(AllJoin);
  10660. MAKEFACTORY(WorkUnitWrite);
  10661. MAKEFACTORY(DictionaryWorkUnitWrite);
  10662. MAKEFACTORY(FirstN);
  10663. MAKEFACTORY(InlineTable);
  10664. MAKEFACTORY_ARG(Concat, Funnel);
  10665. MAKEFACTORY(Apply);
  10666. MAKEFACTORY(Sample);
  10667. MAKEFACTORY(Normalize);
  10668. MAKEFACTORY(NormalizeChild);
  10669. MAKEFACTORY(NormalizeLinkedChild);
  10670. MAKEFACTORY(Distribution);
  10671. MAKEFACTORY(RemoteResult);
  10672. MAKEFACTORY(ChooseSets);
  10673. MAKEFACTORY_ARG(ChooseSetsLast, ChooseSetsEx);
  10674. MAKEFACTORY_ARG(ChooseSetsEnth, ChooseSetsEx);
  10675. MAKEFACTORY(WorkunitRead);
  10676. MAKEFACTORY(PipeRead);
  10677. MAKEFACTORY(PipeWrite);
  10678. MAKEFACTORY(CsvWrite);
  10679. MAKEFACTORY(XmlWrite);
  10680. MAKEFACTORY(PipeThrough);
  10681. MAKEFACTORY(If);
  10682. extern HTHOR_API IHThorActivity *createChildIfActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorIfArg &arg, ThorActivityKind kind, EclGraph & _graph)
  10683. {
  10684. return new CHThorIfActivity(_agent, _activityId, _subgraphId, arg, kind, _graph);
  10685. }
  10686. extern HTHOR_API IHThorActivity *createHashAggregateActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorHashAggregateArg &arg, ThorActivityKind kind, EclGraph & _graph, bool _isGroupedAggregate)
  10687. {
  10688. return new CHThorHashAggregateActivity(_agent, _activityId, _subgraphId, arg, kind, _graph, _isGroupedAggregate);
  10689. }
  10690. extern HTHOR_API IHThorActivity *createGenericDiskReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadArg &arg, ThorActivityKind kind, EclGraph & _graph, IPropertyTree * node)
  10691. {
  10692. return new CHThorGenericDiskReadActivity(_agent, _activityId, _subgraphId, arg, kind, _graph, node);
  10693. }
  10694. MAKEFACTORY(Null);
  10695. MAKEFACTORY(SideEffect);
  10696. MAKEFACTORY(Action);
  10697. MAKEFACTORY(SelectN);
  10698. MAKEFACTORY(Spill);
  10699. MAKEFACTORY(Limit);
  10700. MAKEFACTORY_ARG(SkipLimit, Limit);
  10701. MAKEFACTORY_ARG(OnFailLimit, Limit);
  10702. MAKEFACTORY(Catch);
  10703. MAKEFACTORY_ARG(SkipCatch, Catch);
  10704. MAKEFACTORY(CountProject);
  10705. MAKEFACTORY(IndexWrite);
  10706. MAKEFACTORY(Parse);
  10707. MAKEFACTORY(Enth);
  10708. MAKEFACTORY(TopN);
  10709. MAKEFACTORY(XmlParse);
  10710. MAKEFACTORY(Merge);
  10711. MAKEFACTORY_ARG(HttpRowCall, HttpCall);
  10712. MAKEFACTORY_ARG(SoapRowCall, SoapCall);
  10713. MAKEFACTORY_ARG(SoapRowAction, SoapAction);
  10714. MAKEFACTORY_ARG(SoapDatasetCall, SoapCall);
  10715. MAKEFACTORY_ARG(SoapDatasetAction, SoapAction);
  10716. MAKEFACTORY(DatasetResult);
  10717. MAKEFACTORY(RowResult);
  10718. MAKEFACTORY(ChildIterator);
  10719. extern HTHOR_API IHThorActivity *createDummyActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorArg &arg, ThorActivityKind kind, EclGraph & _graph)
  10720. {
  10721. return new CHThorDummyActivity(_agent, _activityId, _subgraphId, arg, kind, _graph);
  10722. }
  10723. MAKEFACTORY_EXTRA(WhenAction,EclGraphElement *)
  10724. MAKEFACTORY_EXTRA(LibraryCall, IPropertyTree *)
  10725. MAKEFACTORY(ChildNormalize)
  10726. MAKEFACTORY(ChildAggregate)
  10727. MAKEFACTORY(ChildGroupAggregate)
  10728. MAKEFACTORY(ChildThroughNormalize)
  10729. MAKEFACTORY_EXTRA(DiskRead, IPropertyTree *)
  10730. MAKEFACTORY_EXTRA(DiskNormalize, IPropertyTree *)
  10731. MAKEFACTORY_EXTRA(DiskAggregate, IPropertyTree *)
  10732. MAKEFACTORY_EXTRA(DiskCount, IPropertyTree *)
  10733. MAKEFACTORY_EXTRA(DiskGroupAggregate, IPropertyTree *)
  10734. MAKEFACTORY_EXTRA(CsvRead, IPropertyTree *)
  10735. MAKEFACTORY_EXTRA(XmlRead, IPropertyTree *)
  10736. MAKEFACTORY_EXTRA(NewDiskRead, IPropertyTree *)
  10737. MAKEFACTORY_EXTRA(LocalResultRead, __int64)
  10738. MAKEFACTORY_EXTRA(LocalResultWrite, __int64)
  10739. MAKEFACTORY_EXTRA(DictionaryResultWrite, __int64)
  10740. MAKEFACTORY_EXTRA(LocalResultSpill, __int64)
  10741. MAKEFACTORY_EXTRA(GraphLoopResultRead, __int64)
  10742. MAKEFACTORY_EXTRA(GraphLoopResultWrite, __int64)
  10743. MAKEFACTORY_EXTRA(NWayGraphLoopResultRead, __int64)
  10744. MAKEFACTORY(Combine)
  10745. MAKEFACTORY(RollupGroup)
  10746. MAKEFACTORY(Regroup)
  10747. MAKEFACTORY(CombineGroup)
  10748. MAKEFACTORY(Case)
  10749. MAKEFACTORY(LinkedRawIterator)
  10750. MAKEFACTORY(GraphLoop)
  10751. MAKEFACTORY(Loop)
  10752. MAKEFACTORY(Process)
  10753. MAKEFACTORY(Grouped)
  10754. MAKEFACTORY(Sorted)
  10755. MAKEFACTORY(Trace)
  10756. MAKEFACTORY(NWayInput)
  10757. MAKEFACTORY(NWaySelect)
  10758. MAKEFACTORY(NonEmpty)
  10759. MAKEFACTORY(FilterGroup);
  10760. MAKEFACTORY(StreamedIterator);
  10761. MAKEFACTORY_EXTRA(External, IPropertyTree *);
  10762. IHThorException * makeHThorException(ThorActivityKind kind, unsigned activityId, unsigned subgraphId, int code, char const * format, ...)
  10763. {
  10764. va_list args;
  10765. va_start(args, format);
  10766. IHThorException * ret = new CHThorException(code, format, args, MSGAUD_user, kind, activityId, subgraphId);
  10767. va_end(args);
  10768. return ret;
  10769. }
  10770. IHThorException * makeHThorException(ThorActivityKind kind, unsigned activityId, unsigned subgraphId, IException * exc)
  10771. {
  10772. return new CHThorException(exc, kind, activityId, subgraphId);
  10773. }
  10774. IHThorException * makeHThorException(ThorActivityKind kind, unsigned activityId, unsigned subgraphId, IException * exc, char const * extra)
  10775. {
  10776. return new CHThorException(exc, extra, kind, activityId, subgraphId);
  10777. }