dumpkey.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "jliball.hpp"
  14. #include "jhtree.hpp"
  15. #include "ctfile.hpp"
  16. #include "rtlrecord.hpp"
  17. #include "rtlformat.hpp"
  18. #include "rtldynfield.hpp"
  19. #include "eclhelper_dyn.hpp"
  20. #include "hqlexpr.hpp"
  21. #include "hqlutil.hpp"
  22. void fatal(const char *format, ...) __attribute__((format(printf, 1, 2)));
  23. void fatal(const char *format, ...)
  24. {
  25. va_list args;
  26. va_start(args, format);
  27. vfprintf(stderr, format, args);
  28. va_end(args);
  29. fflush(stderr);
  30. releaseAtoms();
  31. ExitModuleObjects();
  32. _exit(2);
  33. }
  34. bool optHex = false;
  35. bool optRaw = false;
  36. bool optFullHeader = false;
  37. bool optHeader = false;
  38. StringArray files;
  39. void usage()
  40. {
  41. fprintf(stderr, "Usage: dumpkey [options] dataset [dataset...]\n"
  42. "Options:\n"
  43. " node=[n] - dump node n (0 = just header)\n"
  44. " fpos=[n] - dump node at offset fpos\n"
  45. " recs=[n] - output n rows\n"
  46. " fields=[fieldnames] - output specified fields only\n"
  47. " filter=[filter] - filter rows\n"
  48. " -H - hex display\n"
  49. " -R - raw output\n"
  50. " -fullheader - output full header info for each file\n"
  51. " -header - output minimal header info for each file\n"
  52. );
  53. fflush(stderr);
  54. releaseAtoms();
  55. ExitModuleObjects();
  56. _exit(2);
  57. }
  58. void doOption(const char *opt)
  59. {
  60. if (streq(opt, "-H"))
  61. optHex = true;
  62. else if (streq(opt, "-R"))
  63. optRaw = true;
  64. else if (streq(opt, "-header"))
  65. optHeader = true;
  66. else if (streq(opt, "-fullheader"))
  67. optFullHeader = true;
  68. else
  69. usage();
  70. }
  71. class MyIndexVirtualFieldCallback : public CInterfaceOf<IVirtualFieldCallback>
  72. {
  73. public:
  74. MyIndexVirtualFieldCallback(IKeyManager *_manager) : manager(_manager)
  75. {
  76. }
  77. virtual const char * queryLogicalFilename(const void * row) override
  78. {
  79. UNIMPLEMENTED;
  80. }
  81. virtual unsigned __int64 getFilePosition(const void * row) override
  82. {
  83. UNIMPLEMENTED;
  84. }
  85. virtual unsigned __int64 getLocalFilePosition(const void * row) override
  86. {
  87. UNIMPLEMENTED;
  88. }
  89. virtual const byte * lookupBlob(unsigned __int64 id) override
  90. {
  91. size32_t blobSize;
  92. return manager->loadBlob(id, blobSize);
  93. }
  94. private:
  95. Linked<IKeyManager> manager;
  96. };
  97. int main(int argc, const char **argv)
  98. {
  99. InitModuleObjects();
  100. #ifdef _WIN32
  101. _setmode( _fileno( stdout ), _O_BINARY );
  102. _setmode( _fileno( stdin ), _O_BINARY );
  103. #endif
  104. Owned<IProperties> globals = createProperties("dumpkey.ini", true);
  105. StringArray filters;
  106. for (int i = 1; i < argc; i++)
  107. {
  108. if (argv[i][0] == '-')
  109. doOption(argv[i]);
  110. else if (strncmp(argv[i], "filter=", 7)==0)
  111. filters.append(argv[i]+7);
  112. else if (strchr(argv[i], '='))
  113. globals->loadProp(argv[i]);
  114. else
  115. files.append(argv[i]);
  116. }
  117. try
  118. {
  119. StringBuffer logname("dumpkey.");
  120. logname.append(GetCachedHostName()).append(".");
  121. StringBuffer lf;
  122. openLogFile(lf, logname.append("log").str());
  123. }
  124. catch (IException *E)
  125. {
  126. // Silently ignore failure to open logfile.
  127. E->Release();
  128. }
  129. ForEachItemIn(idx, files)
  130. {
  131. try
  132. {
  133. Owned <IKeyIndex> index;
  134. const char * keyName = files.item(idx);
  135. index.setown(createKeyIndex(keyName, 0, false));
  136. size32_t key_size = index->keySize(); // NOTE - in variable size case, this is 32767
  137. size32_t keyedSize = index->keyedSize();
  138. unsigned nodeSize = index->getNodeSize();
  139. if (optFullHeader)
  140. {
  141. Owned<IFile> in = createIFile(keyName);
  142. Owned<IFileIO> io = in->open(IFOread);
  143. if (!io)
  144. throw MakeStringException(999, "Failed to open file %s", keyName);
  145. Owned<CKeyHdr> header = new CKeyHdr;
  146. MemoryAttr block(sizeof(KeyHdr));
  147. io->read(0, sizeof(KeyHdr), (void *)block.get());
  148. header->load(*(KeyHdr*)block.get());
  149. if (header->getKeyType() & USE_TRAILING_HEADER)
  150. {
  151. if (io->read(in->size() - header->getNodeSize(), sizeof(KeyHdr), (void *)block.get()) != sizeof(KeyHdr))
  152. throw MakeStringException(4, "Invalid key %s: failed to read trailing key header", keyName);
  153. header->load(*(KeyHdr*)block.get());
  154. }
  155. printf("Key '%s'\nkeySize=%d keyedSize = %d NumParts=%x, Top=%d\n", keyName, key_size, keyedSize, index->numParts(), index->isTopLevelKey());
  156. printf("File size = %" I64F "d, nodes = %" I64F "d\n", in->size(), in->size() / nodeSize - 1);
  157. printf("rootoffset=%" I64F "d[%" I64F "d]\n", header->getRootFPos(), header->getRootFPos()/nodeSize);
  158. Owned<IPropertyTree> metadata = index->getMetadata();
  159. if (metadata)
  160. {
  161. StringBuffer xml;
  162. toXML(metadata, xml);
  163. printf("MetaData:\n%s\n", xml.str());
  164. }
  165. }
  166. else if (optHeader)
  167. {
  168. if (idx)
  169. printf("\n");
  170. printf("%s:\n\n", keyName);
  171. }
  172. if (globals->hasProp("node"))
  173. {
  174. if (stricmp(globals->queryProp("node"), "all")==0)
  175. {
  176. }
  177. else
  178. {
  179. int node = globals->getPropInt("node");
  180. if (node != 0)
  181. index->dumpNode(stdout, node * nodeSize, globals->getPropInt("recs", 0), optRaw);
  182. }
  183. }
  184. else if (globals->hasProp("fpos"))
  185. {
  186. index->dumpNode(stdout, globals->getPropInt("fpos"), globals->getPropInt("recs", 0), optRaw);
  187. }
  188. else
  189. {
  190. Owned<IKeyManager> manager;
  191. Owned<IPropertyTree> metadata = index->getMetadata();
  192. Owned<IOutputMetaData> diskmeta;
  193. Owned<IOutputMetaData> translatedmeta;
  194. ArrayOf<const RtlFieldInfo *> deleteFields;
  195. ArrayOf<const RtlFieldInfo *> fields; // Note - the lifetime of the array needs to extend beyond the lifetime of outmeta. The fields themselves are shared with diskmeta, and do not need to be released.
  196. Owned<IOutputMetaData> outmeta;
  197. Owned<IXmlWriterExt> writer;
  198. Owned<const IDynamicTransform> translator;
  199. RowFilter rowFilter;
  200. unsigned __int64 count = globals->getPropInt("recs", 1);
  201. const RtlRecordTypeInfo *outRecType = nullptr;
  202. if (metadata && metadata->hasProp("_rtlType"))
  203. {
  204. MemoryBuffer layoutBin;
  205. metadata->getPropBin("_rtlType", layoutBin);
  206. try
  207. {
  208. diskmeta.setown(createTypeInfoOutputMetaData(layoutBin, false));
  209. }
  210. catch (IException *E)
  211. {
  212. EXCLOG(E);
  213. E->Release();
  214. }
  215. }
  216. if (!diskmeta && metadata && metadata->hasProp("_record_ECL"))
  217. {
  218. MultiErrorReceiver errs;
  219. Owned<IHqlExpression> expr = parseQuery(metadata->queryProp("_record_ECL"), &errs);
  220. if (errs.errCount() == 0)
  221. {
  222. MemoryBuffer layoutBin;
  223. if (exportBinaryType(layoutBin, expr, true))
  224. diskmeta.setown(createTypeInfoOutputMetaData(layoutBin, false));
  225. }
  226. }
  227. if (diskmeta)
  228. {
  229. writer.setown(new SimpleOutputWriter);
  230. const RtlRecord &inrec = diskmeta->queryRecordAccessor(true);
  231. manager.setown(createLocalKeyManager(inrec, index, nullptr, true, false));
  232. size32_t minRecSize = 0;
  233. if (globals->hasProp("fields"))
  234. {
  235. StringArray fieldNames;
  236. fieldNames.appendList(globals->queryProp("fields"), ",");
  237. ForEachItemIn(idx, fieldNames)
  238. {
  239. unsigned fieldNum = inrec.getFieldNum(fieldNames.item(idx));
  240. if (fieldNum == (unsigned) -1)
  241. throw MakeStringException(0, "Requested output field '%s' not found", fieldNames.item(idx));
  242. const RtlFieldInfo *field = inrec.queryOriginalField(fieldNum);
  243. if (field->type->getType() == type_blob)
  244. {
  245. // We can't just use the original source field in this case (as blobs are only supported in the input)
  246. // So instead, create a field in the target with the original type.
  247. field = new RtlFieldStrInfo(field->name, field->xpath, field->type->queryChildType());
  248. deleteFields.append(field);
  249. }
  250. fields.append(field);
  251. minRecSize += field->type->getMinSize();
  252. }
  253. }
  254. else
  255. {
  256. // Copy all fields from the source record
  257. unsigned numFields = inrec.getNumFields();
  258. for (unsigned idx = 0; idx < numFields;idx++)
  259. {
  260. const RtlFieldInfo *field = inrec.queryOriginalField(idx);
  261. if (field->type->getType() == type_blob)
  262. {
  263. // See above - blob field in source needs special treatment
  264. field = new RtlFieldStrInfo(field->name, field->xpath, field->type->queryChildType());
  265. deleteFields.append(field);
  266. }
  267. fields.append(field);
  268. minRecSize += field->type->getMinSize();
  269. }
  270. }
  271. fields.append(nullptr);
  272. outRecType = new RtlRecordTypeInfo(type_record, minRecSize, fields.getArray(0));
  273. outmeta.setown(new CDynamicOutputMetaData(*outRecType));
  274. translator.setown(createRecordTranslator(outmeta->queryRecordAccessor(true), inrec));
  275. if (filters.ordinality())
  276. {
  277. ForEachItemIn(idx, filters)
  278. {
  279. const IFieldFilter &thisFilter = rowFilter.addFilter(diskmeta->queryRecordAccessor(true), filters.item(idx));
  280. unsigned idx = thisFilter.queryFieldIndex();
  281. const RtlFieldInfo *field = inrec.queryOriginalField(idx);
  282. if (field->flags & RFTMispayloadfield)
  283. throw MakeStringException(0, "Cannot filter on payload field '%s'", field->name);
  284. }
  285. }
  286. rowFilter.createSegmentMonitors(manager);
  287. }
  288. else
  289. {
  290. // We don't have record info - fake it? We could pretend it's a single field...
  291. UNIMPLEMENTED;
  292. // manager.setown(createLocalKeyManager(fake, index, nullptr));
  293. }
  294. manager->finishSegmentMonitors();
  295. manager->reset();
  296. MyIndexVirtualFieldCallback callback(manager);
  297. while (manager->lookup(true) && count--)
  298. {
  299. byte const * buffer = manager->queryKeyBuffer();
  300. size32_t size = manager->queryRowSize();
  301. unsigned __int64 seq = manager->querySequence();
  302. if (optRaw)
  303. {
  304. fwrite(buffer, 1, size, stdout);
  305. }
  306. else if (optHex)
  307. {
  308. for (unsigned i = 0; i < size; i++)
  309. printf("%02x", ((unsigned char) buffer[i]) & 0xff);
  310. printf(" :%" I64F "u\n", seq);
  311. }
  312. else if (translator)
  313. {
  314. MemoryBuffer buf;
  315. MemoryBufferBuilder aBuilder(buf, 0);
  316. if (translator->translate(aBuilder, callback, buffer))
  317. {
  318. outmeta->toXML(aBuilder.getSelf(), *writer.get());
  319. printf("%s\n", writer->str());
  320. writer->clear();
  321. }
  322. else
  323. count++; // Row was postfiltered
  324. }
  325. else
  326. printf("%.*s :%" I64F "u\n", size, buffer, seq);
  327. manager->releaseBlobs();
  328. }
  329. if (outRecType)
  330. outRecType->doDelete();
  331. ForEachItemIn(idx, deleteFields)
  332. {
  333. delete deleteFields.item(idx);
  334. }
  335. }
  336. }
  337. catch (IException *E)
  338. {
  339. StringBuffer msg;
  340. E->errorMessage(msg);
  341. printf("%s\n", msg.str());
  342. E->Release();
  343. }
  344. }
  345. releaseAtoms();
  346. ExitModuleObjects();
  347. return 0;
  348. }