copyexp.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. /* Overloaded and redefined operators */
  15. #include "jlib.hpp"
  16. #include "jiface.hpp"
  17. #include "jfile.hpp"
  18. #include "jlzw.hpp"
  19. #include "jio.hpp"
  20. #include "jflz.hpp"
  21. #include "jlz4.hpp"
  22. IFEflags extraFlags = IFEnone;
  23. void doexit(int err)
  24. {
  25. releaseAtoms();
  26. exit(err);
  27. }
  28. void usage(bool isHelp)
  29. {
  30. printf("Usage:\n");
  31. printf(" copyexp <file> -- returns compress type\n");
  32. printf(" copyexp <file> <destination> -- copies file to destination\n");
  33. printf(" (expanding as needed)\n");
  34. printf(" copyexp -z <file> <dest> -- compresses file (LZW)\n");
  35. printf(" copyexp -r <recsz> <file> <dest> -- compresses file (RowDif)\n");
  36. printf(" copyexp -f <file> <dest> -- compresses file (FastLZ)\n");
  37. printf(" copyexp -fs <file> <dest> -- compresses file (FastLZ stream)\n");
  38. printf(" copyexp -l <file> <dest> -- compresses file (LZ4)\n");
  39. printf(" copyexp -ls <file> <dest> -- compresses file (LZ4 stream)\n");
  40. printf(" -s -- timing stats\n");
  41. printf(" -d -- do not cache files in OS\n");
  42. doexit(isHelp ? 0 : 2);
  43. }
  44. #define BUFFERSIZE (0x100000)
  45. void printCompDetails(const char *fname,IFileIO *baseio,ICompressedFileIO *cmpio,IFileIOStream *strm, bool flzstrm, bool lz4strm)
  46. {
  47. const char *method = "Unknown Method";
  48. offset_t expsize = 0;
  49. if (!cmpio&&strm)
  50. {
  51. if (flzstrm)
  52. method = "FLZSTREAM";
  53. else if (lz4strm)
  54. method = "LZ4STREAM";
  55. expsize = strm->size();
  56. }
  57. else
  58. {
  59. switch (cmpio->method())
  60. {
  61. case COMPRESS_METHOD_ROWDIF: method = "ROWDIF"; break;
  62. case COMPRESS_METHOD_LZW: method = "LZW"; break;
  63. case COMPRESS_METHOD_FASTLZ: method = "FASTLZ"; break;
  64. case COMPRESS_METHOD_LZ4: method = "LZ4"; break;
  65. }
  66. expsize = cmpio->size();
  67. }
  68. printf("%s: is %s compressed, size= %" I64F "d, expanded= %" I64F "d",fname,method,baseio->size(),expsize);
  69. if (!strm&&cmpio)
  70. printf(", block size= %d",cmpio->blockSize());
  71. if (!strm&&cmpio&&cmpio->recordSize())
  72. printf(", record size= %d",cmpio->recordSize());
  73. printf("\n");
  74. }
  75. static const char *formatTime(unsigned t,StringBuffer &str)
  76. {
  77. str.clear();
  78. if (t>100000)
  79. str.appendf("%ds",t/1000);
  80. else
  81. str.appendf("%dms",t);
  82. return str.str();
  83. }
  84. static const char *formatTimeU(unsigned t,StringBuffer &str)
  85. {
  86. str.clear();
  87. if (t>100000000)
  88. str.appendf("%ds",t/1000000);
  89. else if (t>100000)
  90. str.appendf("%dms",t/1000);
  91. else
  92. str.appendf("%dus",t);
  93. return str.str();
  94. }
  95. static void printStats(offset_t filesize,unsigned start,unsigned startu)
  96. {
  97. StringBuffer tmp;
  98. unsigned elapsed = msTick()-start;
  99. unsigned elapsedu = usTick()-startu;
  100. if (!elapsedu)
  101. elapsedu = 1;
  102. if (elapsed<1000)
  103. printf("%" I64F "d bytes copied, at %.2f MB/s in %s\n",filesize,((((double)filesize)/(1024*1024))/elapsedu)*1000000,formatTimeU(elapsedu,tmp));
  104. else
  105. printf("%" I64F "d bytes copied, at %.2f MB/s in %s\n",filesize,((((double)filesize)/(1024*1024))/elapsed)*1000,formatTime(elapsed,tmp));
  106. }
  107. int copyExpanded(const char *from, const char *to, bool stats)
  108. {
  109. Owned<IFile> srcfile = createIFile(from);
  110. Owned<IFileIO> srcio = srcfile->open(IFOread, extraFlags);
  111. if (!srcio) {
  112. printf("ERROR: could not open '%s' for read\n",from);
  113. doexit(3);
  114. }
  115. Owned<ICompressedFileIO> cmpio = createCompressedFileReader(srcio);
  116. Owned<IFileIOStream> strmsrc;
  117. bool flzstrm = false;
  118. bool lz4strm = false;
  119. if (!cmpio)
  120. {
  121. strmsrc.setown(createFastLZStreamRead(srcio));
  122. if (strmsrc)
  123. flzstrm = true;
  124. else
  125. {
  126. strmsrc.setown(createLZ4StreamRead(srcio));
  127. if (strmsrc)
  128. lz4strm = true;
  129. }
  130. }
  131. int ret = 0;
  132. if (cmpio||strmsrc)
  133. printCompDetails(from,srcio,cmpio,strmsrc,flzstrm,lz4strm);
  134. else {
  135. ret = 1;
  136. printf("%s is not compressed, size= %" I64F "d\n",from,srcio->size());
  137. }
  138. if (!to||!*to)
  139. return ret;
  140. Owned<IFile> dstfile = createIFile(to);
  141. StringBuffer fulldst;
  142. if (dstfile->isDirectory()==fileBool::foundYes) {
  143. dstfile.clear();
  144. addPathSepChar(fulldst.append(to)).append(pathTail(from));
  145. to = fulldst.str();
  146. dstfile.setown(createIFile(to));
  147. }
  148. if (dstfile->exists()) {
  149. printf("ERROR: file '%s' already exists\n",to);
  150. doexit(4);
  151. }
  152. unsigned start = 0;
  153. unsigned startu = 0;
  154. if (stats) {
  155. start = msTick();
  156. startu = usTick();
  157. }
  158. Owned<IFileIO> dstio = dstfile->open(IFOcreate, extraFlags);
  159. if (!dstio) {
  160. printf("ERROR: could not open '%s' for write\n",to);
  161. doexit(5);
  162. }
  163. #ifdef __linux__
  164. // this is not really needed in windows - if it is we will have to
  165. // test the file extension - .exe, .bat
  166. struct stat info;
  167. if (stat(from, &info) == 0) // cannot fail - exception would have been thrown above
  168. dstfile->setCreateFlags(info.st_mode&(S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH|S_IXUSR|S_IXGRP|S_IXOTH));
  169. #endif
  170. MemoryAttr mb;
  171. void * buffer = mb.allocate(BUFFERSIZE);
  172. offset_t offset = 0;
  173. try
  174. {
  175. for (;;) {
  176. size32_t got = cmpio.get()?cmpio->read(offset,BUFFERSIZE, buffer):
  177. (strmsrc?strmsrc->read(BUFFERSIZE, buffer):
  178. srcio->read(offset, BUFFERSIZE, buffer));
  179. if (got == 0)
  180. break;
  181. dstio->write(offset, got, buffer);
  182. offset += got;
  183. }
  184. }
  185. catch (IException *e)
  186. {
  187. // try to delete partial copy
  188. dstio.clear();
  189. try {
  190. dstfile->remove();
  191. }
  192. catch (IException *e2) {
  193. StringBuffer s;
  194. pexception(s.clear().append("Removing partial copy file: ").append(to).str(),e2);
  195. e2->Release();
  196. }
  197. throw e;
  198. }
  199. dstio.clear();
  200. if (stats)
  201. printStats(offset,start,startu);
  202. CDateTime createTime, modifiedTime;
  203. if (srcfile->getTime(&createTime, &modifiedTime, NULL))
  204. dstfile->setTime(&createTime, &modifiedTime, NULL);
  205. printf("copied %s to %s%s\n",from,to,(cmpio.get()||strmsrc)?" expanding":"");
  206. return 0;
  207. }
  208. void copyCompress(const char *from, const char *to, size32_t rowsize, bool fast, bool flzstrm, bool lz4, bool lz4strm, bool stats)
  209. {
  210. Owned<IFile> srcfile = createIFile(from);
  211. Owned<IFileIO> baseio = srcfile->open(IFOread, extraFlags);
  212. if (!baseio) {
  213. printf("ERROR: could not open '%s' for read\n",from);
  214. doexit(3);
  215. }
  216. Owned<ICompressedFileIO> cmpio = createCompressedFileReader(baseio);
  217. Owned<IFileIOStream> strmsrc;
  218. if (!cmpio)
  219. {
  220. strmsrc.setown(createFastLZStreamRead(baseio));
  221. if (!strmsrc)
  222. strmsrc.setown(createLZ4StreamRead(baseio));
  223. }
  224. bool plaincopy = false;
  225. IFileIO *srcio = NULL;
  226. if (cmpio) {
  227. srcio = cmpio;
  228. if (rowsize&&(cmpio->recordSize()==rowsize))
  229. plaincopy = true;
  230. else if (!rowsize) {
  231. if (fast&&(cmpio->method()==COMPRESS_METHOD_FASTLZ))
  232. plaincopy = true;
  233. else if (!fast&&(cmpio->method()==COMPRESS_METHOD_LZW))
  234. plaincopy = true;
  235. else if (!fast&&(cmpio->method()==COMPRESS_METHOD_LZ4))
  236. plaincopy = true;
  237. }
  238. }
  239. else if (strmsrc) {
  240. if (flzstrm||lz4strm)
  241. plaincopy = true;
  242. }
  243. else
  244. srcio = baseio;
  245. if (plaincopy) {
  246. if(cmpio)
  247. cmpio.clear();
  248. srcio = baseio.get();
  249. }
  250. Owned<IFile> dstfile = createIFile(to);
  251. StringBuffer fulldst;
  252. if (dstfile->isDirectory()==fileBool::foundYes) {
  253. dstfile.clear();
  254. addPathSepChar(fulldst.append(to)).append(pathTail(from));
  255. to = fulldst.str();
  256. dstfile.setown(createIFile(to));
  257. }
  258. if (dstfile->exists()) {
  259. printf("ERROR: file '%s' already exists\n",to);
  260. doexit(4);
  261. }
  262. unsigned start = 0;
  263. unsigned startu = 0;
  264. if (stats) {
  265. start = msTick();
  266. startu = usTick();
  267. }
  268. Owned<IFileIO> dstio;
  269. Owned<IFileIOStream> strmdst;
  270. if (plaincopy||flzstrm||lz4strm) {
  271. dstio.setown(dstfile->open(IFOcreate, extraFlags));
  272. if (dstio&&!plaincopy)
  273. {
  274. if (flzstrm)
  275. strmdst.setown(createFastLZStreamWrite(dstio));
  276. else if (lz4strm)
  277. strmdst.setown(createLZ4StreamWrite(dstio));
  278. }
  279. }
  280. else
  281. {
  282. unsigned compMethod = COMPRESS_METHOD_LZW;
  283. if (fast)
  284. compMethod = COMPRESS_METHOD_FASTLZ;
  285. else if (lz4)
  286. compMethod = COMPRESS_METHOD_LZ4;
  287. dstio.setown(createCompressedFileWriter(dstfile,rowsize,false,true,NULL,compMethod,extraFlags));
  288. }
  289. if (!dstio) {
  290. printf("ERROR: could not open '%s' for write\n",to);
  291. doexit(5);
  292. }
  293. #ifdef __linux__
  294. // this is not really needed in windows - if it is we will have to
  295. // test the file extension - .exe, .bat
  296. struct stat info;
  297. if (stat(from, &info) == 0) // cannot fail - exception would have been thrown above
  298. dstfile->setCreateFlags(info.st_mode&(S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH|S_IXUSR|S_IXGRP|S_IXOTH));
  299. #endif
  300. MemoryAttr mb;
  301. void * buffer = mb.allocate(BUFFERSIZE);
  302. offset_t offset = 0;
  303. try
  304. {
  305. for (;;) {
  306. size32_t got = cmpio.get()?cmpio->read(offset, BUFFERSIZE, buffer):srcio->read(offset, BUFFERSIZE, buffer);
  307. if (got == 0)
  308. break;
  309. if (strmdst)
  310. strmdst->write(got,buffer);
  311. else
  312. dstio->write(offset, got, buffer);
  313. offset += got;
  314. }
  315. }
  316. catch (IException *e)
  317. {
  318. // try to delete partial copy
  319. dstio.clear();
  320. try {
  321. dstfile->remove();
  322. }
  323. catch (IException *e2) {
  324. StringBuffer s;
  325. pexception(s.clear().append("Removing partial copy file: ").append(to).str(),e2);
  326. e2->Release();
  327. }
  328. throw e;
  329. }
  330. if (strmdst)
  331. strmdst.clear();
  332. dstio.clear();
  333. if (stats)
  334. printStats(offset,start,startu);
  335. CDateTime createTime, modifiedTime;
  336. if (srcfile->getTime(&createTime, &modifiedTime, NULL))
  337. dstfile->setTime(&createTime, &modifiedTime, NULL);
  338. printf("copied %s to %s%s\n",from,to,plaincopy?"":" compressing");
  339. { // print details
  340. dstio.setown(dstfile->open(IFOread, extraFlags));
  341. if (dstio) {
  342. Owned<ICompressedFileIO> cmpio = createCompressedFileReader(dstio);
  343. Owned<IFileIOStream> strmchk;
  344. if (!cmpio)
  345. {
  346. strmchk.setown(createFastLZStreamRead(dstio));
  347. if (!strmchk)
  348. strmchk.setown(createLZ4StreamRead(dstio));
  349. }
  350. if (cmpio||strmchk)
  351. printCompDetails(to,dstio,cmpio,strmchk,flzstrm,lz4strm);
  352. else
  353. printf("destination %s not compressed\n",to);
  354. }
  355. else
  356. printf("destination %s could not be read\n",to);
  357. }
  358. }
  359. int main(int argc, char * const * argv)
  360. {
  361. InitModuleObjects();
  362. int ret = 0;
  363. try
  364. {
  365. bool test=false;
  366. unsigned arg = 1;
  367. StringBuffer fname1;
  368. StringBuffer fname2;
  369. bool lzw = false;
  370. bool fast = false;
  371. bool flzstrm = false;
  372. bool lz4 = false;
  373. bool lz4strm = false;
  374. bool stats = false;
  375. size32_t rowsz = 0;
  376. for (int a = 1; a<argc; a++) {
  377. const char *arg = argv[a];
  378. if (arg[0]=='-') {
  379. if(strcmp(arg, "-t") == 0)
  380. test = true;
  381. else if(strcmp(arg, "-?") == 0)
  382. usage(true);
  383. else if(strcmp(arg, "-h") == 0)
  384. usage(true);
  385. else if(strcmp(arg, "-z") == 0) {
  386. lzw = true;
  387. continue;
  388. }
  389. else if(strcmp(arg, "-d") == 0) {
  390. extraFlags = IFEnocache;
  391. continue;
  392. }
  393. else if(strcmp(arg, "-s") == 0) {
  394. stats = true;
  395. continue;
  396. }
  397. else if(strcmp(arg, "-f") == 0) {
  398. fast = true;
  399. continue;
  400. }
  401. else if(strcmp(arg, "-fs") == 0) {
  402. flzstrm = true;
  403. continue;
  404. }
  405. else if(strcmp(arg, "-l") == 0) {
  406. lz4 = true;
  407. continue;
  408. }
  409. else if(strcmp(arg, "-ls") == 0) {
  410. lz4strm = true;
  411. continue;
  412. }
  413. else if(strcmp(arg, "-r") == 0) {
  414. if (a+1<argc) {
  415. rowsz = atoi(argv[a+1]);
  416. if (rowsz) {
  417. a++;
  418. continue;
  419. }
  420. }
  421. usage(false);
  422. }
  423. else {
  424. printf("ERROR unexpected parameter '%s'",arg);
  425. usage(false);
  426. }
  427. }
  428. if (fname1.length()) {
  429. if (test||fname2.length()) {
  430. printf("ERROR unexpected parameter '%s'",arg);
  431. usage(false);
  432. }
  433. fname2.append(arg);
  434. }
  435. else
  436. fname1.append(arg);
  437. }
  438. if (!fname1.length())
  439. usage(true);
  440. if (!fast&&!lzw&&!rowsz&&!flzstrm&&!lz4&&!lz4strm)
  441. copyExpanded(fname1.str(),fname2.str(),stats);
  442. else
  443. copyCompress(fname1.str(),fname2.str(),rowsz,fast,flzstrm,lz4,lz4strm,stats);
  444. }
  445. catch(IException * e)
  446. {
  447. pexception("copyexp: ",e);
  448. e->Release();
  449. ret = 99;
  450. }
  451. releaseAtoms();
  452. return ret;
  453. }