rtlqstr.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "limits.h"
  14. #include "platform.h"
  15. #include <math.h>
  16. #include <stdio.h>
  17. #include "jexcept.hpp"
  18. #include "jmisc.hpp"
  19. #include "jutil.hpp"
  20. #include "jlib.hpp"
  21. #include "jptree.hpp"
  22. #include "eclrtl.hpp"
  23. #include "rtlbcd.hpp"
  24. #include "jlog.hpp"
  25. #include "jmd5.hpp"
  26. //=============================================================================
  27. // Miscellaneous string functions...
  28. inline unsigned QStrLength(unsigned size) { return (size * 4) / 3; }
  29. inline unsigned QStrSize(unsigned length) { return (length + 1) * 3 / 4; }
  30. byte lastQStrByteMask(unsigned tlen)
  31. {
  32. switch (tlen & 3)
  33. {
  34. case 1:
  35. return 0xfc;
  36. case 2:
  37. return 0xf0;
  38. case 3:
  39. return 0xc0;
  40. }
  41. return 0xff;
  42. }
  43. inline byte expandQChar(byte c)
  44. {
  45. return ' ' + c;
  46. }
  47. #if 1
  48. static const char compressXlat[256] =
  49. {
  50. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00
  51. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  52. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x10
  53. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  54. 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // 0x20
  55. 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  56. 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, // 0x30
  57. 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
  58. 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, // 0x40
  59. 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
  60. 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 0x50
  61. 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
  62. 0x00, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, // 0x60
  63. 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
  64. 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 0x70
  65. 0x38, 0x39, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00,
  66. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x80
  67. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  68. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x90
  69. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  70. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xA0
  71. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  72. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xB0
  73. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  74. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xC0
  75. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  76. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xD0
  77. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  78. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xE0
  79. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  80. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF0
  81. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
  82. };
  83. #define compressQChar(c) compressXlat[(byte)c]
  84. #else
  85. inline byte compressQChar(byte c)
  86. {
  87. if (c > 0x20)
  88. {
  89. if (c < 0x60)
  90. return c - 0x20;
  91. if ((c >= 'a') && (c <= 'z'))
  92. return c - 0x40;
  93. }
  94. return 0;
  95. }
  96. #endif
  97. //---------------------------------------------------------------------------
  98. class QStrReader
  99. {
  100. public:
  101. QStrReader(const byte * _buffer) { buffer = _buffer; curLen = 0; offset = 0; }
  102. byte curQChar()
  103. {
  104. switch (curLen & 3)
  105. {
  106. case 0:
  107. return buffer[offset] >> 2;
  108. case 1:
  109. return ((buffer[offset] & 0x3) << 4) | (buffer[offset+1] >> 4);
  110. case 2:
  111. return ((buffer[offset] & 0xf) << 2) | (buffer[offset+1] >> 6);
  112. case 3:
  113. return (buffer[offset] & 0x3f);
  114. }
  115. return 0;
  116. }
  117. byte nextQChar()
  118. {
  119. byte c = curQChar();
  120. if ((curLen & 3) != 0)
  121. offset++;
  122. curLen++;
  123. return c;
  124. }
  125. byte prevQChar()
  126. {
  127. curLen--;
  128. if ((curLen & 3) != 0)
  129. offset--;
  130. return curQChar();
  131. }
  132. char nextChar()
  133. {
  134. return expandQChar(nextQChar());
  135. }
  136. inline void seek(unsigned pos)
  137. {
  138. curLen = pos;
  139. offset = (pos* 3)/4;
  140. }
  141. protected:
  142. const byte * buffer;
  143. unsigned curLen;
  144. unsigned offset;
  145. };
  146. //---------------------------------------------------------------------------
  147. class QStrBuilder
  148. {
  149. public:
  150. QStrBuilder(void * _buffer) { buffer = (byte *)_buffer; curLen = 0; pending = 0; }
  151. void appendChar(char next)
  152. {
  153. appendQChar(compressQChar(next));
  154. }
  155. void appendCharN(unsigned len, char next)
  156. {
  157. byte c = compressQChar(next);
  158. while (len--)
  159. appendQChar(c);
  160. }
  161. void appendQStr(unsigned len, const char * text)
  162. {
  163. QStrReader reader((const byte *)text);
  164. while (len--)
  165. appendQChar(reader.nextQChar());
  166. }
  167. void appendStr(unsigned len, const char * text)
  168. {
  169. while (len--)
  170. appendChar(*text++);
  171. }
  172. void appendQChar(byte c)
  173. {
  174. switch (curLen & 3)
  175. {
  176. case 0:
  177. pending = c << 2;
  178. break;
  179. case 1:
  180. *buffer++ = pending | (c >> 4);
  181. pending = c << 4;
  182. break;
  183. case 2:
  184. *buffer++ = pending | (c >> 2);
  185. pending = c << 6;
  186. break;
  187. case 3:
  188. *buffer++ = pending | c;
  189. pending = 0;
  190. break;
  191. }
  192. curLen++;
  193. }
  194. void finish(unsigned max, byte fill)
  195. {
  196. while (curLen < max)
  197. appendQChar(fill & 0x3F);
  198. //force a final character to be output, but never writes too many.
  199. appendQChar(fill & 0x3F);
  200. //curLen is now undefined.
  201. }
  202. protected:
  203. byte * buffer;
  204. unsigned curLen;
  205. byte pending;
  206. };
  207. //=============================================================================
  208. void copyQStrRange(unsigned tlen, char * tgt, const char * src, unsigned from, unsigned to)
  209. {
  210. unsigned copylen = to - from;
  211. if ((from & 3) == 0)
  212. {
  213. //can index the qstring directly...
  214. rtlQStrToQStr(tlen, tgt, copylen, src+QStrSize(from));
  215. //make sure the contents are in canonical format
  216. if ((copylen & 3) != 0)
  217. {
  218. unsigned copysize = QStrSize(copylen);
  219. tgt[copysize-1] &= lastQStrByteMask(copylen);
  220. }
  221. }
  222. else if (copylen == 0)
  223. {
  224. memset(tgt, 0, QStrSize(tlen));
  225. }
  226. else
  227. {
  228. //More: Could implement this cleverly by shifting and copying, but not worth it at the moment
  229. unsigned tempSrcLen;
  230. char * tempSrcPtr;
  231. rtlQStrToStrX(tempSrcLen, tempSrcPtr, from+copylen, src);
  232. rtlStrToQStr(tlen, tgt, copylen, tempSrcPtr+from);
  233. rtlFree(tempSrcPtr);
  234. }
  235. }
  236. //-----------------------------------------------------------------------------
  237. unsigned rtlQStrLength(unsigned size) { return QStrLength(size); }
  238. unsigned rtlQStrSize(unsigned length) { return QStrSize(length); }
  239. unsigned rtlTrimQStrLen(size32_t l, const char * t)
  240. {
  241. QStrReader reader((const byte *)t);
  242. reader.seek(l);
  243. while (l && (reader.prevQChar() == 0))
  244. l--;
  245. return l;
  246. }
  247. void rtlStrToQStr(size32_t outlen, char * out, size32_t inlen, const void *in)
  248. {
  249. unsigned outSize = QStrSize(outlen);
  250. if (inlen >= outlen)
  251. inlen = outlen;
  252. else
  253. {
  254. size32_t size = QStrSize(inlen);
  255. memset(out+size, 0, outSize-size);
  256. }
  257. byte * curIn = (byte *)in;
  258. byte * endIn = curIn + inlen;
  259. byte * curOut = (byte *)out;
  260. while ((endIn-curIn)>=4)
  261. {
  262. byte c0 = compressQChar(curIn[0]);
  263. byte c1 = compressQChar(curIn[1]);
  264. byte c2 = compressQChar(curIn[2]);
  265. byte c3 = compressQChar(curIn[3]);
  266. curOut[0] = (c0 << 2) | (c1 >> 4);
  267. curOut[1] = (c1 << 4) | (c2 >> 2);
  268. curOut[2] = (c2 << 6) | c3;
  269. curIn += 4;
  270. curOut += 3;
  271. }
  272. byte c0;
  273. byte c1 = 0;
  274. byte c2 = 0;
  275. switch (endIn - curIn)
  276. {
  277. case 3:
  278. c2 = compressQChar(curIn[2]);
  279. curOut[2] = (c2 << 6);
  280. //fallthrough
  281. case 2:
  282. c1 = compressQChar(curIn[1]);
  283. curOut[1] = (c1 << 4) | (c2 >> 2);
  284. //fall through
  285. case 1:
  286. c0 = compressQChar(curIn[0]);
  287. curOut[0] = (c0 << 2) | (c1 >> 4);
  288. break;
  289. case 0:
  290. break;
  291. default:
  292. UNIMPLEMENTED;
  293. }
  294. }
  295. void rtlStrToQStrX(size32_t & outlen, char * & out, size32_t inlen, const void *in)
  296. {
  297. outlen = inlen;
  298. out = (char *)malloc(QStrSize(inlen));
  299. rtlStrToQStr(inlen, out, inlen, in);
  300. }
  301. void rtlStrToQStrNX(size32_t & outlen, char * & out, size32_t inlen, const void * in, size32_t logicalLength)
  302. {
  303. outlen = logicalLength;
  304. out = (char *)malloc(QStrSize(logicalLength));
  305. rtlStrToQStr(logicalLength, out, inlen, in);
  306. }
  307. void rtlQStrToData(size32_t outlen, void * out, size32_t inlen, const char *in)
  308. {
  309. if (inlen >= outlen)
  310. inlen = outlen;
  311. else
  312. memset((char *)out+inlen, 0, outlen-inlen);
  313. rtlQStrToStr(inlen, (char *)out, inlen, in);
  314. }
  315. void rtlQStrToDataX(size32_t & outlen, void * & out, size32_t inlen, const char *in)
  316. {
  317. outlen = inlen;
  318. out = (char *)malloc(inlen);
  319. rtlQStrToStr(inlen, (char *)out, inlen, in);
  320. }
  321. void rtlQStrToVStr(size32_t outlen, char * out, size32_t inlen, const char *in)
  322. {
  323. out[--outlen] = 0;
  324. if (inlen >= outlen)
  325. inlen = outlen;
  326. else
  327. memset((char *)out+inlen, 0, outlen-inlen);
  328. rtlQStrToStr(inlen, out, inlen, in);
  329. }
  330. //NB: Need to be careful when expanding qstring3 to string3, that 4 bytes aren't written.
  331. void rtlQStrToStr(size32_t outlen, char * out, size32_t inlen, const char * in)
  332. {
  333. if (inlen < outlen)
  334. {
  335. memset(out+inlen, ' ', outlen-inlen);
  336. outlen = inlen;
  337. }
  338. const byte * curIn = (const byte *)in;
  339. byte * curOut = (byte *)out;
  340. byte * endOut = curOut + outlen;
  341. while ((endOut-curOut)>=4)
  342. {
  343. byte c0 = curIn[0];
  344. byte c1 = curIn[1];
  345. byte c2 = curIn[2];
  346. curOut[0] = expandQChar(c0 >> 2);
  347. curOut[1] = expandQChar(((c0 & 0x3) << 4) | (c1 >> 4));
  348. curOut[2] = expandQChar(((c1 & 0xF) << 2) | (c2 >> 6));
  349. curOut[3] = expandQChar(c2 & 0x3F);
  350. curIn += 3;
  351. curOut += 4;
  352. }
  353. switch (endOut - curOut)
  354. {
  355. case 3:
  356. curOut[2] = expandQChar(((curIn[1] & 0xF) << 2) | (curIn[2] >> 6));
  357. //fallthrough
  358. case 2:
  359. curOut[1] = expandQChar(((curIn[0] & 0x3) << 4) | (curIn[1] >> 4));
  360. //fallthrough
  361. case 1:
  362. curOut[0] = expandQChar(curIn[0] >> 2);
  363. break;
  364. case 0:
  365. break;
  366. default:
  367. UNIMPLEMENTED;
  368. }
  369. }
  370. void rtlQStrToStrX(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  371. {
  372. outlen = inlen;
  373. out = (char *)malloc(inlen);
  374. rtlQStrToStr(inlen, out, inlen, in);
  375. }
  376. void rtlQStrToQStr(size32_t outlen, char * out, size32_t inlen, const char * in)
  377. {
  378. size32_t inSize = QStrSize(inlen);
  379. size32_t outSize = QStrSize(outlen);
  380. if (inSize >= outSize)
  381. memcpy(out, in, outSize);
  382. else
  383. {
  384. memcpy(out, in, inSize);
  385. memset(out+inSize, 0, outSize-inSize);
  386. }
  387. }
  388. void rtlQStrToQStrX(unsigned & outlen, char * & out, unsigned inlen, const char * in)
  389. {
  390. size32_t inSize = QStrSize(inlen);
  391. char * data = (char *)malloc(inSize);
  392. memcpy(data, in, inSize);
  393. outlen = inlen;
  394. out = data;
  395. }
  396. int rtlCompareQStrQStr(size32_t llen, const void * left, size32_t rlen, const void * right)
  397. {
  398. size32_t lsize = QStrSize(llen);
  399. size32_t rsize = QStrSize(rlen);
  400. if (lsize < rsize)
  401. {
  402. int ret = memcmp(left, right, lsize);
  403. if (ret == 0)
  404. {
  405. const byte * r = (const byte *)right;
  406. while (lsize < rsize)
  407. {
  408. if (r[lsize])
  409. return -1;
  410. lsize++;
  411. }
  412. }
  413. return ret;
  414. }
  415. int ret = memcmp(left, right, rsize);
  416. if (ret == 0)
  417. {
  418. const byte * l = (const byte *)left;
  419. while (lsize > rsize)
  420. {
  421. if (l[rsize])
  422. return +1;
  423. rsize++;
  424. }
  425. }
  426. return ret;
  427. }
  428. void rtlDecPushQStr(size32_t len, const void * data)
  429. {
  430. char * strData = (char *)alloca(len);
  431. rtlQStrToStr(len, strData, len, (const char *)data);
  432. DecPushString(len, strData);
  433. }
  434. bool rtlQStrToBool(size32_t inlen, const char *in)
  435. {
  436. unsigned size = QStrSize(inlen);
  437. while (size--)
  438. if (in[size])
  439. return true;
  440. return false;
  441. }
  442. //---------------------------------------------------------------------------
  443. ECLRTL_API void rtlCreateQStrRange(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * qstr, byte fill)
  444. {
  445. //NB: Keep in sync with rtlCreateRange()
  446. if (compareLen > fieldLen)
  447. {
  448. if ((int)compareLen >= 0)
  449. {
  450. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  451. compareLen = fieldLen;
  452. }
  453. else
  454. compareLen = 0; // probably m[1..-1] or something silly
  455. }
  456. //y has been trimmed when this function is called. If y is longer than field length, then it is never going to match
  457. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  458. if (len > fieldLen)
  459. {
  460. compareLen = 0;
  461. fill = (fill == 0) ? 255 : 0;
  462. }
  463. outlen = fieldLen;
  464. out = (char *)malloc(QStrSize(fieldLen));
  465. QStrBuilder builder(out);
  466. if (len >= compareLen)
  467. builder.appendQStr(compareLen, qstr);
  468. else
  469. {
  470. builder.appendQStr(len, qstr);
  471. builder.appendCharN(compareLen-len, ' ');
  472. }
  473. builder.finish(fieldLen, fill);
  474. }
  475. ECLRTL_API void rtlCreateQStrRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * qstr)
  476. {
  477. len = rtlTrimQStrLen(len, qstr);
  478. rtlCreateQStrRange(outlen, out, fieldLen, compareLen, len, qstr, 0);
  479. }
  480. ECLRTL_API void rtlCreateQStrRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * qstr)
  481. {
  482. len = rtlTrimQStrLen(len, qstr);
  483. rtlCreateQStrRange(outlen, out, fieldLen, compareLen, len, qstr, 255);
  484. }
  485. void serializeQStrX(size32_t len, const char * data, MemoryBuffer &out)
  486. {
  487. out.append(len).append(QStrSize(len), data);
  488. }
  489. void deserializeQStrX(size32_t & len, char * & data, MemoryBuffer &in)
  490. {
  491. free(data);
  492. in.read(sizeof(len), &len);
  493. unsigned size = QStrSize(len);
  494. data = (char *)malloc(size);
  495. in.read(size, data);
  496. }