eclrtl.cpp 157 KB


  1. /*##############################################################################
  2. Copyright (C) 2011 HPCC Systems.
  3. All rights reserved. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU Affero General Public License as
  5. published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Affero General Public License for more details.
  11. You should have received a copy of the GNU Affero General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ############################################################################## */
  14. #include "limits.h"
  15. #ifdef _USE_BOOST_REGEX
  16. #include "boost/regex.hpp" // must precede platform.h ; n.b. this uses a #pragma comment(lib, ...) to link the appropriate .lib in MSVC
  17. #endif
  18. #include "platform.h"
  19. #include <math.h>
  20. #include <stdio.h>
  21. #include "jexcept.hpp"
  22. #include "jmisc.hpp"
  23. #include "jutil.hpp"
  24. #include "jlib.hpp"
  25. #include "jptree.hpp"
  26. #include "junicode.hpp"
  27. #include "eclrtl.hpp"
  28. #include "bcd.hpp"
  29. #include "eclrtl_imp.hpp"
  30. #include "unicode/uchar.h"
  31. #include "unicode/ucol.h"
  32. #include "unicode/ustring.h"
  33. #include "unicode/ucnv.h"
  34. #include "unicode/schriter.h"
  35. #include "unicode/regex.h"
  36. #include "unicode/normlzr.h"
  37. #include "unicode/locid.h"
  38. #include "jlog.hpp"
  39. #include "jmd5.hpp"
  40. #include "rtlqstr.ipp"
  41. #ifndef _WIN32
  42. //typedef long long __int64;
  43. #define _fastcall
  44. #define __fastcall
  45. #define _stdcall
  46. #define __stdcall
  47. #endif
  48. #define UTF8_CODEPAGE "UTF-8"
  49. #define UTF8_MAXSIZE 4
  50. IRandomNumberGenerator * random_;
  51. static CriticalSection random_Sect;
  52. MODULE_INIT(INIT_PRIORITY_ECLRTL_ECLRTL)
  53. {
  54. random_ = createRandomNumberGenerator();
  55. random_->seed((unsigned)get_cycles_now());
  56. return true;
  57. }
  58. MODULE_EXIT()
  59. {
  60. random_->Release();
  61. }
  62. //=============================================================================
  63. // Miscellaneous string functions...
  64. ECLRTL_API void * rtlMalloc(size32_t size)
  65. {
  66. return malloc(size);
  67. }
  68. void rtlFree(void *ptr)
  69. {
  70. free(ptr);
  71. }
  72. ECLRTL_API void * rtlRealloc(void * _ptr, size32_t size)
  73. {
  74. return realloc(_ptr, size);
  75. }
  76. //=============================================================================
  77. static IRtlRowCallback * rowCallback = NULL;
  78. ECLRTL_API void rtlReleaseRow(const void * row)
  79. {
  80. if (row)
  81. rowCallback->releaseRow(row);
  82. }
  83. ECLRTL_API void rtlReleaseRowset(unsigned count, byte * * rowset)
  84. {
  85. rowCallback->releaseRowset(count, rowset);
  86. }
  87. ECLRTL_API IRtlRowCallback * rtlSetReleaseRowHook(IRtlRowCallback * hook)
  88. {
  89. IRtlRowCallback * prev = rowCallback;
  90. rowCallback = hook;
  91. return prev;
  92. }
  93. ECLRTL_API void * rtlLinkRow(const void * row)
  94. {
  95. return rowCallback->linkRow(row);
  96. }
  97. ECLRTL_API byte * * rtlLinkRowset(byte * * rowset)
  98. {
  99. return rowCallback->linkRowset(rowset);
  100. }
  101. //=============================================================================
  102. // Unicode helper classes and functions
  103. // escape
  104. void escapeUnicode(unsigned inlen, UChar const * in, StringBuffer & out)
  105. {
  106. UCharCharacterIterator iter(in, inlen);
  107. for(iter.first32(); iter.hasNext(); iter.next32())
  108. {
  109. UChar32 c = iter.current32();
  110. if(c < 0x80)
  111. out.append((char) c);
  112. else if (c < 0x10000)
  113. out.appendf("\\u%04X", c);
  114. else
  115. out.appendf("\\U%08X", c);
  116. }
  117. }
  118. // locales and collators
  119. static unsigned const unicodeStrengthLimit = 5;
  120. static UCollationStrength unicodeStrength[unicodeStrengthLimit] =
  121. {
  122. UCOL_PRIMARY,
  123. UCOL_SECONDARY,
  124. UCOL_TERTIARY,
  125. UCOL_QUATERNARY,
  126. UCOL_IDENTICAL
  127. };
  128. class RTLLocale : public CInterface
  129. {
  130. public:
  131. RTLLocale(char const * _locale) : locale(_locale)
  132. {
  133. for(unsigned i=0; i<unicodeStrengthLimit; i++)
  134. colls[i] = NULL;
  135. UErrorCode err = U_ZERO_ERROR;
  136. colls[2] = ucol_open(locale.get(), &err);
  137. assertex(U_SUCCESS(err));
  138. }
  139. ~RTLLocale()
  140. {
  141. for(unsigned i=0; i<unicodeStrengthLimit; i++)
  142. if(colls[i]) ucol_close(colls[i]);
  143. }
  144. UCollator * queryCollator() const { return colls[2]; }
  145. UCollator * queryCollator(unsigned strength) const
  146. {
  147. if(strength == 0) strength = 1;
  148. if(strength > unicodeStrengthLimit) strength = unicodeStrengthLimit;
  149. if(!colls[strength-1])
  150. {
  151. UErrorCode err = U_ZERO_ERROR;
  152. const_cast<UCollator * *>(colls)[strength-1] = ucol_open(locale.get(), &err);
  153. assertex(U_SUCCESS(err));
  154. ucol_setStrength(colls[strength-1], unicodeStrength[strength-1]);
  155. }
  156. return colls[strength-1];
  157. }
  158. private:
  159. StringAttr locale;
  160. UCollator * colls[unicodeStrengthLimit];
  161. };
  162. typedef MapStringTo<RTLLocale, char const *> MapStrToLocale;
  163. MapStrToLocale *localeMap;
  164. CriticalSection localeCrit;
  165. MODULE_INIT(INIT_PRIORITY_STANDARD)
  166. {
  167. localeMap = new MapStrToLocale;
  168. return true;
  169. }
  170. MODULE_EXIT()
  171. {
  172. delete localeMap;
  173. }
  174. bool rtlGetNormalizedUnicodeLocaleName(unsigned len, char const * in, char * out)
  175. {
  176. bool isPrimary = true;
  177. bool ok = true;
  178. unsigned i;
  179. for(i=0; i<len; i++)
  180. if(in[i] == '_')
  181. {
  182. out[i] = '_';
  183. isPrimary = false;
  184. }
  185. else if(isalpha(in[i]))
  186. {
  187. out[i] = (isPrimary ? tolower(in[i]) : toupper(in[i]));
  188. }
  189. else
  190. {
  191. out[i] = 0;
  192. ok = false;
  193. }
  194. return ok;
  195. }
  196. RTLLocale * queryRTLLocale(char const * locale)
  197. {
  198. if (!locale) locale = "";
  199. CriticalBlock b(localeCrit);
  200. RTLLocale * loc = localeMap->getValue(locale);
  201. if(!loc)
  202. {
  203. unsigned ll = strlen(locale);
  204. StringBuffer lnorm;
  205. rtlGetNormalizedUnicodeLocaleName(ll, locale, lnorm.reserve(ll));
  206. localeMap->setValue(locale, lnorm.str());
  207. loc = localeMap->getValue(locale);
  208. }
  209. return loc;
  210. }
  211. // converters
  212. class RTLUnicodeConverter : public CInterface
  213. {
  214. public:
  215. RTLUnicodeConverter(char const * codepage)
  216. {
  217. UErrorCode err = U_ZERO_ERROR;
  218. conv = ucnv_open(codepage, &err);
  219. if (!U_SUCCESS(err))
  220. {
  221. StringBuffer msg;
  222. msg.append("Unrecognised codepage '").append(codepage).append("'");
  223. rtlFail(0, msg.str());
  224. }
  225. }
  226. ~RTLUnicodeConverter()
  227. {
  228. ucnv_close(conv);
  229. }
  230. UConverter * query() const { return conv; }
  231. private:
  232. UConverter * conv;
  233. };
  234. typedef MapStringTo<RTLUnicodeConverter, char const *> MapStrToUnicodeConverter;
  235. MapStrToUnicodeConverter *unicodeConverterMap;
  236. CriticalSection ucmCrit;
  237. MODULE_INIT(INIT_PRIORITY_STANDARD)
  238. {
  239. unicodeConverterMap = new MapStrToUnicodeConverter;
  240. return true;
  241. }
  242. MODULE_EXIT()
  243. {
  244. delete unicodeConverterMap;
  245. }
  246. RTLUnicodeConverter * queryRTLUnicodeConverter(char const * codepage)
  247. {
  248. CriticalBlock b(ucmCrit);
  249. RTLUnicodeConverter * conv = unicodeConverterMap->getValue(codepage);
  250. if(!conv)
  251. {
  252. unicodeConverterMap->setValue(codepage, codepage);
  253. conv = unicodeConverterMap->getValue(codepage);
  254. }
  255. return conv;
  256. }
  257. // normalization
  258. bool unicodeNeedsNormalize(unsigned inlen, UChar * in, UErrorCode * err)
  259. {
  260. return !unorm_isNormalized(in, inlen, UNORM_NFC, err);
  261. }
  262. bool vunicodeNeedsNormalize(UChar * in, UErrorCode * err)
  263. {
  264. return !unorm_isNormalized(in, -1, UNORM_NFC, err);
  265. }
  266. void unicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
  267. {
  268. UChar * buff = (UChar *)malloc(inlen*2);
  269. unsigned len = unorm_normalize(in, inlen, UNORM_NFC, 0, buff, inlen, err);
  270. while(len<inlen) buff[len++] = 0x0020;
  271. memcpy(in, buff, inlen);
  272. free(buff);
  273. }
  274. void vunicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
  275. {
  276. UChar * buff = (UChar *)malloc(inlen*2);
  277. unsigned len = unorm_normalize(in, -1, UNORM_NFC, 0, buff, inlen-1, err);
  278. buff[len] = 0x0000;
  279. memcpy(in, buff, inlen);
  280. free(buff);
  281. }
  282. void unicodeGetNormalized(unsigned & outlen, UChar * & out, unsigned inlen, UChar * in, UErrorCode * err)
  283. {
  284. outlen = unorm_normalize(in, inlen, UNORM_NFC, 0, 0, 0, err);
  285. out = (UChar *)malloc(outlen*2);
  286. unorm_normalize(in, inlen, UNORM_NFC, 0, out, outlen, err);
  287. }
  288. void vunicodeGetNormalized(UChar * & out, unsigned inlen, UChar * in, UErrorCode * err)
  289. {
  290. unsigned outlen = unorm_normalize(in, inlen, UNORM_NFC, 0, 0, 0, err);
  291. out = (UChar *)malloc((outlen+1)*2);
  292. unorm_normalize(in, inlen, UNORM_NFC, 0, out, outlen, err);
  293. out[outlen] = 0x0000;
  294. }
  295. void unicodeEnsureIsNormalized(unsigned len, UChar * str)
  296. {
  297. UErrorCode err = U_ZERO_ERROR;
  298. if(unicodeNeedsNormalize(len, str, &err))
  299. unicodeReplaceNormalized(len, str, &err);
  300. }
  301. void vunicodeEnsureIsNormalized(unsigned len, UChar * str)
  302. {
  303. UErrorCode err = U_ZERO_ERROR;
  304. if(vunicodeNeedsNormalize(str, &err))
  305. vunicodeReplaceNormalized(len, str, &err);
  306. }
  307. void unicodeEnsureIsNormalizedX(unsigned & len, UChar * & str)
  308. {
  309. UErrorCode err = U_ZERO_ERROR;
  310. if(unicodeNeedsNormalize(len, str, &err))
  311. {
  312. unsigned inlen = len;
  313. UChar * in = str;
  314. unicodeGetNormalized(len, str, inlen, in, &err);
  315. free(in);
  316. }
  317. }
  318. void vunicodeEnsureIsNormalizedX(unsigned inlen, UChar * & str)
  319. {
  320. UErrorCode err = U_ZERO_ERROR;
  321. if(unicodeNeedsNormalize(inlen, str, &err))
  322. {
  323. UChar * in = str;
  324. vunicodeGetNormalized(str, inlen, in, &err);
  325. free(in);
  326. }
  327. }
  328. void unicodeNormalizedCopy(UChar * out, UChar * in, unsigned len)
  329. {
  330. UErrorCode err = U_ZERO_ERROR;
  331. if(unicodeNeedsNormalize(len, in, &err))
  332. unorm_normalize(in, len, UNORM_NFC, 0, out, len, &err);
  333. else
  334. memcpy(out, in, len);
  335. }
  336. void normalizeUnicodeString(UnicodeString const & in, UnicodeString & out)
  337. {
  338. UErrorCode err = U_ZERO_ERROR;
  339. Normalizer::compose(in, false, 0, out, err);
  340. assertex(U_SUCCESS(err));
  341. }
  342. // padding
  343. void multimemset(char * out, unsigned outlen, char const * in, unsigned inlen)
  344. {
  345. unsigned outpos = 0;
  346. unsigned inpos = 0;
  347. while(outpos < outlen)
  348. {
  349. out[outpos++] = in[inpos++];
  350. if(inpos == inlen)
  351. inpos = 0;
  352. }
  353. }
  354. typedef MapStringTo<MemoryAttr, size32_t> MemoryAttrMapping;
  355. MemoryAttrMapping *unicodeBlankCache;
  356. CriticalSection ubcCrit;
  357. MODULE_INIT(INIT_PRIORITY_STANDARD)
  358. {
  359. unicodeBlankCache = new MemoryAttrMapping;
  360. return true;
  361. }
  362. MODULE_EXIT()
  363. {
  364. delete unicodeBlankCache;
  365. }
  366. UChar unicodeSpace = 0x0020;
  367. void codepageBlankFill(char const * codepage, char * out, unsigned len)
  368. {
  369. CriticalBlock b(ubcCrit);
  370. MemoryAttr * cached = unicodeBlankCache->getValue(codepage);
  371. if(cached)
  372. {
  373. char const * blank = (char const *)cached->get();
  374. size32_t blanklen = cached->length();
  375. if(blanklen==1)
  376. memset(out, *blank, len);
  377. else
  378. multimemset(out, len, blank, blanklen);
  379. }
  380. else
  381. {
  382. unsigned blanklen;
  383. char * blank;
  384. rtlUnicodeToCodepageX(blanklen, blank, 1, &unicodeSpace, codepage);
  385. unicodeBlankCache->setValue(codepage, blanklen);
  386. unicodeBlankCache->getValue(codepage)->set(blanklen, blank);
  387. if(blanklen==1)
  388. memset(out, *blank, len);
  389. else
  390. multimemset(out, len, blank, blanklen);
  391. free(blank);
  392. }
  393. }
  394. //---------------------------------------------------------------------------
  395. // floating point functions
  396. static const double smallPowers[16] = {
  397. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
  398. 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 };
  399. static double powerOfTen(int x)
  400. {
  401. if (x < 0)
  402. return 1 / powerOfTen(-x);
  403. double value = smallPowers[x&15];
  404. double scale = 1e16;
  405. x >>= 4;
  406. while (x)
  407. {
  408. if (x & 1)
  409. value *= scale;
  410. scale *= scale;
  411. x >>= 1;
  412. }
  413. return value;
  414. };
  415. static double kk = (1.0 / ((unsigned __int64)1<<53));
  416. __int64 rtlRound(double x)
  417. {
  418. //a fudge to make numbers that are inexact after a division round up "correctly".
  419. //coded rather oddly as microsoft's optimizer has a habit of throwing it away otherwise...
  420. volatile double tt = x * kk;
  421. x += tt;
  422. if (x >= 0.0)
  423. return (__int64)(x + 0.5);
  424. return -(__int64)(-x + 0.5);
  425. }
  426. double rtlRoundTo(const double x, int places)
  427. {
  428. if (x < 0)
  429. return -rtlRoundTo(-x, places);
  430. volatile double tt = x * kk;
  431. double x0 = x + tt;
  432. if (places >= 0)
  433. {
  434. double scale = powerOfTen(places);
  435. return floor(x * scale + 0.5) / scale;
  436. }
  437. else
  438. {
  439. double scale = powerOfTen(-places);
  440. return floor(x / scale + 0.5) * scale;
  441. }
  442. }
  443. __int64 rtlRoundDown(double x)
  444. {
  445. if (x >= 0.0)
  446. return (__int64)floor(x);
  447. return (__int64)ceil(x);
  448. }
  449. __int64 rtlRoundUp(double x)
  450. {
  451. if (x >= 0.0)
  452. return (__int64)ceil(x);
  453. return (__int64)floor(x);
  454. }
  455. //=============================================================================
  456. // Numeric conversion functions... - fixed length target
  457. #define intToStringNBody() \
  458. unsigned len = numtostr(temp, val); \
  459. if (len > l) \
  460. memset(t,'*',l); \
  461. else \
  462. { \
  463. memcpy(t,temp,len); \
  464. memset(t+len, ' ', l-len); \
  465. }
  466. void rtlUInt4ToStr(size32_t l, char * t, unsigned val)
  467. {
  468. char temp[20];
  469. intToStringNBody();
  470. }
  471. void rtlUInt8ToStr(size32_t l, char * t, unsigned __int64 val)
  472. {
  473. char temp[40];
  474. intToStringNBody();
  475. }
  476. void rtlInt4ToStr(size32_t l, char * t, int val)
  477. {
  478. char temp[20];
  479. intToStringNBody();
  480. }
  481. void rtlInt8ToStr(size32_t l, char * t, __int64 val)
  482. {
  483. char temp[40];
  484. intToStringNBody();
  485. }
  486. //=============================================================================
  487. // Numeric conversion functions... - unknown length target
  488. #define intToUnknownStringBody() \
  489. unsigned len = numtostr(temp, val); \
  490. char * result = (char *)malloc(len); \
  491. memcpy(result, temp, len); \
  492. l = len; \
  493. t = result;
  494. void rtlUInt4ToStrX(size32_t & l, char * & t, unsigned val)
  495. {
  496. char temp[20];
  497. intToUnknownStringBody();
  498. }
  499. void rtlUInt8ToStrX(size32_t & l, char * & t, unsigned __int64 val)
  500. {
  501. char temp[40];
  502. intToUnknownStringBody();
  503. }
  504. void rtlInt4ToStrX(size32_t & l, char * & t, int val)
  505. {
  506. char temp[20];
  507. intToUnknownStringBody();
  508. }
  509. void rtlInt8ToStrX(size32_t & l, char * & t, __int64 val)
  510. {
  511. char temp[40];
  512. intToUnknownStringBody();
  513. }
  514. //=============================================================================
  515. // Numeric conversion functions... - fixed length ebcdic target
  516. // ILKA - converting ebcdic to numeric still uses string in between, for more efficiency
  517. // a function numtoebcdicstr should be implemented
  518. #define intToEbcdicStringNBody() \
  519. unsigned len = numtostr(astr, val); \
  520. rtlStrToEStr(sizeof(estr),estr,len,astr); \
  521. if (len > l) \
  522. memset(t,0x2A,l); \
  523. else \
  524. { \
  525. memcpy(t,estr,len); \
  526. memset(t+len, '@', l-len); \
  527. }
  528. void rtl_l42en(size32_t l, char * t, unsigned val)
  529. {
  530. char astr[20];
  531. char estr[20];
  532. intToEbcdicStringNBody();
  533. }
  534. void rtl_l82en(size32_t l, char * t, unsigned __int64 val)
  535. {
  536. char astr[40];
  537. char estr[40];
  538. intToEbcdicStringNBody();
  539. }
  540. void rtl_ls42en(size32_t l, char * t, int val)
  541. {
  542. char astr[20];
  543. char estr[20];
  544. intToEbcdicStringNBody();
  545. }
  546. void rtl_ls82en(size32_t l, char * t, __int64 val)
  547. {
  548. char astr[40];
  549. char estr[40];
  550. intToEbcdicStringNBody();
  551. }
  552. //=============================================================================
  553. // Numeric conversion functions... - unknown length ebcdic target
  554. #define intToUnknownEbcdicStringBody() \
  555. unsigned alen = numtostr(astr, val); \
  556. rtlStrToEStrX(elen,estr,alen,astr); \
  557. char * result = (char *)malloc(elen); \
  558. memcpy(result, estr, elen); \
  559. l = elen; \
  560. t = result;
  561. #if defined _MSC_VER
  562. #pragma warning(push)
  563. #pragma warning(disable:4700)
  564. #endif
  565. void rtl_l42ex(size32_t & l, char * & t, unsigned val)
  566. {
  567. char astr[20];
  568. char * estr;
  569. unsigned elen;
  570. intToUnknownEbcdicStringBody();
  571. }
  572. void rtl_l82ex(size32_t & l, char * & t, unsigned __int64 val)
  573. {
  574. char astr[40];
  575. char * estr;
  576. unsigned elen;
  577. intToUnknownEbcdicStringBody();
  578. }
  579. void rtl_ls42ex(size32_t & l, char * & t, int val)
  580. {
  581. char astr[20];
  582. char * estr;
  583. unsigned elen;
  584. intToUnknownEbcdicStringBody();
  585. }
  586. void rtl_ls82ex(size32_t & l, char * & t, __int64 val)
  587. {
  588. char astr[40];
  589. char * estr;
  590. unsigned elen;
  591. intToUnknownEbcdicStringBody();
  592. }
  593. #ifdef _MSC_VER
  594. #pragma warning(pop)
  595. #endif
  596. //=============================================================================
  597. // Numeric conversion functions... - fixed length variable target
  598. #define intToVarStringNBody() \
  599. unsigned len = numtostr(temp, val) + 1; \
  600. if (len > l) \
  601. { \
  602. memset(t,'*',l); \
  603. t[l-1]=0; \
  604. } \
  605. else \
  606. memcpy(t,temp,len);
  607. void rtlUInt4ToVStr(size32_t l, char * t, unsigned val)
  608. {
  609. char temp[20];
  610. intToVarStringNBody();
  611. }
  612. void rtlUInt8ToVStr(size32_t l, char * t, unsigned __int64 val)
  613. {
  614. char temp[40];
  615. intToVarStringNBody();
  616. }
  617. void rtlInt4ToVStr(size32_t l, char * t, int val)
  618. {
  619. char temp[20];
  620. intToVarStringNBody();
  621. }
  622. void rtlInt8ToVStr(size32_t l, char * t, __int64 val)
  623. {
  624. char temp[40];
  625. intToVarStringNBody();
  626. }
  627. //=============================================================================
  628. // Numeric conversion functions... - unknown length variable target
  629. #define intToVarStringXBody() \
  630. unsigned len = numtostr(temp, val); \
  631. temp[len] = 0; \
  632. return strdup(temp);
  633. char * rtlUInt4ToVStrX(unsigned val)
  634. {
  635. char temp[20];
  636. intToVarStringXBody();
  637. }
  638. char * rtlUInt8ToVStrX(unsigned __int64 val)
  639. {
  640. char temp[40];
  641. intToVarStringXBody();
  642. }
  643. char * rtlInt4ToVStrX(int val)
  644. {
  645. char temp[20];
  646. intToVarStringXBody();
  647. }
  648. char * rtlInt8ToVStrX(__int64 val)
  649. {
  650. char temp[40];
  651. intToVarStringXBody();
  652. }
  653. //---------------------------------------------------------------------------
  654. double rtlStrToReal(size32_t l, const char * t)
  655. {
  656. char * temp = (char *)alloca(l+1);
  657. memcpy(temp, t, l);
  658. temp[l] = 0;
  659. return rtlVStrToReal(temp);
  660. }
  661. double rtlEStrToReal(size32_t l, const char * t)
  662. {
  663. char * astr = (char*)alloca(l);
  664. rtlEStrToStr(l,astr,l,t);
  665. char * temp = (char *)alloca(l+1);
  666. memcpy(temp, astr, l);
  667. temp[l] = 0;
  668. return rtlVStrToReal(temp);
  669. }
  670. double rtlVStrToReal(const char * t)
  671. {
  672. char * end;
  673. return strtod(t, &end);
  674. }
  675. double rtl_ex2f(const char * t)
  676. {
  677. unsigned len = strlen(t);
  678. char * astr = (char*)alloca(len+1);
  679. rtlEStrToStr(len,astr,len,t);
  680. astr[len] = 0;
  681. return rtlVStrToReal(astr);
  682. }
  683. double rtlUnicodeToReal(size32_t l, UChar const * t)
  684. {
  685. unsigned bufflen;
  686. char * buff;
  687. rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
  688. double ret = rtlStrToReal(bufflen, buff);
  689. rtlFree(buff);
  690. return ret;
  691. }
  692. //---------------------------------------------------------------------------
  693. void rtlRealToStr(size32_t l, char * t, double val)
  694. {
  695. StringBuffer temp;
  696. temp.append(val);
  697. unsigned len = temp.length();
  698. if (len > l)
  699. memset(t,'*',l);
  700. else
  701. {
  702. memcpy(t,temp.str(),len);
  703. memset(t+len, ' ', l-len);
  704. }
  705. }
  706. void rtlRealToStr(size32_t l, char * t, float val)
  707. {
  708. StringBuffer temp;
  709. temp.append(val);
  710. unsigned len = temp.length();
  711. if (len > l)
  712. memset(t,'*',l);
  713. else
  714. {
  715. memcpy(t,temp.str(),len);
  716. memset(t+len, ' ', l-len);
  717. }
  718. }
  719. void rtlRealToStrX(size32_t & l, char * & t, double val)
  720. {
  721. StringBuffer temp;
  722. temp.append(val);
  723. unsigned len = temp.length();
  724. char * result = (char *)malloc(len);
  725. memcpy(result,temp.str(),len);
  726. l = len;
  727. t = result;
  728. }
  729. void rtlRealToStrX(size32_t & l, char * & t, float val)
  730. {
  731. StringBuffer temp;
  732. temp.append(val);
  733. unsigned len = temp.length();
  734. char * result = (char *)malloc(len);
  735. memcpy(result,temp.str(),len);
  736. l = len;
  737. t = result;
  738. }
  739. void rtlRealToVStr(size32_t l, char * t, double val)
  740. {
  741. StringBuffer temp;
  742. temp.append(val);
  743. unsigned len = temp.length()+1;
  744. if (len > l)
  745. {
  746. memset(t,'*',l);
  747. t[l-1]=0;
  748. }
  749. else
  750. {
  751. memcpy(t,temp.str(),len);
  752. }
  753. }
  754. void rtlRealToVStr(size32_t l, char * t, float val)
  755. {
  756. StringBuffer temp;
  757. temp.append(val);
  758. unsigned len = temp.length()+1;
  759. if (len > l)
  760. {
  761. memset(t,'*',l);
  762. t[l-1]=0;
  763. }
  764. else
  765. {
  766. memcpy(t,temp.str(),len);
  767. }
  768. }
  769. char * rtlRealToVStrX(double val)
  770. {
  771. StringBuffer temp;
  772. temp.append(val);
  773. return strdup(temp);
  774. }
  775. char * rtlRealToVStrX(float val)
  776. {
  777. StringBuffer temp;
  778. temp.append(val);
  779. return strdup(temp);
  780. }
  781. //---------------------------------------------------------------------------
  782. #define SkipSpaces(l, t) \
  783. while (l) \
  784. { \
  785. char c = *t; \
  786. switch (c) \
  787. { \
  788. case ' ': \
  789. case '\t': \
  790. case '-': \
  791. case '+': \
  792. break; \
  793. default: \
  794. goto done; \
  795. } \
  796. l--; \
  797. t++; \
  798. } \
  799. done:
  800. #define SkipSignSpaces(l, t, negate) \
  801. while (l) \
  802. { \
  803. char c = *t; \
  804. switch (c) \
  805. { \
  806. case '-': \
  807. negate = true; \
  808. break; \
  809. case ' ': \
  810. case '\t': \
  811. case '+': \
  812. break; \
  813. default: \
  814. goto done; \
  815. } \
  816. l--; \
  817. t++; \
  818. } \
  819. done:
  820. unsigned rtlStrToUInt4(size32_t l, const char * t)
  821. {
  822. SkipSpaces(l, t);
  823. unsigned v = 0;
  824. while (l--)
  825. {
  826. char c = *t++;
  827. if ((c >= '0') && (c <= '9'))
  828. v = v * 10 + (c-'0');
  829. else
  830. break;
  831. }
  832. return v;
  833. }
  834. unsigned __int64 rtlStrToUInt8(size32_t l, const char * t)
  835. {
  836. SkipSpaces(l, t);
  837. unsigned __int64 v = 0;
  838. while (l--)
  839. {
  840. char c = *t++;
  841. if ((c >= '0') && (c <= '9'))
  842. v = v * 10 + (c-'0');
  843. else
  844. break;
  845. }
  846. return v;
  847. }
  848. int rtlStrToInt4(size32_t l, const char * t)
  849. {
  850. bool negate = false;
  851. SkipSignSpaces(l, t, negate);
  852. int v = 0;
  853. while (l--)
  854. {
  855. char c = *t++;
  856. if ((c >= '0') && (c <= '9'))
  857. v = v * 10 + (c-'0');
  858. else
  859. break;
  860. }
  861. return negate ? -v : v;
  862. }
  863. __int64 rtlStrToInt8(size32_t l, const char * t)
  864. {
  865. bool negate = false;
  866. SkipSignSpaces(l, t, negate);
  867. __int64 v = 0;
  868. while (l--)
  869. {
  870. char c = *t++;
  871. if ((c >= '0') && (c <= '9'))
  872. v = v * 10 + (c-'0');
  873. else
  874. break;
  875. }
  876. return negate ? -v : v;
  877. }
  878. __int64 rtlUnicodeToInt8(size32_t l, UChar const * t)
  879. {
  880. unsigned bufflen;
  881. char * buff;
  882. rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
  883. __int64 ret = rtlStrToInt8(bufflen, buff);
  884. rtlFree(buff);
  885. return ret;
  886. }
  887. bool rtlStrToBool(size32_t l, const char * t)
  888. {
  889. while (l--)
  890. {
  891. char c = *t++;
  892. if (c != ' ')
  893. return true;
  894. }
  895. return false;
  896. }
  897. bool rtlUnicodeToBool(size32_t l, UChar const * t)
  898. {
  899. while(l--)
  900. if(*t++ != 0x20) return true;
  901. return false;
  902. }
  903. // return true for "on", "true" or any non-zero constant, else false;
  904. bool rtlCsvStrToBool(size32_t l, const char * t)
  905. {
  906. return clipStrToBool(l, t);
  907. }
  908. //---------------------------------------------------------------------------
  909. unsigned rtlEStrToUInt4(size32_t l, const char * t)
  910. {
  911. char * astr = (char*)alloca(l);
  912. rtlEStrToStr(l,astr,l,t);
  913. return rtlStrToUInt4(l,astr);
  914. }
  915. unsigned __int64 rtlEStrToUInt8(size32_t l, const char * t)
  916. {
  917. char * astr = (char*)alloca(l);
  918. rtlEStrToStr(l,astr,l,t);
  919. return rtlStrToUInt8(l,astr);
  920. }
  921. int rtlEStrToInt4(size32_t l, const char * t)
  922. {
  923. char * astr = (char*)alloca(l);
  924. rtlEStrToStr(l,astr,l,t);
  925. return rtlStrToInt4(l,astr);
  926. }
  927. __int64 rtlEStrToInt8(size32_t l, const char * t)
  928. {
  929. char * astr = (char*)alloca(l);
  930. rtlEStrToStr(l,astr,l,t);
  931. return rtlStrToInt8(l,astr);
  932. }
  933. bool rtl_en2b(size32_t l, const char * t)
  934. {
  935. char * astr = (char*)alloca(l);
  936. rtlEStrToStr(l,astr,l,t);
  937. return rtlStrToBool(l,astr);
  938. }
  939. //---------------------------------------------------------------------------
  940. unsigned rtlVStrToUInt4(const char * t)
  941. {
  942. return rtlStrToUInt4(strlen(t), t);
  943. }
  944. unsigned __int64 rtlVStrToUInt8(const char * t)
  945. {
  946. return rtlStrToUInt8(strlen(t), t);
  947. }
  948. int rtlVStrToInt4(const char * t)
  949. {
  950. return rtlStrToInt4(strlen(t), t);
  951. }
  952. __int64 rtlVStrToInt8(const char * t)
  953. {
  954. return rtlStrToInt8(strlen(t), t);
  955. }
  956. bool rtlVStrToBool(const char * t)
  957. {
  958. char c;
  959. while ((c = *t++) != 0)
  960. {
  961. //MORE: Allow spaces if we change the semantics.
  962. return true;
  963. }
  964. return false;
  965. }
  966. //---------------------------------------------------------------------------
  967. void holeIntFormat(size32_t maxlen, char * target, __int64 value, unsigned width, unsigned flags)
  968. {
  969. StringBuffer result;
  970. if (flags & 1)
  971. result.appendf("%0*"I64F"d", width, value);
  972. else
  973. result.appendf("%*"I64F"d", width, value);
  974. size32_t written = result.length();
  975. if (written > maxlen)
  976. memset(target, '*', maxlen);
  977. else
  978. {
  979. memset(target+written, ' ', maxlen-written);
  980. memcpy(target, result.str(), written);
  981. }
  982. }
  983. void holeRealFormat(size32_t maxlen, char * target, double value, unsigned width, unsigned places)
  984. {
  985. if ((int) width < 0)
  986. return;
  987. char temp[500];
  988. if (width > sizeof(temp))
  989. {
  990. unsigned delta = width - sizeof(temp);
  991. memset(target, ' ', delta);
  992. target += delta;
  993. width = sizeof(temp);
  994. }
  995. if (places >= width) places = width-1;
  996. unsigned written = sprintf(temp, "%*.*f", width, places, value);
  997. if (written > width)
  998. {
  999. memset(target, '*', width);
  1000. if (places)
  1001. target[width-places-1] = '.';
  1002. }
  1003. else
  1004. memcpy(target, temp, width);
  1005. }
  1006. //=============================================================================
  1007. // Conversion functions...
  1008. void rtlIntFormat(unsigned & len, char * & target, __int64 value, unsigned width, unsigned flags)
  1009. {
  1010. if ((int) width <= 0)
  1011. {
  1012. len = 0;
  1013. target = NULL;
  1014. return;
  1015. }
  1016. len = width;
  1017. target = (char *)malloc(width);
  1018. holeIntFormat(width, target, value, width, flags);
  1019. }
  1020. void rtlRealFormat(unsigned & len, char * & target, double value, unsigned width, unsigned places)
  1021. {
  1022. if ((int) width < 0)
  1023. {
  1024. len = 0;
  1025. target = NULL;
  1026. return;
  1027. }
  1028. len = width;
  1029. target = (char *)malloc(width);
  1030. holeRealFormat(width, target, value, width, places);
  1031. }
  1032. //=============================================================================
  1033. // String functions...
  1034. bool rtlDataToBool(unsigned len, const void * _src)
  1035. {
  1036. const char * src = (const char *)_src;
  1037. while (len--)
  1038. if (*src++)
  1039. return true;
  1040. return false;
  1041. }
  1042. void rtlBoolToData(unsigned tlen, void * tgt, bool src)
  1043. {
  1044. memset(tgt, 0, tlen);
  1045. if (src)
  1046. ((char *)tgt)[tlen-1] = 1;
  1047. }
  1048. void rtlBoolToStr(unsigned tlen, void * tgt, bool src)
  1049. {
  1050. memset(tgt, ' ', tlen);
  1051. if (src)
  1052. ((char *)tgt)[tlen-1] = '1';
  1053. }
  1054. void rtlBoolToVStr(char * tgt, bool src)
  1055. {
  1056. if (src)
  1057. *tgt++ = '1';
  1058. *tgt = 0;
  1059. }
  1060. void rtlBoolToStrX(unsigned & tlen, char * & tgt, bool src)
  1061. {
  1062. if (src)
  1063. {
  1064. char * ret = (char *)malloc(1);
  1065. ret[0] = '1';
  1066. tlen = 1;
  1067. tgt = ret;
  1068. }
  1069. else
  1070. {
  1071. tlen = 0;
  1072. tgt = NULL;
  1073. }
  1074. }
  1075. char * rtlBoolToVStrX(bool src)
  1076. {
  1077. if (src)
  1078. return strdup("1");
  1079. else
  1080. return strdup("");
  1081. }
  1082. //-----------------------------------------------------------------------------
  1083. // String copying functions....
  1084. void rtlDataToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1085. {
  1086. if (slen > tlen)
  1087. slen = tlen;
  1088. memcpy(tgt, src, slen);
  1089. if (tlen > slen)
  1090. memset((char *)tgt+slen, 0, tlen-slen);
  1091. }
  1092. void rtlStrToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1093. {
  1094. if (slen > tlen)
  1095. slen = tlen;
  1096. memcpy(tgt, src, slen);
  1097. if (tlen > slen)
  1098. memset((char *)tgt+slen, 0, tlen-slen);
  1099. }
  1100. void rtlStrToStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1101. {
  1102. if (slen > tlen)
  1103. slen = tlen;
  1104. memcpy(tgt, src, slen);
  1105. if (tlen > slen)
  1106. memset((char *)tgt+slen, ' ', tlen-slen);
  1107. }
  1108. void rtlStrToVStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1109. {
  1110. if ((slen >= tlen) && (tlen != 0))
  1111. slen = tlen-1;
  1112. memcpy(tgt, src, slen);
  1113. *((char *)tgt+slen)=0;
  1114. }
  1115. void rtlStr2EStr(unsigned tlen, char * tgt, unsigned slen, const char * src)
  1116. {
  1117. rtlStrToEStr(tlen,tgt,slen,src);
  1118. }
  1119. void rtlEStr2Data(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1120. {
  1121. if (slen > tlen)
  1122. slen = tlen;
  1123. rtlEStrToStr(slen,(char *)tgt,slen,src);
  1124. if (tlen > slen)
  1125. memset((char *)tgt+slen, 0, tlen-slen);
  1126. }
  1127. void rtlEStr2Str(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1128. {
  1129. rtlEStrToStr(tlen,(char *)tgt,slen,src);
  1130. }
  1131. void rtlEStrToVStr(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1132. {
  1133. if (slen >= tlen)
  1134. slen = tlen-1;
  1135. rtlEStrToStr(slen,(char *)tgt,slen,src);
  1136. *((char *)tgt+slen)=0;
  1137. }
  1138. void rtlEStrToEStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1139. {
  1140. if (slen > tlen)
  1141. slen = tlen;
  1142. memcpy(tgt, src, slen);
  1143. if (tlen > slen)
  1144. memset((char *)tgt+slen, '@', tlen-slen);
  1145. }
  1146. void rtlVStrToData(unsigned tlen, void * tgt, const char * src)
  1147. {
  1148. rtlStrToData(tlen, tgt, strlen(src), src);
  1149. }
  1150. void rtlVStrToStr(unsigned tlen, void * tgt, const char * src)
  1151. {
  1152. rtlStrToStr(tlen, tgt, strlen(src), src);
  1153. }
  1154. void rtlVStr2EStr(unsigned tlen, char * tgt, const char * src)
  1155. {
  1156. rtlStr2EStr(tlen, tgt, strlen(src), src);
  1157. }
  1158. void rtlVStrToVStr(unsigned tlen, void * tgt, const char * src)
  1159. {
  1160. rtlStrToVStr(tlen, tgt, strlen(src), src);
  1161. }
  1162. char *rtlCreateQuotedString(unsigned _len_tgt,char * tgt)
  1163. {
  1164. // Add ' at start and end. MORE! also needs to handle embedded quotes
  1165. char * result = (char *)malloc(_len_tgt + 3);
  1166. result[0] = '\'';
  1167. memcpy(result+1, tgt, _len_tgt);
  1168. result[_len_tgt+1] = '\'';
  1169. result[_len_tgt+2] = 0;
  1170. return result;
  1171. }
  1172. //-----------------------------------------------------------------------------
  1173. //List of strings with length of -1 to mark the end...
  1174. void rtlConcat(unsigned & tlen, char * * tgt, ...)
  1175. {
  1176. va_list args;
  1177. unsigned totalLength = 0;
  1178. va_start(args, tgt);
  1179. for (;;)
  1180. {
  1181. unsigned len = va_arg(args, unsigned);
  1182. if (len+1==0)
  1183. break;
  1184. char * str = va_arg(args, char *);
  1185. totalLength += len;
  1186. }
  1187. va_end(args);
  1188. char * buffer = (char *)malloc(totalLength);
  1189. char * cur = buffer;
  1190. va_start(args, tgt);
  1191. for (;;)
  1192. {
  1193. unsigned len = va_arg(args, unsigned);
  1194. if (len+1==0)
  1195. break;
  1196. char * str = va_arg(args, char *);
  1197. memcpy(cur, str, len);
  1198. cur += len;
  1199. }
  1200. va_end(args);
  1201. tlen = totalLength;
  1202. *tgt = buffer;
  1203. }
  1204. void rtlConcatVStr(char * * tgt, ...)
  1205. {
  1206. va_list args;
  1207. unsigned totalLength = 0;
  1208. va_start(args, tgt);
  1209. for (;;)
  1210. {
  1211. unsigned len = va_arg(args, unsigned);
  1212. if (len+1==0)
  1213. break;
  1214. char * str = va_arg(args, char *);
  1215. totalLength += len;
  1216. }
  1217. va_end(args);
  1218. char * buffer = (char *)malloc(totalLength+1);
  1219. char * cur = buffer;
  1220. va_start(args, tgt);
  1221. for (;;)
  1222. {
  1223. unsigned len = va_arg(args, unsigned);
  1224. if (len+1==0)
  1225. break;
  1226. char * str = va_arg(args, char *);
  1227. memcpy(cur, str, len);
  1228. cur += len;
  1229. }
  1230. va_end(args);
  1231. cur[0] = 0;
  1232. *tgt = buffer;
  1233. }
  1234. void rtlConcatUnicode(unsigned & tlen, UChar * * tgt, ...)
  1235. {
  1236. va_list args;
  1237. unsigned totalLength = 0;
  1238. va_start(args, tgt);
  1239. for(;;)
  1240. {
  1241. unsigned len = va_arg(args, unsigned);
  1242. if(len+1==0)
  1243. break;
  1244. UChar * str = va_arg(args, UChar *);
  1245. totalLength += len;
  1246. }
  1247. va_end(args);
  1248. UChar * buffer = (UChar *)malloc(totalLength*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
  1249. unsigned idx = 0;
  1250. UErrorCode err = U_ZERO_ERROR;
  1251. va_start(args, tgt);
  1252. for(;;)
  1253. {
  1254. unsigned len = va_arg(args, unsigned);
  1255. if(len+1==0)
  1256. break;
  1257. UChar * str = va_arg(args, UChar *);
  1258. if (len)
  1259. idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
  1260. }
  1261. va_end(args);
  1262. *tgt = buffer;
  1263. tlen = idx;
  1264. }
  1265. void rtlConcatVUnicode(UChar * * tgt, ...)
  1266. {
  1267. va_list args;
  1268. unsigned totalLength = 0;
  1269. va_start(args, tgt);
  1270. for(;;)
  1271. {
  1272. unsigned len = va_arg(args, unsigned);
  1273. if(len+1==0)
  1274. break;
  1275. UChar * str = va_arg(args, UChar *);
  1276. totalLength += len;
  1277. }
  1278. va_end(args);
  1279. UChar * buffer = (UChar *)malloc((totalLength+1)*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
  1280. unsigned idx = 0;
  1281. UErrorCode err = U_ZERO_ERROR;
  1282. va_start(args, tgt);
  1283. for(;;)
  1284. {
  1285. unsigned len = va_arg(args, unsigned);
  1286. if(len+1==0)
  1287. break;
  1288. UChar * str = va_arg(args, UChar *);
  1289. if (len)
  1290. idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
  1291. }
  1292. va_end(args);
  1293. buffer[idx++] = 0x0000;
  1294. *tgt = buffer;
  1295. }
  1296. //List of strings with length of -1 to mark the end...
  1297. void rtlConcatStrF(unsigned tlen, void * _tgt, int fill, ...)
  1298. {
  1299. va_list args;
  1300. char * tgt = (char *)_tgt;
  1301. unsigned offset = 0;
  1302. va_start(args, fill);
  1303. while (offset != tlen)
  1304. {
  1305. unsigned len = va_arg(args, unsigned);
  1306. if (len+1==0)
  1307. break;
  1308. const char * str = va_arg(args, const char *);
  1309. unsigned copyLen = len + offset > tlen ? tlen - offset : len;
  1310. memcpy(tgt+offset, str, copyLen);
  1311. offset += copyLen;
  1312. }
  1313. va_end(args);
  1314. if (offset < tlen)
  1315. memset(tgt+offset, fill, tlen-offset);
  1316. }
  1317. void rtlConcatVStrF(unsigned tlen, char * tgt, ...)
  1318. {
  1319. va_list args;
  1320. unsigned offset = 0;
  1321. va_start(args, tgt);
  1322. while (offset != tlen)
  1323. {
  1324. unsigned len = va_arg(args, unsigned);
  1325. if (len+1==0)
  1326. break;
  1327. const char * str = va_arg(args, const char *);
  1328. unsigned copyLen = len + offset > tlen ? tlen - offset : len;
  1329. memcpy(tgt+offset, str, copyLen);
  1330. offset += copyLen;
  1331. }
  1332. va_end(args);
  1333. memset(tgt+offset, 0, (tlen+1)-offset);
  1334. }
  1335. void rtlConcatUnicodeF(unsigned tlen, UChar * tgt, ...)
  1336. {
  1337. va_list args;
  1338. unsigned idx = 0;
  1339. UErrorCode err = U_ZERO_ERROR;
  1340. va_start(args, tgt);
  1341. for(;;)
  1342. {
  1343. unsigned len = va_arg(args, unsigned);
  1344. if(len+1==0)
  1345. break;
  1346. UChar * str = va_arg(args, UChar *);
  1347. if (len)
  1348. idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
  1349. }
  1350. va_end(args);
  1351. while (idx < tlen)
  1352. tgt[idx++] = ' ';
  1353. }
  1354. void rtlConcatVUnicodeF(unsigned tlen, UChar * tgt, ...)
  1355. {
  1356. va_list args;
  1357. unsigned idx = 0;
  1358. UErrorCode err = U_ZERO_ERROR;
  1359. va_start(args, tgt);
  1360. for(;;)
  1361. {
  1362. unsigned len = va_arg(args, unsigned);
  1363. if(len+1==0)
  1364. break;
  1365. UChar * str = va_arg(args, UChar *);
  1366. if (len)
  1367. idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
  1368. }
  1369. va_end(args);
  1370. while (idx < tlen)
  1371. tgt[idx++] = 0;
  1372. tgt[tlen] = 0;
  1373. }
  1374. //------------------------------------------------------------------------------------------------
  1375. // The followinf concat functions are all deprecated in favour of the variable number of argument
  1376. // versions
  1377. unsigned rtlConcatStrToStr(unsigned tlen, char * tgt, unsigned idx, unsigned slen, const char * src)
  1378. {
  1379. unsigned len = tlen-idx;
  1380. if (len > slen)
  1381. len = slen;
  1382. memcpy(tgt+idx, src, len);
  1383. return idx+len;
  1384. }
  1385. unsigned rtlConcatVStrToStr(unsigned tlen, char * tgt, unsigned idx, const char * src)
  1386. {
  1387. while (idx != tlen)
  1388. {
  1389. char next = *src++;
  1390. if (!next)
  1391. break;
  1392. tgt[idx++] = next;
  1393. }
  1394. return idx;
  1395. }
  1396. void rtlConcatStrToVStr(unsigned tlen, void * _tgt, unsigned slen, const void * src)
  1397. {
  1398. char * tgt = (char *)_tgt;
  1399. unsigned tend = strlen(tgt);
  1400. rtlStrToVStr(tlen-tend, tgt+tend, slen, src);
  1401. }
  1402. void rtlConcatVStrToVStr(unsigned tlen, void * _tgt, const char * src)
  1403. {
  1404. char * tgt = (char *)_tgt;
  1405. unsigned tend = strlen(tgt);
  1406. rtlVStrToVStr(tlen-tend, tgt+tend, src);
  1407. }
  1408. unsigned rtlConcatUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, unsigned slen, UChar const * src)
  1409. {
  1410. UErrorCode err = U_ZERO_ERROR;
  1411. return unorm_concatenate(tgt, idx, src, slen, tgt, tlen, UNORM_NFC, 0, &err);
  1412. }
  1413. unsigned rtlConcatVUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, UChar const * src)
  1414. {
  1415. return rtlConcatUnicodeToUnicode(tlen, tgt, idx, rtlUnicodeStrlen(src), src);
  1416. }
  1417. void rtlESpaceFill(unsigned tlen, char * tgt, unsigned idx)
  1418. {
  1419. if (idx < tlen)
  1420. memset(tgt+idx, '@', tlen-idx);
  1421. }
  1422. void rtlSpaceFill(unsigned tlen, char * tgt, unsigned idx)
  1423. {
  1424. if (idx < tlen)
  1425. memset(tgt+idx, ' ', tlen-idx);
  1426. }
  1427. void rtlZeroFill(unsigned tlen, char * tgt, unsigned idx)
  1428. {
  1429. if (idx < tlen)
  1430. memset(tgt+idx, 0, tlen-idx);
  1431. }
  1432. void rtlNullTerminate(unsigned tlen, char * tgt, unsigned idx)
  1433. {
  1434. if (idx >= tlen)
  1435. idx = tlen-1;
  1436. tgt[idx] = 0;
  1437. }
  1438. void rtlUnicodeSpaceFill(unsigned tlen, UChar * tgt, unsigned idx)
  1439. {
  1440. while(idx<tlen) tgt[idx++] = 0x0020;
  1441. }
  1442. void rtlUnicodeNullTerminate(unsigned tlen, UChar * tgt, unsigned idx)
  1443. {
  1444. if (idx >= tlen)
  1445. idx = tlen-1;
  1446. tgt[idx] = 0x0000;
  1447. }
  1448. void rtlUnicodeStrcpy(UChar * tgt, UChar const * src)
  1449. {
  1450. memcpy(tgt, src, rtlUnicodeStrlen(src)*2+2);
  1451. }
  1452. void rtlConcatExtend(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1453. {
  1454. unsigned len = tlen + slen;
  1455. tgt = (char *)realloc(tgt, len);
  1456. memcpy(tgt+tlen, src, slen);
  1457. tlen = len;
  1458. }
  1459. //-----------------------------------------------------------------------------
  1460. inline void normalizeFrom(unsigned & from, unsigned slen)
  1461. {
  1462. from--;
  1463. if ((int)from < 0)
  1464. from = 0;
  1465. else if (from > slen)
  1466. from = slen;
  1467. }
  1468. inline void normalizeFromTo(unsigned & from, unsigned & to)
  1469. {
  1470. from--;
  1471. if ((int)from < 0) from = 0;
  1472. if ((int)to < (int)from) to = from;
  1473. }
  1474. inline void clipFromTo(unsigned & from, unsigned & to, unsigned slen)
  1475. {
  1476. if (to > slen)
  1477. {
  1478. to = slen;
  1479. if (from > slen)
  1480. from = slen;
  1481. }
  1482. }
  1483. //NB: From and to are 1 based: Now fills to ensure the correct length.
  1484. void * doSubStrFT(unsigned & tlen, unsigned slen, const void * src, unsigned from, unsigned to, byte fillChar)
  1485. {
  1486. normalizeFromTo(from, to);
  1487. unsigned len = to - from;
  1488. clipFromTo(from, to, slen);
  1489. unsigned copylen = to - from;
  1490. char * buffer = (char *)malloc(len);
  1491. memcpy(buffer, (byte *)src+from, copylen);
  1492. if (copylen < len)
  1493. memset(buffer+copylen, fillChar, len-copylen);
  1494. tlen = len;
  1495. return buffer;
  1496. }
  1497. void rtlSubStrFX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from)
  1498. {
  1499. normalizeFrom(from, slen);
  1500. tlen = slen-from;
  1501. tgt = (char *) malloc(tlen);
  1502. memcpy(tgt, src+from, tlen);
  1503. }
  1504. void rtlSubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1505. {
  1506. tgt = (char *)doSubStrFT(tlen, slen, src, from, to, ' ');
  1507. }
  1508. void rtlSubStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1509. {
  1510. unsigned char fillChar = ' '; // More, should be passed as a parameter
  1511. normalizeFromTo(from, to);
  1512. clipFromTo(from, to, slen);
  1513. unsigned copylen = to - from;
  1514. if (copylen > tlen)
  1515. copylen = tlen;
  1516. memcpy(tgt, (const char *)src+from, copylen);
  1517. if (copylen < tlen)
  1518. memset(tgt+copylen, fillChar, tlen-copylen);
  1519. }
  1520. void rtlSubDataFT(unsigned tlen, void * tgt, unsigned slen, const void * src, unsigned from, unsigned to)
  1521. {
  1522. normalizeFromTo(from, to);
  1523. clipFromTo(from, to, slen);
  1524. unsigned copylen = to - from;
  1525. if (copylen > tlen)
  1526. copylen = tlen;
  1527. memcpy(tgt, (char *)src+from, copylen);
  1528. if (copylen < tlen)
  1529. memset((byte*)tgt+copylen, 0, tlen-copylen);
  1530. }
  1531. void rtlSubDataFTX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from, unsigned to)
  1532. {
  1533. tgt = doSubStrFT(tlen, slen, src, from, to, 0);
  1534. }
  1535. void rtlSubDataFX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from)
  1536. {
  1537. normalizeFrom(from, slen);
  1538. tlen = slen-from;
  1539. tgt = (char *) malloc(tlen);
  1540. memcpy(tgt, (const byte *)src+from, tlen);
  1541. }
  1542. void rtlUnicodeSubStrFTX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from, unsigned to)
  1543. {
  1544. normalizeFromTo(from, to);
  1545. tlen = to - from;
  1546. clipFromTo(from, to, slen);
  1547. tgt = (UChar *)malloc(tlen*2);
  1548. unsigned copylen = to - from;
  1549. memcpy(tgt, src+from, copylen*2);
  1550. while(copylen<tlen)
  1551. tgt[copylen++] = 0x0020;
  1552. }
  1553. void rtlUnicodeSubStrFX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from)
  1554. {
  1555. normalizeFrom(from, slen);
  1556. tlen = slen - from;
  1557. tgt = (UChar *)malloc(tlen*2);
  1558. memcpy(tgt, src+from, tlen*2);
  1559. }
  1560. void rtlSubQStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  1561. {
  1562. normalizeFromTo(from, to);
  1563. tlen = to - from;
  1564. clipFromTo(from, to, slen);
  1565. tgt = (char *)malloc(rtlQStrSize(tlen));
  1566. copyQStrRange(tlen, tgt, src, from, to);
  1567. }
  1568. void rtlSubQStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
  1569. {
  1570. normalizeFrom(from, slen);
  1571. tlen = slen - from;
  1572. tgt = (char *)malloc(rtlQStrSize(tlen));
  1573. copyQStrRange(tlen, tgt, src, from, slen);
  1574. }
  1575. void rtlSubQStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1576. {
  1577. normalizeFromTo(from, to);
  1578. clipFromTo(from, to, slen);
  1579. copyQStrRange(tlen, tgt, src, from ,to);
  1580. }
  1581. //-----------------------------------------------------------------------------
  1582. unsigned rtlTrimStrLen(size32_t l, const char * t)
  1583. {
  1584. while (l)
  1585. {
  1586. if (t[l-1] != ' ')
  1587. break;
  1588. l--;
  1589. }
  1590. return l;
  1591. }
  1592. unsigned rtlTrimDataLen(size32_t l, const void * _t)
  1593. {
  1594. const char * t = (const char *)_t;
  1595. while (l)
  1596. {
  1597. if (t[l-1] != 0)
  1598. break;
  1599. l--;
  1600. }
  1601. return l;
  1602. }
  1603. unsigned rtlTrimUnicodeStrLen(size32_t l, UChar const * t)
  1604. {
  1605. if (!l)
  1606. return 0;
  1607. UCharCharacterIterator iter(t, l);
  1608. for(iter.last32(); iter.hasPrevious(); iter.previous32())
  1609. if(!u_isspace(iter.current32()))
  1610. break;
  1611. if(u_isspace(iter.current32())) return iter.getIndex(); // required as the reverse iteration above doesn't hit the first character
  1612. return iter.getIndex() + 1;
  1613. }
  1614. inline size32_t rtlQuickTrimUnicode(size32_t len, UChar const * str)
  1615. {
  1616. while (len && u_isspace(str[len-1]))
  1617. len--;
  1618. return len;
  1619. }
  1620. unsigned rtlTrimVStrLen(const char * t)
  1621. {
  1622. const char * first = t;
  1623. const char * last = first;
  1624. unsigned char c;
  1625. while ((c = *t++) != 0)
  1626. {
  1627. if (c != ' ')
  1628. last = t; //nb after increment of t
  1629. }
  1630. return (last - first);
  1631. }
  1632. unsigned rtlTrimVUnicodeStrLen(UChar const * t)
  1633. {
  1634. return rtlTrimUnicodeStrLen(rtlUnicodeStrlen(t), t);
  1635. }
  1636. inline unsigned rtlLeftTrimStrStart(size32_t slen, const char * src)
  1637. {
  1638. unsigned i = 0;
  1639. while(i < slen && src[i] == ' ')
  1640. i++;
  1641. return i;
  1642. }
  1643. inline unsigned rtlLeftTrimUnicodeStrStart(size32_t slen, UChar const * src)
  1644. {
  1645. UCharCharacterIterator iter(src, slen);
  1646. for(iter.first32(); iter.hasNext(); iter.next32())
  1647. if(!u_isspace(iter.current32()))
  1648. break;
  1649. return iter.getIndex();
  1650. }
  1651. inline unsigned rtlLeftTrimVStrStart(const char * src)
  1652. {
  1653. unsigned i = 0;
  1654. while(src[i] == ' ')
  1655. i++;
  1656. return i;
  1657. }
  1658. inline void rtlTrimUtf8Len(unsigned & trimLen, size32_t & trimSize, size32_t len, const char * t)
  1659. {
  1660. const byte * start = (const byte *)t;
  1661. const byte * cur = start;
  1662. unsigned trimLength = 0;
  1663. const byte * trimEnd = cur;
  1664. for (unsigned i=0; i < len; i++)
  1665. {
  1666. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1667. if (!u_isspace(next))
  1668. {
  1669. trimLength = i+1;
  1670. trimEnd = cur;
  1671. }
  1672. }
  1673. trimLen = trimLength;
  1674. trimSize = trimEnd-start;
  1675. }
  1676. inline void rtlTrimUtf8Start(unsigned & trimLen, size32_t & trimSize, size32_t len, const char * t)
  1677. {
  1678. const byte * start = (const byte *)t;
  1679. const byte * cur = start;
  1680. for (unsigned i=0; i < len; i++)
  1681. {
  1682. const byte * prev = cur;
  1683. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1684. if (!u_isspace(next))
  1685. {
  1686. trimLen = i;
  1687. trimSize = prev-start;
  1688. return;
  1689. }
  1690. }
  1691. trimLen = len;
  1692. trimSize = cur-start;
  1693. }
  1694. inline char * rtlDupSubString(const char * src, unsigned len)
  1695. {
  1696. char * buffer = (char *)malloc(len + 1);
  1697. memcpy(buffer, src, len);
  1698. buffer[len] = 0;
  1699. return buffer;
  1700. }
  1701. inline UChar * rtlDupSubUnicode(UChar const * src, unsigned len)
  1702. {
  1703. UChar * buffer = (UChar *)malloc((len + 1) * 2);
  1704. memcpy(buffer, src, len*2);
  1705. buffer[len] = 0x00;
  1706. return buffer;
  1707. }
  1708. inline void rtlCopySubStringV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1709. {
  1710. if (slen >= tlen)
  1711. slen = tlen-1;
  1712. memcpy(tgt, src, slen);
  1713. tgt[slen] = 0;
  1714. }
  1715. //not yet used, but would be needed for assignment to string rather than vstring
  1716. inline void rtlCopySubString(size32_t tlen, char * tgt, unsigned slen, const char * src, char fill)
  1717. {
  1718. if (slen > tlen)
  1719. slen = tlen;
  1720. memcpy(tgt, src, slen);
  1721. memset(tgt + slen, fill, tlen-slen);
  1722. }
  1723. unsigned rtlTrimUtf8StrLen(size32_t len, const char * t)
  1724. {
  1725. const byte * cur = (const byte *)t;
  1726. unsigned trimLength = 0;
  1727. for (unsigned i=0; i < len; i++)
  1728. {
  1729. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1730. if (!u_isspace(next))
  1731. trimLength = i+1;
  1732. }
  1733. return trimLength;
  1734. }
  1735. //-----------------------------------------------------------------------------
  1736. // Functions to trim off left side blank spaces
  1737. void rtlTrimRight(size32_t & tlen, char * & tgt, unsigned slen, const char * src)
  1738. {
  1739. tlen = rtlTrimStrLen(slen, src);
  1740. tgt = rtlDupSubString(src, tlen);
  1741. }
  1742. void rtlTrimUnicodeRight(size32_t & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1743. {
  1744. tlen = rtlTrimUnicodeStrLen(slen, src);
  1745. tgt = rtlDupSubUnicode(src, tlen);
  1746. }
  1747. void rtlTrimVRight(size32_t & tlen, char * & tgt, const char * src)
  1748. {
  1749. tlen = rtlTrimVStrLen(src);
  1750. tgt = rtlDupSubString(src, tlen);
  1751. }
  1752. void rtlTrimVUnicodeRight(size32_t & tlen, UChar * & tgt, UChar const * src)
  1753. {
  1754. rtlTrimUnicodeRight(tlen, tgt, rtlUnicodeStrlen(src), src);
  1755. }
  1756. void rtlTrimUtf8Right(unsigned &tlen, char * &tgt, unsigned slen, char const * src)
  1757. {
  1758. unsigned trimLength;
  1759. size32_t trimSize;
  1760. rtlTrimUtf8Len(trimLength, trimSize, slen, src);
  1761. tlen = trimLength;
  1762. tgt = rtlDupSubString(src, trimSize);
  1763. }
  1764. void rtlAssignTrimRightV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1765. {
  1766. unsigned len = rtlTrimStrLen(slen, src);
  1767. rtlCopySubStringV(tlen, tgt, len, src);
  1768. }
  1769. void rtlAssignTrimVRightV(size32_t tlen, char * tgt, const char * src)
  1770. {
  1771. unsigned len = rtlTrimVStrLen(src);
  1772. rtlCopySubStringV(tlen, tgt, len, src);
  1773. }
  1774. //-------------------------------------------------------------------------------
  1775. // Functions to trim off left side blank spaces
  1776. void rtlTrimLeft(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1777. {
  1778. unsigned start = rtlLeftTrimStrStart(slen, src);
  1779. unsigned len = slen - start;
  1780. tlen = len;
  1781. tgt = rtlDupSubString(src + start, len);
  1782. }
  1783. void rtlTrimUnicodeLeft(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1784. {
  1785. unsigned start = rtlLeftTrimUnicodeStrStart(slen, src);
  1786. unsigned len = slen - start;
  1787. tlen = len;
  1788. tgt = rtlDupSubUnicode(src + start, len);
  1789. }
  1790. void rtlTrimVLeft(unsigned & tlen, char * & tgt, const char * src)
  1791. {
  1792. unsigned start = rtlLeftTrimVStrStart(src);
  1793. unsigned len = strlen(src+start);
  1794. tlen = len;
  1795. tgt = rtlDupSubString(src + start, len);
  1796. }
  1797. void rtlTrimVUnicodeLeft(unsigned & tlen, UChar * & tgt, UChar const * src)
  1798. {
  1799. rtlTrimUnicodeLeft(tlen, tgt, rtlUnicodeStrlen(src), src);
  1800. }
  1801. ECLRTL_API void rtlTrimUtf8Left(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1802. {
  1803. unsigned trimLength;
  1804. size32_t trimSize;
  1805. rtlTrimUtf8Start(trimLength, trimSize, slen, src);
  1806. unsigned len = slen-trimLength;
  1807. const char * start = src+trimSize;
  1808. tlen = len;
  1809. tgt = rtlDupSubString(start, rtlUtf8Size(len, start));
  1810. }
  1811. void rtlAssignTrimLeftV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1812. {
  1813. unsigned start = rtlLeftTrimStrStart(slen, src);
  1814. unsigned len = slen - start;
  1815. rtlCopySubStringV(tlen, tgt, len, src+start);
  1816. }
  1817. void rtlAssignTrimVLeftV(size32_t tlen, char * tgt, const char * src)
  1818. {
  1819. unsigned start = rtlLeftTrimVStrStart(src);
  1820. unsigned len = strlen(src+start);
  1821. rtlCopySubStringV(tlen, tgt, len, src+start);
  1822. }
  1823. //--------------------------------------------------------------------------------
  1824. // Functions to trim off blank spaces of both sides
  1825. void rtlTrimBoth(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1826. {
  1827. unsigned len = rtlTrimStrLen(slen, src);
  1828. unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
  1829. len -= start;
  1830. tlen = len;
  1831. tgt = rtlDupSubString(src + start, len);
  1832. }
  1833. void rtlTrimUnicodeBoth(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1834. {
  1835. unsigned len = rtlTrimUnicodeStrLen(slen, src);
  1836. unsigned start = len ? rtlLeftTrimUnicodeStrStart(slen, src) : 0;
  1837. len -= start;
  1838. tlen = len;
  1839. tgt = rtlDupSubUnicode(src + start, len);
  1840. }
  1841. void rtlTrimVBoth(unsigned & tlen, char * & tgt, const char * src)
  1842. {
  1843. unsigned len = rtlTrimVStrLen(src);
  1844. unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
  1845. len -= start;
  1846. tlen = len;
  1847. tgt = rtlDupSubString(src + start, len);
  1848. }
  1849. void rtlTrimVUnicodeBoth(unsigned & tlen, UChar * & tgt, UChar const * src)
  1850. {
  1851. rtlTrimUnicodeBoth(tlen, tgt, rtlUnicodeStrlen(src), src);
  1852. }
  1853. ECLRTL_API void rtlTrimUtf8Both(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1854. {
  1855. unsigned lTrimLength;
  1856. size32_t lTrimSize;
  1857. rtlTrimUtf8Start(lTrimLength, lTrimSize, slen, src);
  1858. rtlTrimUtf8Right(tlen, tgt, slen-lTrimLength, src+lTrimSize);
  1859. }
  1860. void rtlAssignTrimBothV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1861. {
  1862. unsigned len = rtlTrimStrLen(slen, src);
  1863. unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
  1864. len -= start;
  1865. rtlCopySubStringV(tlen, tgt, len, src+start);
  1866. }
  1867. void rtlAssignTrimVBothV(size32_t tlen, char * tgt, const char * src)
  1868. {
  1869. unsigned len = rtlTrimVStrLen(src);
  1870. unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
  1871. len -= start;
  1872. rtlCopySubStringV(tlen, tgt, len, src+start);
  1873. }
  1874. //-----------------------------------------------------------------------------
  1875. // Functions used to trim off all blank spaces in a string.
  1876. unsigned rtlTrimStrLenNonBlank(size32_t l, const char * t)
  1877. {
  1878. unsigned len = 0;
  1879. while (l)
  1880. {
  1881. l--;
  1882. if (t[l] != ' ')
  1883. len++;
  1884. }
  1885. return len;
  1886. }
  1887. unsigned rtlTrimVStrLenNonBlank(const char * t)
  1888. {
  1889. unsigned len = 0;
  1890. unsigned char c;
  1891. while ((c = *t++) != 0)
  1892. {
  1893. if (c != ' ')
  1894. len++;
  1895. }
  1896. return len;
  1897. }
  1898. void rtlTrimAll(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1899. {
  1900. tlen = rtlTrimStrLenNonBlank(slen, src);
  1901. char * buffer = (char *)malloc(tlen + 1);
  1902. int ind = 0;
  1903. for(unsigned i = 0; i < slen; i++) {
  1904. if(src[i] != ' ') {
  1905. buffer[ind] = src[i];
  1906. ind++;
  1907. }
  1908. }
  1909. buffer[tlen] = 0;
  1910. tgt = buffer;
  1911. }
  1912. void rtlTrimUnicodeAll(unsigned & tlen, UChar * & tgt, unsigned slen, const UChar * src)
  1913. {
  1914. UnicodeString rawStr;
  1915. UCharCharacterIterator iter(src, slen);
  1916. for(iter.first32(); iter.hasNext(); iter.next32())
  1917. if(!u_isspace(iter.current32()))
  1918. rawStr.append(iter.current32());
  1919. UnicodeString tgtStr;
  1920. normalizeUnicodeString(rawStr, tgtStr); // normalized in case crazy string like [combining accent] [space] [vowel]
  1921. tlen = tgtStr.length();
  1922. tgt = (UChar *)malloc((tlen+1)*2);
  1923. tgtStr.extract(0, tlen, tgt);
  1924. tgt[tlen] = 0x0000;
  1925. }
  1926. void rtlTrimVAll(unsigned & tlen, char * & tgt, const char * src)
  1927. {
  1928. tlen = rtlTrimVStrLenNonBlank(src);
  1929. char * buffer = (char *)malloc(tlen + 1);
  1930. int ind = 0;
  1931. int i = 0;
  1932. while(src[i] != 0) {
  1933. if(src[i] != ' ') {
  1934. buffer[ind] = src[i];
  1935. ind++;
  1936. }
  1937. i++;
  1938. }
  1939. buffer[tlen] = 0;
  1940. tgt = buffer;
  1941. }
  1942. void rtlTrimVUnicodeAll(unsigned & tlen, UChar * & tgt, const UChar * src)
  1943. {
  1944. rtlTrimUnicodeAll(tlen, tgt, rtlUnicodeStrlen(src), src);
  1945. }
  1946. ECLRTL_API void rtlTrimUtf8All(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1947. {
  1948. //Go via unicode because of possibility of combining accents etc.
  1949. rtlDataAttr temp1(slen*sizeof(UChar));
  1950. rtlUtf8ToUnicode(slen, temp1.getustr(), slen, src);
  1951. unsigned trimLen;
  1952. rtlDataAttr trimText;
  1953. rtlTrimUnicodeAll(trimLen, trimText.refustr(), slen, temp1.getustr());
  1954. rtlUnicodeToUtf8X(tlen, tgt, trimLen, trimText.getustr());
  1955. }
  1956. void rtlAssignTrimAllV(unsigned tlen, char * tgt, unsigned slen, const char * src)
  1957. {
  1958. unsigned to = 0;
  1959. for (unsigned from = 0; (from < slen)&&(to+1 < tlen); from++)
  1960. {
  1961. if (src[from] != ' ')
  1962. tgt[to++] = src[from];
  1963. }
  1964. tgt[to] = 0;
  1965. }
  1966. void rtlAssignTrimVAllV(unsigned tlen, char * tgt, const char * src)
  1967. {
  1968. unsigned to = 0;
  1969. for (;(*src && (to+1 < tlen));src++)
  1970. {
  1971. if (*src != ' ')
  1972. tgt[to++] = *src;
  1973. }
  1974. tgt[to] = 0;
  1975. }
  1976. //-----------------------------------------------------------------------------
  1977. ECLRTL_API void rtlUnicodeToVAscii(unsigned outlen, char * out, unsigned inlen, UChar const * in)
  1978. {
  1979. rtlUnicodeToVCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  1980. }
  1981. ECLRTL_API void rtlData2VUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  1982. {
  1983. rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  1984. }
  1985. ECLRTL_API void rtlStrToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  1986. {
  1987. rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  1988. }
  1989. ECLRTL_API void rtlData2Unicode(unsigned outlen, UChar * out, unsigned inlen, void const * in)
  1990. {
  1991. rtlCodepageToUnicode(outlen, out, inlen, (const char *)in, ASCII_LIKE_CODEPAGE);
  1992. }
  1993. ECLRTL_API void rtlAssignTrimUnicodeLeftV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  1994. {
  1995. unsigned len;
  1996. UChar * str;
  1997. rtlTrimUnicodeLeft(len, str, slen, src);
  1998. if (len >= tlen)
  1999. len = tlen-1;
  2000. memcpy(tgt, str, len*2);
  2001. tgt[len] = 0;
  2002. rtlFree(str);
  2003. }
  2004. ECLRTL_API void rtlAssignTrimVUnicodeLeftV(size32_t tlen, UChar * tgt, const UChar * src)
  2005. {
  2006. unsigned len;
  2007. UChar * str;
  2008. rtlTrimVUnicodeLeft(len, str, src);
  2009. if (len >= tlen)
  2010. len = tlen-1;
  2011. memcpy(tgt, str, len*2);
  2012. tgt[len] = 0;
  2013. rtlFree(str);
  2014. }
  2015. ECLRTL_API void rtlAssignTrimUnicodeRightV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2016. {
  2017. unsigned len;
  2018. UChar * str;
  2019. rtlTrimUnicodeRight(len, str, slen, src);
  2020. if (len >= tlen)
  2021. len = tlen-1;
  2022. memcpy(tgt, str, len*2);
  2023. tgt[len] = 0;
  2024. rtlFree(str);
  2025. }
  2026. ECLRTL_API void rtlAssignTrimVUnicodeRightV(size32_t tlen, UChar * tgt, const UChar * src)
  2027. {
  2028. unsigned len;
  2029. UChar * str;
  2030. rtlTrimVUnicodeRight(len, str, src);
  2031. if (len >= tlen)
  2032. len = tlen-1;
  2033. memcpy(tgt, str, len*2);
  2034. tgt[len] = 0;
  2035. rtlFree(str);
  2036. }
  2037. ECLRTL_API void rtlAssignTrimUnicodeBothV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2038. {
  2039. unsigned len;
  2040. UChar * str;
  2041. rtlTrimUnicodeBoth(len, str, slen, src);
  2042. if (len >= tlen)
  2043. len = tlen-1;
  2044. memcpy(tgt, str, len*2);
  2045. tgt[len] = 0;
  2046. rtlFree(str);
  2047. }
  2048. ECLRTL_API void rtlAssignTrimVUnicodeBothV(size32_t tlen, UChar * tgt, const UChar * src)
  2049. {
  2050. unsigned len;
  2051. UChar * str;
  2052. rtlTrimVUnicodeBoth(len, str, src);
  2053. if (len >= tlen)
  2054. len = tlen-1;
  2055. memcpy(tgt, str, len*2);
  2056. tgt[len] = 0;
  2057. rtlFree(str);
  2058. }
  2059. ECLRTL_API void rtlAssignTrimUnicodeAllV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2060. {
  2061. unsigned len;
  2062. UChar * str;
  2063. rtlTrimUnicodeAll(len, str, slen, src);
  2064. if (len >= tlen)
  2065. len = tlen-1;
  2066. memcpy(tgt, str, len*2);
  2067. tgt[len] = 0;
  2068. rtlFree(str);
  2069. }
  2070. ECLRTL_API void rtlAssignTrimVUnicodeAllV(size32_t tlen, UChar * tgt, const UChar * src)
  2071. {
  2072. unsigned len;
  2073. UChar * str;
  2074. rtlTrimVUnicodeAll(len, str, src);
  2075. if (len >= tlen)
  2076. len = tlen-1;
  2077. memcpy(tgt, str, len*2);
  2078. tgt[len] = 0;
  2079. rtlFree(str);
  2080. }
  2081. //-----------------------------------------------------------------------------
  2082. int rtlCompareStrStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2083. {
  2084. unsigned len = l1;
  2085. if (len > l2)
  2086. len = l2;
  2087. int diff = memcmp(p1, p2, len);
  2088. if (diff == 0)
  2089. {
  2090. if (len != l1)
  2091. {
  2092. for (;(diff == 0) && (len != l1);len++)
  2093. diff = ((unsigned char *)p1)[len] - ' ';
  2094. }
  2095. else if (len != l2)
  2096. {
  2097. for (;(diff == 0) && (len != l2);len++)
  2098. diff = ' ' - ((unsigned char *)p2)[len];
  2099. }
  2100. }
  2101. return diff;
  2102. }
  2103. int rtlCompareVStrVStr(const char * p1, const char * p2)
  2104. {
  2105. return rtlCompareStrStr(strlen(p1), p1, strlen(p2), p2);
  2106. }
  2107. int rtlCompareStrBlank(unsigned l1, const char * p1)
  2108. {
  2109. while (l1--)
  2110. {
  2111. int diff = (*(unsigned char *)(p1++)) - ' ';
  2112. if (diff)
  2113. return diff;
  2114. }
  2115. return 0;
  2116. }
  2117. int rtlCompareDataData(unsigned l1, const void * p1, unsigned l2, const void * p2)
  2118. {
  2119. unsigned len = l1;
  2120. if (len > l2)
  2121. len = l2;
  2122. int diff = memcmp(p1, p2, len);
  2123. if (diff == 0)
  2124. {
  2125. if (l1 > l2)
  2126. diff = +1;
  2127. else if (l1 < l2)
  2128. diff = -1;
  2129. }
  2130. return diff;
  2131. }
  2132. int rtlCompareEStrEStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2133. {
  2134. unsigned len = l1;
  2135. if (len > l2)
  2136. len = l2;
  2137. int diff = memcmp(p1, p2, len);
  2138. if (diff == 0)
  2139. {
  2140. if (len != l1)
  2141. {
  2142. for (;(diff == 0) && (len != l1);len++)
  2143. diff = ((unsigned char *)p1)[len] - '@';
  2144. }
  2145. else if (len != l2)
  2146. {
  2147. for (;(diff == 0) && (len != l2);len++)
  2148. diff = '@' - ((unsigned char *)p2)[len];
  2149. }
  2150. }
  2151. return diff;
  2152. }
  2153. int rtlCompareUnicodeUnicode(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale)
  2154. {
  2155. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2156. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2157. return ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1, l1, p2, l2);
  2158. }
  2159. int rtlCompareUnicodeUnicodeStrength(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale, unsigned strength)
  2160. {
  2161. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2162. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2163. return ucol_strcoll(queryRTLLocale(locale)->queryCollator(strength), p1, l1, p2, l2);
  2164. }
  2165. int rtlCompareVUnicodeVUnicode(UChar const * p1, UChar const * p2, char const * locale)
  2166. {
  2167. return rtlCompareUnicodeUnicode(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale);
  2168. }
  2169. int rtlCompareVUnicodeVUnicodeStrength(UChar const * p1, UChar const * p2, char const * locale, unsigned strength)
  2170. {
  2171. return rtlCompareUnicodeUnicodeStrength(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale, strength);
  2172. }
  2173. void rtlKeyUnicodeX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale)
  2174. {
  2175. while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
  2176. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2177. tlen = ucol_getSortKey(coll, src, slen, 0, 0);
  2178. tgt = malloc(tlen);
  2179. ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
  2180. }
  2181. void rtlKeyUnicodeStrengthX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale, unsigned strength)
  2182. {
  2183. while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
  2184. UCollator * coll = queryRTLLocale(locale)->queryCollator(strength);
  2185. tlen = ucol_getSortKey(coll, src, slen, 0, 0);
  2186. tgt = malloc(tlen);
  2187. ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
  2188. }
  2189. ECLRTL_API int rtlPrefixDiffStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2190. {
  2191. unsigned len = l1 < l2 ? l1 : l2;
  2192. const byte * str1 = (const byte *)p1;
  2193. const byte * str2 = (const byte *)p2;
  2194. for (unsigned i=0; i<len; i++)
  2195. {
  2196. byte c1 = str1[i];
  2197. byte c2 = str2[i];
  2198. if (c1 != c2)
  2199. {
  2200. if (c1 < c2)
  2201. return -(int)(i+1);
  2202. else
  2203. return (int)(i+1);
  2204. }
  2205. }
  2206. if (l1 != l2)
  2207. return (l1 < l2) ? -(int)(len+1) : (int)(len + 1);
  2208. return 0;
  2209. }
  2210. //MORE: I'm not sure this can really be implemented....
  2211. ECLRTL_API int rtlPrefixDiffUnicode(unsigned l1, const UChar * p1, unsigned l2, const UChar * p2, char const * locale)
  2212. {
  2213. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2214. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2215. unsigned len = l1 < l2 ? l1 : l2;
  2216. for (unsigned i=0; i<len; i++)
  2217. {
  2218. if (p1[i] != p2[i])
  2219. {
  2220. int c = ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1+i, l1-i, p2+i, l2-i);
  2221. if (c < 0)
  2222. return -(int)(i+1);
  2223. else if (c > 0)
  2224. return (int)(i+1);
  2225. else
  2226. return 0; //weird!
  2227. }
  2228. }
  2229. if (l1 != l2)
  2230. return (l1 < l2) ? -(int)(len+1) : (int)(len + 1);
  2231. return 0;
  2232. }
  2233. //-----------------------------------------------------------------------------
  2234. void rtlStringToLower(size32_t l, char * t)
  2235. {
  2236. for (;l--;t++)
  2237. *t = tolower(*t);
  2238. }
  2239. void rtlStringToUpper(size32_t l, char * t)
  2240. {
  2241. for (;l--;t++)
  2242. *t = toupper(*t);
  2243. }
  2244. void rtlUnicodeToLower(size32_t l, UChar * t, char const * locale)
  2245. {
  2246. UChar * buff = (UChar *)malloc(l*2);
  2247. UErrorCode err = U_ZERO_ERROR;
  2248. u_strToLower(buff, l, t, l, locale, &err);
  2249. unicodeNormalizedCopy(buff, t, l);
  2250. }
  2251. void rtlUnicodeToLowerX(size32_t & lenout, UChar * & out, size32_t l, const UChar * t, char const * locale)
  2252. {
  2253. out = (UChar *)malloc(l*2);
  2254. lenout = l;
  2255. UErrorCode err = U_ZERO_ERROR;
  2256. u_strToLower(out, l, t, l, locale, &err);
  2257. }
  2258. void rtlUnicodeToUpper(size32_t l, UChar * t, char const * locale)
  2259. {
  2260. UChar * buff = (UChar *)malloc(l*2);
  2261. UErrorCode err = U_ZERO_ERROR;
  2262. u_strToUpper(buff, l, t, l, locale, &err);
  2263. unicodeNormalizedCopy(buff, t, l);
  2264. }
  2265. //=============================================================================
  2266. // Miscellaneous helper functions...
  2267. //-----------------------------------------------------------------------------
  2268. int searchTableStringN(unsigned count, const char * * table, unsigned width, const char * search)
  2269. {
  2270. int left = 0;
  2271. int right = count;
  2272. do
  2273. {
  2274. int mid = (left + right) >> 1;
  2275. int cmp = memcmp(search, table[mid], width);
  2276. if (cmp < 0)
  2277. right = mid;
  2278. else if (cmp > 0)
  2279. left = mid+1;
  2280. else
  2281. return mid;
  2282. } while (left < right);
  2283. return -1;
  2284. }
  2285. int rtlSearchTableStringN(unsigned count, char * * table, unsigned width, const char * search)
  2286. {
  2287. int left = 0;
  2288. int right = count;
  2289. do
  2290. {
  2291. int mid = (left + right) >> 1;
  2292. //we could use rtlCompareStrStr, but both source and target strings should
  2293. //be the correct length, so no point.... (unless new weird collation sequences)
  2294. //we would also need to call a different function for data
  2295. int cmp = memcmp(search, table[mid], width);
  2296. if (cmp < 0)
  2297. right = mid;
  2298. else if (cmp > 0)
  2299. left = mid+1;
  2300. else
  2301. return mid;
  2302. } while (left < right);
  2303. return -1;
  2304. }
  2305. int rtlSearchTableVStringN(unsigned count, char * * table, const char * search)
  2306. {
  2307. int left = 0;
  2308. int right = count;
  2309. do
  2310. {
  2311. int mid = (left + right) >> 1;
  2312. int cmp = strcmp(search, table[mid]);
  2313. if (cmp < 0)
  2314. right = mid;
  2315. else if (cmp > 0)
  2316. left = mid+1;
  2317. else
  2318. return mid;
  2319. } while (left < right);
  2320. return -1;
  2321. }
  2322. int rtlNewSearchDataTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2323. {
  2324. int left = 0;
  2325. int right = count;
  2326. do
  2327. {
  2328. int mid = (left + right) >> 1;
  2329. int cmp = rtlCompareDataData( width, search, elemlen, table[mid]);
  2330. if (cmp < 0)
  2331. right = mid;
  2332. else if (cmp > 0)
  2333. left = mid+1;
  2334. else {
  2335. return mid;
  2336. }
  2337. } while (left < right);
  2338. return -1;
  2339. }
  2340. int rtlNewSearchEStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2341. {
  2342. int left = 0;
  2343. int right = count;
  2344. do
  2345. {
  2346. int mid = (left + right) >> 1;
  2347. int cmp = rtlCompareEStrEStr( width, search, elemlen, table[mid]);
  2348. if (cmp < 0)
  2349. right = mid;
  2350. else if (cmp > 0)
  2351. left = mid+1;
  2352. else {
  2353. return mid;
  2354. }
  2355. } while (left < right);
  2356. return -1;
  2357. }
  2358. int rtlNewSearchQStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2359. {
  2360. int left = 0;
  2361. int right = count;
  2362. do
  2363. {
  2364. int mid = (left + right) >> 1;
  2365. int cmp = rtlCompareQStrQStr( width, search, elemlen, table[mid]);
  2366. if (cmp < 0)
  2367. right = mid;
  2368. else if (cmp > 0)
  2369. left = mid+1;
  2370. else {
  2371. return mid;
  2372. }
  2373. } while (left < right);
  2374. return -1;
  2375. }
  2376. int rtlNewSearchStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2377. {
  2378. int left = 0;
  2379. int right = count;
  2380. do
  2381. {
  2382. int mid = (left + right) >> 1;
  2383. int cmp = rtlCompareStrStr( width, search, elemlen, table[mid]);
  2384. if (cmp < 0)
  2385. right = mid;
  2386. else if (cmp > 0)
  2387. left = mid+1;
  2388. else {
  2389. return mid;
  2390. }
  2391. } while (left < right);
  2392. return -1;
  2393. }
  2394. int rtlNewSearchUnicodeTable(unsigned count, unsigned elemlen, UChar * * table, unsigned width, const UChar * search, const char * locale)
  2395. {
  2396. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2397. int left = 0;
  2398. int right = count;
  2399. size32_t trimWidth = rtlQuickTrimUnicode(width, search);
  2400. do
  2401. {
  2402. int mid = (left + right) >> 1;
  2403. size32_t elemTrimWidth = rtlQuickTrimUnicode(elemlen, table[mid]);
  2404. UCollationResult cmp = ucol_strcoll(coll, search, trimWidth, table[mid], elemTrimWidth);
  2405. if (cmp == UCOL_LESS)
  2406. right = mid;
  2407. else if (cmp == UCOL_GREATER)
  2408. left = mid+1;
  2409. else
  2410. return mid;
  2411. } while (left < right);
  2412. return -1;
  2413. }
  2414. int rtlNewSearchVUnicodeTable(unsigned count, UChar * * table, const UChar * search, const char * locale)
  2415. {
  2416. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2417. int left = 0;
  2418. int right = count;
  2419. do
  2420. {
  2421. int mid = (left + right) >> 1;
  2422. UCollationResult cmp = ucol_strcoll(coll, search, rtlUnicodeStrlen(search), table[mid], rtlUnicodeStrlen(table[mid]));
  2423. if (cmp == UCOL_LESS)
  2424. right = mid;
  2425. else if (cmp == UCOL_GREATER)
  2426. left = mid+1;
  2427. else
  2428. return mid;
  2429. } while (left < right);
  2430. return -1;
  2431. }
  2432. //-----------------------------------------------------------------------------
  2433. template <class T>
  2434. int rtlSearchIntegerTable(unsigned count, T * table, T search)
  2435. {
  2436. int left = 0;
  2437. int right = count;
  2438. do
  2439. {
  2440. int mid = (left + right) >> 1;
  2441. T midValue = table[mid];
  2442. if (search < midValue)
  2443. right = mid;
  2444. else if (search > midValue)
  2445. left = mid+1;
  2446. else
  2447. return mid;
  2448. } while (left < right);
  2449. return -1;
  2450. }
  2451. int rtlSearchTableInteger8(unsigned count, __int64 * table, __int64 search)
  2452. {
  2453. return rtlSearchIntegerTable(count, table, search);
  2454. }
  2455. int rtlSearchTableUInteger8(unsigned count, unsigned __int64 * table, unsigned __int64 search)
  2456. {
  2457. return rtlSearchIntegerTable(count, table, search);
  2458. }
  2459. int rtlSearchTableInteger4(unsigned count, int * table, int search)
  2460. {
  2461. return rtlSearchIntegerTable(count, table, search);
  2462. }
  2463. int rtlSearchTableUInteger4(unsigned count, unsigned * table, unsigned search)
  2464. {
  2465. return rtlSearchIntegerTable(count, table, search);
  2466. }
  2467. //-----------------------------------------------------------------------------
  2468. unsigned rtlCrc32(unsigned len, const void * buffer, unsigned crc)
  2469. {
  2470. return crc32((const char *)buffer, len, crc);
  2471. }
  2472. //=============================================================================
  2473. // EBCDIC helper functions...
  2474. static char ccsid819[] = "\
  2475. \000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017\
  2476. \020\021\022\023\235\205\010\207\030\031\222\217\034\035\036\037\
  2477. \200\201\202\203\204\012\027\033\210\211\212\213\214\005\006\007\
  2478. \220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032\
  2479. \040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174\
  2480. \046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\254\
  2481. \055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077\
  2482. \370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042\
  2483. \330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261\
  2484. \260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244\
  2485. \265\176\163\164\165\166\167\170\171\172\241\277\320\335\336\256\
  2486. \136\243\245\267\251\247\266\274\275\276\133\135\257\250\264\327\
  2487. \173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365\
  2488. \175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377\
  2489. \134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325\
  2490. \060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237";
  2491. static unsigned char ccsid1047[] = "\
  2492. \000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017\
  2493. \020\021\022\023\235\012\010\207\030\031\222\217\034\035\036\037\
  2494. \200\201\202\203\204\205\027\033\210\211\212\213\214\005\006\007\
  2495. \220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032\
  2496. \040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174\
  2497. \046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\136\
  2498. \055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077\
  2499. \370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042\
  2500. \330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261\
  2501. \260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244\
  2502. \265\176\163\164\165\166\167\170\171\172\241\277\320\133\336\256\
  2503. \254\243\245\267\251\247\266\274\275\276\335\250\257\135\264\327\
  2504. \173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365\
  2505. \175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377\
  2506. \134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325\
  2507. \060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237";
  2508. static unsigned char ccsid1047_rev[] = "\
  2509. \000\001\002\003\067\055\056\057\026\005\025\013\014\015\016\017\
  2510. \020\021\022\023\074\075\062\046\030\031\077\047\034\035\036\037\
  2511. \100\132\177\173\133\154\120\175\115\135\134\116\153\140\113\141\
  2512. \360\361\362\363\364\365\366\367\370\371\172\136\114\176\156\157\
  2513. \174\301\302\303\304\305\306\307\310\311\321\322\323\324\325\326\
  2514. \327\330\331\342\343\344\345\346\347\350\351\255\340\275\137\155\
  2515. \171\201\202\203\204\205\206\207\210\211\221\222\223\224\225\226\
  2516. \227\230\231\242\243\244\245\246\247\250\251\300\117\320\241\007\
  2517. \040\041\042\043\044\045\006\027\050\051\052\053\054\011\012\033\
  2518. \060\061\032\063\064\065\066\010\070\071\072\073\004\024\076\377\
  2519. \101\252\112\261\237\262\152\265\273\264\232\212\260\312\257\274\
  2520. \220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\
  2521. \144\145\142\146\143\147\236\150\164\161\162\163\170\165\166\167\
  2522. \254\151\355\356\353\357\354\277\200\375\376\373\374\272\256\131\
  2523. \104\105\102\106\103\107\234\110\124\121\122\123\130\125\126\127\
  2524. \214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337";
  2525. void rtlEStrToStr(unsigned outlen, char *out, unsigned inlen, const char *in)
  2526. {
  2527. unsigned char *codepage = ccsid1047;
  2528. unsigned i,j;
  2529. unsigned lim = inlen;
  2530. if (lim>outlen) lim = outlen;
  2531. for (i=0;i<lim;i++)
  2532. {
  2533. j = in[i] & 0x00ff;
  2534. out[i] = codepage[j];
  2535. }
  2536. for (;i<outlen; i++)
  2537. out[i] = ' ';
  2538. }
  2539. void rtlStrToEStr(unsigned outlen, char *out, unsigned inlen, const char *in)
  2540. {
  2541. unsigned char *codepage = ccsid1047_rev;
  2542. unsigned i,j;
  2543. unsigned lim = inlen;
  2544. if (lim>outlen) lim = outlen;
  2545. for (i=0;i<lim;i++)
  2546. {
  2547. j = in[i] & 0x00ff;
  2548. out[i] = codepage[j];
  2549. }
  2550. for (;i<outlen; i++)
  2551. out[i] = codepage[' '];
  2552. }
  2553. //---------------------------------------------------------------------------
  2554. void rtlCodepageToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2555. {
  2556. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2557. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2558. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2559. UErrorCode err = U_ZERO_ERROR;
  2560. unsigned len = ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2561. while(len<outlen) out[len++] = 0x0020;
  2562. unicodeEnsureIsNormalized(outlen, out);
  2563. }
  2564. void rtlCodepageToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2565. {
  2566. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2567. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2568. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2569. UErrorCode err = U_ZERO_ERROR;
  2570. unsigned len = ucnv_toUChars(conv, out, outlen-1, in, inlen, &err);
  2571. if (len >= outlen) len = outlen-1;
  2572. out[len] = 0;
  2573. vunicodeEnsureIsNormalized(outlen, out);
  2574. }
  2575. void rtlVCodepageToUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
  2576. {
  2577. rtlCodepageToUnicode(outlen, out, strlen(in), in, codepage);
  2578. }
  2579. void rtlVCodepageToVUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
  2580. {
  2581. rtlCodepageToVUnicode(outlen, out, strlen(in), in, codepage);
  2582. }
  2583. void rtlCodepageToUnicodeUnescape(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2584. {
  2585. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2586. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2587. UnicodeString raw(in, inlen, codepage);
  2588. UnicodeString unescaped = raw.unescape();
  2589. UnicodeString normalized;
  2590. normalizeUnicodeString(unescaped, normalized);
  2591. if((unsigned)normalized.length()>outlen)
  2592. normalized.truncate(outlen);
  2593. else if((unsigned)normalized.length()<outlen)
  2594. normalized.padTrailing(outlen);
  2595. normalized.extract(0, outlen, out);
  2596. }
  2597. void rtlUnicodeToCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2598. {
  2599. //If the unicode contains a character which doesn't exist in the destination codepage,
  2600. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2601. //no telling how your terminal may display this (I've seen a divide sign and a right
  2602. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2603. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2604. UErrorCode err = U_ZERO_ERROR;
  2605. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
  2606. if(len<outlen)
  2607. codepageBlankFill(codepage, out+len, outlen-len);
  2608. }
  2609. void rtlUnicodeToData(unsigned outlen, void * out, unsigned inlen, UChar const * in)
  2610. {
  2611. //If the unicode contains a character which doesn't exist in the destination codepage,
  2612. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2613. //no telling how your terminal may display this (I've seen a divide sign and a right
  2614. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2615. UConverter * conv = queryRTLUnicodeConverter(ASCII_LIKE_CODEPAGE)->query();
  2616. UErrorCode err = U_ZERO_ERROR;
  2617. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
  2618. if(len<outlen)
  2619. memset((char *)out+len, 0, outlen-len);
  2620. }
  2621. void rtlUnicodeToVCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2622. {
  2623. //If the unicode contains a character which doesn't exist in the destination codepage,
  2624. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2625. //no telling how your terminal may display this (I've seen a divide sign and a right
  2626. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2627. UConverter * conv = queryRTLUnicodeConverter(ASCII_LIKE_CODEPAGE)->query();
  2628. UErrorCode err = U_ZERO_ERROR;
  2629. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen-1, in, inlen, &err);
  2630. if (len >= outlen) len = outlen-1;
  2631. out[len] = 0;
  2632. }
  2633. void rtlVUnicodeToCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
  2634. {
  2635. rtlUnicodeToCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2636. }
  2637. void rtlVUnicodeToData(unsigned outlen, void * out, UChar const * in)
  2638. {
  2639. rtlUnicodeToData(outlen, out, rtlUnicodeStrlen(in), in);
  2640. }
  2641. void rtlVUnicodeToVCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
  2642. {
  2643. rtlUnicodeToVCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2644. }
  2645. void rtlCodepageToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2646. {
  2647. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2648. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2649. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2650. UErrorCode err = U_ZERO_ERROR;
  2651. outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
  2652. if(err==U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2653. out = (UChar *)malloc(outlen*2);
  2654. ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2655. }
  2656. UChar * rtlCodepageToVUnicodeX(unsigned inlen, char const * in, char const * codepage)
  2657. {
  2658. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2659. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2660. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2661. UErrorCode err = U_ZERO_ERROR;
  2662. unsigned outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
  2663. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2664. UChar * out = (UChar *)malloc((outlen+1)*2);
  2665. ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2666. out[outlen] = 0x0000;
  2667. vunicodeEnsureIsNormalizedX(outlen, out);
  2668. return out;
  2669. }
  2670. void rtlVCodepageToUnicodeX(unsigned & outlen, UChar * & out, char const * in, char const * codepage)
  2671. {
  2672. rtlCodepageToUnicodeX(outlen, out, strlen(in), in, codepage);
  2673. }
  2674. UChar * rtlVCodepageToVUnicodeX(char const * in, char const * codepage)
  2675. {
  2676. return rtlCodepageToVUnicodeX(strlen(in), in, codepage);
  2677. }
  2678. void rtlCodepageToUnicodeXUnescape(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2679. {
  2680. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2681. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2682. UnicodeString raw(in, inlen, codepage);
  2683. UnicodeString unescaped = raw.unescape();
  2684. UnicodeString normalized;
  2685. normalizeUnicodeString(unescaped, normalized);
  2686. outlen = normalized.length();
  2687. out = (UChar *)malloc(outlen*2);
  2688. normalized.extract(0, outlen, out);
  2689. }
  2690. void rtlUnicodeToCodepageX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in, char const * codepage)
  2691. {
  2692. //If the unicode contains a character which doesn't exist in the destination codepage,
  2693. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2694. //no telling how your terminal may display this (I've seen a divide sign and a right
  2695. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2696. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2697. UErrorCode err = U_ZERO_ERROR;
  2698. outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
  2699. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2700. out = (char *)malloc(outlen);
  2701. ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
  2702. }
  2703. void rtlUnicodeToDataX(unsigned & outlen, void * & out, unsigned inlen, UChar const * in)
  2704. {
  2705. rtlUnicodeToCodepageX(outlen, (char * &)out, inlen, in, ASCII_LIKE_CODEPAGE);
  2706. }
  2707. char * rtlUnicodeToVCodepageX(unsigned inlen, UChar const * in, char const * codepage)
  2708. {
  2709. //If the unicode contains a character which doesn't exist in the destination codepage,
  2710. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2711. //no telling how your terminal may display this (I've seen a divide sign and a right
  2712. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2713. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2714. UErrorCode err = U_ZERO_ERROR;
  2715. unsigned outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
  2716. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2717. char * out = (char *)malloc(outlen+1);
  2718. ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
  2719. out[outlen] = 0x00;
  2720. return out;
  2721. }
  2722. void rtlVUnicodeToCodepageX(unsigned & outlen, char * & out, UChar const * in, char const * codepage)
  2723. {
  2724. rtlUnicodeToCodepageX(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2725. }
  2726. char * rtlVUnicodeToVCodepageX(UChar const * in, char const * codepage)
  2727. {
  2728. return rtlUnicodeToVCodepageX(rtlUnicodeStrlen(in), in, codepage);
  2729. }
  2730. void rtlStrToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  2731. {
  2732. rtlCodepageToUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2733. }
  2734. void rtlUnicodeToStr(unsigned outlen, char * out, unsigned inlen, UChar const * in)
  2735. {
  2736. rtlUnicodeToCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2737. }
  2738. void rtlStrToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
  2739. {
  2740. rtlCodepageToUnicodeX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2741. }
  2742. void rtlUnicodeToStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  2743. {
  2744. rtlUnicodeToCodepageX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2745. }
  2746. void rtlUnicodeToEscapedStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  2747. {
  2748. StringBuffer outbuff;
  2749. escapeUnicode(inlen, in, outbuff);
  2750. outlen = outbuff.length();
  2751. out = (char *)malloc(outlen);
  2752. memcpy(out, outbuff.str(), outlen);
  2753. }
  2754. void rtlUnicodeToQuotedUTF8X(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  2755. {
  2756. UnicodeString unicode(in, inlen);
  2757. unicode.findAndReplace("'", "\\'");
  2758. //pre-flight length - may be more efficient to guess length and only re-extract if guess no good, but what to guess?
  2759. outlen = unicode.extract(0, unicode.length(), 0, 0, UTF8_CODEPAGE);
  2760. out = (char *)malloc(outlen);
  2761. unicode.extract(0, unicode.length(), out, outlen, UTF8_CODEPAGE);
  2762. }
  2763. bool rtlCodepageToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  2764. {
  2765. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  2766. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  2767. UErrorCode err = U_ZERO_ERROR;
  2768. char * target = out;
  2769. ucnv_convertEx(outconv, inconv, &target, out+outlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  2770. unsigned len = target - out;
  2771. if(len < outlen)
  2772. codepageBlankFill(outcodepage, target, outlen-len);
  2773. return U_SUCCESS(err);
  2774. }
  2775. bool rtlCodepageToCodepageX(unsigned & outlen, char * & out, unsigned maxoutlen, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  2776. {
  2777. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  2778. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  2779. UErrorCode err = U_ZERO_ERROR;
  2780. //GH->PG is there a better way of coding this with out temporary buffer?
  2781. char * tempBuffer = (char *)malloc(maxoutlen);
  2782. char * target = tempBuffer;
  2783. ucnv_convertEx(outconv, inconv, &target, tempBuffer+maxoutlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  2784. unsigned len = target - tempBuffer;
  2785. outlen = len;
  2786. if (len == maxoutlen)
  2787. out = tempBuffer;
  2788. else
  2789. {
  2790. out = (char *)realloc(tempBuffer, len);
  2791. if (!out)
  2792. out = tempBuffer;
  2793. }
  2794. return U_SUCCESS(err);
  2795. }
  2796. int rtlSingleUtf8ToCodepage(char * out, unsigned inlen, char const * in, char const * outcodepage)
  2797. {
  2798. if(!U8_IS_LEAD(*in))
  2799. return -1;
  2800. uint8_t trailbytes = U8_COUNT_TRAIL_BYTES(*in);
  2801. if(inlen < (unsigned)(trailbytes+1))
  2802. return -1;
  2803. if(!rtlCodepageToCodepage(1, out, trailbytes+1, in, outcodepage, UTF8_CODEPAGE))
  2804. return -1;
  2805. return static_cast<int>(trailbytes); //cast okay as is certainly 0--3
  2806. }
  2807. //---------------------------------------------------------------------------
  2808. void rtlStrToDataX(unsigned & tlen, void * & tgt, unsigned slen, const void * src)
  2809. {
  2810. void * data = malloc(slen);
  2811. memcpy(data, src, slen);
  2812. tgt = data;
  2813. tlen = slen;
  2814. }
  2815. void rtlStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const void * src)
  2816. {
  2817. char * data = (char *)malloc(slen);
  2818. memcpy(data, src, slen);
  2819. tgt = data;
  2820. tlen = slen;
  2821. }
  2822. char * rtlStrToVStrX(unsigned slen, const void * src)
  2823. {
  2824. char * data = (char *)malloc(slen+1);
  2825. memcpy(data, src, slen);
  2826. data[slen] = 0;
  2827. return data;
  2828. }
  2829. char * rtlEStrToVStrX(unsigned slen, const char * src)
  2830. {
  2831. char * astr = (char*)alloca(slen);
  2832. rtlEStrToStr(slen,astr,slen,src);
  2833. return rtlStrToVStrX(slen, astr);
  2834. }
  2835. void rtlEStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  2836. {
  2837. char * data = (char *)malloc(slen);
  2838. rtlEStrToStr(slen, data, slen, src);
  2839. tgt = data;
  2840. tlen = slen;
  2841. }
  2842. void rtlStrToEStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  2843. {
  2844. char * data = (char *)malloc(slen);
  2845. rtlStrToEStr(slen, data, slen, src);
  2846. tgt = data;
  2847. tlen = slen;
  2848. }
  2849. //---------------------------------------------------------------------------
  2850. // See http://www.isthe.com/chongo/tech/comp/fnv/index.html
  2851. #define FNV1_64_INIT I64C(0xcbf29ce484222325)
  2852. #define FNV_64_PRIME I64C(0x100000001b3U)
  2853. hash64_t rtlHash64Data(size32_t len, const void *buf, hash64_t hval)
  2854. {
  2855. const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
  2856. const unsigned char *be = bp + len; /* beyond end of buffer */
  2857. while (bp < be)
  2858. {
  2859. hval *= FNV_64_PRIME;
  2860. hval ^= *bp++;
  2861. }
  2862. return hval;
  2863. }
  2864. hash64_t rtlHash64VStr(const char *str, hash64_t hval)
  2865. {
  2866. const unsigned char *s = (const unsigned char *)str;
  2867. unsigned char c;
  2868. while ((c = *s++) != 0)
  2869. {
  2870. hval *= FNV_64_PRIME;
  2871. hval ^= c;
  2872. }
  2873. return hval;
  2874. }
  2875. hash64_t rtlHash64Unicode(unsigned length, UChar const * k, hash64_t initval)
  2876. {
  2877. return rtlHash64Data(length*2, k, initval);
  2878. }
  2879. hash64_t rtlHash64VUnicode(UChar const * k, hash64_t initval)
  2880. {
  2881. return rtlHash64Data(rtlUnicodeStrlen(k)*2, k, initval);
  2882. }
  2883. //---------------------------------------------------------------------------
  2884. // See http://www.isthe.com/chongo/tech/comp/fnv/index.html
  2885. #define FNV1_32_INIT 0x811C9DC5
  2886. #define FNV_32_PRIME 0x1000193
  2887. unsigned rtlHash32Data(size32_t len, const void *buf, unsigned hval)
  2888. {
  2889. const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
  2890. const unsigned char *be = bp + len; /* beyond end of buffer */
  2891. while (bp < be)
  2892. {
  2893. hval *= FNV_32_PRIME;
  2894. hval ^= *bp++;
  2895. }
  2896. return hval;
  2897. }
  2898. unsigned rtlHash32VStr(const char *str, unsigned hval)
  2899. {
  2900. const unsigned char *s = (const unsigned char *)str;
  2901. unsigned char c;
  2902. while ((c = *s++) != 0)
  2903. {
  2904. hval *= FNV_32_PRIME;
  2905. hval ^= c;
  2906. }
  2907. return hval;
  2908. }
  2909. unsigned rtlHash32Unicode(unsigned length, UChar const * k, unsigned initval)
  2910. {
  2911. return rtlHash32Data(length*2, k, initval);
  2912. }
  2913. unsigned rtlHash32VUnicode(UChar const * k, unsigned initval)
  2914. {
  2915. return rtlHash32Data(rtlUnicodeStrlen(k)*2, k, initval);
  2916. }
  2917. //---------------------------------------------------------------------------
  2918. // Hash Helper functions
  2919. #define mix(a,b,c) \
  2920. { \
  2921. a -= b; a -= c; a ^= (c>>13); \
  2922. b -= c; b -= a; b ^= (a<<8); \
  2923. c -= a; c -= b; c ^= (b>>13); \
  2924. a -= b; a -= c; a ^= (c>>12); \
  2925. b -= c; b -= a; b ^= (a<<16); \
  2926. c -= a; c -= b; c ^= (b>>5); \
  2927. a -= b; a -= c; a ^= (c>>3); \
  2928. b -= c; b -= a; b ^= (a<<10); \
  2929. c -= a; c -= b; c ^= (b>>15); \
  2930. }
  2931. #define GETBYTE0(n) ((unsigned)k[n])
  2932. #define GETBYTE1(n) ((unsigned)k[n+1]<<8)
  2933. #define GETBYTE2(n) ((unsigned)k[n+2]<<16)
  2934. #define GETBYTE3(n) ((unsigned)k[n+3]<<24)
  2935. #define GETWORD(k,n) (GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))
  2936. // the above looks inefficient but the compiler optimizes well
  2937. // this hash looks slow but is about twice as quick as using our CRC table
  2938. // and gives gives better results
  2939. // (see paper at http://burtleburtle.net/bob/hash/evahash.html for more info)
  2940. unsigned rtlHashData( unsigned length, const void *_k, unsigned initval)
  2941. {
  2942. const unsigned char * k = (const unsigned char *)_k;
  2943. register unsigned a,b,c,len;
  2944. /* Set up the internal state */
  2945. len = length;
  2946. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  2947. c = initval; /* the previous hash value */
  2948. /*---------------------------------------- handle most of the key */
  2949. while (len >= 12)
  2950. {
  2951. a += GETWORD(k,0);
  2952. b += GETWORD(k,4);
  2953. c += GETWORD(k,8);
  2954. mix(a,b,c);
  2955. k += 12; len -= 12;
  2956. }
  2957. /*------------------------------------- handle the last 11 bytes */
  2958. c += length;
  2959. switch(len) /* all the case statements fall through */
  2960. {
  2961. case 11: c+=GETBYTE3(7);
  2962. case 10: c+=GETBYTE2(7);
  2963. case 9 : c+=GETBYTE1(7);
  2964. /* the first byte of c is reserved for the length */
  2965. case 8 : b+=GETBYTE3(4);
  2966. case 7 : b+=GETBYTE2(4);
  2967. case 6 : b+=GETBYTE1(4);
  2968. case 5 : b+=GETBYTE0(4);
  2969. case 4 : a+=GETBYTE3(0);
  2970. case 3 : a+=GETBYTE2(0);
  2971. case 2 : a+=GETBYTE1(0);
  2972. case 1 : a+=GETBYTE0(0);
  2973. /* case 0: nothing left to add */
  2974. }
  2975. mix(a,b,c);
  2976. /*-------------------------------------------- report the result */
  2977. return c;
  2978. }
  2979. unsigned rtlHashString( unsigned length, const char *_k, unsigned initval)
  2980. {
  2981. return rtlHashData(rtlTrimStrLen(length, _k), _k, initval);
  2982. }
  2983. unsigned rtlHashUnicode(unsigned length, UChar const * k, unsigned initval)
  2984. {
  2985. //Would make more sense to trim here.
  2986. return rtlHashData(length*2, k, initval);
  2987. }
  2988. unsigned rtlHashVStr(const char * k, unsigned initval)
  2989. {
  2990. return rtlHashData(rtlTrimVStrLen(k), k, initval);
  2991. }
  2992. unsigned rtlHashVUnicode(UChar const * k, unsigned initval)
  2993. {
  2994. return rtlHashData(rtlTrimVUnicodeStrLen(k)*2, k, initval);
  2995. }
  2996. #define GETWORDNC(k,n) ((GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))&0xdfdfdfdf)
  2997. unsigned rtlHashDataNC( unsigned length, const void * _k, unsigned initval)
  2998. {
  2999. const unsigned char * k = (const unsigned char *)_k;
  3000. register unsigned a,b,c,len;
  3001. /* Set up the internal state */
  3002. len = length;
  3003. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  3004. c = initval; /* the previous hash value */
  3005. /*---------------------------------------- handle most of the key */
  3006. while (len >= 12)
  3007. {
  3008. a += GETWORDNC(k,0);
  3009. b += GETWORDNC(k,4);
  3010. c += GETWORDNC(k,8);
  3011. mix(a,b,c);
  3012. k += 12; len -= 12;
  3013. }
  3014. /*------------------------------------- handle the last 11 bytes */
  3015. c += length;
  3016. switch(len) /* all the case statements fall through */
  3017. {
  3018. case 11: c+=GETBYTE3(7)&0xdf;
  3019. case 10: c+=GETBYTE2(7)&0xdf;
  3020. case 9 : c+=GETBYTE1(7)&0xdf;
  3021. /* the first byte of c is reserved for the length */
  3022. case 8 : b+=GETBYTE3(4)&0xdf;
  3023. case 7 : b+=GETBYTE2(4)&0xdf;
  3024. case 6 : b+=GETBYTE1(4)&0xdf;
  3025. case 5 : b+=GETBYTE0(4)&0xdf;
  3026. case 4 : a+=GETBYTE3(0)&0xdf;
  3027. case 3 : a+=GETBYTE2(0)&0xdf;
  3028. case 2 : a+=GETBYTE1(0)&0xdf;
  3029. case 1 : a+=GETBYTE0(0)&0xdf;
  3030. /* case 0: nothing left to add */
  3031. }
  3032. mix(a,b,c);
  3033. /*-------------------------------------------- report the result */
  3034. return c;
  3035. }
  3036. unsigned rtlHashVStrNC(const char * k, unsigned initval)
  3037. {
  3038. return rtlHashDataNC(strlen(k), k, initval);
  3039. }
  3040. //---------------------------------------------------------------------------
  3041. unsigned rtlCrcData( unsigned length, const void *_k, unsigned initval)
  3042. {
  3043. return crc32((const char *)_k, length, initval);
  3044. }
  3045. unsigned rtlCrcUnicode(unsigned length, UChar const * k, unsigned initval)
  3046. {
  3047. return crc32((char const *)k, length*2, initval);
  3048. }
  3049. unsigned rtlCrcVStr( const char * k, unsigned initval)
  3050. {
  3051. return crc32(k, strlen(k), initval);
  3052. }
  3053. unsigned rtlCrcVUnicode(UChar const * k, unsigned initval)
  3054. {
  3055. return crc32((char const *)k, rtlUnicodeStrlen(k)*2, initval);
  3056. }
  3057. //---------------------------------------------------------------------------
  3058. // MD5 processing:
  3059. void rtlHashMd5Init(size32_t sizestate, void * _state)
  3060. {
  3061. assertex(sizestate >= sizeof(md5_state_s));
  3062. md5_state_s * state = (md5_state_s *)_state;
  3063. md5_init(state);
  3064. }
  3065. void rtlHashMd5Data(size32_t len, const void *buf, size32_t sizestate, void * _state)
  3066. {
  3067. md5_state_s * state = (md5_state_s * )_state;
  3068. md5_append(state, (const md5_byte_t *)buf, len);
  3069. }
  3070. void rtlHashMd5Finish(void * out, size32_t sizestate, void * _state)
  3071. {
  3072. typedef md5_byte_t digest_t[16];
  3073. md5_state_s * state = (md5_state_s *)_state;
  3074. md5_finish(state, *(digest_t*)out);
  3075. }
  3076. //---------------------------------------------------------------------------
  3077. unsigned rtlRandom()
  3078. {
  3079. CriticalBlock block(random_Sect);
  3080. return random_->next();
  3081. }
  3082. void rtlSeedRandom(unsigned value)
  3083. {
  3084. CriticalBlock block(random_Sect);
  3085. random_->seed(value);
  3086. }
  3087. // These are all useful functions for testing - not really designed for other people to use them...
  3088. ECLRTL_API unsigned rtlTick()
  3089. {
  3090. return msTick();
  3091. }
  3092. ECLRTL_API bool rtlGPF()
  3093. {
  3094. char * x = 0;
  3095. *x = 0;
  3096. return false;
  3097. }
  3098. ECLRTL_API unsigned rtlSleep(unsigned delay)
  3099. {
  3100. MilliSleep(delay);
  3101. return 0;
  3102. }
  3103. ECLRTL_API unsigned rtlDisplay(unsigned len, const char * src)
  3104. {
  3105. LOG(MCprogress, unknownJob, "%.*s", len, src);
  3106. return 0;
  3107. }
  3108. void rtlEcho(unsigned len, const char * src)
  3109. {
  3110. printf("%.*s\n", len, src);
  3111. }
  3112. ECLRTL_API unsigned __int64 rtlNano()
  3113. {
  3114. return cycle_to_nanosec(get_cycles_now());
  3115. }
  3116. ECLRTL_API void rtlTestGetPrimes(unsigned & num, void * & data)
  3117. {
  3118. unsigned numPrimes = 6;
  3119. unsigned size = sizeof(unsigned) * numPrimes;
  3120. unsigned * primes = (unsigned *)malloc(size);
  3121. primes[0] = 1;
  3122. primes[1] = 2;
  3123. primes[2] = 3;
  3124. primes[3] = 5;
  3125. primes[4] = 7;
  3126. primes[5] = 11;
  3127. num = numPrimes;
  3128. data = primes;
  3129. }
  3130. ECLRTL_API void rtlTestFibList(bool & outAll, size32_t & outSize, void * & outData, bool inAll, size32_t inSize, const void * inData)
  3131. {
  3132. const unsigned * inList = (const unsigned *)inData;
  3133. unsigned * outList = (unsigned *)malloc(inSize);
  3134. unsigned * curOut = outList;
  3135. unsigned count = inSize / sizeof(*inList);
  3136. unsigned prev = 0;
  3137. for (unsigned i=0; i < count; i++)
  3138. {
  3139. unsigned next = *inList++;
  3140. *curOut++ = next + prev;
  3141. prev = next;
  3142. }
  3143. outAll = inAll;
  3144. outSize = inSize;
  3145. outData = outList;
  3146. }
  3147. unsigned rtlDelayReturn(unsigned value, unsigned sleepTime)
  3148. {
  3149. MilliSleep(sleepTime);
  3150. return value;
  3151. }
  3152. //---------------------------------------------------------------------------
  3153. class CRtlFailException : public CInterface, public IUserException
  3154. {
  3155. public:
  3156. CRtlFailException(int _code, char const * _msg) : code(_code) { msg = strdup(_msg); }
  3157. ~CRtlFailException() { free(msg); }
  3158. IMPLEMENT_IINTERFACE;
  3159. virtual int errorCode() const { return code; }
  3160. virtual StringBuffer & errorMessage(StringBuffer & buff) const { return buff.append(msg); }
  3161. virtual MessageAudience errorAudience() const { return MSGAUD_user; }
  3162. private:
  3163. int code;
  3164. char * msg;
  3165. };
  3166. void rtlFail(int code, const char *msg)
  3167. {
  3168. throw dynamic_cast<IUserException *>(new CRtlFailException(code, msg));
  3169. }
  3170. void rtlSysFail(int code, const char *msg)
  3171. {
  3172. throw MakeStringException(MSGAUD_user, code, "%s", msg);
  3173. }
  3174. void rtlReportRowOverflow(unsigned size, unsigned max)
  3175. {
  3176. throw MakeStringException(MSGAUD_user, 1000, "Row size %u exceeds the maximum size specified(%u)", size, max);
  3177. }
  3178. void rtlReportFieldOverflow(unsigned size, unsigned max, const char * name)
  3179. {
  3180. if (!name)
  3181. rtlReportRowOverflow(size, max);
  3182. else
  3183. throw MakeStringException(MSGAUD_user, 1000, "Assignment to field '%s' causes row overflow. Size %u exceeds the maximum size specified(%u)", name, size, max);
  3184. }
  3185. void rtlCheckRowOverflow(unsigned size, unsigned max)
  3186. {
  3187. if (size > max)
  3188. rtlReportRowOverflow(size, max);
  3189. }
  3190. void rtlCheckFieldOverflow(unsigned size, unsigned max, const char * field)
  3191. {
  3192. if (size > max)
  3193. rtlReportFieldOverflow(size, max, field);
  3194. }
  3195. void rtlFailUnexpected()
  3196. {
  3197. throw MakeStringException(MSGAUD_user, -1, "Unexpected code execution");
  3198. }
  3199. void rtlFailOnAssert()
  3200. {
  3201. throw MakeStringException(MSGAUD_user, -1, "Abort execution");
  3202. }
  3203. //---------------------------------------------------------------------------
  3204. void deserializeRaw(unsigned recordSize, void *record, MemoryBuffer &in)
  3205. {
  3206. in.read(recordSize, record);
  3207. }
  3208. void deserializeDataX(size32_t & len, void * & data, MemoryBuffer &in)
  3209. {
  3210. free(data);
  3211. in.read(sizeof(len), &len);
  3212. data = malloc(len);
  3213. in.read(len, data);
  3214. }
  3215. void deserializeStringX(size32_t & len, char * & data, MemoryBuffer &in)
  3216. {
  3217. free(data);
  3218. in.read(sizeof(len), &len);
  3219. data = (char *)malloc(len);
  3220. in.read(len, data);
  3221. }
  3222. char * deserializeCStringX(MemoryBuffer &in)
  3223. {
  3224. unsigned len;
  3225. in.read(sizeof(len), &len);
  3226. char * data = (char *)malloc(len+1);
  3227. in.read(len, data);
  3228. data[len] = 0;
  3229. return data;
  3230. }
  3231. void deserializeUnicodeX(size32_t & len, UChar * & data, MemoryBuffer &in)
  3232. {
  3233. free(data);
  3234. in.read(sizeof(len), &len);
  3235. data = (UChar *)malloc(len*sizeof(UChar));
  3236. in.read(len*sizeof(UChar), data);
  3237. }
  3238. void deserializeUtf8X(size32_t & len, char * & data, MemoryBuffer &in)
  3239. {
  3240. free(data);
  3241. in.read(sizeof(len), &len);
  3242. unsigned size = rtlUtf8Size(len, in.readDirect(0));
  3243. data = (char *)malloc(size);
  3244. in.read(size, data);
  3245. }
  3246. UChar * deserializeVUnicodeX(MemoryBuffer &in)
  3247. {
  3248. unsigned len;
  3249. in.read(sizeof(len), &len);
  3250. UChar * data = (UChar *)malloc((len+1)*sizeof(UChar));
  3251. in.read(len*sizeof(UChar), data);
  3252. data[len] = 0;
  3253. return data;
  3254. }
  3255. void deserializeSet(bool & isAll, size32_t & len, void * & data, MemoryBuffer &in)
  3256. {
  3257. free(data);
  3258. in.read(isAll);
  3259. in.read(sizeof(len), &len);
  3260. data = malloc(len);
  3261. in.read(len, data);
  3262. }
  3263. void serializeRaw(unsigned recordSize, const void *record, MemoryBuffer &out)
  3264. {
  3265. out.append(recordSize, record);
  3266. }
  3267. void serializeDataX(size32_t len, const void * data, MemoryBuffer &out)
  3268. {
  3269. out.append(len).append(len, data);
  3270. }
  3271. void serializeStringX(size32_t len, const char * data, MemoryBuffer &out)
  3272. {
  3273. out.append(len).append(len, data);
  3274. }
  3275. void serializeCStringX(const char * data, MemoryBuffer &out)
  3276. {
  3277. unsigned len = strlen(data);
  3278. out.append(len).append(len, data);
  3279. }
  3280. void serializeUnicodeX(size32_t len, const UChar * data, MemoryBuffer &out)
  3281. {
  3282. out.append(len).append(len*sizeof(UChar), data);
  3283. }
  3284. void serializeUtf8X(size32_t len, const char * data, MemoryBuffer &out)
  3285. {
  3286. out.append(len).append(rtlUtf8Size(len, data), data);
  3287. }
  3288. void serializeSet(bool isAll, size32_t len, const void * data, MemoryBuffer &out)
  3289. {
  3290. out.append(isAll).append(len).append(len, data);
  3291. }
  3292. //---------------------------------------------------------------------------
  3293. ECLRTL_API void serializeFixedString(unsigned len, const char *field, MemoryBuffer &out)
  3294. {
  3295. out.append(len, field);
  3296. }
  3297. ECLRTL_API void serializeLPString(unsigned len, const char *field, MemoryBuffer &out)
  3298. {
  3299. out.append(len);
  3300. out.append(len, field);
  3301. }
  3302. ECLRTL_API void serializeVarString(const char *field, MemoryBuffer &out)
  3303. {
  3304. out.append(field);
  3305. }
  3306. ECLRTL_API void serializeBool(bool field, MemoryBuffer &out)
  3307. {
  3308. out.append(field);
  3309. }
  3310. ECLRTL_API void serializeFixedData(unsigned len, const void *field, MemoryBuffer &out)
  3311. {
  3312. out.append(len, field);
  3313. }
  3314. ECLRTL_API void serializeLPData(unsigned len, const void *field, MemoryBuffer &out)
  3315. {
  3316. out.append(len);
  3317. out.append(len, field);
  3318. }
  3319. ECLRTL_API void serializeInt1(signed char field, MemoryBuffer &out)
  3320. {
  3321. // MORE - why did overloading pick the int method for this???
  3322. // out.append(field);
  3323. out.appendEndian(sizeof(field), &field);
  3324. }
  3325. ECLRTL_API void serializeInt2(signed short field, MemoryBuffer &out)
  3326. {
  3327. out.appendEndian(sizeof(field), &field);
  3328. }
  3329. ECLRTL_API void serializeInt3(signed int field, MemoryBuffer &out)
  3330. {
  3331. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3332. out.appendEndian(3, &field);
  3333. #else
  3334. out.appendEndian(3, ((char *) &field) + 1);
  3335. #endif
  3336. }
  3337. ECLRTL_API void serializeInt4(signed int field, MemoryBuffer &out)
  3338. {
  3339. out.appendEndian(sizeof(field), &field);
  3340. }
  3341. ECLRTL_API void serializeInt5(signed __int64 field, MemoryBuffer &out)
  3342. {
  3343. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3344. out.appendEndian(5, &field);
  3345. #else
  3346. out.appendEndian(5, ((char *) &field) + 3);
  3347. #endif
  3348. }
  3349. ECLRTL_API void serializeInt6(signed __int64 field, MemoryBuffer &out)
  3350. {
  3351. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3352. out.appendEndian(6, &field);
  3353. #else
  3354. out.appendEndian(6, ((char *) &field) + 2);
  3355. #endif
  3356. }
  3357. ECLRTL_API void serializeInt7(signed __int64 field, MemoryBuffer &out)
  3358. {
  3359. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3360. out.appendEndian(7, &field);
  3361. #else
  3362. out.appendEndian(7, ((char *) &field) + 1);
  3363. #endif
  3364. }
  3365. ECLRTL_API void serializeInt8(signed __int64 field, MemoryBuffer &out)
  3366. {
  3367. out.appendEndian(sizeof(field), &field);
  3368. }
  3369. ECLRTL_API void serializeUInt1(unsigned char field, MemoryBuffer &out)
  3370. {
  3371. out.appendEndian(sizeof(field), &field);
  3372. }
  3373. ECLRTL_API void serializeUInt2(unsigned short field, MemoryBuffer &out)
  3374. {
  3375. out.appendEndian(sizeof(field), &field);
  3376. }
  3377. ECLRTL_API void serializeUInt3(unsigned int field, MemoryBuffer &out)
  3378. {
  3379. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3380. out.appendEndian(3, &field);
  3381. #else
  3382. out.appendEndian(3, ((char *) &field) + 1);
  3383. #endif
  3384. }
  3385. ECLRTL_API void serializeUInt4(unsigned int field, MemoryBuffer &out)
  3386. {
  3387. out.appendEndian(sizeof(field), &field);
  3388. }
  3389. ECLRTL_API void serializeUInt5(unsigned __int64 field, MemoryBuffer &out)
  3390. {
  3391. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3392. out.appendEndian(5, &field);
  3393. #else
  3394. out.appendEndian(5, ((char *) &field) + 3);
  3395. #endif
  3396. }
  3397. ECLRTL_API void serializeUInt6(unsigned __int64 field, MemoryBuffer &out)
  3398. {
  3399. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3400. out.appendEndian(6, &field);
  3401. #else
  3402. out.appendEndian(6, ((char *) &field) + 2);
  3403. #endif
  3404. }
  3405. ECLRTL_API void serializeUInt7(unsigned __int64 field, MemoryBuffer &out)
  3406. {
  3407. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3408. out.appendEndian(7, &field);
  3409. #else
  3410. out.appendEndian(7, ((char *) &field) + 1);
  3411. #endif
  3412. }
  3413. ECLRTL_API void serializeUInt8(unsigned __int64 field, MemoryBuffer &out)
  3414. {
  3415. out.appendEndian(sizeof(field), &field);
  3416. }
  3417. ECLRTL_API void serializeReal4(float field, MemoryBuffer &out)
  3418. {
  3419. out.appendEndian(sizeof(field), &field);
  3420. }
  3421. ECLRTL_API void serializeReal8(double field, MemoryBuffer &out)
  3422. {
  3423. out.append(sizeof(field), &field);
  3424. }
  3425. //These maths functions can all have out of range arguments....
  3426. //---------------------------------------------------------------------------
  3427. ECLRTL_API double rtlLog10(double x)
  3428. {
  3429. if (x <= 0) return 0;
  3430. return log10(x);
  3431. }
  3432. ECLRTL_API double rtlLog(double x)
  3433. {
  3434. if (x <= 0) return 0;
  3435. return log(x);
  3436. }
  3437. ECLRTL_API double rtlSqrt(double x)
  3438. {
  3439. if (x < 0) return 0;
  3440. return sqrt(x);
  3441. }
  3442. ECLRTL_API double rtlACos(double x)
  3443. {
  3444. if (fabs(x) > 1) return 0;
  3445. return acos(x);
  3446. }
  3447. ECLRTL_API double rtlASin(double x)
  3448. {
  3449. if (fabs(x) > 1) return 0;
  3450. return asin(x);
  3451. }
  3452. //---------------------------------------------------------------------------
  3453. ECLRTL_API bool rtlIsValidReal(unsigned size, const void * data)
  3454. {
  3455. byte * bytes = (byte *)data;
  3456. //Valid unless it is a Nan, represented by exponent all 1's and non-zero mantissa (ignore the sign).
  3457. if (size == 4)
  3458. {
  3459. //sign(1) exponent(8) mantissa(23)
  3460. if (((bytes[3] & 0x7f) == 0x7f) && ((bytes[2] & 0x80) == 0x80))
  3461. {
  3462. if ((bytes[2] & 0x7f) != 0 || bytes[1] || bytes[0])
  3463. return false;
  3464. }
  3465. }
  3466. else if (size == 8)
  3467. {
  3468. //sign(1) exponent(11) mantissa(52)
  3469. if (((bytes[7] & 0x7f) == 0x7f) && ((bytes[6] & 0xF0) == 0xF0))
  3470. {
  3471. if ((bytes[6] & 0xF) || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
  3472. return false;
  3473. }
  3474. }
  3475. else
  3476. {
  3477. //sign(1) exponent(15) mantissa(64)
  3478. assertex(size==10);
  3479. if (((bytes[9] & 0x7f) == 0x7f) && (bytes[8] == 0xFF))
  3480. {
  3481. if (bytes[7] || bytes[6] || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
  3482. return false;
  3483. }
  3484. }
  3485. return true;
  3486. }
  3487. void rtlUnicodeToUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
  3488. {
  3489. if(inlen>outlen) inlen = outlen;
  3490. memcpy(out, in, inlen*2);
  3491. while(inlen<outlen)
  3492. out[inlen++] = 0x0020;
  3493. }
  3494. void rtlUnicodeToVUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
  3495. {
  3496. if((inlen>=outlen) && (outlen != 0)) inlen = outlen-1;
  3497. memcpy(out, in, inlen*2);
  3498. out[inlen] = 0x0000;
  3499. }
  3500. void rtlVUnicodeToUnicode(size32_t outlen, UChar * out, UChar const *in)
  3501. {
  3502. rtlUnicodeToUnicode(outlen, out, rtlUnicodeStrlen(in), in);
  3503. }
  3504. void rtlVUnicodeToVUnicode(size32_t outlen, UChar * out, UChar const *in)
  3505. {
  3506. rtlUnicodeToVUnicode(outlen, out, rtlUnicodeStrlen(in), in);
  3507. }
  3508. void rtlUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  3509. {
  3510. tgt = (UChar *)malloc(slen*2);
  3511. memcpy(tgt, src, slen*2);
  3512. tlen = slen;
  3513. }
  3514. UChar * rtlUnicodeToVUnicodeX(unsigned slen, UChar const * src)
  3515. {
  3516. UChar * data = (UChar *)malloc((slen+1)*2);
  3517. memcpy(data, src, slen*2);
  3518. data[slen] = 0x0000;
  3519. return data;
  3520. }
  3521. void rtlVUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, UChar const * src)
  3522. {
  3523. rtlUnicodeToUnicodeX(tlen, tgt, rtlUnicodeStrlen(src), src);
  3524. }
  3525. UChar * rtlVUnicodeToVUnicodeX(UChar const * src)
  3526. {
  3527. return rtlUnicodeToVUnicodeX(rtlUnicodeStrlen(src), src);
  3528. }
  3529. void rtlDecPushUnicode(size32_t len, UChar const * data)
  3530. {
  3531. char * buff = 0;
  3532. unsigned bufflen = 0;
  3533. rtlUnicodeToStrX(bufflen, buff, len, data);
  3534. DecPushString(bufflen, buff);
  3535. rtlFree(buff);
  3536. }
  3537. unsigned rtlUnicodeStrlen(UChar const * str)
  3538. {
  3539. return u_strlen(str);
  3540. }
  3541. //---------------------------------------------------------------------------
  3542. unsigned rtlUtf8Size(const void * data)
  3543. {
  3544. return readUtf8Size(data);
  3545. }
  3546. unsigned rtlUtf8Size(unsigned len, const void * _data)
  3547. {
  3548. const byte * data = (const byte *)_data;
  3549. size32_t offset = 0;
  3550. for (unsigned i=0; i< len; i++)
  3551. offset += readUtf8Size(data+offset);
  3552. return offset;
  3553. }
  3554. unsigned rtlUtf8Length(unsigned size, const void * _data)
  3555. {
  3556. const byte * data = (const byte *)_data;
  3557. size32_t length = 0;
  3558. for (unsigned offset=0; offset < size; offset += readUtf8Size(data+offset))
  3559. length++;
  3560. return length;
  3561. }
  3562. unsigned rtlUtf8Char(const void * data)
  3563. {
  3564. return readUtf8Char(data);
  3565. }
  3566. void rtlUtf8ToData(size32_t outlen, void * out, size32_t inlen, const char *in)
  3567. {
  3568. unsigned insize = rtlUtf8Size(inlen, in);
  3569. rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3570. }
  3571. void rtlUtf8ToDataX(size32_t & outlen, void * & out, size32_t inlen, const char *in)
  3572. {
  3573. unsigned insize = rtlUtf8Size(inlen, in);
  3574. char * cout;
  3575. rtlCodepageToCodepageX(outlen, cout, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3576. out = cout;
  3577. }
  3578. void rtlUtf8ToStr(size32_t outlen, char * out, size32_t inlen, const char *in)
  3579. {
  3580. unsigned insize = rtlUtf8Size(inlen, in);
  3581. rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3582. }
  3583. void rtlUtf8ToStrX(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  3584. {
  3585. unsigned insize = rtlUtf8Size(inlen, in);
  3586. rtlCodepageToCodepageX(outlen, out, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3587. }
  3588. char * rtlUtf8ToVStr(size32_t inlen, const char *in)
  3589. {
  3590. unsigned utfSize = rtlUtf8Size(inlen, in);
  3591. char *ret = (char *) rtlMalloc(inlen+1);
  3592. rtlCodepageToCodepage(inlen, ret, utfSize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3593. ret[inlen] = 0;
  3594. return ret;
  3595. }
  3596. void rtlDataToUtf8(size32_t outlen, char * out, size32_t inlen, const void *in)
  3597. {
  3598. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3599. }
  3600. void rtlDataToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const void *in)
  3601. {
  3602. unsigned outsize;
  3603. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3604. outlen = rtlUtf8Length(outsize, out);
  3605. }
  3606. void rtlStrToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
  3607. {
  3608. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3609. }
  3610. void rtlStrToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  3611. {
  3612. unsigned outsize;
  3613. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3614. outlen = rtlUtf8Length(outsize, out);
  3615. }
  3616. void rtlUtf8ToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
  3617. {
  3618. //Packs as many characaters as it can into the target, but don't include any half characters
  3619. size32_t offset = 0;
  3620. size32_t outsize = outlen*UTF8_MAXSIZE;
  3621. for (unsigned i=0; i< inlen; i++)
  3622. {
  3623. unsigned nextSize = readUtf8Size(in+offset);
  3624. if (offset + nextSize > outsize)
  3625. break;
  3626. offset += nextSize;
  3627. }
  3628. memcpy(out, in, offset);
  3629. if (offset != outsize)
  3630. memset(out+offset, ' ', outsize-offset);
  3631. }
  3632. void rtlUtf8ToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  3633. {
  3634. unsigned insize = rtlUtf8Size(inlen, in);
  3635. char * buffer = (char *)malloc(insize);
  3636. memcpy(buffer, in, insize);
  3637. outlen = inlen;
  3638. out = buffer;
  3639. }
  3640. static int rtlCompareUtf8Utf8ViaUnicode(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  3641. {
  3642. rtlDataAttr uleft(llen*sizeof(UChar));
  3643. rtlDataAttr uright(rlen*sizeof(UChar));
  3644. rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
  3645. rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
  3646. return rtlCompareUnicodeUnicode(llen, uleft.getustr(), rlen, uright.getustr(), locale);
  3647. }
  3648. int rtlCompareUtf8Utf8(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  3649. {
  3650. //MORE: Do a simple comparison as long as there are no non->0x80 characters around
  3651. // fall back to a full unicode comparison if we hit one - or in the next character to allow for accents etc.
  3652. const byte * bleft = (const byte *)left;
  3653. const byte * bright = (const byte *)right;
  3654. unsigned len = llen > rlen ? rlen : llen;
  3655. for (unsigned i = 0; i < len; i++)
  3656. {
  3657. byte nextLeft = bleft[i];
  3658. byte nextRight = bright[i];
  3659. if (nextLeft >= 0x80 || nextRight >= 0x80)
  3660. return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
  3661. if ((i+1 != len) && ((bleft[i+1] >= 0x80) || bright[i+1] >= 0x80))
  3662. return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
  3663. if (nextLeft != nextRight)
  3664. return nextLeft - nextRight;
  3665. }
  3666. int diff = 0;
  3667. if (len != llen)
  3668. {
  3669. for (;(diff == 0) && (len != llen);len++)
  3670. diff = bleft[len] - ' ';
  3671. }
  3672. else if (len != rlen)
  3673. {
  3674. for (;(diff == 0) && (len != rlen);len++)
  3675. diff = ' ' - bright[len];
  3676. }
  3677. return diff;
  3678. }
  3679. int rtlCompareUtf8Utf8Strength(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale, unsigned strength)
  3680. {
  3681. //GH->PG Any better way of doing this? We could possible decide it was a binary comparison instead I guess.
  3682. rtlDataAttr uleft(llen*sizeof(UChar));
  3683. rtlDataAttr uright(rlen*sizeof(UChar));
  3684. rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
  3685. rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
  3686. return rtlCompareUnicodeUnicodeStrength(llen, uleft.getustr(), rlen, uright.getustr(), locale, strength);
  3687. }
  3688. void rtlDecPushUtf8(size32_t len, const void * data)
  3689. {
  3690. DecPushString(len, (const char *)data); // good enough for the moment
  3691. }
  3692. bool rtlUtf8ToBool(size32_t inlen, const char * in)
  3693. {
  3694. return rtlStrToBool(inlen, in);
  3695. }
  3696. __int64 rtlUtf8ToInt(size32_t inlen, const char * in)
  3697. {
  3698. return rtlStrToInt8(inlen, in); // good enough for the moment
  3699. }
  3700. double rtlUtf8ToReal(size32_t inlen, const char * in)
  3701. {
  3702. return rtlStrToReal(inlen, in); // good enough for the moment
  3703. }
  3704. void rtlCodepageToUtf8(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
  3705. {
  3706. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, codepage);
  3707. }
  3708. void rtlCodepageToUtf8X(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  3709. {
  3710. unsigned outsize;
  3711. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, codepage);
  3712. outlen = rtlUtf8Length(outsize, out);
  3713. }
  3714. void rtlUtf8ToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
  3715. {
  3716. unsigned insize = rtlUtf8Size(inlen, in);
  3717. rtlCodepageToCodepage(outlen, (char *)out, insize, in, codepage, UTF8_CODEPAGE);
  3718. }
  3719. void rtlUtf8ToCodepageX(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  3720. {
  3721. unsigned insize = rtlUtf8Size(inlen, in);
  3722. rtlCodepageToCodepageX(outlen, out, inlen, insize, in, codepage, UTF8_CODEPAGE);
  3723. }
  3724. void rtlUnicodeToUtf8X(unsigned & outlen, char * & out, unsigned inlen, const UChar * in)
  3725. {
  3726. unsigned outsize;
  3727. rtlUnicodeToCodepageX(outsize, out, inlen, in, UTF8_CODEPAGE);
  3728. outlen = rtlUtf8Length(outsize, out);
  3729. }
  3730. void rtlUnicodeToUtf8(unsigned outlen, char * out, unsigned inlen, const UChar * in)
  3731. {
  3732. rtlUnicodeToCodepage(outlen*UTF8_MAXSIZE, out, inlen, in, UTF8_CODEPAGE);
  3733. }
  3734. void rtlUtf8ToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
  3735. {
  3736. rtlCodepageToUnicodeX(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
  3737. }
  3738. void rtlUtf8ToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  3739. {
  3740. rtlCodepageToUnicode(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
  3741. }
  3742. ECLRTL_API void rtlUtf8SubStrFT(unsigned tlen, char * tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  3743. {
  3744. normalizeFromTo(from, to);
  3745. clipFromTo(from, to, slen);
  3746. unsigned copylen = to - from;
  3747. unsigned startOffset = rtlUtf8Size(from, src);
  3748. rtlUtf8ToUtf8(tlen, tgt, copylen, src+startOffset);
  3749. }
  3750. ECLRTL_API void rtlUtf8SubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  3751. {
  3752. normalizeFromTo(from, to);
  3753. unsigned len = to - from;
  3754. clipFromTo(from, to, slen);
  3755. unsigned copylen = to - from;
  3756. unsigned fillSize = len - copylen;
  3757. unsigned startOffset = rtlUtf8Size(from, src);
  3758. unsigned copySize = rtlUtf8Size(copylen, src+startOffset);
  3759. char * buffer = (char *)malloc(copySize + fillSize);
  3760. memcpy(buffer, (byte *)src+startOffset, copySize);
  3761. if (fillSize)
  3762. memset(buffer+copySize, ' ', fillSize);
  3763. tlen = len;
  3764. tgt = buffer;
  3765. }
  3766. ECLRTL_API void rtlUtf8SubStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
  3767. {
  3768. normalizeFromTo(from, slen);
  3769. unsigned len = slen - from;
  3770. unsigned startOffset = rtlUtf8Size(from, src);
  3771. unsigned copySize = rtlUtf8Size(len, src+startOffset);
  3772. char * buffer = (char *)malloc(copySize);
  3773. memcpy(buffer, (byte *)src+startOffset, copySize);
  3774. tlen = len;
  3775. tgt = buffer;
  3776. }
  3777. ECLRTL_API void rtlUtf8ToLower(size32_t l, char * t, char const * locale)
  3778. {
  3779. //Convert to lower case, but only go via unicode routines if we have to...
  3780. for (unsigned i=0; i< l; i++)
  3781. {
  3782. byte next = *t;
  3783. if (next >= 0x80)
  3784. {
  3785. //yuk, go via unicode to do the convertion.
  3786. unsigned len = l-i;
  3787. unsigned size = rtlUtf8Size(len, t+i);
  3788. rtlDataAttr unicode(len*sizeof(UChar));
  3789. rtlCodepageToUnicode(len, unicode.getustr(), size, t+i, UTF8_CODEPAGE);
  3790. rtlUnicodeToLower(len, unicode.getustr(), locale);
  3791. rtlUnicodeToCodepage(size, t+i, len, unicode.getustr(), UTF8_CODEPAGE);
  3792. return;
  3793. }
  3794. *t++ = tolower(next);
  3795. }
  3796. }
  3797. ECLRTL_API void rtlConcatUtf8(unsigned & tlen, char * * tgt, ...)
  3798. {
  3799. //Going to have to go via unicode because of normalization. However, it might be worth optimizing the case where no special characters are present
  3800. va_list args;
  3801. unsigned totalLength = 0;
  3802. unsigned maxLength = 0;
  3803. va_start(args, tgt);
  3804. for(;;)
  3805. {
  3806. unsigned len = va_arg(args, unsigned);
  3807. if(len+1==0)
  3808. break;
  3809. const char * str = va_arg(args, const char *);
  3810. totalLength += len;
  3811. if (len > maxLength)
  3812. maxLength = len;
  3813. }
  3814. va_end(args);
  3815. rtlDataAttr next(maxLength*sizeof(UChar));
  3816. rtlDataAttr result(totalLength*sizeof(UChar));
  3817. unsigned idx = 0;
  3818. UErrorCode err = U_ZERO_ERROR;
  3819. va_start(args, tgt);
  3820. for(;;)
  3821. {
  3822. unsigned len = va_arg(args, unsigned);
  3823. if(len+1==0)
  3824. break;
  3825. if (len)
  3826. {
  3827. const char * str = va_arg(args, const char *);
  3828. rtlUtf8ToUnicode(len, next.getustr(), len, str);
  3829. idx = unorm_concatenate(result.getustr(), idx, next.getustr(), len, result.getustr(), totalLength, UNORM_NFC, 0, &err);
  3830. }
  3831. }
  3832. va_end(args);
  3833. rtlUnicodeToUtf8X(tlen, *tgt, idx, result.getustr());
  3834. }
  3835. ECLRTL_API unsigned rtlConcatUtf8ToUtf8(unsigned tlen, char * tgt, unsigned offset, unsigned slen, const char * src)
  3836. {
  3837. //NB: Inconsistently with the other varieties, idx is a byte offset, not a character position to make the code more efficient.....
  3838. //normalization is done in the space filling routine at the end
  3839. unsigned ssize = rtlUtf8Size(slen, src);
  3840. assertex(tlen * UTF8_MAXSIZE >= offset+ssize);
  3841. memcpy(tgt+offset, src, ssize);
  3842. return offset + ssize;
  3843. }
  3844. ECLRTL_API void rtlUtf8SpaceFill(unsigned tlen, char * tgt, unsigned offset)
  3845. {
  3846. const byte * src = (const byte *)tgt;
  3847. for (unsigned i=0; i<offset; i++)
  3848. {
  3849. if (src[i] >= 0x80)
  3850. {
  3851. unsigned idx = rtlUtf8Length(offset, tgt);
  3852. rtlDataAttr unicode(idx*sizeof(UChar));
  3853. rtlUtf8ToUnicode(idx, unicode.getustr(), idx, tgt);
  3854. unicodeEnsureIsNormalized(idx, unicode.getustr());
  3855. rtlUnicodeToUtf8(tlen, tgt, idx, unicode.getustr());
  3856. return;
  3857. }
  3858. }
  3859. //no special characters=>easy route.
  3860. memset(tgt+offset, ' ', tlen*UTF8_MAXSIZE-offset);
  3861. }
  3862. ECLRTL_API unsigned rtlHash32Utf8(unsigned length, const char * k, unsigned initval)
  3863. {
  3864. return rtlHash32Data(rtlUtf8Size(length, k), k, initval);
  3865. }
  3866. ECLRTL_API unsigned rtlHashUtf8(unsigned length, const char * k, unsigned initval)
  3867. {
  3868. return rtlHashData(rtlUtf8Size(length, k), k, initval);
  3869. }
  3870. ECLRTL_API hash64_t rtlHash64Utf8(unsigned length, const char * k, hash64_t initval)
  3871. {
  3872. return rtlHash64Data(rtlUtf8Size(length, k), k, initval);
  3873. }
  3874. unsigned rtlCrcUtf8(unsigned length, const char * k, unsigned initval)
  3875. {
  3876. return rtlCrcData(rtlUtf8Size(length, k), k, initval);
  3877. }
  3878. int rtlNewSearchUtf8Table(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search, const char * locale)
  3879. {
  3880. //MORE: Hopelessly inefficient.... Should rethink - possibly introducing a class for doing string searching, and the Utf8 variety pre-converting the
  3881. //search strings into unicode.
  3882. int left = 0;
  3883. int right = count;
  3884. do
  3885. {
  3886. int mid = (left + right) >> 1;
  3887. int cmp = rtlCompareUtf8Utf8(width, search, elemlen, table[mid], locale);
  3888. if (cmp < 0)
  3889. right = mid;
  3890. else if (cmp > 0)
  3891. left = mid+1;
  3892. else
  3893. return mid;
  3894. } while (left < right);
  3895. return -1;
  3896. }
  3897. //---------------------------------------------------------------------------
  3898. #ifdef _USE_BOOST_REGEX
  3899. class CStrRegExprFindInstance : implements IStrRegExprFindInstance
  3900. {
  3901. private:
  3902. bool matched;
  3903. const boost::regex * regEx;
  3904. boost::cmatch subs;
  3905. char * sample; //only required if findstr/findvstr will be called
  3906. public:
  3907. CStrRegExprFindInstance(const boost::regex * _regEx, const char * _str, size32_t _from, size32_t _len, bool _keep)
  3908. : regEx(_regEx)
  3909. {
  3910. matched = false;
  3911. sample = NULL;
  3912. try
  3913. {
  3914. if (_keep)
  3915. {
  3916. sample = (char *)malloc(_len + 1); //required for findstr
  3917. memcpy(sample, _str + _from, _len);
  3918. sample[_len] = (char)NULL;
  3919. matched = boost::regex_search(sample, subs, *regEx);
  3920. }
  3921. else
  3922. {
  3923. matched = boost::regex_search(_str + _from, _str + _len, subs, *regEx);
  3924. }
  3925. }
  3926. catch (const std::runtime_error & e)
  3927. {
  3928. throw MakeStringException(0, "Error in regex search: %s (regex: %s)", e.what(), regEx->str().c_str());
  3929. }
  3930. }
  3931. ~CStrRegExprFindInstance() //CAVEAT non-virtual destructor !
  3932. {
  3933. free(sample);
  3934. }
  3935. //IStrRegExprFindInstance
  3936. bool found() const { return matched; }
  3937. void getMatchX(unsigned & outlen, char * & out, unsigned n = 0) const
  3938. {
  3939. if (matched && (n < subs.size()))
  3940. {
  3941. outlen = subs[n].second - subs[n].first;
  3942. out = (char *)malloc(outlen);
  3943. memcpy(out, subs[n].first, outlen);
  3944. }
  3945. else
  3946. {
  3947. outlen = 0;
  3948. out = NULL;
  3949. }
  3950. }
  3951. char const * findvstr(unsigned outlen, char * out, unsigned n = 0)
  3952. {
  3953. if (matched && (n < subs.size()))
  3954. {
  3955. unsigned sublen = subs[n].second - subs[n].first;
  3956. if (sublen >= outlen)
  3957. sublen = outlen - 1;
  3958. memcpy(out, subs[n].first, sublen);
  3959. out[sublen] = 0;
  3960. }
  3961. else
  3962. {
  3963. out[0] = 0;
  3964. }
  3965. return out;
  3966. }
  3967. };
  3968. //---------------------------------------------------------------------------
  3969. class CCompiledStrRegExpr : implements ICompiledStrRegExpr
  3970. {
  3971. private:
  3972. boost::regex regEx;
  3973. public:
  3974. CCompiledStrRegExpr(const char * _regExp, bool _isCaseSensitive = false)
  3975. {
  3976. try
  3977. {
  3978. if (_isCaseSensitive)
  3979. regEx.assign(_regExp, boost::regbase::perl);
  3980. else
  3981. regEx.assign(_regExp, boost::regbase::perl | boost::regbase::icase);
  3982. }
  3983. catch(const boost::bad_expression & e)
  3984. {
  3985. StringBuffer msg;
  3986. msg.append("Bad regular expression: ").append(e.what()).append(": ").append(_regExp);
  3987. rtlFail(0, msg.str()); //throws
  3988. }
  3989. }
  3990. //ICompiledStrRegExpr
  3991. void replace(size32_t & outlen, char * & out, size32_t slen, char const * str, size32_t rlen, char const * replace) const
  3992. {
  3993. std::string src(str, str + slen);
  3994. std::string fmt(replace, replace + rlen);
  3995. std::string tgt;
  3996. try
  3997. {
  3998. // tgt = boost::regex_merge(src, cre->regEx, fmt, boost::format_perl); //Algorithm regex_merge has been renamed regex_replace, existing code will continue to compile, but new code should use regex_replace instead.
  3999. tgt = boost::regex_replace(src, regEx, fmt, boost::format_perl);
  4000. }
  4001. catch(const std::runtime_error & e)
  4002. {
  4003. throw MakeStringException(0, "Error in regex replace: %s (regex: %s)", e.what(), regEx.str().c_str());
  4004. }
  4005. outlen = tgt.length();
  4006. out = (char *)malloc(outlen);
  4007. memcpy(out, tgt.data(), outlen);
  4008. }
  4009. IStrRegExprFindInstance * find(const char * str, size32_t from, size32_t len, bool needToKeepSearchString) const
  4010. {
  4011. CStrRegExprFindInstance * findInst = new CStrRegExprFindInstance(&regEx, str, from, len, needToKeepSearchString);
  4012. return findInst;
  4013. }
  4014. };
  4015. //---------------------------------------------------------------------------
  4016. ECLRTL_API ICompiledStrRegExpr * rtlCreateCompiledStrRegExpr(const char * regExpr, bool isCaseSensitive)
  4017. {
  4018. CCompiledStrRegExpr * expr = new CCompiledStrRegExpr(regExpr, isCaseSensitive);
  4019. return expr;
  4020. }
  4021. ECLRTL_API void rtlDestroyCompiledStrRegExpr(ICompiledStrRegExpr * compiledExpr)
  4022. {
  4023. if (compiledExpr)
  4024. delete (CCompiledStrRegExpr*)compiledExpr;
  4025. }
  4026. ECLRTL_API void rtlDestroyStrRegExprFindInstance(IStrRegExprFindInstance * findInst)
  4027. {
  4028. if (findInst)
  4029. delete (CStrRegExprFindInstance*)findInst;
  4030. }
  4031. //---------------------------------------------------------------------------
  4032. // RegEx Compiler for unicode strings
  4033. class CUStrRegExprFindInstance : implements IUStrRegExprFindInstance
  4034. {
  4035. private:
  4036. bool matched;
  4037. RegexMatcher * matcher;
  4038. UnicodeString sample;
  4039. unsigned matchedSize;
  4040. public:
  4041. CUStrRegExprFindInstance(RegexMatcher * _matcher, const UChar * _str, size32_t _from, size32_t _len)
  4042. : matcher(_matcher)
  4043. {
  4044. matched = false;
  4045. sample.setTo(_str + _from, _len);
  4046. matcher->reset(sample);
  4047. matched = matcher->find();
  4048. if (matched)
  4049. matchedSize = (unsigned)matcher->groupCount() + 1;
  4050. }
  4051. //IUStrRegExprFindInstance
  4052. bool found() const { return matched; }
  4053. void getMatchX(unsigned & outlen, UChar * & out, unsigned n = 0) const
  4054. {
  4055. if(matched && (n < matchedSize))
  4056. {
  4057. assertex(matcher);
  4058. UErrorCode uerr = U_ZERO_ERROR;
  4059. int32_t start = n ? matcher->start(n, uerr) : matcher->start(uerr);
  4060. int32_t end = n ? matcher->end(n, uerr) : matcher->end(uerr);
  4061. outlen = end - start;
  4062. out = (UChar *)malloc(outlen*2);
  4063. sample.extract(start, outlen, out);
  4064. }
  4065. else
  4066. {
  4067. outlen = 0;
  4068. out = NULL;
  4069. }
  4070. }
  4071. UChar const * findvstr(unsigned outlen, UChar * out, unsigned n = 0)
  4072. {
  4073. if(matched && (n < matchedSize))
  4074. {
  4075. assertex(matcher);
  4076. UErrorCode uerr = U_ZERO_ERROR;
  4077. int32_t start = n ? matcher->start(n, uerr) : matcher->start(uerr);
  4078. int32_t end = n ? matcher->end(n, uerr) : matcher->end(uerr);
  4079. unsigned sublen = end - start;
  4080. if(sublen >= outlen)
  4081. sublen = outlen - 1;
  4082. sample.extract(start, sublen, out);
  4083. out[sublen] = 0;
  4084. }
  4085. else
  4086. {
  4087. out[0] = 0;
  4088. }
  4089. return out;
  4090. }
  4091. };
  4092. //---------------------------------------------------------------------------
  4093. class CCompiledUStrRegExpr : implements ICompiledUStrRegExpr
  4094. {
  4095. private:
  4096. RegexPattern * pattern;
  4097. RegexMatcher * matcher;
  4098. public:
  4099. CCompiledUStrRegExpr(const UChar * _UregExp, bool _isCaseSensitive = false)
  4100. {
  4101. UErrorCode uerr = U_ZERO_ERROR;
  4102. UParseError uperr;
  4103. if (_isCaseSensitive)
  4104. pattern = RegexPattern::compile(_UregExp, uperr, uerr);
  4105. else
  4106. pattern = RegexPattern::compile(_UregExp, UREGEX_CASE_INSENSITIVE, uperr, uerr);
  4107. matcher = pattern->matcher(uerr);
  4108. if (U_FAILURE(uerr))
  4109. {
  4110. char * expAscii;
  4111. unsigned expAsciiLen;
  4112. rtlUnicodeToEscapedStrX(expAsciiLen, expAscii, rtlUnicodeStrlen(_UregExp), _UregExp);
  4113. StringBuffer msg;
  4114. msg.append("Bad regular expression: ").append(u_errorName(uerr)).append(": ").append(expAsciiLen, expAscii);
  4115. rtlFree(expAscii);
  4116. delete matcher;
  4117. delete pattern;
  4118. matcher = 0;
  4119. pattern = 0;
  4120. rtlFail(0, msg.str()); //throws
  4121. }
  4122. }
  4123. ~CCompiledUStrRegExpr()
  4124. {
  4125. if (matcher)
  4126. delete matcher;
  4127. if (pattern)
  4128. delete pattern;
  4129. }
  4130. void replace(size32_t & outlen, UChar * & out, size32_t slen, const UChar * str, size32_t rlen, UChar const * replace) const
  4131. {
  4132. UnicodeString const src(str, slen);
  4133. UErrorCode err = U_ZERO_ERROR;
  4134. RegexMatcher * replacer = pattern->matcher(src, err);
  4135. UnicodeString const fmt(replace, rlen);
  4136. UnicodeString const tgt = replacer->replaceAll(fmt, err);
  4137. outlen = tgt.length();
  4138. out = (UChar *)malloc(outlen*2);
  4139. tgt.extract(0, outlen, out);
  4140. delete replacer;
  4141. }
  4142. IUStrRegExprFindInstance * find(const UChar * str, size32_t from, size32_t len) const
  4143. {
  4144. CUStrRegExprFindInstance * findInst = new CUStrRegExprFindInstance(matcher, str, from, len);
  4145. return findInst;
  4146. }
  4147. };
  4148. //---------------------------------------------------------------------------
  4149. ECLRTL_API ICompiledUStrRegExpr * rtlCreateCompiledUStrRegExpr(const UChar * regExpr, bool isCaseSensitive)
  4150. {
  4151. CCompiledUStrRegExpr * expr = new CCompiledUStrRegExpr(regExpr, isCaseSensitive);
  4152. return expr;
  4153. }
  4154. ECLRTL_API void rtlDestroyCompiledUStrRegExpr(ICompiledUStrRegExpr * compiledExpr)
  4155. {
  4156. if (compiledExpr)
  4157. delete (CCompiledUStrRegExpr*)compiledExpr;
  4158. }
  4159. ECLRTL_API void rtlDestroyUStrRegExprFindInstance(IUStrRegExprFindInstance * findInst)
  4160. {
  4161. if (findInst)
  4162. delete (CUStrRegExprFindInstance*)findInst;
  4163. }
  4164. #else // _USE_BOOST_REGEX not set
  4165. ECLRTL_API ICompiledStrRegExpr * rtlCreateCompiledStrRegExpr(const char * regExpr, bool isCaseSensitive)
  4166. {
  4167. UNIMPLEMENTED_X("Boost regex disabled");
  4168. }
  4169. ECLRTL_API void rtlDestroyCompiledStrRegExpr(ICompiledStrRegExpr * compiledExpr)
  4170. {
  4171. }
  4172. ECLRTL_API void rtlDestroyStrRegExprFindInstance(IStrRegExprFindInstance * findInst)
  4173. {
  4174. }
  4175. ECLRTL_API ICompiledUStrRegExpr * rtlCreateCompiledUStrRegExpr(const UChar * regExpr, bool isCaseSensitive)
  4176. {
  4177. UNIMPLEMENTED_X("Boost regex disabled");
  4178. }
  4179. ECLRTL_API void rtlDestroyCompiledUStrRegExpr(ICompiledUStrRegExpr * compiledExpr)
  4180. {
  4181. }
  4182. ECLRTL_API void rtlDestroyUStrRegExprFindInstance(IUStrRegExprFindInstance * findInst)
  4183. {
  4184. }
  4185. #endif
  4186. //---------------------------------------------------------------------------
  4187. ECLRTL_API int rtlQueryLocalFailCode(IException * e)
  4188. {
  4189. return e->errorCode();
  4190. }
  4191. ECLRTL_API void rtlGetLocalFailMessage(size32_t & len, char * & text, IException * e, const char * tag)
  4192. {
  4193. rtlExceptionExtract(len, text, e, tag);
  4194. }
  4195. ECLRTL_API void rtlFreeException(IException * e)
  4196. {
  4197. e->Release();
  4198. }
  4199. //---------------------------------------------------------------------------
  4200. //Generally any calls to this function have also checked that the length(trim(str)) <= fieldLen, so exceptions should only occur if compareLen > fieldLen
  4201. //However, function can now also handle the exception case.
  4202. ECLRTL_API void rtlCreateRange(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str, byte fill, byte pad)
  4203. {
  4204. //
  4205. if (compareLen > fieldLen)
  4206. {
  4207. if ((int)compareLen >= 0)
  4208. {
  4209. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  4210. compareLen = fieldLen;
  4211. }
  4212. else
  4213. compareLen = 0; // probably m[1..-1] or something silly
  4214. }
  4215. if (len > compareLen)
  4216. {
  4217. while ((len > compareLen) && (str[len-1] == pad))
  4218. len--;
  4219. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  4220. if (len > compareLen)
  4221. {
  4222. compareLen = 0;
  4223. fill = (fill == 0) ? 255 : 0;
  4224. }
  4225. }
  4226. outlen = fieldLen;
  4227. out = (char *)malloc(fieldLen);
  4228. if (len >= compareLen)
  4229. memcpy(out, str, compareLen);
  4230. else
  4231. {
  4232. memcpy(out, str, len);
  4233. memset(out+len, pad, compareLen-len);
  4234. }
  4235. memset(out + compareLen, fill, fieldLen-compareLen);
  4236. }
  4237. ECLRTL_API void rtlCreateStrRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4238. {
  4239. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
  4240. }
  4241. ECLRTL_API void rtlCreateStrRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4242. {
  4243. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
  4244. }
  4245. ECLRTL_API void rtlCreateDataRangeLow(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
  4246. {
  4247. rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 0, 0);
  4248. }
  4249. ECLRTL_API void rtlCreateDataRangeHigh(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
  4250. {
  4251. rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 255, 0);
  4252. }
  4253. ECLRTL_API void rtlCreateRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4254. {
  4255. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
  4256. }
  4257. ECLRTL_API void rtlCreateRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4258. {
  4259. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
  4260. }
  4261. ECLRTL_API void rtlCreateUnicodeRange(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str, byte fill)
  4262. {
  4263. //Same as function above!
  4264. if (compareLen > fieldLen)
  4265. {
  4266. if ((int)compareLen >= 0)
  4267. {
  4268. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  4269. compareLen = fieldLen;
  4270. }
  4271. else
  4272. compareLen = 0; // probably m[1..-1] or something silly
  4273. }
  4274. if (len > compareLen)
  4275. {
  4276. while ((len > compareLen) && (str[len-1] == ' '))
  4277. len--;
  4278. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  4279. if (len > compareLen)
  4280. {
  4281. compareLen = 0;
  4282. fill = (fill == 0) ? 255 : 0;
  4283. }
  4284. }
  4285. outlen = fieldLen;
  4286. out = (UChar *)malloc(fieldLen*sizeof(UChar));
  4287. if (len >= compareLen)
  4288. memcpy(out, str, compareLen*sizeof(UChar));
  4289. else
  4290. {
  4291. memcpy(out, str, len * sizeof(UChar));
  4292. while (len != compareLen)
  4293. out[len++] = ' ';
  4294. }
  4295. memset(out + compareLen, fill, (fieldLen-compareLen) * sizeof(UChar));
  4296. }
  4297. ECLRTL_API void rtlCreateUnicodeRangeLow(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
  4298. {
  4299. rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0x00);
  4300. }
  4301. ECLRTL_API void rtlCreateUnicodeRangeHigh(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
  4302. {
  4303. rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0xFF);
  4304. }
  4305. //---------------------------------------------------------------------------
  4306. ECLRTL_API unsigned rtlCountRows(size32_t len, const void * data, IRecordSize * rs)
  4307. {
  4308. if (rs->isFixedSize())
  4309. return len / rs->getFixedSize();
  4310. unsigned count = 0;
  4311. while (len)
  4312. {
  4313. size32_t thisLen = rs->getRecordSize(data);
  4314. data = (byte *)data + thisLen;
  4315. if (thisLen > len)
  4316. throw MakeStringException(0, "Invalid raw data");
  4317. len -= thisLen;
  4318. count++;
  4319. }
  4320. return count;
  4321. }
  4322. //---------------------------------------------------------------------------
  4323. ECLRTL_API size32_t rtlCountToSize(unsigned count, const void * data, IRecordSize * rs)
  4324. {
  4325. if (rs->isFixedSize())
  4326. return count * rs->getFixedSize();
  4327. unsigned size = 0;
  4328. for (unsigned i=0;i<count;i++)
  4329. {
  4330. size32_t thisLen = rs->getRecordSize(data);
  4331. data = (byte *)data + thisLen;
  4332. size += thisLen;
  4333. }
  4334. return size;
  4335. }
  4336. //---------------------------------------------------------------------------
  4337. class rtlCodepageConverter
  4338. {
  4339. public:
  4340. rtlCodepageConverter(char const * sourceName, char const * targetName, bool & failed) : uerr(U_ZERO_ERROR)
  4341. {
  4342. srccnv = ucnv_open(sourceName, &uerr);
  4343. tgtcnv = ucnv_open(targetName, &uerr);
  4344. tgtMaxRatio = ucnv_getMaxCharSize(tgtcnv);
  4345. failed = U_FAILURE(uerr);
  4346. }
  4347. ~rtlCodepageConverter()
  4348. {
  4349. ucnv_close(srccnv);
  4350. ucnv_close(tgtcnv);
  4351. }
  4352. void convertX(unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4353. {
  4354. //convert from source to utf-16: try to avoid preflighting by guessing upper bound
  4355. //unicode length in UChars equal source length in chars if single byte encoding, and be less for multibyte
  4356. UChar * ubuff = (UChar *)malloc(sourceLength*2);
  4357. int32_t ulen = ucnv_toUChars(srccnv, ubuff, sourceLength, source, sourceLength, &uerr);
  4358. if(ulen > (int32_t)sourceLength)
  4359. {
  4360. //okay, so our guess was wrong, and we have to reallocate
  4361. free(ubuff);
  4362. ubuff = (UChar *)malloc(ulen*2);
  4363. ucnv_toUChars(srccnv, ubuff, ulen, source, sourceLength, &uerr);
  4364. }
  4365. if(preflight)
  4366. {
  4367. //convert from utf-16 to target: preflight to get buffer of exactly the right size
  4368. UErrorCode uerr2 = uerr; //preflight has to use copy of error code, as it is considered an 'error'
  4369. int32_t tlen = ucnv_fromUChars(tgtcnv, 0, 0, ubuff, ulen, &uerr2);
  4370. target = (char *)malloc(tlen);
  4371. targetLength = ucnv_fromUChars(tgtcnv, target, tlen, ubuff, ulen, &uerr);
  4372. }
  4373. else
  4374. {
  4375. //convert from utf-16 to target: avoid preflighting by allocating buffer of maximum size
  4376. target = (char *)malloc(ulen*tgtMaxRatio);
  4377. targetLength = ucnv_fromUChars(tgtcnv, target, ulen*tgtMaxRatio, ubuff, ulen, &uerr);
  4378. }
  4379. free(ubuff);
  4380. failed = U_FAILURE(uerr);
  4381. }
  4382. unsigned convert(unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4383. {
  4384. char * tgtStart = target;
  4385. ucnv_convertEx(tgtcnv, srccnv, &target, target+targetLength, &source, source+sourceLength, 0, 0, 0, 0, true, true, &uerr);
  4386. int32_t ret = target-tgtStart;
  4387. failed = U_FAILURE(uerr);
  4388. return ret;
  4389. }
  4390. private:
  4391. UErrorCode uerr;
  4392. UConverter * srccnv;
  4393. UConverter * tgtcnv;
  4394. int8_t tgtMaxRatio;
  4395. };
  4396. void * rtlOpenCodepageConverter(char const * sourceName, char const * targetName, bool & failed)
  4397. {
  4398. return new rtlCodepageConverter(sourceName, targetName, failed);
  4399. }
  4400. void rtlCloseCodepageConverter(void * converter)
  4401. {
  4402. delete ((rtlCodepageConverter *)converter);
  4403. }
  4404. void rtlCodepageConvertX(void * converter, unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4405. {
  4406. ((rtlCodepageConverter *)converter)->convertX(targetLength, target, sourceLength, source, failed, preflight);
  4407. }
  4408. unsigned rtlCodepageConvert(void * converter, unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4409. {
  4410. return ((rtlCodepageConverter *)converter)->convert(targetLength, target, sourceLength, source, failed);
  4411. }
  4412. //---------------------------------------------------------------------------
  4413. void appendUChar(MemoryBuffer & buff, char x)
  4414. {
  4415. UChar c = x;
  4416. buff.append(sizeof(c), &c);
  4417. }
  4418. void appendUChar(MemoryBuffer & buff, UChar c)
  4419. {
  4420. buff.append(sizeof(c), &c);
  4421. }
  4422. void appendUStr(MemoryBuffer & x, const char * text)
  4423. {
  4424. while (*text)
  4425. {
  4426. UChar c = *text++;
  4427. x.append(sizeof(c), &c);
  4428. }
  4429. }
  4430. ECLRTL_API void xmlDecodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in)
  4431. {
  4432. StringBuffer temp;
  4433. decodeXML(in, temp, inLen);
  4434. outLen = temp.length();
  4435. out = temp.detach();
  4436. }
  4437. bool hasPrefix(const UChar * ustr, const char * str, unsigned len)
  4438. {
  4439. while (len--)
  4440. {
  4441. if (*ustr++ != *str++)
  4442. return false;
  4443. }
  4444. return true;
  4445. }
  4446. ECLRTL_API void xmlDecodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in)
  4447. {
  4448. const UChar * cur = in;
  4449. const UChar * end = in+inLen;
  4450. MemoryBuffer ret;
  4451. while (cur<end)
  4452. {
  4453. switch(*cur)
  4454. {
  4455. case '&':
  4456. if(hasPrefix(cur+1, "amp;", 4))
  4457. {
  4458. cur += 4;
  4459. appendUChar(ret, '&');
  4460. }
  4461. else if(hasPrefix(cur+1, "lt;", 3))
  4462. {
  4463. cur += 3;
  4464. appendUChar(ret, '<');
  4465. }
  4466. else if(hasPrefix(cur+1, "gt;", 3))
  4467. {
  4468. cur += 3;
  4469. appendUChar(ret, '>');
  4470. }
  4471. else if(hasPrefix(cur+1, "quot;", 5))
  4472. {
  4473. cur += 5;
  4474. appendUChar(ret, '"');
  4475. }
  4476. else if(hasPrefix(cur+1, "apos;", 5))
  4477. {
  4478. cur += 5;
  4479. appendUChar(ret, '\'');
  4480. }
  4481. else
  4482. {
  4483. cur++;
  4484. if (*cur == '#')
  4485. {
  4486. cur++;
  4487. unsigned base = 10;
  4488. if (*cur == 'x' || *cur == 'X') // strictly not sure about X.
  4489. {
  4490. base = 16;
  4491. cur++;
  4492. }
  4493. UChar value = 0;
  4494. while (cur < end)
  4495. {
  4496. unsigned digit;
  4497. UChar next = *cur;
  4498. if ((next >= '0') && (next <= '9'))
  4499. digit = next-'0';
  4500. else if ((next >= 'A') && (next <= 'F'))
  4501. digit = next-'A'+10;
  4502. else if ((next >= 'a') && (next <= 'f'))
  4503. digit = next-'a'+10;
  4504. else
  4505. break;
  4506. if (digit >= base)
  4507. break;
  4508. value = value * base + digit;
  4509. cur++;
  4510. }
  4511. appendUChar(ret, value);
  4512. //if (cur == end) || (*cur != ';') throw Error;
  4513. }
  4514. else
  4515. appendUChar(ret, *cur); // error... / unexpanded entity
  4516. }
  4517. //assertex(cur<end);
  4518. break;
  4519. default:
  4520. appendUChar(ret, *cur);
  4521. break;
  4522. }
  4523. cur++;
  4524. }
  4525. outLen = ret.length()/2;
  4526. out = (UChar *)ret.detach();
  4527. }
  4528. ECLRTL_API void xmlEncodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in, unsigned flags)
  4529. {
  4530. StringBuffer temp;
  4531. encodeXML(in, temp, flags, inLen, false);
  4532. outLen = temp.length();
  4533. out = temp.detach();
  4534. }
  4535. ECLRTL_API void xmlEncodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in, unsigned flags)
  4536. {
  4537. const UChar * cur = in;
  4538. MemoryBuffer ret;
  4539. ret.ensureCapacity(inLen*2);
  4540. while (inLen)
  4541. {
  4542. UChar next = *cur;
  4543. switch(*cur)
  4544. {
  4545. case '&':
  4546. appendUStr(ret, "&amp;");
  4547. break;
  4548. case '<':
  4549. appendUStr(ret, "&lt;");
  4550. break;
  4551. case '>':
  4552. appendUStr(ret, "&gt;");
  4553. break;
  4554. case '\"':
  4555. appendUStr(ret, "&quot;");
  4556. break;
  4557. case '\'':
  4558. appendUStr(ret, "&apos;");
  4559. break;
  4560. case ' ':
  4561. appendUStr(ret, flags & ENCODE_SPACES?"&#32;":" ");
  4562. break;
  4563. case '\n':
  4564. appendUStr(ret, flags & ENCODE_NEWLINES?"&#10;":"\n");
  4565. break;
  4566. case '\r':
  4567. appendUStr(ret, flags & ENCODE_NEWLINES?"&#13;":"\r");
  4568. break;
  4569. case '\t':
  4570. appendUStr(ret, flags & ENCODE_SPACES?"&#9;":"\t");
  4571. break;
  4572. default:
  4573. appendUChar(ret, next);
  4574. break;
  4575. }
  4576. inLen--;
  4577. cur++;
  4578. }
  4579. outLen = ret.length()/2;
  4580. out = (UChar *)ret.detach();
  4581. }
  4582. //---------------------------------------------------------------------------
  4583. #define STRUCTURED_EXCEPTION_TAG "Error"
  4584. inline bool isStructuredMessage(const char * text, const char * tag)
  4585. {
  4586. if (!text || text[0] != '<')
  4587. return false;
  4588. if (!tag)
  4589. return true;
  4590. size32_t lenTag = strlen(tag);
  4591. if (memcmp(text+1,tag,lenTag) != 0)
  4592. return false;
  4593. if (text[lenTag+1] != '>')
  4594. return false;
  4595. return true;
  4596. }
  4597. inline bool isStructuredError(const char * text) { return isStructuredMessage(text, STRUCTURED_EXCEPTION_TAG); }
  4598. void rtlExtractTag(size32_t & outLen, char * & out, const char * text, const char * tag, const char * rootTag)
  4599. {
  4600. if (!tag || !isStructuredMessage(text, rootTag))
  4601. {
  4602. if (!tag || strcmp(tag, "text")==0)
  4603. rtlStrToStrX(outLen, out, strlen(text), text);
  4604. else
  4605. {
  4606. outLen = 0;
  4607. out = NULL;
  4608. }
  4609. }
  4610. else
  4611. {
  4612. StringBuffer startTag, endTag;
  4613. startTag.append("<").append(tag).append(">");
  4614. endTag.append("</").append(tag).append(">");
  4615. const char * start = strstr(text, startTag.str());
  4616. const char * end = strstr(text, endTag.str());
  4617. if (start && end)
  4618. {
  4619. start += startTag.length();
  4620. xmlDecodeStrX(outLen, out, end-start, start);
  4621. }
  4622. else
  4623. {
  4624. outLen = 0;
  4625. out = NULL;
  4626. }
  4627. }
  4628. }
  4629. void rtlExceptionExtract(size32_t & outLen, char * & out, const char * text, const char * tag)
  4630. {
  4631. if (!tag) tag = "text";
  4632. rtlExtractTag(outLen, out, text, tag, STRUCTURED_EXCEPTION_TAG);
  4633. }
  4634. void rtlExceptionExtract(size32_t & outLen, char * & out, IException * e, const char * tag)
  4635. {
  4636. StringBuffer text;
  4637. e->errorMessage(text);
  4638. rtlExceptionExtract(outLen, out, text.str(), tag);
  4639. }
  4640. void rtlAddExceptionTag(StringBuffer & errorText, const char * tag, const char * value)
  4641. {
  4642. if (!isStructuredError(errorText.str()))
  4643. {
  4644. StringBuffer temp;
  4645. temp.append("<" STRUCTURED_EXCEPTION_TAG "><text>");
  4646. encodeXML(errorText.str(), temp, ENCODE_WHITESPACE, errorText.length(), false);
  4647. temp.append("</text></" STRUCTURED_EXCEPTION_TAG ">");
  4648. errorText.swapWith(temp);
  4649. }
  4650. StringBuffer temp;
  4651. temp.append("<").append(tag).append(">");
  4652. encodeXML(value, temp, ENCODE_WHITESPACE, (unsigned)-1, false);
  4653. temp.append("</").append(tag).append(">");
  4654. unsigned len = errorText.length();
  4655. unsigned pos = len - strlen(STRUCTURED_EXCEPTION_TAG) - 3;
  4656. errorText.insert(pos, temp);
  4657. }
  4658. //---------------------------------------------------------------------------
  4659. void rtlRowBuilder::forceAvailable(size32_t size)
  4660. {
  4661. const size32_t chunkSize = 64;
  4662. maxsize = (size + chunkSize-1) & ~(chunkSize-1);
  4663. ptr = realloc(ptr, maxsize);
  4664. }
  4665. //---------------------------------------------------------------------------
  4666. inline unsigned numExtraBytesFromValue(unsigned __int64 first)
  4667. {
  4668. if (first >= I64C(0x10000000))
  4669. if (first >= I64C(0x40000000000))
  4670. if (first >= I64C(0x2000000000000))
  4671. if (first >= I64C(0x100000000000000))
  4672. return 8;
  4673. else
  4674. return 7;
  4675. else
  4676. return 6;
  4677. else
  4678. if (first >= I64C(0x800000000))
  4679. return 5;
  4680. else
  4681. return 4;
  4682. else
  4683. if (first >= 0x4000)
  4684. if (first >= 0x200000)
  4685. return 3;
  4686. else
  4687. return 2;
  4688. else
  4689. if (first >= 0x80)
  4690. return 1;
  4691. else
  4692. return 0;
  4693. }
  4694. //An packed byte format, based on the unicode packing of utf-8.
  4695. //The number of top bits set in the leading byte indicates how many extra
  4696. //bytes follow (0..8). It gives the same compression as using a top bit to
  4697. //indicate continuation, but seems to be quicker (and requires less look ahead).
  4698. /*
  4699. byte numExtraBytesFromFirstTable[256] =
  4700. {
  4701. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4702. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4703. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4704. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4705. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  4706. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  4707. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  4708. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8
  4709. };
  4710. inline unsigned numExtraBytesFromFirst(byte first)
  4711. {
  4712. return numExtraBytesFromFirstTable(first);
  4713. }
  4714. */
  4715. //NB: This seems to be faster than using the table lookup above. Probably affects the data cache less
  4716. inline unsigned numExtraBytesFromFirst(byte first)
  4717. {
  4718. if (first >= 0xF0)
  4719. if (first >= 0xFC)
  4720. if (first >= 0xFE)
  4721. if (first >= 0xFF)
  4722. return 8;
  4723. else
  4724. return 7;
  4725. else
  4726. return 6;
  4727. else
  4728. if (first >= 0xF8)
  4729. return 5;
  4730. else
  4731. return 4;
  4732. else
  4733. if (first >= 0xC0)
  4734. if (first >= 0xE0)
  4735. return 3;
  4736. else
  4737. return 2;
  4738. else
  4739. if (first >= 0x80)
  4740. return 1;
  4741. else
  4742. return 0;
  4743. }
  4744. static byte leadingValueMask[9] = { 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01, 0x00, 0x00 };
  4745. static byte leadingLengthMask[9] = { 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF };
  4746. //maximum number of bytes for a packed value is size+1 bytes for size <=8 and last byte being fully used.
  4747. unsigned __int64 rtlGetPackedUnsigned(const void * _ptr)
  4748. {
  4749. const byte * ptr = (const byte *)_ptr;
  4750. byte first = *ptr++;
  4751. unsigned numExtra = numExtraBytesFromFirst(first);
  4752. unsigned __int64 value = first & leadingValueMask[numExtra];
  4753. //Loop unrolling has a negligable effect
  4754. while (numExtra--)
  4755. value = (value << 8) | *ptr++;
  4756. return value;
  4757. }
  4758. void rtlSetPackedUnsigned(void * _ptr, unsigned __int64 value)
  4759. {
  4760. byte * ptr = (byte *)_ptr;
  4761. unsigned numExtra = numExtraBytesFromValue(value);
  4762. byte firstMask = leadingLengthMask[numExtra];
  4763. while (numExtra)
  4764. {
  4765. ptr[numExtra--] = (byte)value;
  4766. value >>= 8;
  4767. }
  4768. ptr[0] = (byte)value | firstMask;
  4769. }
  4770. size32_t rtlGetPackedSize(const void * ptr)
  4771. {
  4772. return numExtraBytesFromFirst(*(byte*)ptr)+1;
  4773. }
  4774. size32_t rtlGetPackedSizeFromFirst(byte first)
  4775. {
  4776. return numExtraBytesFromFirst(first)+1;
  4777. }
  4778. //Store signed by moving the sign to the bottom bit, and inverting if negative.
  4779. //so small positive and negative numbers are stored compactly.
  4780. __int64 rtlGetPackedSigned(const void * ptr)
  4781. {
  4782. unsigned __int64 value = rtlGetPackedUnsigned(ptr);
  4783. unsigned __int64 shifted = (value >> 1);
  4784. return (__int64)((value & 1) ? ~shifted : shifted);
  4785. }
  4786. void rtlSetPackedSigned(void * ptr, __int64 value)
  4787. {
  4788. unsigned __int64 storeValue;
  4789. if (value < 0)
  4790. storeValue = (~value << 1) | 1;
  4791. else
  4792. storeValue = value << 1;
  4793. rtlSetPackedUnsigned(ptr, storeValue);
  4794. }
  4795. IAtom * rtlCreateFieldNameAtom(const char * name)
  4796. {
  4797. return createAtom(name);
  4798. }
  4799. //---------------------------------------------------------------------------
  4800. void RtlCInterface::Link() const { atomic_inc(&xxcount); }
  4801. bool RtlCInterface::Release(void) const
  4802. {
  4803. if (atomic_dec_and_test(&xxcount))
  4804. {
  4805. delete this;
  4806. return true;
  4807. }
  4808. return false;
  4809. }
  4810. //---------------------------------------------------------------------------
  4811. #if 0
  4812. void PrintExtract(StringBuffer & s, const char * tag)
  4813. {
  4814. size32_t outLen;
  4815. char * out = NULL;
  4816. rtlExceptionExtract(outLen, out, s.str(), tag);
  4817. PrintLog("%s = %.*s", tag, outLen, out);
  4818. rtlFree(out);
  4819. }
  4820. void testStructuredExceptions()
  4821. {
  4822. StringBuffer s;
  4823. s.append("This<is>some text");
  4824. PrintExtract(s, NULL);
  4825. PrintExtract(s, "text");
  4826. PrintExtract(s, "is");
  4827. rtlAddExceptionTag(s, "location", "192.168.12.1");
  4828. PrintExtract(s, NULL);
  4829. PrintExtract(s, "text");
  4830. PrintExtract(s, "is");
  4831. PrintExtract(s, "location");
  4832. rtlAddExceptionTag(s, "author", "gavin");
  4833. PrintExtract(s, NULL);
  4834. PrintExtract(s, "text");
  4835. PrintExtract(s, "is");
  4836. PrintExtract(s, "location");
  4837. PrintExtract(s, "author");
  4838. PrintLog("%s", s.str());
  4839. }
  4840. static void testPackedUnsigned()
  4841. {
  4842. unsigned __int64 values[] = { 0, 1, 2, 10, 127, 128, 16383, 16384, 32767, 32768, 0xffffff, 0x7fffffff, 0xffffffff,
  4843. I64C(0xffffffffffffff), I64C(0x100000000000000), I64C(0x7fffffffffffffff), I64C(0xffffffffffffffff) };
  4844. unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 8, 9, 9, 9 };
  4845. unsigned numValues = _elements_in(values);
  4846. byte temp[9];
  4847. for (unsigned i = 0; i < numValues; i++)
  4848. {
  4849. rtlSetPackedUnsigned(temp, values[i]);
  4850. assertex(rtlGetPackedSize(temp) == numBytes[i]);
  4851. assertex(rtlGetPackedUnsigned(temp) == values[i]);
  4852. }
  4853. for (unsigned j= 0; j < 2000000; j++)
  4854. {
  4855. unsigned __int64 value = I64C(1) << (rtlRandom() & 63);
  4856. // unsigned value = rtlRandom();
  4857. rtlSetPackedUnsigned(temp, value);
  4858. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value)+1);
  4859. assertex(rtlGetPackedUnsigned(temp) == value);
  4860. }
  4861. for (unsigned k= 0; k < 63; k++)
  4862. {
  4863. unsigned __int64 value1 = I64C(1) << k;
  4864. rtlSetPackedUnsigned(temp, value1);
  4865. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value1)+1);
  4866. assertex(rtlGetPackedUnsigned(temp) == value1);
  4867. unsigned __int64 value2 = value1-1;
  4868. rtlSetPackedUnsigned(temp, value2);
  4869. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value2)+1);
  4870. assertex(rtlGetPackedUnsigned(temp) == value2);
  4871. }
  4872. }
  4873. static void testPackedSigned()
  4874. {
  4875. __int64 values[] = { 0, 1, -2, 10, 63, 64, -64, -65, 8191, 8192, 0x3fffffff,
  4876. I64C(0x7fffffffffffff), I64C(0x80000000000000), I64C(0x7fffffffffffffff), I64C(0x8000000000000000) };
  4877. unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 1, 2, 2, 3, 5,
  4878. 8, 9, 9, 9 };
  4879. unsigned numValues = _elements_in(values);
  4880. byte temp[9];
  4881. for (unsigned i = 0; i < numValues; i++)
  4882. {
  4883. rtlSetPackedSigned(temp, values[i]);
  4884. assertex(rtlGetPackedSize(temp) == numBytes[i]);
  4885. assertex(rtlGetPackedSigned(temp) == values[i]);
  4886. }
  4887. }
  4888. #endif
  4889. void ensureRtlLoaded()
  4890. {
  4891. }
  4892. #ifdef _USE_CPPUNIT
  4893. #include <cppunit/extensions/HelperMacros.h>
  4894. #define ASSERT(a) { if (!(a)) CPPUNIT_ASSERT(a); }
  4895. class EclRtlTests : public CppUnit::TestFixture
  4896. {
  4897. CPPUNIT_TEST_SUITE( EclRtlTests );
  4898. CPPUNIT_TEST(RegexTest);
  4899. CPPUNIT_TEST(MultiRegexTest);
  4900. CPPUNIT_TEST_SUITE_END();
  4901. protected:
  4902. void RegexTest()
  4903. {
  4904. rtlCompiledStrRegex r;
  4905. size32_t outlen;
  4906. char * out = NULL;
  4907. r.setPattern("([A-Z]+)[ ]?'(S) ", true);
  4908. r->replace(outlen, out, 7, "ABC'S ", 5, "$1$2 ");
  4909. ASSERT(outlen==6);
  4910. ASSERT(out != NULL);
  4911. ASSERT(memcmp(out, "ABCS ", outlen)==0);
  4912. rtlFree(out);
  4913. }
  4914. void MultiRegexTest()
  4915. {
  4916. class RegexTestThread : public Thread
  4917. {
  4918. virtual int run()
  4919. {
  4920. for (int i = 0; i < 100000; i++)
  4921. {
  4922. rtlCompiledStrRegex r;
  4923. size32_t outlen;
  4924. char * out = NULL;
  4925. r.setPattern("([A-Z]+)[ ]?'(S) ", true);
  4926. r->replace(outlen, out, 7, "ABC'S ", 5, "$1$2 ");
  4927. ASSERT(outlen==6);
  4928. ASSERT(out != NULL);
  4929. ASSERT(memcmp(out, "ABCS ", outlen)==0);
  4930. rtlFree(out);
  4931. }
  4932. return 0;
  4933. }
  4934. };
  4935. RegexTestThread t1;
  4936. RegexTestThread t2;
  4937. RegexTestThread t3;
  4938. t1.start();
  4939. t2.start();
  4940. t3.start();
  4941. t1.join();
  4942. t2.join();
  4943. t3.join();
  4944. }
  4945. };
  4946. CPPUNIT_TEST_SUITE_REGISTRATION( EclRtlTests );
  4947. CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( EclRtlTests, "EclRtlTests" );
  4948. #endif