eclrtl.cpp 189 KB


  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "limits.h"
  14. #include "platform.h"
  15. #include <math.h>
  16. #include <stdio.h>
  17. #include "jexcept.hpp"
  18. #include "jmisc.hpp"
  19. #include "jutil.hpp"
  20. #include "jlib.hpp"
  21. #include "jptree.hpp"
  22. #include "junicode.hpp"
  23. #include "eclrtl.hpp"
  24. #include "rtlbcd.hpp"
  25. #include "eclhelper.hpp"
  26. #include "eclrtl_imp.hpp"
  27. #ifdef _USE_ICU
  28. #include "unicode/uchar.h"
  29. #include "unicode/ucol.h"
  30. #include "unicode/ustring.h"
  31. #include "unicode/ucnv.h"
  32. #include "unicode/uclean.h"
  33. #include "unicode/schriter.h"
  34. #include "unicode/regex.h"
  35. #include "unicode/normlzr.h"
  36. #include "unicode/locid.h"
  37. #endif
  38. #include "jlog.hpp"
  39. #include "jmd5.hpp"
  40. #include "rtlqstr.ipp"
  41. #include "roxiemem.hpp"
  42. #define UTF8_CODEPAGE "UTF-8"
  43. #define UTF8_MAXSIZE 4
  44. IRandomNumberGenerator * random_;
  45. static CriticalSection random_Sect;
  46. MODULE_INIT(INIT_PRIORITY_ECLRTL_ECLRTL)
  47. {
  48. random_ = createRandomNumberGenerator();
  49. random_->seed((unsigned)get_cycles_now());
  50. return true;
  51. }
  52. MODULE_EXIT()
  53. {
  54. random_->Release();
  55. }
  56. #ifndef _USE_ICU
  57. static inline bool u_isspace(UChar next) { return isspace((byte)next); }
  58. #endif
  59. //=============================================================================
  60. // Miscellaneous string functions...
  61. ECLRTL_API void * rtlMalloc(size32_t size)
  62. {
  63. if (!size)
  64. return NULL;
  65. void * retVal = malloc(size);
  66. if (!retVal)
  67. {
  68. PrintStackReport();
  69. rtlThrowOutOfMemory(0, "Memory allocation error!");
  70. }
  71. return retVal;
  72. }
  73. ECLRTL_API void * rtlCalloc(size32_t num, size32_t size)
  74. {
  75. if (!num || !size)
  76. return NULL;
  77. void *retVal = calloc(num, size);
  78. if (!retVal)
  79. {
  80. PrintStackReport();
  81. rtlThrowOutOfMemory(0, "Memory allocation error!");
  82. }
  83. return retVal;
  84. }
  85. void rtlFree(void *ptr)
  86. {
  87. free(ptr);
  88. }
  89. ECLRTL_API void * rtlRealloc(void * _ptr, size32_t size)
  90. {
  91. void * retVal = realloc(_ptr, size);
  92. if( (0 < size) && (NULL == retVal))
  93. {
  94. PrintStackReport();
  95. rtlThrowOutOfMemory(0, "Memory reallocation error!");
  96. }
  97. return retVal;
  98. }
  99. //=============================================================================
  100. ECLRTL_API void rtlReleaseRow(const void * row)
  101. {
  102. ReleaseRoxieRow(row);
  103. }
  104. ECLRTL_API void rtlReleaseRowset(unsigned count, const byte * * rowset)
  105. {
  106. ReleaseRoxieRowset(count, rowset);
  107. }
  108. ECLRTL_API void * rtlLinkRow(const void * row)
  109. {
  110. LinkRoxieRow(row);
  111. return const_cast<void *>(row);
  112. }
  113. ECLRTL_API const byte * * rtlLinkRowset(const byte * * rowset)
  114. {
  115. LinkRoxieRowset(rowset);
  116. return rowset;
  117. }
  118. //=============================================================================
  119. // Unicode helper classes and functions
  120. // escape
  121. bool rtlGetNormalizedUnicodeLocaleName(unsigned len, char const * in, char * out)
  122. {
  123. bool isPrimary = true;
  124. bool ok = true;
  125. unsigned i;
  126. for(i=0; i<len; i++)
  127. if(in[i] == '_')
  128. {
  129. out[i] = '_';
  130. isPrimary = false;
  131. }
  132. else if(isalpha(in[i]))
  133. {
  134. out[i] = (isPrimary ? tolower(in[i]) : toupper(in[i]));
  135. }
  136. else
  137. {
  138. out[i] = 0;
  139. ok = false;
  140. }
  141. return ok;
  142. }
  143. #ifdef _USE_ICU
  144. using icu::UnicodeString;
  145. using icu::UCharCharacterIterator;
  146. static bool stripIgnorableCharacters(size32_t & lenResult, UChar * & result, size32_t length, const UChar * in)
  147. {
  148. unsigned numStripped = 0;
  149. unsigned lastGood = 0;
  150. for (unsigned i=0; i < length; i++)
  151. {
  152. UChar32 c = in[i];
  153. unsigned stripSize = 0;
  154. if (U16_IS_SURROGATE(c))
  155. {
  156. U16_GET(in, 0, i, length, c);
  157. if (u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  158. stripSize = 2;
  159. else
  160. i++; // skip the surrogate
  161. }
  162. else
  163. {
  164. if (u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  165. stripSize = 1;
  166. }
  167. if (stripSize != 0)
  168. {
  169. if (numStripped == 0)
  170. result = (UChar *)rtlMalloc((length-stripSize)*sizeof(UChar));
  171. //Copy and non ignorable characters skipped up to this point. (Note result+x is scaled by UChar)
  172. memcpy(result + lastGood - numStripped, in+lastGood, (i-lastGood) * sizeof(UChar));
  173. lastGood = i+stripSize;
  174. numStripped += stripSize;
  175. i += (stripSize-1);
  176. }
  177. }
  178. if (numStripped == 0)
  179. return false;
  180. lenResult = length-numStripped;
  181. memcpy(result + lastGood - numStripped, in+lastGood, (length-lastGood) * sizeof(UChar));
  182. return true;
  183. }
  184. void escapeUnicode(unsigned inlen, UChar const * in, StringBuffer & out)
  185. {
  186. icu::UCharCharacterIterator iter(in, inlen);
  187. for(iter.first32(); iter.hasNext(); iter.next32())
  188. {
  189. UChar32 c = iter.current32();
  190. if(c < 0x80)
  191. out.append((char) c);
  192. else if (c < 0x10000)
  193. out.appendf("\\u%04X", c);
  194. else
  195. out.appendf("\\U%08X", c);
  196. }
  197. }
  198. // locales and collators
  199. static unsigned const unicodeStrengthLimit = 5;
  200. static UCollationStrength unicodeStrength[unicodeStrengthLimit] =
  201. {
  202. UCOL_PRIMARY,
  203. UCOL_SECONDARY,
  204. UCOL_TERTIARY,
  205. UCOL_QUATERNARY,
  206. UCOL_IDENTICAL
  207. };
  208. class RTLLocale : public CInterface
  209. {
  210. public:
  211. RTLLocale(char const * _locale) : locale(_locale)
  212. {
  213. for(unsigned i=0; i<unicodeStrengthLimit; i++)
  214. colls[i] = NULL;
  215. UErrorCode err = U_ZERO_ERROR;
  216. colls[2] = ucol_open(locale.get(), &err);
  217. assertex(U_SUCCESS(err));
  218. }
  219. ~RTLLocale()
  220. {
  221. for(unsigned i=0; i<unicodeStrengthLimit; i++)
  222. if(colls[i]) ucol_close(colls[i]);
  223. }
  224. UCollator * queryCollator() const { return colls[2]; }
  225. UCollator * queryCollator(unsigned strength) const
  226. {
  227. if(strength == 0) strength = 1;
  228. if(strength > unicodeStrengthLimit) strength = unicodeStrengthLimit;
  229. if(!colls[strength-1])
  230. {
  231. UErrorCode err = U_ZERO_ERROR;
  232. const_cast<UCollator * *>(colls)[strength-1] = ucol_open(locale.get(), &err);
  233. assertex(U_SUCCESS(err));
  234. ucol_setStrength(colls[strength-1], unicodeStrength[strength-1]);
  235. }
  236. return colls[strength-1];
  237. }
  238. private:
  239. StringAttr locale;
  240. UCollator * colls[unicodeStrengthLimit];
  241. };
  242. typedef MapStringTo<RTLLocale, char const *> MapStrToLocale;
  243. MapStrToLocale *localeMap;
  244. CriticalSection localeCrit;
  245. MODULE_INIT(INIT_PRIORITY_STANDARD)
  246. {
  247. localeMap = new MapStrToLocale;
  248. return true;
  249. }
  250. MODULE_EXIT()
  251. {
  252. delete localeMap;
  253. u_cleanup();
  254. }
  255. RTLLocale * queryRTLLocale(char const * locale)
  256. {
  257. if (!locale) locale = "";
  258. CriticalBlock b(localeCrit);
  259. RTLLocale * loc = localeMap->getValue(locale);
  260. if(!loc)
  261. {
  262. unsigned ll = strlen(locale);
  263. StringBuffer lnorm;
  264. rtlGetNormalizedUnicodeLocaleName(ll, locale, lnorm.reserve(ll));
  265. localeMap->setValue(locale, lnorm.str());
  266. loc = localeMap->getValue(locale);
  267. }
  268. return loc;
  269. }
  270. // converters
  271. class RTLUnicodeConverter : public CInterface
  272. {
  273. public:
  274. RTLUnicodeConverter(char const * codepage)
  275. {
  276. UErrorCode err = U_ZERO_ERROR;
  277. conv = ucnv_open(codepage, &err);
  278. if (!U_SUCCESS(err))
  279. {
  280. StringBuffer msg;
  281. msg.append("Unrecognised codepage '").append(codepage).append("'");
  282. rtlFail(0, msg.str());
  283. }
  284. }
  285. ~RTLUnicodeConverter()
  286. {
  287. ucnv_close(conv);
  288. }
  289. UConverter * query() const { return conv; }
  290. private:
  291. UConverter * conv;
  292. };
  293. typedef MapStringTo<RTLUnicodeConverter, char const *> MapStrToUnicodeConverter;
  294. static __thread MapStrToUnicodeConverter *unicodeConverterMap = NULL;
  295. static __thread ThreadTermFunc prevThreadTerminator = NULL;
  296. static void clearUnicodeConverterMap()
  297. {
  298. delete unicodeConverterMap;
  299. unicodeConverterMap = NULL; // Important to clear, as this is called when threadpool threads end...
  300. if (prevThreadTerminator)
  301. {
  302. (*prevThreadTerminator)();
  303. prevThreadTerminator = NULL;
  304. }
  305. }
  306. RTLUnicodeConverter * queryRTLUnicodeConverter(char const * codepage)
  307. {
  308. if (!unicodeConverterMap) // NB: one per thread, so no contention
  309. {
  310. unicodeConverterMap = new MapStrToUnicodeConverter;
  311. // Use thread terminator hook to clear them up on thread exit.
  312. // NB: May need to revisit if not on a jlib Thread.
  313. prevThreadTerminator = addThreadTermFunc(clearUnicodeConverterMap);
  314. }
  315. RTLUnicodeConverter * conv = unicodeConverterMap->getValue(codepage);
  316. if(!conv)
  317. {
  318. unicodeConverterMap->setValue(codepage, codepage);
  319. conv = unicodeConverterMap->getValue(codepage);
  320. }
  321. return conv;
  322. }
  323. // normalization
  324. bool unicodeNeedsNormalize(unsigned inlen, UChar * in, UErrorCode * err)
  325. {
  326. return !unorm_isNormalized(in, inlen, UNORM_NFC, err);
  327. }
  328. bool vunicodeNeedsNormalize(UChar * in, UErrorCode * err)
  329. {
  330. return !unorm_isNormalized(in, -1, UNORM_NFC, err);
  331. }
  332. void unicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
  333. {
  334. UChar * buff = (UChar *)rtlMalloc(inlen*sizeof(UChar));
  335. unsigned len = unorm_normalize(in, inlen, UNORM_NFC, 0, buff, inlen, err);
  336. while(len<inlen) buff[len++] = 0x0020;
  337. memcpy(in, buff, inlen * sizeof(UChar));
  338. free(buff);
  339. }
  340. void vunicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
  341. {
  342. UChar * buff = (UChar *)rtlMalloc(inlen*sizeof(UChar));
  343. unsigned len = unorm_normalize(in, -1, UNORM_NFC, 0, buff, inlen-1, err);
  344. buff[len] = 0x0000;
  345. memcpy(in, buff, inlen * sizeof(UChar));
  346. free(buff);
  347. }
  348. void unicodeGetNormalized(unsigned & outlen, UChar * & out, unsigned inlen, UChar * in, UErrorCode * err)
  349. {
  350. outlen = unorm_normalize(in, inlen, UNORM_NFC, 0, 0, 0, err);
  351. out = (UChar *)rtlMalloc(outlen*2);
  352. unorm_normalize(in, inlen, UNORM_NFC, 0, out, outlen, err);
  353. }
  354. void vunicodeGetNormalized(UChar * & out, unsigned inlen, UChar * in, UErrorCode * err)
  355. {
  356. unsigned outlen = unorm_normalize(in, inlen, UNORM_NFC, 0, 0, 0, err);
  357. out = (UChar *)rtlMalloc((outlen+1)*2);
  358. unorm_normalize(in, inlen, UNORM_NFC, 0, out, outlen, err);
  359. out[outlen] = 0x0000;
  360. }
  361. void unicodeEnsureIsNormalized(unsigned len, UChar * str)
  362. {
  363. UErrorCode err = U_ZERO_ERROR;
  364. if(unicodeNeedsNormalize(len, str, &err))
  365. unicodeReplaceNormalized(len, str, &err);
  366. }
  367. void vunicodeEnsureIsNormalized(unsigned len, UChar * str)
  368. {
  369. UErrorCode err = U_ZERO_ERROR;
  370. if(vunicodeNeedsNormalize(str, &err))
  371. vunicodeReplaceNormalized(len, str, &err);
  372. }
  373. void unicodeEnsureIsNormalizedX(unsigned & len, UChar * & str)
  374. {
  375. UErrorCode err = U_ZERO_ERROR;
  376. if(unicodeNeedsNormalize(len, str, &err))
  377. {
  378. unsigned inlen = len;
  379. UChar * in = str;
  380. unicodeGetNormalized(len, str, inlen, in, &err);
  381. free(in);
  382. }
  383. }
  384. void vunicodeEnsureIsNormalizedX(unsigned inlen, UChar * & str)
  385. {
  386. UErrorCode err = U_ZERO_ERROR;
  387. if(unicodeNeedsNormalize(inlen, str, &err))
  388. {
  389. UChar * in = str;
  390. vunicodeGetNormalized(str, inlen, in, &err);
  391. free(in);
  392. }
  393. }
  394. void unicodeNormalizedCopy(UChar * out, UChar * in, unsigned len)
  395. {
  396. UErrorCode err = U_ZERO_ERROR;
  397. if(unicodeNeedsNormalize(len, in, &err))
  398. unorm_normalize(in, len, UNORM_NFC, 0, out, len, &err);
  399. else
  400. memcpy(out, in, len);
  401. }
  402. void normalizeUnicodeString(UnicodeString const & in, UnicodeString & out)
  403. {
  404. UErrorCode err = U_ZERO_ERROR;
  405. icu::Normalizer::compose(in, false, 0, out, err);
  406. assertex(U_SUCCESS(err));
  407. }
  408. #endif
  409. // padding
  410. static void multimemset(char * out, size_t outlen, char const * in, size_t inlen)
  411. {
  412. size_t outpos = 0;
  413. size_t inpos = 0;
  414. while(outpos < outlen)
  415. {
  416. out[outpos++] = in[inpos++];
  417. if(inpos == inlen)
  418. inpos = 0;
  419. }
  420. }
  421. typedef MapStringTo<MemoryAttr, size32_t> MemoryAttrMapping;
  422. MemoryAttrMapping *unicodeBlankCache;
  423. CriticalSection ubcCrit;
  424. MODULE_INIT(INIT_PRIORITY_STANDARD)
  425. {
  426. unicodeBlankCache = new MemoryAttrMapping;
  427. return true;
  428. }
  429. MODULE_EXIT()
  430. {
  431. delete unicodeBlankCache;
  432. }
  433. UChar unicodeSpace = 0x0020;
  434. void codepageBlankFill(char const * codepage, char * out, size_t len)
  435. {
  436. CriticalBlock b(ubcCrit);
  437. MemoryAttr * cached = unicodeBlankCache->getValue(codepage);
  438. if(cached)
  439. {
  440. char const * blank = (char const *)cached->get();
  441. size_t blanklen = cached->length();
  442. if(blanklen==1)
  443. memset(out, *blank, len);
  444. else
  445. multimemset(out, len, blank, blanklen);
  446. }
  447. else
  448. {
  449. unsigned blanklen;
  450. char * blank;
  451. rtlUnicodeToCodepageX(blanklen, blank, 1, &unicodeSpace, codepage);
  452. unicodeBlankCache->setValue(codepage, blanklen);
  453. unicodeBlankCache->getValue(codepage)->set(blanklen, blank);
  454. if(blanklen==1)
  455. memset(out, *blank, len);
  456. else
  457. multimemset(out, len, blank, blanklen);
  458. free(blank);
  459. }
  460. }
  461. //---------------------------------------------------------------------------
  462. // floating point functions
  463. static const double smallPowers[16] = {
  464. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
  465. 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 };
  466. static double powerOfTen(int x)
  467. {
  468. if (x < 0)
  469. return 1 / powerOfTen(-x);
  470. double value = smallPowers[x&15];
  471. double scale = 1e16;
  472. x >>= 4;
  473. while (x)
  474. {
  475. if (x & 1)
  476. value *= scale;
  477. scale *= scale;
  478. x >>= 1;
  479. }
  480. return value;
  481. };
  482. static double kk = (1.0 / ((unsigned __int64)1<<53));
  483. __int64 rtlRound(double x)
  484. {
  485. //a fudge to make numbers that are inexact after a division round up "correctly".
  486. //coded rather oddly as microsoft's optimizer has a habit of throwing it away otherwise...
  487. volatile double tt = x * kk;
  488. x += tt;
  489. if (x >= 0.0)
  490. return (__int64)(x + 0.5);
  491. return -(__int64)(-x + 0.5);
  492. }
  493. double rtlRoundTo(const double x, int places)
  494. {
  495. if (x < 0)
  496. return -rtlRoundTo(-x, places);
  497. // See HPCC-15557 and HPCC-21878 regarding the following two lines.
  498. // volatile double tt = x * kk;
  499. // double x0 = x + tt;
  500. if (places >= 0)
  501. {
  502. double scale = powerOfTen(places);
  503. return floor(x * scale + 0.5) / scale;
  504. }
  505. else
  506. {
  507. double scale = powerOfTen(-places);
  508. return floor(x / scale + 0.5) * scale;
  509. }
  510. }
  511. __int64 rtlRoundDown(double x)
  512. {
  513. if (x >= 0.0)
  514. return (__int64)floor(x);
  515. return (__int64)ceil(x);
  516. }
  517. __int64 rtlRoundUp(double x)
  518. {
  519. if (x >= 0.0)
  520. return (__int64)ceil(x);
  521. return (__int64)floor(x);
  522. }
  523. //=============================================================================
  524. // Numeric conversion functions... - fixed length target
  525. #define intToStringNBody() \
  526. unsigned len = numtostr(temp, val); \
  527. if (len > l) \
  528. memset(t,'*',l); \
  529. else \
  530. { \
  531. memcpy(t,temp,len); \
  532. memset(t+len, ' ', l-len); \
  533. }
  534. void rtlUInt4ToStr(size32_t l, char * t, unsigned val)
  535. {
  536. char temp[20];
  537. intToStringNBody();
  538. }
  539. void rtlUInt8ToStr(size32_t l, char * t, unsigned __int64 val)
  540. {
  541. char temp[40];
  542. intToStringNBody();
  543. }
  544. void rtlInt4ToStr(size32_t l, char * t, int val)
  545. {
  546. char temp[20];
  547. intToStringNBody();
  548. }
  549. void rtlInt8ToStr(size32_t l, char * t, __int64 val)
  550. {
  551. char temp[40];
  552. intToStringNBody();
  553. }
  554. //=============================================================================
  555. // Numeric conversion functions... - unknown length target
  556. #define intToUnknownStringBody() \
  557. unsigned len = numtostr(temp, val); \
  558. char * result = (char *)rtlMalloc(len); \
  559. memcpy(result, temp, len); \
  560. l = len; \
  561. t = result;
  562. void rtlUInt4ToStrX(size32_t & l, char * & t, unsigned val)
  563. {
  564. char temp[20];
  565. intToUnknownStringBody();
  566. }
  567. void rtlUInt8ToStrX(size32_t & l, char * & t, unsigned __int64 val)
  568. {
  569. char temp[40];
  570. intToUnknownStringBody();
  571. }
  572. void rtlInt4ToStrX(size32_t & l, char * & t, int val)
  573. {
  574. char temp[20];
  575. intToUnknownStringBody();
  576. }
  577. void rtlInt8ToStrX(size32_t & l, char * & t, __int64 val)
  578. {
  579. char temp[40];
  580. intToUnknownStringBody();
  581. }
  582. //=============================================================================
  583. // Numeric conversion functions... - fixed length ebcdic target
  584. // ILKA - converting ebcdic to numeric still uses string in between, for more efficiency
  585. // a function numtoebcdicstr should be implemented
  586. #define intToEbcdicStringNBody() \
  587. unsigned len = numtostr(astr, val); \
  588. rtlStrToEStr(sizeof(estr),estr,len,astr); \
  589. if (len > l) \
  590. memset(t,0x2A,l); \
  591. else \
  592. { \
  593. memcpy(t,estr,len); \
  594. memset(t+len, '@', l-len); \
  595. }
  596. void rtl_l42en(size32_t l, char * t, unsigned val)
  597. {
  598. char astr[20];
  599. char estr[20];
  600. intToEbcdicStringNBody();
  601. }
  602. void rtl_l82en(size32_t l, char * t, unsigned __int64 val)
  603. {
  604. char astr[40];
  605. char estr[40];
  606. intToEbcdicStringNBody();
  607. }
  608. void rtl_ls42en(size32_t l, char * t, int val)
  609. {
  610. char astr[20];
  611. char estr[20];
  612. intToEbcdicStringNBody();
  613. }
  614. void rtl_ls82en(size32_t l, char * t, __int64 val)
  615. {
  616. char astr[40];
  617. char estr[40];
  618. intToEbcdicStringNBody();
  619. }
  620. //=============================================================================
  621. // Numeric conversion functions... - unknown length ebcdic target
  622. #if defined _MSC_VER
  623. #pragma warning(push)
  624. #pragma warning(disable:4700)
  625. #endif
  626. void rtl_l42ex(size32_t & l, char * & t, unsigned val)
  627. {
  628. char astr[20];
  629. unsigned alen = numtostr(astr, val);
  630. rtlStrToEStrX(l,t,alen,astr);
  631. }
  632. void rtl_l82ex(size32_t & l, char * & t, unsigned __int64 val)
  633. {
  634. char astr[40];
  635. unsigned alen = numtostr(astr, val);
  636. rtlStrToEStrX(l,t,alen,astr);
  637. }
  638. void rtl_ls42ex(size32_t & l, char * & t, int val)
  639. {
  640. char astr[20];
  641. unsigned alen = numtostr(astr, val);
  642. rtlStrToEStrX(l,t,alen,astr);
  643. }
  644. void rtl_ls82ex(size32_t & l, char * & t, __int64 val)
  645. {
  646. char astr[40];
  647. unsigned alen = numtostr(astr, val);
  648. rtlStrToEStrX(l,t,alen,astr);
  649. }
  650. #ifdef _MSC_VER
  651. #pragma warning(pop)
  652. #endif
  653. //=============================================================================
  654. // Numeric conversion functions... - fixed length variable target
  655. #define intToVarStringNBody() \
  656. unsigned len = numtostr(temp, val) + 1; \
  657. if (len > l) \
  658. { \
  659. memset(t,'*',l); \
  660. t[l-1]=0; \
  661. } \
  662. else \
  663. memcpy(t,temp,len);
  664. void rtlUInt4ToVStr(size32_t l, char * t, unsigned val)
  665. {
  666. char temp[20];
  667. intToVarStringNBody();
  668. }
  669. void rtlUInt8ToVStr(size32_t l, char * t, unsigned __int64 val)
  670. {
  671. char temp[40];
  672. intToVarStringNBody();
  673. }
  674. void rtlInt4ToVStr(size32_t l, char * t, int val)
  675. {
  676. char temp[20];
  677. intToVarStringNBody();
  678. }
  679. void rtlInt8ToVStr(size32_t l, char * t, __int64 val)
  680. {
  681. char temp[40];
  682. intToVarStringNBody();
  683. }
  684. //=============================================================================
  685. // Numeric conversion functions... - unknown length variable target
  686. #define intToVarStringXBody() \
  687. unsigned len = numtostr(temp, val); \
  688. temp[len] = 0; \
  689. return strdup(temp);
  690. char * rtlUInt4ToVStrX(unsigned val)
  691. {
  692. char temp[20];
  693. intToVarStringXBody();
  694. }
  695. char * rtlUInt8ToVStrX(unsigned __int64 val)
  696. {
  697. char temp[40];
  698. intToVarStringXBody();
  699. }
  700. char * rtlInt4ToVStrX(int val)
  701. {
  702. char temp[20];
  703. intToVarStringXBody();
  704. }
  705. char * rtlInt8ToVStrX(__int64 val)
  706. {
  707. char temp[40];
  708. intToVarStringXBody();
  709. }
  710. //---------------------------------------------------------------------------
  711. static const unsigned largeAllocaThreshold = 1024*10;
  712. #define CONDSTACKALLOC(MA, SZ) ((SZ>largeAllocaThreshold) ? MA.allocate(SZ) : alloca(SZ))
  713. double rtlStrToReal(size32_t l, const char * t)
  714. {
  715. MemoryAttr heapMem;
  716. char * temp = (char *)CONDSTACKALLOC(heapMem, l+1);
  717. memcpy(temp, t, l);
  718. temp[l] = 0;
  719. return rtlVStrToReal(temp);
  720. }
  721. double rtlEStrToReal(size32_t l, const char * t)
  722. {
  723. MemoryAttr heapMem;
  724. char * temp = (char *)CONDSTACKALLOC(heapMem, l+1);
  725. rtlEStrToStr(l,temp,l,t);
  726. temp[l] = 0;
  727. return rtlVStrToReal(temp);
  728. }
  729. double rtlVStrToReal(const char * t)
  730. {
  731. char * end;
  732. return strtod(t, &end);
  733. }
  734. double rtl_ex2f(const char * t)
  735. {
  736. return rtlEStrToReal(strlen(t), t);
  737. }
  738. double rtlUnicodeToReal(size32_t l, UChar const * t)
  739. {
  740. unsigned bufflen;
  741. char * buff;
  742. rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
  743. double ret = rtlStrToReal(bufflen, buff);
  744. rtlFree(buff);
  745. return ret;
  746. }
  747. //---------------------------------------------------------------------------
  748. static void truncFixedReal(size32_t l, char * t, StringBuffer & temp)
  749. {
  750. const char * str = temp.str();
  751. unsigned len = temp.length();
  752. if (len > l)
  753. {
  754. //If we don't lose significant digits left of the decimal point then truncate the string.
  755. const char * dot = strchr(str, '.');
  756. if (dot && ((size_t)(dot - str) <= l))
  757. len = l;
  758. }
  759. if (len > l)
  760. memset(t,'*',l);
  761. else
  762. {
  763. memcpy(t,temp.str(),len);
  764. memset(t+len, ' ', l-len);
  765. }
  766. }
  767. void rtlRealToStr(size32_t l, char * t, double val)
  768. {
  769. StringBuffer temp;
  770. temp.append(val);
  771. //This could either truncate or round when converting a real to a string
  772. //Rounding is more user friendly, but then (string3)(string)1.99 != (string3)1.99 which is
  773. //rather count intuitive. (That is still true if the value is out of range.)
  774. truncFixedReal(l, t, temp);
  775. }
  776. void rtlRealToStr(size32_t l, char * t, float val)
  777. {
  778. StringBuffer temp;
  779. temp.append(val);
  780. //See comment above
  781. truncFixedReal(l, t, temp);
  782. }
  783. void rtlRealToStrX(size32_t & l, char * & t, double val)
  784. {
  785. StringBuffer temp;
  786. temp.append(val);
  787. unsigned len = temp.length();
  788. char * result = (char *)rtlMalloc(len);
  789. memcpy(result,temp.str(),len);
  790. l = len;
  791. t = result;
  792. }
  793. void rtlRealToStrX(size32_t & l, char * & t, float val)
  794. {
  795. StringBuffer temp;
  796. temp.append(val);
  797. unsigned len = temp.length();
  798. char * result = (char *)rtlMalloc(len);
  799. memcpy(result,temp.str(),len);
  800. l = len;
  801. t = result;
  802. }
  803. void rtlRealToVStr(size32_t l, char * t, double val)
  804. {
  805. StringBuffer temp;
  806. temp.append(val);
  807. unsigned len = temp.length()+1;
  808. if (len > l)
  809. {
  810. memset(t,'*',l);
  811. t[l-1]=0;
  812. }
  813. else
  814. {
  815. memcpy(t,temp.str(),len);
  816. }
  817. }
  818. void rtlRealToVStr(size32_t l, char * t, float val)
  819. {
  820. StringBuffer temp;
  821. temp.append(val);
  822. unsigned len = temp.length()+1;
  823. if (len > l)
  824. {
  825. memset(t,'*',l);
  826. t[l-1]=0;
  827. }
  828. else
  829. {
  830. memcpy(t,temp.str(),len);
  831. }
  832. }
  833. char * rtlRealToVStrX(double val)
  834. {
  835. StringBuffer temp;
  836. temp.append(val);
  837. return strdup(temp);
  838. }
  839. char * rtlRealToVStrX(float val)
  840. {
  841. StringBuffer temp;
  842. temp.append(val);
  843. return strdup(temp);
  844. }
  845. //---------------------------------------------------------------------------
  846. #define SkipSpaces(l, t) \
  847. while (l) \
  848. { \
  849. char c = *t; \
  850. switch (c) \
  851. { \
  852. case ' ': \
  853. case '\t': \
  854. case '-': \
  855. case '+': \
  856. break; \
  857. default: \
  858. goto done; \
  859. } \
  860. l--; \
  861. t++; \
  862. } \
  863. done:
  864. #define SkipSignSpaces(l, t, negate) \
  865. while (l) \
  866. { \
  867. char c = *t; \
  868. switch (c) \
  869. { \
  870. case '-': \
  871. negate = true; \
  872. break; \
  873. case ' ': \
  874. case '\t': \
  875. case '+': \
  876. break; \
  877. default: \
  878. goto done; \
  879. } \
  880. l--; \
  881. t++; \
  882. } \
  883. done:
  884. unsigned rtlStrToUInt4(size32_t l, const char * t)
  885. {
  886. SkipSpaces(l, t);
  887. unsigned v = 0;
  888. while (l--)
  889. {
  890. char c = *t++;
  891. if ((c >= '0') && (c <= '9'))
  892. v = v * 10 + (c-'0');
  893. else
  894. break;
  895. }
  896. return v;
  897. }
  898. unsigned __int64 rtlStrToUInt8(size32_t l, const char * t)
  899. {
  900. SkipSpaces(l, t);
  901. unsigned __int64 v = 0;
  902. while (l--)
  903. {
  904. char c = *t++;
  905. if ((c >= '0') && (c <= '9'))
  906. v = v * 10 + (c-'0');
  907. else
  908. break;
  909. }
  910. return v;
  911. }
  912. int rtlStrToInt4(size32_t l, const char * t)
  913. {
  914. bool negate = false;
  915. SkipSignSpaces(l, t, negate);
  916. int v = 0;
  917. while (l--)
  918. {
  919. char c = *t++;
  920. if ((c >= '0') && (c <= '9'))
  921. v = v * 10 + (c-'0');
  922. else
  923. break;
  924. }
  925. return negate ? -v : v;
  926. }
  927. __int64 rtlStrToInt8(size32_t l, const char * t)
  928. {
  929. bool negate = false;
  930. SkipSignSpaces(l, t, negate);
  931. __int64 v = 0;
  932. while (l--)
  933. {
  934. char c = *t++;
  935. if ((c >= '0') && (c <= '9'))
  936. v = v * 10 + (c-'0');
  937. else
  938. break;
  939. }
  940. return negate ? -v : v;
  941. }
  942. __int64 rtlUnicodeToInt8(size32_t l, UChar const * t)
  943. {
  944. unsigned bufflen;
  945. char * buff;
  946. rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
  947. __int64 ret = rtlStrToInt8(bufflen, buff);
  948. rtlFree(buff);
  949. return ret;
  950. }
  951. bool rtlStrToBool(size32_t l, const char * t)
  952. {
  953. while (l--)
  954. {
  955. char c = *t++;
  956. if (c != ' ')
  957. return true;
  958. }
  959. return false;
  960. }
  961. bool rtlUnicodeToBool(size32_t l, UChar const * t)
  962. {
  963. while(l--)
  964. if(*t++ != 0x20) return true;
  965. return false;
  966. }
  967. // return true for "on", "true" or any non-zero constant, else false;
  968. bool rtlCsvStrToBool(size32_t l, const char * t)
  969. {
  970. return clipStrToBool(l, t);
  971. }
  972. //---------------------------------------------------------------------------
  973. unsigned rtlEStrToUInt4(size32_t l, const char * t)
  974. {
  975. MemoryAttr heapMem;
  976. char * astr = (char *)CONDSTACKALLOC(heapMem, l);
  977. rtlEStrToStr(l,astr,l,t);
  978. return rtlStrToUInt4(l,astr);
  979. }
  980. unsigned __int64 rtlEStrToUInt8(size32_t l, const char * t)
  981. {
  982. MemoryAttr heapMem;
  983. char * astr = (char *)CONDSTACKALLOC(heapMem, l);
  984. rtlEStrToStr(l,astr,l,t);
  985. return rtlStrToUInt8(l,astr);
  986. }
  987. int rtlEStrToInt4(size32_t l, const char * t)
  988. {
  989. MemoryAttr heapMem;
  990. char * astr = (char *)CONDSTACKALLOC(heapMem, l);
  991. rtlEStrToStr(l,astr,l,t);
  992. return rtlStrToInt4(l,astr);
  993. }
  994. __int64 rtlEStrToInt8(size32_t l, const char * t)
  995. {
  996. MemoryAttr heapMem;
  997. char * astr = (char *)CONDSTACKALLOC(heapMem, l);
  998. rtlEStrToStr(l,astr,l,t);
  999. return rtlStrToInt8(l,astr);
  1000. }
  1001. bool rtl_en2b(size32_t l, const char * t)
  1002. {
  1003. MemoryAttr heapMem;
  1004. char * astr = (char *)CONDSTACKALLOC(heapMem, l);
  1005. rtlEStrToStr(l,astr,l,t);
  1006. return rtlStrToBool(l,astr);
  1007. }
  1008. //---------------------------------------------------------------------------
  1009. unsigned rtlVStrToUInt4(const char * t)
  1010. {
  1011. return rtlStrToUInt4(strlen(t), t);
  1012. }
  1013. unsigned __int64 rtlVStrToUInt8(const char * t)
  1014. {
  1015. return rtlStrToUInt8(strlen(t), t);
  1016. }
  1017. int rtlVStrToInt4(const char * t)
  1018. {
  1019. return rtlStrToInt4(strlen(t), t);
  1020. }
  1021. __int64 rtlVStrToInt8(const char * t)
  1022. {
  1023. return rtlStrToInt8(strlen(t), t);
  1024. }
  1025. bool rtlVStrToBool(const char * t)
  1026. {
  1027. char c;
  1028. while ((c = *t++) != 0)
  1029. {
  1030. //MORE: Allow spaces if we change the semantics.
  1031. return true;
  1032. }
  1033. return false;
  1034. }
  1035. //---------------------------------------------------------------------------
  1036. void holeIntFormat(size32_t maxlen, char * target, __int64 value, unsigned width, unsigned flags)
  1037. {
  1038. StringBuffer result;
  1039. if (flags & 1)
  1040. result.appendf("%0*" I64F "d", width, value);
  1041. else
  1042. result.appendf("%*" I64F "d", width, value);
  1043. size32_t written = result.length();
  1044. if (written > maxlen)
  1045. memset(target, '*', maxlen);
  1046. else
  1047. {
  1048. memset(target+written, ' ', maxlen-written);
  1049. memcpy(target, result.str(), written);
  1050. }
  1051. }
  1052. void holeRealFormat(size32_t maxlen, char * target, double value, unsigned width, unsigned places)
  1053. {
  1054. if ((int) width <= 0)
  1055. return;
  1056. const unsigned tempSize = 500;
  1057. char temp[tempSize*2+2]; // Space for leading digits/0, '-' and \0 terminator
  1058. //Ensure that we output at most 2*tempSize characters.
  1059. unsigned formatWidth = width < tempSize ? width : tempSize;
  1060. if (places >= formatWidth)
  1061. places = formatWidth-1;
  1062. unsigned written = sprintf(temp, "%*.*f", formatWidth, places, value);
  1063. const char * src = temp;
  1064. if (written > width)
  1065. {
  1066. //Strip a leading 0 for very small numbers.
  1067. if (*src == '0')
  1068. {
  1069. written--;
  1070. src++;
  1071. }
  1072. }
  1073. if (written > width)
  1074. {
  1075. memset(target, '*', width);
  1076. if (places)
  1077. target[width-places-1] = '.';
  1078. }
  1079. else
  1080. {
  1081. unsigned delta = width - written;
  1082. if (delta)
  1083. memset(target, ' ', delta);
  1084. memcpy(target+delta, src, written);
  1085. }
  1086. }
  1087. //=============================================================================
  1088. // Conversion functions...
  1089. void rtlIntFormat(unsigned & len, char * & target, __int64 value, unsigned width, unsigned flags)
  1090. {
  1091. if ((int) width <= 0)
  1092. {
  1093. len = 0;
  1094. target = NULL;
  1095. return;
  1096. }
  1097. len = width;
  1098. target = (char *)rtlMalloc(width);
  1099. holeIntFormat(width, target, value, width, flags);
  1100. }
  1101. void rtlRealFormat(unsigned & len, char * & target, double value, unsigned width, unsigned places)
  1102. {
  1103. if ((int) width < 0)
  1104. {
  1105. len = 0;
  1106. target = NULL;
  1107. return;
  1108. }
  1109. len = width;
  1110. target = (char *)rtlMalloc(width);
  1111. holeRealFormat(width, target, value, width, places);
  1112. }
  1113. //=============================================================================
  1114. // String functions...
  1115. bool rtlDataToBool(unsigned len, const void * _src)
  1116. {
  1117. const char * src = (const char *)_src;
  1118. while (len--)
  1119. if (*src++)
  1120. return true;
  1121. return false;
  1122. }
  1123. void rtlBoolToData(unsigned tlen, void * tgt, bool src)
  1124. {
  1125. memset(tgt, 0, tlen);
  1126. if (src)
  1127. ((char *)tgt)[tlen-1] = 1;
  1128. }
  1129. void rtlBoolToStr(unsigned tlen, void * tgt, bool src)
  1130. {
  1131. memset(tgt, ' ', tlen);
  1132. if (src)
  1133. ((char *)tgt)[tlen-1] = '1';
  1134. }
  1135. void rtlBoolToVStr(char * tgt, bool src)
  1136. {
  1137. if (src)
  1138. *tgt++ = '1';
  1139. *tgt = 0;
  1140. }
  1141. void rtlBoolToStrX(unsigned & tlen, char * & tgt, bool src)
  1142. {
  1143. if (src)
  1144. {
  1145. char * ret = (char *)rtlMalloc(1);
  1146. ret[0] = '1';
  1147. tlen = 1;
  1148. tgt = ret;
  1149. }
  1150. else
  1151. {
  1152. tlen = 0;
  1153. tgt = NULL;
  1154. }
  1155. }
  1156. char * rtlBoolToVStrX(bool src)
  1157. {
  1158. if (src)
  1159. return strdup("1");
  1160. else
  1161. return strdup("");
  1162. }
  1163. //-----------------------------------------------------------------------------
  1164. // String copying functions....
  1165. void rtlDataToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1166. {
  1167. if (slen > tlen)
  1168. slen = tlen;
  1169. memcpy(tgt, src, slen);
  1170. if (tlen > slen)
  1171. memset((char *)tgt+slen, 0, tlen-slen);
  1172. }
  1173. void rtlStrToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1174. {
  1175. if (slen > tlen)
  1176. slen = tlen;
  1177. memcpy(tgt, src, slen);
  1178. if (tlen > slen)
  1179. memset((char *)tgt+slen, 0, tlen-slen);
  1180. }
  1181. void rtlStrToStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1182. {
  1183. if (slen > tlen)
  1184. slen = tlen;
  1185. memcpy(tgt, src, slen);
  1186. if (tlen > slen)
  1187. memset((char *)tgt+slen, ' ', tlen-slen);
  1188. }
  1189. void rtlStrToVStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1190. {
  1191. if ((slen >= tlen) && (tlen != 0))
  1192. slen = tlen-1;
  1193. memcpy(tgt, src, slen);
  1194. *((char *)tgt+slen)=0;
  1195. }
  1196. void rtlStr2EStr(unsigned tlen, char * tgt, unsigned slen, const char * src)
  1197. {
  1198. rtlStrToEStr(tlen,tgt,slen,src);
  1199. }
  1200. void rtlEStr2Data(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1201. {
  1202. if (slen > tlen)
  1203. slen = tlen;
  1204. rtlEStrToStr(slen,(char *)tgt,slen,src);
  1205. if (tlen > slen)
  1206. memset((char *)tgt+slen, 0, tlen-slen);
  1207. }
  1208. void rtlEStr2Str(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1209. {
  1210. rtlEStrToStr(tlen,(char *)tgt,slen,src);
  1211. }
  1212. void rtlEStrToVStr(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1213. {
  1214. if (slen >= tlen)
  1215. slen = tlen-1;
  1216. rtlEStrToStr(slen,(char *)tgt,slen,src);
  1217. *((char *)tgt+slen)=0;
  1218. }
  1219. void rtlEStrToEStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1220. {
  1221. if (slen > tlen)
  1222. slen = tlen;
  1223. memcpy(tgt, src, slen);
  1224. if (tlen > slen)
  1225. memset((char *)tgt+slen, '@', tlen-slen);
  1226. }
  1227. void rtlVStrToData(unsigned tlen, void * tgt, const char * src)
  1228. {
  1229. rtlStrToData(tlen, tgt, strlen(src), src);
  1230. }
  1231. void rtlVStrToStr(unsigned tlen, void * tgt, const char * src)
  1232. {
  1233. rtlStrToStr(tlen, tgt, strlen(src), src);
  1234. }
  1235. void rtlVStr2EStr(unsigned tlen, char * tgt, const char * src)
  1236. {
  1237. rtlStr2EStr(tlen, tgt, strlen(src), src);
  1238. }
  1239. void rtlVStrToVStr(unsigned tlen, void * tgt, const char * src)
  1240. {
  1241. rtlStrToVStr(tlen, tgt, strlen(src), src);
  1242. }
  1243. char *rtlCreateQuotedString(unsigned _len_tgt,char * tgt)
  1244. {
  1245. // Add ' at start and end. MORE! also needs to handle embedded quotes
  1246. char * result = (char *)rtlMalloc(_len_tgt + 3);
  1247. result[0] = '\'';
  1248. memcpy(result+1, tgt, _len_tgt);
  1249. result[_len_tgt+1] = '\'';
  1250. result[_len_tgt+2] = 0;
  1251. return result;
  1252. }
  1253. //-----------------------------------------------------------------------------
  1254. //List of strings with length of -1 to mark the end...
  1255. void rtlConcat(unsigned & tlen, char * * tgt, ...)
  1256. {
  1257. va_list args;
  1258. unsigned totalLength = 0;
  1259. va_start(args, tgt);
  1260. for (;;)
  1261. {
  1262. unsigned len = va_arg(args, unsigned);
  1263. if (len+1==0)
  1264. break;
  1265. va_arg(args, char *); // Skip the string
  1266. totalLength += len;
  1267. }
  1268. va_end(args);
  1269. char * buffer = (char *)rtlMalloc(totalLength);
  1270. char * cur = buffer;
  1271. va_start(args, tgt);
  1272. for (;;)
  1273. {
  1274. unsigned len = va_arg(args, unsigned);
  1275. if (len+1==0)
  1276. break;
  1277. char * str = va_arg(args, char *);
  1278. memcpy(cur, str, len);
  1279. cur += len;
  1280. }
  1281. va_end(args);
  1282. tlen = totalLength;
  1283. *tgt = buffer;
  1284. }
  1285. void rtlConcatVStr(char * * tgt, ...)
  1286. {
  1287. va_list args;
  1288. unsigned totalLength = 0;
  1289. va_start(args, tgt);
  1290. for (;;)
  1291. {
  1292. unsigned len = va_arg(args, unsigned);
  1293. if (len+1==0)
  1294. break;
  1295. va_arg(args, char *); // Skip the string
  1296. totalLength += len;
  1297. }
  1298. va_end(args);
  1299. char * buffer = (char *)rtlMalloc(totalLength+1);
  1300. char * cur = buffer;
  1301. va_start(args, tgt);
  1302. for (;;)
  1303. {
  1304. unsigned len = va_arg(args, unsigned);
  1305. if (len+1==0)
  1306. break;
  1307. char * str = va_arg(args, char *);
  1308. memcpy(cur, str, len);
  1309. cur += len;
  1310. }
  1311. va_end(args);
  1312. cur[0] = 0;
  1313. *tgt = buffer;
  1314. }
  1315. #ifdef _USE_ICU
  1316. void rtlConcatUnicode(unsigned & tlen, UChar * * tgt, ...)
  1317. {
  1318. va_list args;
  1319. unsigned totalLength = 0;
  1320. va_start(args, tgt);
  1321. for(;;)
  1322. {
  1323. unsigned len = va_arg(args, unsigned);
  1324. if(len+1==0)
  1325. break;
  1326. va_arg(args, UChar *); // Skip the string
  1327. totalLength += len;
  1328. }
  1329. va_end(args);
  1330. UChar * buffer = (UChar *)rtlMalloc(totalLength*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
  1331. unsigned idx = 0;
  1332. UErrorCode err = U_ZERO_ERROR;
  1333. va_start(args, tgt);
  1334. for(;;)
  1335. {
  1336. unsigned len = va_arg(args, unsigned);
  1337. if(len+1==0)
  1338. break;
  1339. UChar * str = va_arg(args, UChar *);
  1340. if (len)
  1341. idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
  1342. }
  1343. va_end(args);
  1344. *tgt = buffer;
  1345. tlen = idx;
  1346. }
  1347. void rtlConcatVUnicode(UChar * * tgt, ...)
  1348. {
  1349. va_list args;
  1350. unsigned totalLength = 0;
  1351. va_start(args, tgt);
  1352. for(;;)
  1353. {
  1354. unsigned len = va_arg(args, unsigned);
  1355. if(len+1==0)
  1356. break;
  1357. va_arg(args, UChar *); // Skip the string
  1358. totalLength += len;
  1359. }
  1360. va_end(args);
  1361. UChar * buffer = (UChar *)rtlMalloc((totalLength+1)*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
  1362. unsigned idx = 0;
  1363. UErrorCode err = U_ZERO_ERROR;
  1364. va_start(args, tgt);
  1365. for(;;)
  1366. {
  1367. unsigned len = va_arg(args, unsigned);
  1368. if(len+1==0)
  1369. break;
  1370. UChar * str = va_arg(args, UChar *);
  1371. if (len)
  1372. idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
  1373. }
  1374. va_end(args);
  1375. buffer[idx++] = 0x0000;
  1376. *tgt = buffer;
  1377. }
  1378. #else
  1379. void rtlConcatUnicode(unsigned & tlen, UChar * * tgt, ...)
  1380. {
  1381. rtlThrowNoUnicode();
  1382. }
  1383. void rtlConcatVUnicode(UChar * * tgt, ...)
  1384. {
  1385. rtlThrowNoUnicode();
  1386. }
  1387. #endif
  1388. //List of strings with length of -1 to mark the end...
  1389. void rtlConcatStrF(unsigned tlen, void * _tgt, int fill, ...)
  1390. {
  1391. va_list args;
  1392. char * tgt = (char *)_tgt;
  1393. unsigned offset = 0;
  1394. va_start(args, fill);
  1395. while (offset != tlen)
  1396. {
  1397. unsigned len = va_arg(args, unsigned);
  1398. if (len+1==0)
  1399. break;
  1400. const char * str = va_arg(args, const char *);
  1401. unsigned copyLen = len + offset > tlen ? tlen - offset : len;
  1402. memcpy(tgt+offset, str, copyLen);
  1403. offset += copyLen;
  1404. }
  1405. va_end(args);
  1406. if (offset < tlen)
  1407. memset(tgt+offset, fill, tlen-offset);
  1408. }
  1409. void rtlConcatVStrF(unsigned tlen, char * tgt, ...)
  1410. {
  1411. va_list args;
  1412. unsigned offset = 0;
  1413. va_start(args, tgt);
  1414. while (offset != tlen)
  1415. {
  1416. unsigned len = va_arg(args, unsigned);
  1417. if (len+1==0)
  1418. break;
  1419. const char * str = va_arg(args, const char *);
  1420. unsigned copyLen = len + offset > tlen ? tlen - offset : len;
  1421. memcpy(tgt+offset, str, copyLen);
  1422. offset += copyLen;
  1423. }
  1424. va_end(args);
  1425. memset(tgt+offset, 0, (tlen+1)-offset);
  1426. }
  1427. #ifdef _USE_ICU
  1428. void rtlConcatUnicodeF(unsigned tlen, UChar * tgt, ...)
  1429. {
  1430. va_list args;
  1431. unsigned idx = 0;
  1432. UErrorCode err = U_ZERO_ERROR;
  1433. va_start(args, tgt);
  1434. for(;;)
  1435. {
  1436. unsigned len = va_arg(args, unsigned);
  1437. if(len+1==0)
  1438. break;
  1439. UChar * str = va_arg(args, UChar *);
  1440. if (len)
  1441. idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
  1442. }
  1443. va_end(args);
  1444. while (idx < tlen)
  1445. tgt[idx++] = ' ';
  1446. }
  1447. void rtlConcatVUnicodeF(unsigned tlen, UChar * tgt, ...)
  1448. {
  1449. va_list args;
  1450. unsigned idx = 0;
  1451. UErrorCode err = U_ZERO_ERROR;
  1452. va_start(args, tgt);
  1453. for(;;)
  1454. {
  1455. unsigned len = va_arg(args, unsigned);
  1456. if(len+1==0)
  1457. break;
  1458. UChar * str = va_arg(args, UChar *);
  1459. if (len)
  1460. idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
  1461. }
  1462. va_end(args);
  1463. while (idx < tlen)
  1464. tgt[idx++] = 0;
  1465. tgt[tlen] = 0;
  1466. }
  1467. #endif
  1468. //------------------------------------------------------------------------------------------------
  1469. // The following concat functions are all deprecated in favour of the variable number of argument
  1470. // versions
  1471. unsigned rtlConcatStrToStr(unsigned tlen, char * tgt, unsigned idx, unsigned slen, const char * src)
  1472. {
  1473. unsigned len = tlen-idx;
  1474. if (len > slen)
  1475. len = slen;
  1476. memcpy(tgt+idx, src, len);
  1477. return idx+len;
  1478. }
  1479. unsigned rtlConcatVStrToStr(unsigned tlen, char * tgt, unsigned idx, const char * src)
  1480. {
  1481. while (idx != tlen)
  1482. {
  1483. char next = *src++;
  1484. if (!next)
  1485. break;
  1486. tgt[idx++] = next;
  1487. }
  1488. return idx;
  1489. }
  1490. void rtlConcatStrToVStr(unsigned tlen, void * _tgt, unsigned slen, const void * src)
  1491. {
  1492. char * tgt = (char *)_tgt;
  1493. unsigned tend = strlen(tgt);
  1494. rtlStrToVStr(tlen-tend, tgt+tend, slen, src);
  1495. }
  1496. void rtlConcatVStrToVStr(unsigned tlen, void * _tgt, const char * src)
  1497. {
  1498. char * tgt = (char *)_tgt;
  1499. unsigned tend = strlen(tgt);
  1500. rtlVStrToVStr(tlen-tend, tgt+tend, src);
  1501. }
  1502. #ifdef _USE_ICU
  1503. unsigned rtlConcatUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, unsigned slen, UChar const * src)
  1504. {
  1505. UErrorCode err = U_ZERO_ERROR;
  1506. return unorm_concatenate(tgt, idx, src, slen, tgt, tlen, UNORM_NFC, 0, &err);
  1507. }
  1508. unsigned rtlConcatVUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, UChar const * src)
  1509. {
  1510. return rtlConcatUnicodeToUnicode(tlen, tgt, idx, rtlUnicodeStrlen(src), src);
  1511. }
  1512. #endif
  1513. void rtlESpaceFill(unsigned tlen, char * tgt, unsigned idx)
  1514. {
  1515. if (idx < tlen)
  1516. memset(tgt+idx, '@', tlen-idx);
  1517. }
  1518. void rtlSpaceFill(unsigned tlen, char * tgt, unsigned idx)
  1519. {
  1520. if (idx < tlen)
  1521. memset(tgt+idx, ' ', tlen-idx);
  1522. }
  1523. void rtlZeroFill(unsigned tlen, char * tgt, unsigned idx)
  1524. {
  1525. if (idx < tlen)
  1526. memset(tgt+idx, 0, tlen-idx);
  1527. }
  1528. void rtlNullTerminate(unsigned tlen, char * tgt, unsigned idx)
  1529. {
  1530. if (idx >= tlen)
  1531. idx = tlen-1;
  1532. tgt[idx] = 0;
  1533. }
  1534. void rtlUnicodeSpaceFill(unsigned tlen, UChar * tgt, unsigned idx)
  1535. {
  1536. while(idx<tlen) tgt[idx++] = 0x0020;
  1537. }
  1538. void rtlUnicodeNullTerminate(unsigned tlen, UChar * tgt, unsigned idx)
  1539. {
  1540. if (idx >= tlen)
  1541. idx = tlen-1;
  1542. tgt[idx] = 0x0000;
  1543. }
  1544. void rtlUnicodeStrcpy(UChar * tgt, UChar const * src)
  1545. {
  1546. memcpy(tgt, src, rtlUnicodeStrlen(src)*2+2);
  1547. }
  1548. void rtlConcatExtend(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1549. {
  1550. unsigned len = tlen + slen;
  1551. tgt = (char *)rtlRealloc(tgt, len);
  1552. memcpy(tgt+tlen, src, slen);
  1553. tlen = len;
  1554. }
  1555. void rtlConcatUnicodeExtend(size32_t & tlen, UChar * & tgt, size32_t slen, const UChar * src)
  1556. {
  1557. unsigned len = tlen + slen;
  1558. tgt = (UChar *)rtlRealloc(tgt, len * sizeof(UChar));
  1559. memcpy(tgt+tlen, src, slen * sizeof(UChar));
  1560. tlen = len;
  1561. }
  1562. //-----------------------------------------------------------------------------
  1563. inline void normalizeFrom(unsigned & from, unsigned slen)
  1564. {
  1565. from--;
  1566. if ((int)from < 0)
  1567. from = 0;
  1568. else if (from > slen)
  1569. from = slen;
  1570. }
  1571. inline void normalizeFromTo(unsigned & from, unsigned & to)
  1572. {
  1573. from--;
  1574. if ((int)from < 0) from = 0;
  1575. if ((int)to < (int)from) to = from;
  1576. }
  1577. inline void clipFromTo(unsigned & from, unsigned & to, unsigned slen)
  1578. {
  1579. if (to > slen)
  1580. {
  1581. to = slen;
  1582. if (from > slen)
  1583. from = slen;
  1584. }
  1585. }
  1586. //NB: From and to are 1 based: Now fills to ensure the correct length.
  1587. void * doSubStrFT(unsigned & tlen, unsigned slen, const void * src, unsigned from, unsigned to, byte fillChar)
  1588. {
  1589. normalizeFromTo(from, to);
  1590. unsigned len = to - from;
  1591. clipFromTo(from, to, slen);
  1592. unsigned copylen = to - from;
  1593. char * buffer = (char *)rtlMalloc(len);
  1594. memcpy(buffer, (byte *)src+from, copylen);
  1595. if (copylen < len)
  1596. memset(buffer+copylen, fillChar, len-copylen);
  1597. tlen = len;
  1598. return buffer;
  1599. }
  1600. void rtlSubStrFX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from)
  1601. {
  1602. normalizeFrom(from, slen);
  1603. tlen = slen-from;
  1604. tgt = (char *) rtlMalloc(tlen);
  1605. memcpy(tgt, src+from, tlen);
  1606. }
  1607. void rtlSubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1608. {
  1609. tgt = (char *)doSubStrFT(tlen, slen, src, from, to, ' ');
  1610. }
  1611. void rtlSubStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1612. {
  1613. unsigned char fillChar = ' '; // More, should be passed as a parameter
  1614. normalizeFromTo(from, to);
  1615. clipFromTo(from, to, slen);
  1616. unsigned copylen = to - from;
  1617. if (copylen > tlen)
  1618. copylen = tlen;
  1619. memcpy(tgt, (const char *)src+from, copylen);
  1620. if (copylen < tlen)
  1621. memset(tgt+copylen, fillChar, tlen-copylen);
  1622. }
  1623. void rtlSubDataFT(unsigned tlen, void * tgt, unsigned slen, const void * src, unsigned from, unsigned to)
  1624. {
  1625. normalizeFromTo(from, to);
  1626. clipFromTo(from, to, slen);
  1627. unsigned copylen = to - from;
  1628. if (copylen > tlen)
  1629. copylen = tlen;
  1630. memcpy(tgt, (char *)src+from, copylen);
  1631. if (copylen < tlen)
  1632. memset((byte*)tgt+copylen, 0, tlen-copylen);
  1633. }
  1634. void rtlSubDataFTX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from, unsigned to)
  1635. {
  1636. tgt = doSubStrFT(tlen, slen, src, from, to, 0);
  1637. }
  1638. void rtlSubDataFX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from)
  1639. {
  1640. normalizeFrom(from, slen);
  1641. tlen = slen-from;
  1642. tgt = (char *) rtlMalloc(tlen);
  1643. memcpy(tgt, (const byte *)src+from, tlen);
  1644. }
  1645. void rtlUnicodeSubStrFTX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from, unsigned to)
  1646. {
  1647. normalizeFromTo(from, to);
  1648. tlen = to - from;
  1649. clipFromTo(from, to, slen);
  1650. tgt = (UChar *)rtlMalloc(tlen*2);
  1651. unsigned copylen = to - from;
  1652. memcpy(tgt, src+from, copylen*2);
  1653. while(copylen<tlen)
  1654. tgt[copylen++] = 0x0020;
  1655. }
  1656. void rtlUnicodeSubStrFX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from)
  1657. {
  1658. normalizeFrom(from, slen);
  1659. tlen = slen - from;
  1660. tgt = (UChar *)rtlMalloc(tlen*2);
  1661. memcpy(tgt, src+from, tlen*2);
  1662. }
  1663. void rtlSubQStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  1664. {
  1665. normalizeFromTo(from, to);
  1666. tlen = to - from;
  1667. clipFromTo(from, to, slen);
  1668. tgt = (char *)rtlMalloc(rtlQStrSize(tlen));
  1669. copyQStrRange(tlen, tgt, src, from, to);
  1670. }
  1671. void rtlSubQStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
  1672. {
  1673. normalizeFrom(from, slen);
  1674. tlen = slen - from;
  1675. tgt = (char *)rtlMalloc(rtlQStrSize(tlen));
  1676. copyQStrRange(tlen, tgt, src, from, slen);
  1677. }
  1678. void rtlSubQStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1679. {
  1680. normalizeFromTo(from, to);
  1681. clipFromTo(from, to, slen);
  1682. copyQStrRange(tlen, tgt, src, from ,to);
  1683. }
  1684. //-----------------------------------------------------------------------------
  1685. unsigned rtlTrimStrLen(size32_t l, const char * t)
  1686. {
  1687. while (l)
  1688. {
  1689. if (t[l-1] != ' ')
  1690. break;
  1691. l--;
  1692. }
  1693. return l;
  1694. }
  1695. unsigned rtlTrimDataLen(size32_t l, const void * _t)
  1696. {
  1697. const char * t = (const char *)_t;
  1698. while (l)
  1699. {
  1700. if (t[l-1] != 0)
  1701. break;
  1702. l--;
  1703. }
  1704. return l;
  1705. }
  1706. inline size32_t rtlQuickTrimUnicode(size32_t len, UChar const * str)
  1707. {
  1708. while (len && (str[len-1] == ' '))
  1709. len--;
  1710. return len;
  1711. }
  1712. unsigned rtlTrimUnicodeStrLen(size32_t l, UChar const * t)
  1713. {
  1714. #ifdef _USE_ICU
  1715. if (!l)
  1716. return 0;
  1717. UCharCharacterIterator iter(t, l);
  1718. for(iter.last32(); iter.hasPrevious(); iter.previous32())
  1719. if(iter.current32() != ' ')
  1720. break;
  1721. if(iter.current32() == ' ') return iter.getIndex(); // required as the reverse iteration above doesn't hit the first character
  1722. return iter.getIndex() + 1;
  1723. #else
  1724. return rtlQuickTrimUnicode(l, t);
  1725. #endif
  1726. }
  1727. unsigned rtlTrimUnicodeStrLenWS(size32_t l, UChar const * t)
  1728. {
  1729. #ifdef _USE_ICU
  1730. if (!l)
  1731. return 0;
  1732. UCharCharacterIterator iter(t, l);
  1733. for(iter.last32(); iter.hasPrevious(); iter.previous32())
  1734. if(!u_isspace(iter.current32()))
  1735. break;
  1736. if(u_isspace(iter.current32())) return iter.getIndex(); // required as the reverse iteration above doesn't hit the first character
  1737. return iter.getIndex() + 1;
  1738. #else
  1739. return rtlQuickTrimUnicode(l, t);
  1740. #endif
  1741. }
  1742. unsigned rtlTrimVStrLen(const char * t)
  1743. {
  1744. const char * first = t;
  1745. const char * last = first;
  1746. unsigned char c;
  1747. while ((c = *t++) != 0)
  1748. {
  1749. if (c != ' ')
  1750. last = t; //nb after increment of t
  1751. }
  1752. return (last - first);
  1753. }
  1754. unsigned rtlTrimVUnicodeStrLen(UChar const * t)
  1755. {
  1756. return rtlTrimUnicodeStrLen(rtlUnicodeStrlen(t), t);
  1757. }
  1758. inline unsigned rtlLeftTrimStrStart(size32_t slen, const char * src)
  1759. {
  1760. unsigned i = 0;
  1761. while(i < slen && src[i] == ' ')
  1762. i++;
  1763. return i;
  1764. }
  1765. inline unsigned rtlLeftTrimUnicodeStrStart(size32_t slen, UChar const * src)
  1766. {
  1767. #ifdef _USE_ICU
  1768. UCharCharacterIterator iter(src, slen);
  1769. for(iter.first32(); iter.hasNext(); iter.next32())
  1770. if(iter.current32() != ' ')
  1771. break;
  1772. return iter.getIndex();
  1773. #else
  1774. return slen;
  1775. #endif
  1776. }
  1777. inline unsigned rtlLeftTrimUnicodeStrStartWS(size32_t slen, UChar const * src)
  1778. {
  1779. #ifdef _USE_ICU
  1780. UCharCharacterIterator iter(src, slen);
  1781. for(iter.first32(); iter.hasNext(); iter.next32())
  1782. if(!u_isspace(iter.current32()))
  1783. break;
  1784. return iter.getIndex();
  1785. #else
  1786. return slen;
  1787. #endif
  1788. }
  1789. inline unsigned rtlLeftTrimVStrStart(const char * src)
  1790. {
  1791. unsigned i = 0;
  1792. while(src[i] == ' ')
  1793. i++;
  1794. return i;
  1795. }
  1796. inline void rtlTrimUtf8Len(unsigned & trimLen, size32_t & trimSize, size32_t len, const char * t)
  1797. {
  1798. const byte * start = (const byte *)t;
  1799. const byte * cur = start;
  1800. unsigned trimLength = 0;
  1801. const byte * trimEnd = cur;
  1802. for (unsigned i=0; i < len; i++)
  1803. {
  1804. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1805. if (next != ' ')
  1806. {
  1807. trimLength = i+1;
  1808. trimEnd = cur;
  1809. }
  1810. }
  1811. trimLen = trimLength;
  1812. trimSize = trimEnd-start;
  1813. }
  1814. inline void rtlTrimUtf8Start(unsigned & trimLen, size32_t & trimSize, size32_t len, const char * t)
  1815. {
  1816. const byte * start = (const byte *)t;
  1817. const byte * cur = start;
  1818. for (unsigned i=0; i < len; i++)
  1819. {
  1820. const byte * prev = cur;
  1821. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1822. if (next != ' ')
  1823. {
  1824. trimLen = i;
  1825. trimSize = prev-start;
  1826. return;
  1827. }
  1828. }
  1829. trimLen = len;
  1830. trimSize = cur-start;
  1831. }
  1832. inline char * rtlDupSubString(const char * src, unsigned len)
  1833. {
  1834. char * buffer = (char *)rtlMalloc(len + 1);
  1835. memcpy(buffer, src, len);
  1836. buffer[len] = 0;
  1837. return buffer;
  1838. }
  1839. inline UChar * rtlDupSubUnicode(UChar const * src, unsigned len)
  1840. {
  1841. UChar * buffer = (UChar *)rtlMalloc((len + 1) * 2);
  1842. memcpy(buffer, src, len*2);
  1843. buffer[len] = 0x00;
  1844. return buffer;
  1845. }
  1846. inline void rtlCopySubStringV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1847. {
  1848. if (slen >= tlen)
  1849. slen = tlen-1;
  1850. memcpy(tgt, src, slen);
  1851. tgt[slen] = 0;
  1852. }
  1853. //not yet used, but would be needed for assignment to string rather than vstring
  1854. inline void rtlCopySubString(size32_t tlen, char * tgt, unsigned slen, const char * src, char fill)
  1855. {
  1856. if (slen > tlen)
  1857. slen = tlen;
  1858. memcpy(tgt, src, slen);
  1859. memset(tgt + slen, fill, tlen-slen);
  1860. }
  1861. unsigned rtlTrimUtf8StrLen(size32_t len, const char * t)
  1862. {
  1863. unsigned trimLength = 0;
  1864. const byte * cur = (const byte *)t;
  1865. for (unsigned i=0; i < len; i++)
  1866. {
  1867. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1868. if (next != ' ')
  1869. trimLength = i+1;
  1870. }
  1871. return trimLength;
  1872. }
  1873. //-----------------------------------------------------------------------------
  1874. // Functions to trim off left side blank spaces
  1875. void rtlTrimRight(size32_t & tlen, char * & tgt, unsigned slen, const char * src)
  1876. {
  1877. tlen = rtlTrimStrLen(slen, src);
  1878. tgt = rtlDupSubString(src, tlen);
  1879. }
  1880. void rtlTrimUnicodeRight(size32_t & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1881. {
  1882. tlen = rtlTrimUnicodeStrLen(slen, src);
  1883. tgt = rtlDupSubUnicode(src, tlen);
  1884. }
  1885. void rtlTrimVRight(size32_t & tlen, char * & tgt, const char * src)
  1886. {
  1887. tlen = rtlTrimVStrLen(src);
  1888. tgt = rtlDupSubString(src, tlen);
  1889. }
  1890. void rtlTrimVUnicodeRight(size32_t & tlen, UChar * & tgt, UChar const * src)
  1891. {
  1892. rtlTrimUnicodeRight(tlen, tgt, rtlUnicodeStrlen(src), src);
  1893. }
  1894. void rtlTrimUtf8Right(unsigned &tlen, char * &tgt, unsigned slen, char const * src)
  1895. {
  1896. unsigned trimLength;
  1897. size32_t trimSize;
  1898. rtlTrimUtf8Len(trimLength, trimSize, slen, src);
  1899. tlen = trimLength;
  1900. tgt = rtlDupSubString(src, trimSize);
  1901. }
  1902. void rtlAssignTrimRightV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1903. {
  1904. unsigned len = rtlTrimStrLen(slen, src);
  1905. rtlCopySubStringV(tlen, tgt, len, src);
  1906. }
  1907. void rtlAssignTrimVRightV(size32_t tlen, char * tgt, const char * src)
  1908. {
  1909. unsigned len = rtlTrimVStrLen(src);
  1910. rtlCopySubStringV(tlen, tgt, len, src);
  1911. }
  1912. //-------------------------------------------------------------------------------
  1913. // Functions to trim off left side blank spaces
  1914. void rtlTrimLeft(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1915. {
  1916. unsigned start = rtlLeftTrimStrStart(slen, src);
  1917. unsigned len = slen - start;
  1918. tlen = len;
  1919. tgt = rtlDupSubString(src + start, len);
  1920. }
  1921. void rtlTrimUnicodeLeft(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1922. {
  1923. unsigned start = rtlLeftTrimUnicodeStrStart(slen, src);
  1924. unsigned len = slen - start;
  1925. tlen = len;
  1926. tgt = rtlDupSubUnicode(src + start, len);
  1927. }
  1928. void rtlTrimVLeft(unsigned & tlen, char * & tgt, const char * src)
  1929. {
  1930. unsigned start = rtlLeftTrimVStrStart(src);
  1931. unsigned len = strlen(src+start);
  1932. tlen = len;
  1933. tgt = rtlDupSubString(src + start, len);
  1934. }
  1935. void rtlTrimVUnicodeLeft(unsigned & tlen, UChar * & tgt, UChar const * src)
  1936. {
  1937. rtlTrimUnicodeLeft(tlen, tgt, rtlUnicodeStrlen(src), src);
  1938. }
  1939. ECLRTL_API void rtlTrimUtf8Left(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1940. {
  1941. unsigned trimLength;
  1942. size32_t trimSize;
  1943. rtlTrimUtf8Start(trimLength, trimSize, slen, src);
  1944. unsigned len = slen-trimLength;
  1945. const char * start = src+trimSize;
  1946. tlen = len;
  1947. tgt = rtlDupSubString(start, rtlUtf8Size(len, start));
  1948. }
  1949. void rtlAssignTrimLeftV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1950. {
  1951. unsigned start = rtlLeftTrimStrStart(slen, src);
  1952. unsigned len = slen - start;
  1953. rtlCopySubStringV(tlen, tgt, len, src+start);
  1954. }
  1955. void rtlAssignTrimVLeftV(size32_t tlen, char * tgt, const char * src)
  1956. {
  1957. unsigned start = rtlLeftTrimVStrStart(src);
  1958. unsigned len = strlen(src+start);
  1959. rtlCopySubStringV(tlen, tgt, len, src+start);
  1960. }
  1961. //--------------------------------------------------------------------------------
  1962. // Functions to trim off blank spaces of both sides
  1963. void rtlTrimBoth(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1964. {
  1965. unsigned len = rtlTrimStrLen(slen, src);
  1966. unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
  1967. len -= start;
  1968. tlen = len;
  1969. tgt = rtlDupSubString(src + start, len);
  1970. }
  1971. void rtlTrimUnicodeBoth(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1972. {
  1973. unsigned len = rtlTrimUnicodeStrLen(slen, src);
  1974. unsigned start = len ? rtlLeftTrimUnicodeStrStart(slen, src) : 0;
  1975. len -= start;
  1976. tlen = len;
  1977. tgt = rtlDupSubUnicode(src + start, len);
  1978. }
  1979. void rtlTrimVBoth(unsigned & tlen, char * & tgt, const char * src)
  1980. {
  1981. unsigned len = rtlTrimVStrLen(src);
  1982. unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
  1983. len -= start;
  1984. tlen = len;
  1985. tgt = rtlDupSubString(src + start, len);
  1986. }
  1987. void rtlTrimVUnicodeBoth(unsigned & tlen, UChar * & tgt, UChar const * src)
  1988. {
  1989. rtlTrimUnicodeBoth(tlen, tgt, rtlUnicodeStrlen(src), src);
  1990. }
  1991. ECLRTL_API void rtlTrimUtf8Both(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1992. {
  1993. unsigned lTrimLength;
  1994. size32_t lTrimSize;
  1995. rtlTrimUtf8Start(lTrimLength, lTrimSize, slen, src);
  1996. rtlTrimUtf8Right(tlen, tgt, slen-lTrimLength, src+lTrimSize);
  1997. }
  1998. void rtlAssignTrimBothV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1999. {
  2000. unsigned len = rtlTrimStrLen(slen, src);
  2001. unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
  2002. len -= start;
  2003. rtlCopySubStringV(tlen, tgt, len, src+start);
  2004. }
  2005. void rtlAssignTrimVBothV(size32_t tlen, char * tgt, const char * src)
  2006. {
  2007. unsigned len = rtlTrimVStrLen(src);
  2008. unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
  2009. len -= start;
  2010. rtlCopySubStringV(tlen, tgt, len, src+start);
  2011. }
  2012. //-----------------------------------------------------------------------------
  2013. // Functions used to trim off all blank spaces in a string.
  2014. unsigned rtlTrimStrLenNonWhitespace(size32_t l, const char * t)
  2015. {
  2016. unsigned len = 0;
  2017. while (l)
  2018. {
  2019. l--;
  2020. if (!isspace(t[l]))
  2021. len++;
  2022. }
  2023. return len;
  2024. }
  2025. unsigned rtlTrimStrLenNonBlank(size32_t l, const char * t)
  2026. {
  2027. unsigned len = 0;
  2028. while (l)
  2029. {
  2030. l--;
  2031. if (t[l] != ' ')
  2032. len++;
  2033. }
  2034. return len;
  2035. }
  2036. unsigned rtlTrimVStrLenNonBlank(const char * t)
  2037. {
  2038. unsigned len = 0;
  2039. unsigned char c;
  2040. while ((c = *t++) != 0)
  2041. {
  2042. if (c != ' ')
  2043. len++;
  2044. }
  2045. return len;
  2046. }
  2047. void rtlTrimAll(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  2048. {
  2049. tlen = rtlTrimStrLenNonBlank(slen, src);
  2050. char * buffer = (char *)rtlMalloc(tlen);
  2051. int ind = 0;
  2052. for(unsigned i = 0; i < slen; i++) {
  2053. if(src[i] != ' ') {
  2054. buffer[ind] = src[i];
  2055. ind++;
  2056. }
  2057. }
  2058. tgt = buffer;
  2059. }
  2060. void rtlTrimWS(unsigned & tlen, char * & tgt, unsigned slen, const char * src, bool left, bool all, bool right)
  2061. {
  2062. if (all)
  2063. {
  2064. tlen = rtlTrimStrLenNonWhitespace(slen, src);
  2065. char * buffer = (char *)rtlMalloc(tlen);
  2066. int ind = 0;
  2067. for(unsigned i = 0; i < slen; i++) {
  2068. if (!isspace(src[i]))
  2069. {
  2070. buffer[ind] = src[i];
  2071. ind++;
  2072. }
  2073. }
  2074. tgt = buffer;
  2075. }
  2076. else
  2077. {
  2078. unsigned start = 0;
  2079. while (right && slen)
  2080. {
  2081. if (!isspace(src[slen-1]))
  2082. break;
  2083. slen--;
  2084. }
  2085. while (left && start < slen && isspace(src[start]))
  2086. start++;
  2087. tlen = slen - start;
  2088. tgt = rtlDupSubString(src + start, tlen);
  2089. }
  2090. }
  2091. void rtlTrimUnicodeAll(unsigned & tlen, UChar * & tgt, unsigned slen, const UChar * src)
  2092. {
  2093. #ifdef _USE_ICU
  2094. UnicodeString rawStr;
  2095. UCharCharacterIterator iter(src, slen);
  2096. for(iter.first32(); iter.hasNext(); iter.next32())
  2097. if(iter.current32() != ' ')
  2098. rawStr.append(iter.current32());
  2099. UnicodeString tgtStr;
  2100. normalizeUnicodeString(rawStr, tgtStr); // normalized in case crazy string like [combining accent] [space] [vowel]
  2101. tlen = tgtStr.length();
  2102. tgt = (UChar *)rtlMalloc(tlen*2);
  2103. tgtStr.extract(0, tlen, tgt);
  2104. #else
  2105. rtlThrowNoUnicode();
  2106. #endif
  2107. }
  2108. void rtlTrimUnicodeWS(unsigned & tlen, UChar * & tgt, unsigned slen, const UChar * src, bool left, bool all, bool right)
  2109. {
  2110. #ifdef _USE_ICU
  2111. if (all)
  2112. {
  2113. UnicodeString rawStr;
  2114. UCharCharacterIterator iter(src, slen);
  2115. for(iter.first32(); iter.hasNext(); iter.next32())
  2116. if(!u_isspace(iter.current32()))
  2117. rawStr.append(iter.current32());
  2118. UnicodeString tgtStr;
  2119. normalizeUnicodeString(rawStr, tgtStr); // normalized in case crazy string like [combining accent] [space] [vowel]
  2120. tlen = tgtStr.length();
  2121. tgt = (UChar *)rtlMalloc(tlen*2);
  2122. tgtStr.extract(0, tlen, tgt);
  2123. }
  2124. else
  2125. {
  2126. if (right)
  2127. slen = rtlTrimUnicodeStrLenWS(slen, src);
  2128. unsigned start = (left && slen) ? rtlLeftTrimUnicodeStrStartWS(slen, src) : 0;
  2129. tlen = slen - start;
  2130. tgt = rtlDupSubUnicode(src + start, slen);
  2131. }
  2132. #else
  2133. rtlThrowNoUnicode();
  2134. #endif
  2135. }
  2136. void rtlTrimVAll(unsigned & tlen, char * & tgt, const char * src)
  2137. {
  2138. tlen = rtlTrimVStrLenNonBlank(src);
  2139. char * buffer = (char *)rtlMalloc(tlen);
  2140. int ind = 0;
  2141. int i = 0;
  2142. while(src[i] != 0) {
  2143. if(src[i] != ' ') {
  2144. buffer[ind] = src[i];
  2145. ind++;
  2146. }
  2147. i++;
  2148. }
  2149. tgt = buffer;
  2150. }
  2151. void rtlTrimVWS(unsigned & tlen, char * & tgt, const char * src, bool left, bool all, bool right)
  2152. {
  2153. rtlTrimWS(tlen, tgt, strlen(src), src, left, all, right);
  2154. }
  2155. void rtlTrimVUnicodeAll(unsigned & tlen, UChar * & tgt, const UChar * src)
  2156. {
  2157. rtlTrimUnicodeAll(tlen, tgt, rtlUnicodeStrlen(src), src);
  2158. }
  2159. void rtlTrimVUnicodeWS(unsigned & tlen, UChar * & tgt, const UChar * src, bool left, bool all, bool right)
  2160. {
  2161. rtlTrimUnicodeWS(tlen, tgt, rtlUnicodeStrlen(src), src, left, all, right);
  2162. }
  2163. ECLRTL_API void rtlTrimUtf8All(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  2164. {
  2165. //Go via unicode because of possibility of combining accents etc.
  2166. rtlDataAttr temp1;
  2167. unsigned temp1len;
  2168. rtlUtf8ToUnicodeX(temp1len, temp1.refustr(), slen, src);
  2169. unsigned trimLen;
  2170. rtlDataAttr trimText;
  2171. rtlTrimUnicodeAll(trimLen, trimText.refustr(), temp1len, temp1.getustr());
  2172. rtlUnicodeToUtf8X(tlen, tgt, trimLen, trimText.getustr());
  2173. }
  2174. void rtlTrimUtf8WS(unsigned & tlen, char * & tgt, unsigned slen, const char * src, bool left, bool all, bool right)
  2175. {
  2176. if (all)
  2177. {
  2178. //Go via unicode because of possibility of combining accents etc.
  2179. rtlDataAttr temp1;
  2180. unsigned temp1len;
  2181. rtlUtf8ToUnicodeX(temp1len, temp1.refustr(), slen, src);
  2182. unsigned trimLen;
  2183. rtlDataAttr trimText;
  2184. rtlTrimUnicodeWS(trimLen, trimText.refustr(), temp1len, temp1.getustr(), left, all, right);
  2185. rtlUnicodeToUtf8X(tlen, tgt, trimLen, trimText.getustr());
  2186. }
  2187. else
  2188. {
  2189. const byte * start = (const byte *) src;
  2190. const byte * cur = start;
  2191. while (left && slen && u_isspace(readUtf8Character(UTF8_MAXSIZE, cur)))
  2192. {
  2193. slen--;
  2194. start = cur;
  2195. }
  2196. if (right)
  2197. {
  2198. cur = start;
  2199. unsigned rtrimLength = 0;
  2200. const byte * trimEnd = cur;
  2201. for (unsigned i=0; i < slen; i++)
  2202. {
  2203. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  2204. if (!u_isspace(next))
  2205. {
  2206. rtrimLength = i+1;
  2207. trimEnd = cur;
  2208. }
  2209. }
  2210. tlen = rtrimLength;
  2211. tgt = rtlDupSubString((const char *) start, trimEnd-start);
  2212. }
  2213. else
  2214. {
  2215. tlen = slen;
  2216. tgt = rtlDupSubString((const char *) start, rtlUtf8Size(slen, start));
  2217. }
  2218. }
  2219. }
  2220. void rtlAssignTrimAllV(unsigned tlen, char * tgt, unsigned slen, const char * src)
  2221. {
  2222. unsigned to = 0;
  2223. for (unsigned from = 0; (from < slen)&&(to+1 < tlen); from++)
  2224. {
  2225. if (src[from] != ' ')
  2226. tgt[to++] = src[from];
  2227. }
  2228. tgt[to] = 0;
  2229. }
  2230. void rtlAssignTrimVAllV(unsigned tlen, char * tgt, const char * src)
  2231. {
  2232. unsigned to = 0;
  2233. for (;(*src && (to+1 < tlen));src++)
  2234. {
  2235. if (*src != ' ')
  2236. tgt[to++] = *src;
  2237. }
  2238. tgt[to] = 0;
  2239. }
  2240. //-----------------------------------------------------------------------------
  2241. ECLRTL_API void rtlUnicodeToVAscii(unsigned outlen, char * out, unsigned inlen, UChar const * in)
  2242. {
  2243. rtlUnicodeToVCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2244. }
  2245. ECLRTL_API void rtlData2VUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  2246. {
  2247. rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2248. }
  2249. ECLRTL_API void rtlStrToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  2250. {
  2251. rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2252. }
  2253. ECLRTL_API void rtlData2Unicode(unsigned outlen, UChar * out, unsigned inlen, void const * in)
  2254. {
  2255. rtlCodepageToUnicode(outlen, out, inlen, (const char *)in, ASCII_LIKE_CODEPAGE);
  2256. }
  2257. ECLRTL_API void rtlAssignTrimUnicodeLeftV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2258. {
  2259. unsigned len;
  2260. UChar * str;
  2261. rtlTrimUnicodeLeft(len, str, slen, src);
  2262. if (len >= tlen)
  2263. len = tlen-1;
  2264. memcpy(tgt, str, len*2);
  2265. tgt[len] = 0;
  2266. rtlFree(str);
  2267. }
  2268. ECLRTL_API void rtlAssignTrimVUnicodeLeftV(size32_t tlen, UChar * tgt, const UChar * src)
  2269. {
  2270. unsigned len;
  2271. UChar * str;
  2272. rtlTrimVUnicodeLeft(len, str, src);
  2273. if (len >= tlen)
  2274. len = tlen-1;
  2275. memcpy(tgt, str, len*2);
  2276. tgt[len] = 0;
  2277. rtlFree(str);
  2278. }
  2279. ECLRTL_API void rtlAssignTrimUnicodeRightV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2280. {
  2281. unsigned len;
  2282. UChar * str;
  2283. rtlTrimUnicodeRight(len, str, slen, src);
  2284. if (len >= tlen)
  2285. len = tlen-1;
  2286. memcpy(tgt, str, len*2);
  2287. tgt[len] = 0;
  2288. rtlFree(str);
  2289. }
  2290. ECLRTL_API void rtlAssignTrimVUnicodeRightV(size32_t tlen, UChar * tgt, const UChar * src)
  2291. {
  2292. unsigned len;
  2293. UChar * str;
  2294. rtlTrimVUnicodeRight(len, str, src);
  2295. if (len >= tlen)
  2296. len = tlen-1;
  2297. memcpy(tgt, str, len*2);
  2298. tgt[len] = 0;
  2299. rtlFree(str);
  2300. }
  2301. ECLRTL_API void rtlAssignTrimUnicodeBothV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2302. {
  2303. unsigned len;
  2304. UChar * str;
  2305. rtlTrimUnicodeBoth(len, str, slen, src);
  2306. if (len >= tlen)
  2307. len = tlen-1;
  2308. memcpy(tgt, str, len*2);
  2309. tgt[len] = 0;
  2310. rtlFree(str);
  2311. }
  2312. ECLRTL_API void rtlAssignTrimVUnicodeBothV(size32_t tlen, UChar * tgt, const UChar * src)
  2313. {
  2314. unsigned len;
  2315. UChar * str;
  2316. rtlTrimVUnicodeBoth(len, str, src);
  2317. if (len >= tlen)
  2318. len = tlen-1;
  2319. memcpy(tgt, str, len*2);
  2320. tgt[len] = 0;
  2321. rtlFree(str);
  2322. }
  2323. ECLRTL_API void rtlAssignTrimUnicodeAllV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2324. {
  2325. unsigned len;
  2326. UChar * str;
  2327. rtlTrimUnicodeAll(len, str, slen, src);
  2328. if (len >= tlen)
  2329. len = tlen-1;
  2330. memcpy(tgt, str, len*2);
  2331. tgt[len] = 0;
  2332. rtlFree(str);
  2333. }
  2334. ECLRTL_API void rtlAssignTrimVUnicodeAllV(size32_t tlen, UChar * tgt, const UChar * src)
  2335. {
  2336. unsigned len;
  2337. UChar * str;
  2338. rtlTrimVUnicodeAll(len, str, src);
  2339. if (len >= tlen)
  2340. len = tlen-1;
  2341. memcpy(tgt, str, len*2);
  2342. tgt[len] = 0;
  2343. rtlFree(str);
  2344. }
  2345. //-----------------------------------------------------------------------------
  2346. int rtlCompareStrStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2347. {
  2348. unsigned len = l1;
  2349. if (len > l2)
  2350. len = l2;
  2351. int diff = memcmp(p1, p2, len);
  2352. if (diff == 0)
  2353. {
  2354. if (len != l1)
  2355. {
  2356. for (;(diff == 0) && (len != l1);len++)
  2357. diff = ((unsigned char *)p1)[len] - ' ';
  2358. }
  2359. else if (len != l2)
  2360. {
  2361. for (;(diff == 0) && (len != l2);len++)
  2362. diff = ' ' - ((unsigned char *)p2)[len];
  2363. }
  2364. }
  2365. return diff;
  2366. }
  2367. int rtlCompareVStrVStr(const char * p1, const char * p2)
  2368. {
  2369. return rtlCompareStrStr(strlen(p1), p1, strlen(p2), p2);
  2370. }
  2371. int rtlCompareStrBlank(unsigned l1, const char * p1)
  2372. {
  2373. while (l1--)
  2374. {
  2375. int diff = (*(unsigned char *)(p1++)) - ' ';
  2376. if (diff)
  2377. return diff;
  2378. }
  2379. return 0;
  2380. }
  2381. int rtlCompareDataData(unsigned l1, const void * p1, unsigned l2, const void * p2)
  2382. {
  2383. unsigned len = l1;
  2384. if (len > l2)
  2385. len = l2;
  2386. int diff = memcmp(p1, p2, len);
  2387. if (diff == 0)
  2388. {
  2389. if (l1 > l2)
  2390. diff = +1;
  2391. else if (l1 < l2)
  2392. diff = -1;
  2393. }
  2394. return diff;
  2395. }
  2396. int rtlCompareEStrEStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2397. {
  2398. unsigned len = l1;
  2399. if (len > l2)
  2400. len = l2;
  2401. int diff = memcmp(p1, p2, len);
  2402. if (diff == 0)
  2403. {
  2404. if (len != l1)
  2405. {
  2406. for (;(diff == 0) && (len != l1);len++)
  2407. diff = ((unsigned char *)p1)[len] - '@';
  2408. }
  2409. else if (len != l2)
  2410. {
  2411. for (;(diff == 0) && (len != l2);len++)
  2412. diff = '@' - ((unsigned char *)p2)[len];
  2413. }
  2414. }
  2415. return diff;
  2416. }
  2417. const static UChar nullUStr = 0;
  2418. #ifdef _USE_ICU
  2419. int rtlCompareUnicodeUnicode(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale)
  2420. {
  2421. while(l1 && (p1[l1-1] == ' ')) l1--;
  2422. while(l2 && (p2[l2-1] == ' ')) l2--;
  2423. if (!p1) p1 = &nullUStr;
  2424. if (!p2) p2 = &nullUStr;
  2425. return ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1, l1, p2, l2);
  2426. }
  2427. int rtlCompareUnicodeUnicodeStrength(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale, unsigned strength)
  2428. {
  2429. while(l1 && (p1[l1-1] == ' ')) l1--;
  2430. while(l2 && (p2[l2-1] == ' ')) l2--;
  2431. if (!p1) p1 = &nullUStr;
  2432. if (!p2) p2 = &nullUStr;
  2433. return ucol_strcoll(queryRTLLocale(locale)->queryCollator(strength), p1, l1, p2, l2);
  2434. }
  2435. #else
  2436. int rtlCompareUnicodeUnicode(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale) { rtlThrowNoUnicode(); }
  2437. int rtlCompareUnicodeUnicodeStrength(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale, unsigned strength) { rtlThrowNoUnicode(); }
  2438. #endif
  2439. int rtlCompareVUnicodeVUnicode(UChar const * p1, UChar const * p2, char const * locale)
  2440. {
  2441. return rtlCompareUnicodeUnicode(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale);
  2442. }
  2443. int rtlCompareVUnicodeVUnicodeStrength(UChar const * p1, UChar const * p2, char const * locale, unsigned strength)
  2444. {
  2445. return rtlCompareUnicodeUnicodeStrength(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale, strength);
  2446. }
  2447. #ifdef _USE_ICU
  2448. void rtlKeyUnicodeX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale)
  2449. {
  2450. while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
  2451. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2452. tlen = ucol_getSortKey(coll, src, slen, 0, 0);
  2453. tgt = rtlMalloc(tlen);
  2454. ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
  2455. }
  2456. void rtlKeyUnicodeStrengthX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale, unsigned strength)
  2457. {
  2458. while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
  2459. UCollator * coll = queryRTLLocale(locale)->queryCollator(strength);
  2460. tlen = ucol_getSortKey(coll, src, slen, 0, 0);
  2461. tgt = rtlMalloc(tlen);
  2462. ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
  2463. }
  2464. #else
  2465. void rtlKeyUnicodeX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale)
  2466. {
  2467. rtlThrowNoUnicode();
  2468. }
  2469. void rtlKeyUnicodeStrengthX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale, unsigned strength)
  2470. {
  2471. rtlThrowNoUnicode();
  2472. }
  2473. #endif
  2474. ECLRTL_API int rtlPrefixDiffStrEx(unsigned l1, const char * p1, unsigned l2, const char * p2, unsigned origin)
  2475. {
  2476. unsigned len = l1 < l2 ? l1 : l2;
  2477. const byte * str1 = (const byte *)p1;
  2478. const byte * str2 = (const byte *)p2;
  2479. for (unsigned i=0; i<len; i++)
  2480. {
  2481. byte c1 = str1[i];
  2482. byte c2 = str2[i];
  2483. if (c1 != c2)
  2484. {
  2485. if (c1 < c2)
  2486. return -(int)(i+origin+1);
  2487. else
  2488. return (int)(i+origin+1);
  2489. }
  2490. }
  2491. if (l1 != l2)
  2492. return (l1 < l2) ? -(int)(len+origin+1) : (int)(len+origin+1);
  2493. return 0;
  2494. }
  2495. ECLRTL_API int rtlPrefixDiffStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2496. {
  2497. return rtlPrefixDiffStrEx(l1, p1, l2, p2, 0);
  2498. }
  2499. //MORE: I'm not sure this can really be implemented....
  2500. ECLRTL_API int rtlPrefixDiffUnicodeEx(unsigned l1, const UChar * p1, unsigned l2, const UChar * p2, char const * locale, unsigned origin)
  2501. {
  2502. #ifdef _USE_ICU
  2503. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2504. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2505. unsigned len = l1 < l2 ? l1 : l2;
  2506. for (unsigned i=0; i<len; i++)
  2507. {
  2508. if (p1[i] != p2[i])
  2509. {
  2510. int c = ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1+i, l1-i, p2+i, l2-i);
  2511. if (c < 0)
  2512. return -(int)(i+origin+1);
  2513. else if (c > 0)
  2514. return (int)(i+origin+1);
  2515. }
  2516. }
  2517. if (l1 != l2)
  2518. return (l1 < l2) ? -(int)(len+origin+1) : (int)(len+origin+1);
  2519. #else
  2520. rtlThrowNoUnicode();
  2521. #endif
  2522. return 0;
  2523. }
  2524. ECLRTL_API int rtlPrefixDiffUnicode(unsigned l1, const UChar * p1, unsigned l2, const UChar * p2, char const * locale)
  2525. {
  2526. return rtlPrefixDiffUnicodeEx(l1, p1, l2, p2, locale, 0);
  2527. }
  2528. //-----------------------------------------------------------------------------
  2529. void rtlStringToLower(size32_t l, char * t)
  2530. {
  2531. for (;l--;t++)
  2532. *t = tolower(*t);
  2533. }
  2534. void rtlStringToUpper(size32_t l, char * t)
  2535. {
  2536. for (;l--;t++)
  2537. *t = toupper(*t);
  2538. }
  2539. #ifdef _USE_ICU
  2540. void rtlUnicodeToLower(size32_t l, UChar * t, char const * locale)
  2541. {
  2542. UChar * buff = (UChar *)rtlMalloc(l*2);
  2543. UErrorCode err = U_ZERO_ERROR;
  2544. u_strToLower(buff, l, t, l, locale, &err);
  2545. unicodeNormalizedCopy(buff, t, l);
  2546. }
  2547. void rtlUnicodeToLowerX(size32_t & lenout, UChar * & out, size32_t l, const UChar * t, char const * locale)
  2548. {
  2549. out = (UChar *)rtlMalloc(l*2);
  2550. lenout = l;
  2551. UErrorCode err = U_ZERO_ERROR;
  2552. u_strToLower(out, l, t, l, locale, &err);
  2553. }
  2554. void rtlUnicodeToUpper(size32_t l, UChar * t, char const * locale)
  2555. {
  2556. UChar * buff = (UChar *)rtlMalloc(l*2);
  2557. UErrorCode err = U_ZERO_ERROR;
  2558. u_strToUpper(buff, l, t, l, locale, &err);
  2559. unicodeNormalizedCopy(buff, t, l);
  2560. }
  2561. #else
  2562. void rtlUnicodeToLower(size32_t l, UChar * t, char const * locale) { rtlThrowNoUnicode(); }
  2563. void rtlUnicodeToLowerX(size32_t & lenout, UChar * & out, size32_t l, const UChar * t, char const * locale) { rtlThrowNoUnicode(); }
  2564. void rtlUnicodeToUpper(size32_t l, UChar * t, char const * locale) { rtlThrowNoUnicode(); }
  2565. #endif
  2566. //=============================================================================
  2567. // Miscellaneous helper functions...
  2568. //-----------------------------------------------------------------------------
  2569. int searchTableStringN(unsigned count, const char * * table, unsigned width, const char * search)
  2570. {
  2571. int left = 0;
  2572. int right = count;
  2573. do
  2574. {
  2575. int mid = (left + right) >> 1;
  2576. int cmp = memcmp(search, table[mid], width);
  2577. if (cmp < 0)
  2578. right = mid;
  2579. else if (cmp > 0)
  2580. left = mid+1;
  2581. else
  2582. return mid;
  2583. } while (left < right);
  2584. return -1;
  2585. }
  2586. int rtlSearchTableStringN(unsigned count, const char * * table, unsigned width, const char * search)
  2587. {
  2588. int left = 0;
  2589. int right = count;
  2590. do
  2591. {
  2592. int mid = (left + right) >> 1;
  2593. //we could use rtlCompareStrStr, but both source and target strings should
  2594. //be the correct length, so no point.... (unless new weird collation sequences)
  2595. //we would also need to call a different function for data
  2596. int cmp = memcmp(search, table[mid], width);
  2597. if (cmp < 0)
  2598. right = mid;
  2599. else if (cmp > 0)
  2600. left = mid+1;
  2601. else
  2602. return mid;
  2603. } while (left < right);
  2604. return -1;
  2605. }
  2606. int rtlSearchTableVStringN(unsigned count, const char * * table, const char * search)
  2607. {
  2608. int left = 0;
  2609. int right = count;
  2610. do
  2611. {
  2612. int mid = (left + right) >> 1;
  2613. int cmp = strcmp(search, table[mid]);
  2614. if (cmp < 0)
  2615. right = mid;
  2616. else if (cmp > 0)
  2617. left = mid+1;
  2618. else
  2619. return mid;
  2620. } while (left < right);
  2621. return -1;
  2622. }
  2623. int rtlNewSearchDataTable(unsigned count, unsigned elemlen, const char * * table, unsigned width, const char * search)
  2624. {
  2625. int left = 0;
  2626. int right = count;
  2627. do
  2628. {
  2629. int mid = (left + right) >> 1;
  2630. int cmp = rtlCompareDataData( width, search, elemlen, table[mid]);
  2631. if (cmp < 0)
  2632. right = mid;
  2633. else if (cmp > 0)
  2634. left = mid+1;
  2635. else {
  2636. return mid;
  2637. }
  2638. } while (left < right);
  2639. return -1;
  2640. }
  2641. int rtlNewSearchEStringTable(unsigned count, unsigned elemlen, const char * * table, unsigned width, const char * search)
  2642. {
  2643. int left = 0;
  2644. int right = count;
  2645. do
  2646. {
  2647. int mid = (left + right) >> 1;
  2648. int cmp = rtlCompareEStrEStr( width, search, elemlen, table[mid]);
  2649. if (cmp < 0)
  2650. right = mid;
  2651. else if (cmp > 0)
  2652. left = mid+1;
  2653. else {
  2654. return mid;
  2655. }
  2656. } while (left < right);
  2657. return -1;
  2658. }
  2659. int rtlNewSearchQStringTable(unsigned count, unsigned elemlen, const char * * table, unsigned width, const char * search)
  2660. {
  2661. int left = 0;
  2662. int right = count;
  2663. do
  2664. {
  2665. int mid = (left + right) >> 1;
  2666. int cmp = rtlCompareQStrQStr( width, search, elemlen, table[mid]);
  2667. if (cmp < 0)
  2668. right = mid;
  2669. else if (cmp > 0)
  2670. left = mid+1;
  2671. else {
  2672. return mid;
  2673. }
  2674. } while (left < right);
  2675. return -1;
  2676. }
  2677. int rtlNewSearchStringTable(unsigned count, unsigned elemlen, const char * * table, unsigned width, const char * search)
  2678. {
  2679. int left = 0;
  2680. int right = count;
  2681. do
  2682. {
  2683. int mid = (left + right) >> 1;
  2684. int cmp = rtlCompareStrStr( width, search, elemlen, table[mid]);
  2685. if (cmp < 0)
  2686. right = mid;
  2687. else if (cmp > 0)
  2688. left = mid+1;
  2689. else {
  2690. return mid;
  2691. }
  2692. } while (left < right);
  2693. return -1;
  2694. }
  2695. #ifdef _USE_ICU
  2696. int rtlNewSearchUnicodeTable(unsigned count, unsigned elemlen, const UChar * * table, unsigned width, const UChar * search, const char * locale)
  2697. {
  2698. dbgassertex(search != nullptr || width == 0);
  2699. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2700. int left = 0;
  2701. int right = count;
  2702. if (!search) search = &nullUStr;
  2703. size32_t trimWidth = rtlQuickTrimUnicode(width, search);
  2704. do
  2705. {
  2706. int mid = (left + right) >> 1;
  2707. size32_t elemTrimWidth = rtlQuickTrimUnicode(elemlen, table[mid]);
  2708. UCollationResult cmp = ucol_strcoll(coll, search, trimWidth, table[mid], elemTrimWidth);
  2709. if (cmp == UCOL_LESS)
  2710. right = mid;
  2711. else if (cmp == UCOL_GREATER)
  2712. left = mid+1;
  2713. else
  2714. return mid;
  2715. } while (left < right);
  2716. return -1;
  2717. }
  2718. int rtlNewSearchVUnicodeTable(unsigned count, const UChar * * table, const UChar * search, const char * locale)
  2719. {
  2720. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2721. int left = 0;
  2722. int right = count;
  2723. do
  2724. {
  2725. int mid = (left + right) >> 1;
  2726. UCollationResult cmp = ucol_strcoll(coll, search, rtlUnicodeStrlen(search), table[mid], rtlUnicodeStrlen(table[mid]));
  2727. if (cmp == UCOL_LESS)
  2728. right = mid;
  2729. else if (cmp == UCOL_GREATER)
  2730. left = mid+1;
  2731. else
  2732. return mid;
  2733. } while (left < right);
  2734. return -1;
  2735. }
  2736. #endif
  2737. //-----------------------------------------------------------------------------
  2738. template <class T>
  2739. int rtlSearchIntegerTable(unsigned count, const T * table, T search)
  2740. {
  2741. int left = 0;
  2742. int right = count;
  2743. do
  2744. {
  2745. int mid = (left + right) >> 1;
  2746. T midValue = table[mid];
  2747. if (search < midValue)
  2748. right = mid;
  2749. else if (search > midValue)
  2750. left = mid+1;
  2751. else
  2752. return mid;
  2753. } while (left < right);
  2754. return -1;
  2755. }
  2756. int rtlSearchTableInteger8(unsigned count, const __int64 * table, __int64 search)
  2757. {
  2758. return rtlSearchIntegerTable(count, table, search);
  2759. }
  2760. int rtlSearchTableUInteger8(unsigned count, const unsigned __int64 * table, unsigned __int64 search)
  2761. {
  2762. return rtlSearchIntegerTable(count, table, search);
  2763. }
  2764. int rtlSearchTableInteger4(unsigned count, const int * table, int search)
  2765. {
  2766. return rtlSearchIntegerTable(count, table, search);
  2767. }
  2768. int rtlSearchTableUInteger4(unsigned count, const unsigned * table, unsigned search)
  2769. {
  2770. return rtlSearchIntegerTable(count, table, search);
  2771. }
  2772. //-----------------------------------------------------------------------------
  2773. unsigned rtlCrc32(unsigned len, const void * buffer, unsigned crc)
  2774. {
  2775. return crc32((const char *)buffer, len, crc);
  2776. }
  2777. //=============================================================================
  2778. // EBCDIC helper functions...
  2779. static unsigned char ccsid1047[] = "\
  2780. \000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017\
  2781. \020\021\022\023\235\012\010\207\030\031\222\217\034\035\036\037\
  2782. \200\201\202\203\204\205\027\033\210\211\212\213\214\005\006\007\
  2783. \220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032\
  2784. \040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174\
  2785. \046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\136\
  2786. \055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077\
  2787. \370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042\
  2788. \330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261\
  2789. \260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244\
  2790. \265\176\163\164\165\166\167\170\171\172\241\277\320\133\336\256\
  2791. \254\243\245\267\251\247\266\274\275\276\335\250\257\135\264\327\
  2792. \173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365\
  2793. \175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377\
  2794. \134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325\
  2795. \060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237";
  2796. static unsigned char ccsid1047_rev[] = "\
  2797. \000\001\002\003\067\055\056\057\026\005\025\013\014\015\016\017\
  2798. \020\021\022\023\074\075\062\046\030\031\077\047\034\035\036\037\
  2799. \100\132\177\173\133\154\120\175\115\135\134\116\153\140\113\141\
  2800. \360\361\362\363\364\365\366\367\370\371\172\136\114\176\156\157\
  2801. \174\301\302\303\304\305\306\307\310\311\321\322\323\324\325\326\
  2802. \327\330\331\342\343\344\345\346\347\350\351\255\340\275\137\155\
  2803. \171\201\202\203\204\205\206\207\210\211\221\222\223\224\225\226\
  2804. \227\230\231\242\243\244\245\246\247\250\251\300\117\320\241\007\
  2805. \040\041\042\043\044\045\006\027\050\051\052\053\054\011\012\033\
  2806. \060\061\032\063\064\065\066\010\070\071\072\073\004\024\076\377\
  2807. \101\252\112\261\237\262\152\265\273\264\232\212\260\312\257\274\
  2808. \220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\
  2809. \144\145\142\146\143\147\236\150\164\161\162\163\170\165\166\167\
  2810. \254\151\355\356\353\357\354\277\200\375\376\373\374\272\256\131\
  2811. \104\105\102\106\103\107\234\110\124\121\122\123\130\125\126\127\
  2812. \214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337";
  2813. void rtlEStrToStr(unsigned outlen, char *out, unsigned inlen, const char *in)
  2814. {
  2815. unsigned char *codepage = ccsid1047;
  2816. unsigned i,j;
  2817. unsigned lim = inlen;
  2818. if (lim>outlen) lim = outlen;
  2819. for (i=0;i<lim;i++)
  2820. {
  2821. j = in[i] & 0x00ff;
  2822. out[i] = codepage[j];
  2823. }
  2824. for (;i<outlen; i++)
  2825. out[i] = ' ';
  2826. }
  2827. void rtlStrToEStr(unsigned outlen, char *out, unsigned inlen, const char *in)
  2828. {
  2829. unsigned char *codepage = ccsid1047_rev;
  2830. unsigned i,j;
  2831. unsigned lim = inlen;
  2832. if (lim>outlen) lim = outlen;
  2833. for (i=0;i<lim;i++)
  2834. {
  2835. j = in[i] & 0x00ff;
  2836. out[i] = codepage[j];
  2837. }
  2838. for (;i<outlen; i++)
  2839. out[i] = codepage[(unsigned char) ' '];
  2840. }
  2841. //---------------------------------------------------------------------------
  2842. #ifdef _USE_ICU
  2843. void rtlCodepageToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2844. {
  2845. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2846. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2847. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2848. UErrorCode err = U_ZERO_ERROR;
  2849. unsigned len = ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2850. while(len<outlen) out[len++] = 0x0020;
  2851. unicodeEnsureIsNormalized(outlen, out);
  2852. }
  2853. void rtlCodepageToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2854. {
  2855. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2856. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2857. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2858. UErrorCode err = U_ZERO_ERROR;
  2859. unsigned len = ucnv_toUChars(conv, out, outlen-1, in, inlen, &err);
  2860. if (len >= outlen) len = outlen-1;
  2861. out[len] = 0;
  2862. vunicodeEnsureIsNormalized(outlen, out);
  2863. }
  2864. void rtlCodepageToUnicodeUnescape(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2865. {
  2866. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2867. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2868. UnicodeString raw(in, inlen, codepage);
  2869. UnicodeString unescaped = raw.unescape();
  2870. UnicodeString normalized;
  2871. normalizeUnicodeString(unescaped, normalized);
  2872. if((unsigned)normalized.length()>outlen)
  2873. normalized.truncate(outlen);
  2874. else if((unsigned)normalized.length()<outlen)
  2875. normalized.padTrailing(outlen);
  2876. normalized.extract(0, outlen, out);
  2877. }
  2878. void rtlUnicodeToCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2879. {
  2880. //If the unicode contains a character which doesn't exist in the destination codepage,
  2881. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2882. //no telling how your terminal may display this (I've seen a divide sign and a right
  2883. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2884. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2885. UErrorCode err = U_ZERO_ERROR;
  2886. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
  2887. if(len<outlen)
  2888. codepageBlankFill(codepage, out+len, outlen-len);
  2889. }
  2890. void rtlUnicodeToData(unsigned outlen, void * out, unsigned inlen, UChar const * in)
  2891. {
  2892. //If the unicode contains a character which doesn't exist in the destination codepage,
  2893. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2894. //no telling how your terminal may display this (I've seen a divide sign and a right
  2895. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2896. UConverter * conv = queryRTLUnicodeConverter(ASCII_LIKE_CODEPAGE)->query();
  2897. UErrorCode err = U_ZERO_ERROR;
  2898. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
  2899. if(len<outlen)
  2900. memset((char *)out+len, 0, outlen-len);
  2901. }
  2902. void rtlUnicodeToVCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2903. {
  2904. //If the unicode contains a character which doesn't exist in the destination codepage,
  2905. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2906. //no telling how your terminal may display this (I've seen a divide sign and a right
  2907. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2908. UConverter * conv = queryRTLUnicodeConverter(ASCII_LIKE_CODEPAGE)->query();
  2909. UErrorCode err = U_ZERO_ERROR;
  2910. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen-1, in, inlen, &err);
  2911. if (len >= outlen) len = outlen-1;
  2912. out[len] = 0;
  2913. }
  2914. void rtlCodepageToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2915. {
  2916. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2917. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2918. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2919. UErrorCode err = U_ZERO_ERROR;
  2920. outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
  2921. if(err==U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2922. out = (UChar *)rtlMalloc(outlen*2);
  2923. ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2924. }
  2925. UChar * rtlCodepageToVUnicodeX(unsigned inlen, char const * in, char const * codepage)
  2926. {
  2927. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2928. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2929. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2930. UErrorCode err = U_ZERO_ERROR;
  2931. unsigned outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
  2932. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2933. UChar * out = (UChar *)rtlMalloc((outlen+1)*2);
  2934. ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2935. out[outlen] = 0x0000;
  2936. vunicodeEnsureIsNormalizedX(outlen, out);
  2937. return out;
  2938. }
  2939. #else
  2940. void rtlCodepageToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2941. {
  2942. if (inlen > outlen)
  2943. inlen = outlen;
  2944. unsigned i = 0;
  2945. for (; i < inlen; i++)
  2946. out[i] = in[i];
  2947. while (i < outlen)
  2948. out[i++] = 0x0020;
  2949. }
  2950. void rtlCodepageToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2951. {
  2952. rtlThrowNoUnicode();
  2953. }
  2954. void rtlCodepageToUnicodeUnescape(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2955. {
  2956. rtlCodepageToUnicode(outlen, out, inlen, in, codepage);
  2957. }
  2958. void rtlUnicodeToCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2959. {
  2960. if (inlen > outlen)
  2961. inlen = outlen;
  2962. unsigned i = 0;
  2963. for (; i < inlen; i++)
  2964. out[i] = (char)in[i];
  2965. while (i < outlen)
  2966. out[i++] = ' ';
  2967. }
  2968. void rtlUnicodeToData(unsigned outlen, void * out, unsigned inlen, UChar const * in)
  2969. {
  2970. rtlUnicodeToCodepage(outlen, (char *)out, inlen, in, nullptr);
  2971. }
  2972. void rtlUnicodeToVCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2973. {
  2974. rtlThrowNoUnicode();
  2975. }
  2976. void rtlCodepageToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2977. {
  2978. outlen = inlen;
  2979. out = (UChar *)rtlMalloc(outlen*2);
  2980. rtlCodepageToUnicode(outlen, out, inlen, in, codepage);
  2981. }
  2982. UChar * rtlCodepageToVUnicodeX(unsigned inlen, char const * in, char const * codepage)
  2983. {
  2984. rtlThrowNoUnicode();
  2985. }
  2986. #endif
  2987. void rtlVCodepageToUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
  2988. {
  2989. rtlCodepageToUnicode(outlen, out, strlen(in), in, codepage);
  2990. }
  2991. void rtlVCodepageToVUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
  2992. {
  2993. rtlCodepageToVUnicode(outlen, out, strlen(in), in, codepage);
  2994. }
  2995. void rtlVUnicodeToCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
  2996. {
  2997. rtlUnicodeToCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2998. }
  2999. void rtlVUnicodeToData(unsigned outlen, void * out, UChar const * in)
  3000. {
  3001. rtlUnicodeToData(outlen, out, rtlUnicodeStrlen(in), in);
  3002. }
  3003. void rtlVUnicodeToDataX(unsigned& outlen, void * &out, UChar const * in)
  3004. {
  3005. rtlUnicodeToDataX(outlen, out, rtlUnicodeStrlen(in), in);
  3006. }
  3007. void rtlVUnicodeToVCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
  3008. {
  3009. rtlUnicodeToVCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  3010. }
  3011. void rtlVCodepageToUnicodeX(unsigned & outlen, UChar * & out, char const * in, char const * codepage)
  3012. {
  3013. rtlCodepageToUnicodeX(outlen, out, strlen(in), in, codepage);
  3014. }
  3015. UChar * rtlVCodepageToVUnicodeX(char const * in, char const * codepage)
  3016. {
  3017. return rtlCodepageToVUnicodeX(strlen(in), in, codepage);
  3018. }
  3019. #ifdef _USE_ICU
  3020. void rtlCodepageToUnicodeXUnescape(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  3021. {
  3022. //If the input contains a character which doesn't exist in its claimed codepage, this will
  3023. //generate U+FFFD (substitution character). This most likely won't be displayed.
  3024. UnicodeString raw(in, inlen, codepage);
  3025. UnicodeString unescaped = raw.unescape();
  3026. UnicodeString normalized;
  3027. normalizeUnicodeString(unescaped, normalized);
  3028. outlen = normalized.length();
  3029. out = (UChar *)rtlMalloc(outlen*2);
  3030. normalized.extract(0, outlen, out);
  3031. }
  3032. void rtlCodepageToUtf8XUnescape(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  3033. {
  3034. //If the input contains a character which doesn't exist in its claimed codepage, this will
  3035. //generate U+FFFD (substitution character). This most likely won't be displayed.
  3036. UnicodeString raw(in, inlen, codepage);
  3037. UnicodeString unescaped = raw.unescape();
  3038. UnicodeString normalized;
  3039. normalizeUnicodeString(unescaped, normalized);
  3040. UConverter * utf8Conv = queryRTLUnicodeConverter(UTF8_CODEPAGE)->query();
  3041. UErrorCode err = U_ZERO_ERROR;
  3042. size32_t outsize = normalized.extract(NULL, 0, utf8Conv, err);
  3043. err = U_ZERO_ERROR;
  3044. out = (char *)rtlMalloc(outsize);
  3045. outsize = normalized.extract(out, outsize, utf8Conv, err);
  3046. outlen = rtlUtf8Length(outsize, out);
  3047. }
  3048. void rtlUnicodeToCodepageX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in, char const * codepage)
  3049. {
  3050. //If the unicode contains a character which doesn't exist in the destination codepage,
  3051. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  3052. //no telling how your terminal may display this (I've seen a divide sign and a right
  3053. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  3054. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  3055. UErrorCode err = U_ZERO_ERROR;
  3056. outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
  3057. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  3058. out = (char *)rtlMalloc(outlen);
  3059. ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
  3060. }
  3061. char * rtlUnicodeToVCodepageX(unsigned inlen, UChar const * in, char const * codepage)
  3062. {
  3063. //If the unicode contains a character which doesn't exist in the destination codepage,
  3064. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  3065. //no telling how your terminal may display this (I've seen a divide sign and a right
  3066. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  3067. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  3068. UErrorCode err = U_ZERO_ERROR;
  3069. unsigned outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
  3070. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  3071. char * out = (char *)rtlMalloc(outlen+1);
  3072. ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
  3073. out[outlen] = 0x00;
  3074. return out;
  3075. }
  3076. #else
  3077. void rtlCodepageToUnicodeXUnescape(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  3078. {
  3079. rtlCodepageToUnicodeX(outlen, out, inlen, in, codepage);
  3080. }
  3081. void rtlCodepageToUtf8XUnescape(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  3082. {
  3083. rtlCodepageToUtf8X(outlen, out, inlen, in, codepage);
  3084. }
  3085. void rtlUnicodeToCodepageX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in, char const * codepage)
  3086. {
  3087. outlen = inlen;
  3088. out = (char *)rtlMalloc(outlen);
  3089. rtlUnicodeToCodepage(outlen, out, inlen, in, codepage);
  3090. }
  3091. char * rtlUnicodeToVCodepageX(unsigned inlen, UChar const * in, char const * codepage)
  3092. {
  3093. rtlThrowNoUnicode();
  3094. }
  3095. #endif
  3096. void rtlUnicodeToDataX(unsigned & outlen, void * & out, unsigned inlen, UChar const * in)
  3097. {
  3098. rtlUnicodeToCodepageX(outlen, (char * &)out, inlen, in, ASCII_LIKE_CODEPAGE);
  3099. }
  3100. void rtlVUnicodeToCodepageX(unsigned & outlen, char * & out, UChar const * in, char const * codepage)
  3101. {
  3102. rtlUnicodeToCodepageX(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  3103. }
  3104. char * rtlVUnicodeToVCodepageX(UChar const * in, char const * codepage)
  3105. {
  3106. return rtlUnicodeToVCodepageX(rtlUnicodeStrlen(in), in, codepage);
  3107. }
  3108. void rtlStrToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  3109. {
  3110. rtlCodepageToUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  3111. }
  3112. void rtlUnicodeToStr(unsigned outlen, char * out, unsigned inlen, UChar const * in)
  3113. {
  3114. rtlUnicodeToCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  3115. }
  3116. void rtlStrToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
  3117. {
  3118. rtlCodepageToUnicodeX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  3119. }
  3120. void rtlUnicodeToStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  3121. {
  3122. rtlUnicodeToCodepageX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  3123. }
  3124. #ifdef _USE_ICU
  3125. void rtlUnicodeToEscapedStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  3126. {
  3127. StringBuffer outbuff;
  3128. escapeUnicode(inlen, in, outbuff);
  3129. outlen = outbuff.length();
  3130. out = (char *)rtlMalloc(outlen);
  3131. memcpy(out, outbuff.str(), outlen);
  3132. }
  3133. bool rtlCodepageToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  3134. {
  3135. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  3136. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  3137. UErrorCode err = U_ZERO_ERROR;
  3138. char * target = out;
  3139. ucnv_convertEx(outconv, inconv, &target, out+outlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  3140. unsigned len = target - out;
  3141. if(len < outlen)
  3142. codepageBlankFill(outcodepage, target, outlen-len);
  3143. return U_SUCCESS(err) != FALSE;
  3144. }
  3145. bool rtlCodepageToCodepageX(unsigned & outlen, char * & out, unsigned maxoutlen, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  3146. {
  3147. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  3148. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  3149. UErrorCode err = U_ZERO_ERROR;
  3150. //GH->PG is there a better way of coding this with out temporary buffer?
  3151. char * tempBuffer = (char *)rtlMalloc(maxoutlen);
  3152. char * target = tempBuffer;
  3153. ucnv_convertEx(outconv, inconv, &target, tempBuffer+maxoutlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  3154. unsigned len = target - tempBuffer;
  3155. outlen = len;
  3156. if (len == maxoutlen)
  3157. out = tempBuffer;
  3158. else
  3159. {
  3160. out = (char *)rtlRealloc(tempBuffer, len);
  3161. if (!out)
  3162. out = tempBuffer;
  3163. }
  3164. return U_SUCCESS(err) != FALSE;
  3165. }
  3166. int rtlSingleUtf8ToCodepage(char * out, unsigned inlen, char const * in, char const * outcodepage)
  3167. {
  3168. const byte head = *in; // Macros require unsigned argument on some versions of ICU
  3169. if(!U8_IS_LEAD(head))
  3170. return -1;
  3171. uint8_t trailbytes = U8_COUNT_TRAIL_BYTES(head);
  3172. if(inlen < (unsigned)(trailbytes+1))
  3173. return -1;
  3174. if(!rtlCodepageToCodepage(1, out, trailbytes+1, in, outcodepage, UTF8_CODEPAGE))
  3175. return -1;
  3176. return static_cast<int>(trailbytes); //cast okay as is certainly 0--3
  3177. }
  3178. bool rtlCodepageToCodepage(StringBuffer & out, unsigned maxoutlen, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  3179. {
  3180. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  3181. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  3182. UErrorCode err = U_ZERO_ERROR;
  3183. //GH->PG is there a better way of coding this with out temporary buffer?
  3184. size32_t oldLength = out.length();
  3185. char * tempBuffer = out.reserve(maxoutlen);
  3186. char * target = tempBuffer;
  3187. ucnv_convertEx(outconv, inconv, &target, tempBuffer+maxoutlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  3188. unsigned len = target - tempBuffer;
  3189. out.setLength(oldLength + len);
  3190. return U_SUCCESS(err) != FALSE;
  3191. }
  3192. #else
  3193. void rtlUnicodeToEscapedStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  3194. {
  3195. return rtlUnicodeToStrX(outlen, out, inlen, in);
  3196. }
  3197. bool rtlCodepageToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  3198. {
  3199. if (inlen > outlen)
  3200. inlen = outlen;
  3201. memcpy(out, in, inlen);
  3202. if (inlen < outlen)
  3203. memset(out+inlen, ' ', outlen-inlen);
  3204. return true;
  3205. }
  3206. bool rtlCodepageToCodepageX(unsigned & outlen, char * & out, unsigned maxoutlen, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  3207. {
  3208. if (inlen > maxoutlen)
  3209. inlen = maxoutlen;
  3210. outlen = inlen;
  3211. out = (char *)rtlMalloc(inlen);
  3212. return rtlCodepageToCodepage(outlen, out, inlen, in, outcodepage, incodepage);
  3213. }
  3214. int rtlSingleUtf8ToCodepage(char * out, unsigned inlen, char const * in, char const * outcodepage)
  3215. {
  3216. rtlThrowNoUnicode();
  3217. }
  3218. bool rtlCodepageToCodepage(StringBuffer & out, unsigned maxoutlen, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  3219. {
  3220. out.append(inlen, in);
  3221. return true;
  3222. }
  3223. #endif
  3224. //---------------------------------------------------------------------------
  3225. void rtlStrToDataX(unsigned & tlen, void * & tgt, unsigned slen, const void * src)
  3226. {
  3227. void * data = rtlMalloc(slen);
  3228. memcpy(data, src, slen);
  3229. tgt = data;
  3230. tlen = slen;
  3231. }
  3232. void rtlStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const void * src)
  3233. {
  3234. char * data = (char *)rtlMalloc(slen);
  3235. memcpy(data, src, slen);
  3236. tgt = data;
  3237. tlen = slen;
  3238. }
  3239. char * rtlStrToVStrX(unsigned slen, const void * src)
  3240. {
  3241. char * data = (char *)rtlMalloc(slen+1);
  3242. memcpy(data, src, slen);
  3243. data[slen] = 0;
  3244. return data;
  3245. }
  3246. char * rtlEStrToVStrX(unsigned slen, const char * src)
  3247. {
  3248. MemoryAttr heapMem;
  3249. char * astr = (char *)CONDSTACKALLOC(heapMem, slen);
  3250. rtlEStrToStr(slen,astr,slen,src);
  3251. return rtlStrToVStrX(slen, astr);
  3252. }
  3253. void rtlEStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  3254. {
  3255. char * data = (char *)rtlMalloc(slen);
  3256. rtlEStrToStr(slen, data, slen, src);
  3257. tgt = data;
  3258. tlen = slen;
  3259. }
  3260. void rtlStrToEStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  3261. {
  3262. char * data = (char *)rtlMalloc(slen);
  3263. rtlStrToEStr(slen, data, slen, src);
  3264. tgt = data;
  3265. tlen = slen;
  3266. }
  3267. //---------------------------------------------------------------------------
  3268. // See http://www.isthe.com/chongo/tech/comp/fnv/index.html
  3269. #define FNV1_64_INIT HASH64_INIT
  3270. #define FNV_64_PRIME I64C(0x100000001b3U)
  3271. #define APPLY_FNV64(hval, next) { hval *= FNV_64_PRIME; hval ^= next; }
  3272. hash64_t rtlHash64Data(size32_t len, const void *buf, hash64_t hval)
  3273. {
  3274. const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
  3275. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3276. //This possibly breaks the aliasing rules for c++, but I can't see it causing any problems
  3277. while (len >= sizeof(unsigned))
  3278. {
  3279. unsigned next = *(const unsigned *)bp;
  3280. bp += sizeof(unsigned);
  3281. for (unsigned i=0; i < sizeof(unsigned); i++)
  3282. {
  3283. APPLY_FNV64(hval, (byte)next);
  3284. next >>= 8;
  3285. }
  3286. len -= sizeof(unsigned);
  3287. }
  3288. #endif
  3289. const unsigned char *be = bp + len; /* beyond end of buffer */
  3290. while (bp < be)
  3291. {
  3292. APPLY_FNV64(hval, *bp++);
  3293. }
  3294. return hval;
  3295. }
  3296. hash64_t rtlHash64VStr(const char *str, hash64_t hval)
  3297. {
  3298. const unsigned char *s = (const unsigned char *)str;
  3299. unsigned char c;
  3300. while ((c = *s++) != 0)
  3301. {
  3302. APPLY_FNV64(hval, c);
  3303. }
  3304. return hval;
  3305. }
  3306. hash64_t rtlHash64Unicode(unsigned length, UChar const * k, hash64_t hval)
  3307. {
  3308. #ifdef _USE_ICU
  3309. unsigned trimLength = rtlTrimUnicodeStrLen(length, k);
  3310. for (unsigned i=0; i < trimLength; i++)
  3311. {
  3312. //Handle surrogate pairs correctly, but still hash the utf16 representation
  3313. const byte * cur = reinterpret_cast<const byte *>(&k[i]);
  3314. UChar32 c = k[i];
  3315. if (U16_IS_SURROGATE(c))
  3316. {
  3317. U16_GET(k, 0, i, length, c);
  3318. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3319. {
  3320. APPLY_FNV64(hval, cur[0]);
  3321. APPLY_FNV64(hval, cur[1]);
  3322. APPLY_FNV64(hval, cur[2]);
  3323. APPLY_FNV64(hval, cur[3]);
  3324. }
  3325. //Skip the surrogate pair
  3326. i++;
  3327. }
  3328. else
  3329. {
  3330. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3331. {
  3332. APPLY_FNV64(hval, cur[0]);
  3333. APPLY_FNV64(hval, cur[1]);
  3334. }
  3335. }
  3336. }
  3337. #else
  3338. rtlThrowNoUnicode();
  3339. #endif
  3340. return hval;
  3341. }
  3342. hash64_t rtlHash64VUnicode(UChar const * k, hash64_t initval)
  3343. {
  3344. return rtlHash64Unicode(rtlUnicodeStrlen(k), k, initval);
  3345. }
  3346. //---------------------------------------------------------------------------
  3347. // See http://www.isthe.com/chongo/tech/comp/fnv/index.html
  3348. #define FNV1_32_INIT HASH32_INIT
  3349. #define FNV_32_PRIME 0x1000193
  3350. #define APPLY_FNV32(hval, next) { hval *= FNV_32_PRIME; hval ^= next; }
  3351. unsigned rtlHash32Data(size32_t len, const void *buf, unsigned hval)
  3352. {
  3353. const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
  3354. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3355. //This possibly breaks the aliasing rules for c++, but I can't see it causing any problems
  3356. while (len >= sizeof(unsigned))
  3357. {
  3358. unsigned next = *(const unsigned *)bp;
  3359. bp += sizeof(unsigned);
  3360. for (unsigned i=0; i < sizeof(unsigned); i++)
  3361. {
  3362. APPLY_FNV32(hval, (byte)next);
  3363. next >>= 8;
  3364. }
  3365. len -= sizeof(unsigned);
  3366. }
  3367. #endif
  3368. const unsigned char *be = bp + len; /* beyond end of buffer */
  3369. while (bp < be)
  3370. {
  3371. APPLY_FNV32(hval, *bp++);
  3372. }
  3373. return hval;
  3374. }
  3375. unsigned rtlHash32VStr(const char *str, unsigned hval)
  3376. {
  3377. const unsigned char *s = (const unsigned char *)str;
  3378. unsigned char c;
  3379. while ((c = *s++) != 0)
  3380. {
  3381. APPLY_FNV32(hval, c);
  3382. }
  3383. return hval;
  3384. }
  3385. unsigned rtlHash32Unicode(unsigned length, UChar const * k, unsigned hval)
  3386. {
  3387. #ifdef _USE_ICU
  3388. unsigned trimLength = rtlTrimUnicodeStrLen(length, k);
  3389. for (unsigned i=0; i < trimLength; i++)
  3390. {
  3391. //Handle surrogate pairs correctly, but still hash the utf16 representation
  3392. const byte * cur = reinterpret_cast<const byte *>(&k[i]);
  3393. UChar32 c = k[i];
  3394. if (U16_IS_SURROGATE(c))
  3395. {
  3396. U16_GET(k, 0, i, length, c);
  3397. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3398. {
  3399. APPLY_FNV32(hval, cur[0]);
  3400. APPLY_FNV32(hval, cur[1]);
  3401. APPLY_FNV32(hval, cur[2]);
  3402. APPLY_FNV32(hval, cur[3]);
  3403. }
  3404. //Skip the surrogate pair
  3405. i++;
  3406. }
  3407. else
  3408. {
  3409. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3410. {
  3411. APPLY_FNV32(hval, cur[0]);
  3412. APPLY_FNV32(hval, cur[1]);
  3413. }
  3414. }
  3415. }
  3416. #else
  3417. rtlThrowNoUnicode();
  3418. #endif
  3419. return hval;
  3420. }
  3421. unsigned rtlHash32VUnicode(UChar const * k, unsigned initval)
  3422. {
  3423. return rtlHash32Unicode(rtlUnicodeStrlen(k), k, initval);
  3424. }
  3425. //---------------------------------------------------------------------------
  3426. // Hash Helper functions
  3427. #define mix(a,b,c) \
  3428. { \
  3429. a -= b; a -= c; a ^= (c>>13); \
  3430. b -= c; b -= a; b ^= (a<<8); \
  3431. c -= a; c -= b; c ^= (b>>13); \
  3432. a -= b; a -= c; a ^= (c>>12); \
  3433. b -= c; b -= a; b ^= (a<<16); \
  3434. c -= a; c -= b; c ^= (b>>5); \
  3435. a -= b; a -= c; a ^= (c>>3); \
  3436. b -= c; b -= a; b ^= (a<<10); \
  3437. c -= a; c -= b; c ^= (b>>15); \
  3438. }
  3439. #define GETBYTE0(n) ((unsigned)k[n])
  3440. #define GETBYTE1(n) ((unsigned)k[n+1]<<8)
  3441. #define GETBYTE2(n) ((unsigned)k[n+2]<<16)
  3442. #define GETBYTE3(n) ((unsigned)k[n+3]<<24)
  3443. #define GETWORD(k,n) (GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))
  3444. // the above looks inefficient but the compiler optimizes well
  3445. // this hash looks slow but is about twice as quick as using our CRC table
  3446. // and gives gives better results
  3447. // (see paper at http://burtleburtle.net/bob/hash/evahash.html for more info)
  3448. unsigned rtlHashData( unsigned length, const void *_k, unsigned initval)
  3449. {
  3450. const unsigned char * k = (const unsigned char *)_k;
  3451. unsigned a,b,c,len;
  3452. /* Set up the internal state */
  3453. len = length;
  3454. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  3455. c = initval; /* the previous hash value */
  3456. /*---------------------------------------- handle most of the key */
  3457. while (len >= 12)
  3458. {
  3459. a += GETWORD(k,0);
  3460. b += GETWORD(k,4);
  3461. c += GETWORD(k,8);
  3462. mix(a,b,c);
  3463. k += 12; len -= 12;
  3464. }
  3465. /*------------------------------------- handle the last 11 bytes */
  3466. c += length;
  3467. switch(len) /* all the case statements fall through */
  3468. {
  3469. case 11: c+=GETBYTE3(7); // Fall through...
  3470. case 10: c+=GETBYTE2(7); // Fall through...
  3471. case 9 : c+=GETBYTE1(7); // Fall through...
  3472. /* the first byte of c is reserved for the length */
  3473. case 8 : b+=GETBYTE3(4); // Fall through...
  3474. case 7 : b+=GETBYTE2(4); // Fall through...
  3475. case 6 : b+=GETBYTE1(4); // Fall through...
  3476. case 5 : b+=GETBYTE0(4); // Fall through...
  3477. case 4 : a+=GETBYTE3(0); // Fall through...
  3478. case 3 : a+=GETBYTE2(0); // Fall through...
  3479. case 2 : a+=GETBYTE1(0); // Fall through...
  3480. case 1 : a+=GETBYTE0(0); // Fall through...
  3481. /* case 0: nothing left to add */
  3482. }
  3483. mix(a,b,c);
  3484. /*-------------------------------------------- report the result */
  3485. return c;
  3486. }
  3487. unsigned rtlHashString( unsigned length, const char *_k, unsigned initval)
  3488. {
  3489. return rtlHashData(rtlTrimStrLen(length, _k), _k, initval);
  3490. }
  3491. unsigned rtlHashUnicode(unsigned length, UChar const * k, unsigned initval)
  3492. {
  3493. #ifdef _USE_ICU
  3494. unsigned trimLength = rtlTrimUnicodeStrLen(length, k);
  3495. //Because of the implementation of HASH we need to strip ignoreable code points instead of skipping them
  3496. size32_t tempLength;
  3497. rtlDataAttr temp;
  3498. if (stripIgnorableCharacters(tempLength, temp.refustr(), trimLength, k))
  3499. return rtlHashData(tempLength*2, temp.getustr(), initval);
  3500. return rtlHashData(trimLength*sizeof(UChar), k, initval);
  3501. #else
  3502. rtlThrowNoUnicode();
  3503. #endif
  3504. }
  3505. unsigned rtlHashVStr(const char * k, unsigned initval)
  3506. {
  3507. return rtlHashData(rtlTrimVStrLen(k), k, initval);
  3508. }
  3509. unsigned rtlHashVUnicode(UChar const * k, unsigned initval)
  3510. {
  3511. return rtlHashUnicode(rtlTrimVUnicodeStrLen(k), k, initval);
  3512. }
  3513. #define GETWORDNC(k,n) ((GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))&0xdfdfdfdf)
  3514. unsigned rtlHashDataNC( unsigned length, const void * _k, unsigned initval)
  3515. {
  3516. const unsigned char * k = (const unsigned char *)_k;
  3517. unsigned a,b,c,len;
  3518. /* Set up the internal state */
  3519. len = length;
  3520. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  3521. c = initval; /* the previous hash value */
  3522. /*---------------------------------------- handle most of the key */
  3523. while (len >= 12)
  3524. {
  3525. a += GETWORDNC(k,0);
  3526. b += GETWORDNC(k,4);
  3527. c += GETWORDNC(k,8);
  3528. mix(a,b,c);
  3529. k += 12; len -= 12;
  3530. }
  3531. /*------------------------------------- handle the last 11 bytes */
  3532. c += length;
  3533. switch(len) /* all the case statements fall through */
  3534. {
  3535. case 11: c+=GETBYTE3(7)&0xdf; // Fall through...
  3536. case 10: c+=GETBYTE2(7)&0xdf; // Fall through...
  3537. case 9 : c+=GETBYTE1(7)&0xdf; // Fall through...
  3538. /* the first byte of c is reserved for the length */
  3539. case 8 : b+=GETBYTE3(4)&0xdf; // Fall through...
  3540. case 7 : b+=GETBYTE2(4)&0xdf; // Fall through...
  3541. case 6 : b+=GETBYTE1(4)&0xdf; // Fall through...
  3542. case 5 : b+=GETBYTE0(4)&0xdf; // Fall through...
  3543. case 4 : a+=GETBYTE3(0)&0xdf; // Fall through...
  3544. case 3 : a+=GETBYTE2(0)&0xdf; // Fall through...
  3545. case 2 : a+=GETBYTE1(0)&0xdf; // Fall through...
  3546. case 1 : a+=GETBYTE0(0)&0xdf; // Fall through...
  3547. /* case 0: nothing left to add */
  3548. }
  3549. mix(a,b,c);
  3550. /*-------------------------------------------- report the result */
  3551. return c;
  3552. }
  3553. unsigned rtlHashVStrNC(const char * k, unsigned initval)
  3554. {
  3555. return rtlHashDataNC(strlen(k), k, initval);
  3556. }
  3557. //---------------------------------------------------------------------------
  3558. unsigned rtlCrcData( unsigned length, const void *_k, unsigned initval)
  3559. {
  3560. return crc32((const char *)_k, length, initval);
  3561. }
  3562. unsigned rtlCrcUnicode(unsigned length, UChar const * k, unsigned initval)
  3563. {
  3564. return crc32((char const *)k, length*2, initval);
  3565. }
  3566. unsigned rtlCrcVStr( const char * k, unsigned initval)
  3567. {
  3568. return crc32(k, strlen(k), initval);
  3569. }
  3570. unsigned rtlCrcVUnicode(UChar const * k, unsigned initval)
  3571. {
  3572. return rtlCrcUnicode(rtlUnicodeStrlen(k), k, initval);
  3573. }
  3574. //---------------------------------------------------------------------------
  3575. // MD5 processing:
  3576. void rtlHashMd5Init(size32_t sizestate, void * _state)
  3577. {
  3578. assertex(sizestate >= sizeof(md5_state_s));
  3579. md5_state_s * state = (md5_state_s *)_state;
  3580. md5_init(state);
  3581. }
  3582. void rtlHashMd5Data(size32_t len, const void *buf, size32_t sizestate, void * _state)
  3583. {
  3584. md5_state_s * state = (md5_state_s * )_state;
  3585. md5_append(state, (const md5_byte_t *)buf, len);
  3586. }
  3587. void rtlHashMd5Finish(void * out, size32_t sizestate, void * _state)
  3588. {
  3589. typedef md5_byte_t digest_t[16];
  3590. md5_state_s * state = (md5_state_s *)_state;
  3591. md5_finish(state, *(digest_t*)out);
  3592. }
  3593. //---------------------------------------------------------------------------
  3594. unsigned rtlRandom()
  3595. {
  3596. CriticalBlock block(random_Sect);
  3597. return random_->next();
  3598. }
  3599. void rtlSeedRandom(unsigned value)
  3600. {
  3601. CriticalBlock block(random_Sect);
  3602. random_->seed(value);
  3603. }
  3604. // These are all useful functions for testing - not really designed for other people to use them...
  3605. ECLRTL_API unsigned rtlTick()
  3606. {
  3607. return msTick();
  3608. }
  3609. ECLRTL_API bool rtlGPF()
  3610. {
  3611. char * x = 0;
  3612. *x = 0;
  3613. return false;
  3614. }
  3615. ECLRTL_API unsigned rtlSleep(unsigned delay)
  3616. {
  3617. MilliSleep(delay);
  3618. return 0;
  3619. }
  3620. ECLRTL_API unsigned rtlDisplay(unsigned len, const char * src)
  3621. {
  3622. LOG(MCprogress, unknownJob, "%.*s", len, src);
  3623. return 0;
  3624. }
  3625. void rtlEcho(unsigned len, const char * src)
  3626. {
  3627. printf("%.*s\n", len, src);
  3628. }
  3629. ECLRTL_API unsigned __int64 rtlNano()
  3630. {
  3631. return cycle_to_nanosec(get_cycles_now());
  3632. }
  3633. ECLRTL_API void rtlTestGetPrimes(unsigned & num, void * & data)
  3634. {
  3635. unsigned numPrimes = 6;
  3636. unsigned size = sizeof(unsigned) * numPrimes;
  3637. unsigned * primes = (unsigned *)rtlMalloc(size);
  3638. primes[0] = 1;
  3639. primes[1] = 2;
  3640. primes[2] = 3;
  3641. primes[3] = 5;
  3642. primes[4] = 7;
  3643. primes[5] = 11;
  3644. num = numPrimes;
  3645. data = primes;
  3646. }
  3647. ECLRTL_API void rtlTestFibList(bool & outAll, size32_t & outSize, void * & outData, bool inAll, size32_t inSize, const void * inData)
  3648. {
  3649. const unsigned * inList = (const unsigned *)inData;
  3650. unsigned * outList = (unsigned *)rtlMalloc(inSize);
  3651. unsigned * curOut = outList;
  3652. unsigned count = inSize / sizeof(*inList);
  3653. unsigned prev = 0;
  3654. for (unsigned i=0; i < count; i++)
  3655. {
  3656. unsigned next = *inList++;
  3657. *curOut++ = next + prev;
  3658. prev = next;
  3659. }
  3660. outAll = inAll;
  3661. outSize = inSize;
  3662. outData = outList;
  3663. }
  3664. unsigned rtlDelayReturn(unsigned value, unsigned sleepTime)
  3665. {
  3666. MilliSleep(sleepTime);
  3667. return value;
  3668. }
  3669. //---------------------------------------------------------------------------
  3670. // strptime and related functions
  3671. inline bool readValue(unsigned & value, size32_t & _offset, size32_t lenStr, const char * str, unsigned max, bool spaceIsZero = false)
  3672. {
  3673. unsigned total = 0;
  3674. unsigned offset = _offset;
  3675. if (lenStr - offset < max)
  3676. max = lenStr - offset;
  3677. unsigned i=0;
  3678. for (; i < max; i++)
  3679. {
  3680. char next = str[offset+i];
  3681. if (next >= '0' && next <= '9')
  3682. total = total * 10 + (next - '0');
  3683. else if (next == ' ' && spaceIsZero)
  3684. total = total * 10;
  3685. else
  3686. break;
  3687. }
  3688. if (i == 0)
  3689. return false;
  3690. value = total;
  3691. _offset = offset+i;
  3692. return true;
  3693. }
  3694. const char * const monthNames[12] = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" };
  3695. inline bool matchString(unsigned & value, size32_t & strOffset, size32_t lenStr, const byte * str, unsigned num, const char * const * strings, unsigned minMatch)
  3696. {
  3697. unsigned startOffset = strOffset;
  3698. for (unsigned i =0; i < num; i++)
  3699. {
  3700. const char * cur = strings[i];
  3701. unsigned offset = startOffset;
  3702. while (offset < lenStr)
  3703. {
  3704. byte next = *cur++;
  3705. if (!next || toupper(next) != toupper(str[offset]))
  3706. break;
  3707. offset++;
  3708. }
  3709. if (offset - startOffset >= minMatch)
  3710. {
  3711. value = i;
  3712. strOffset = offset;
  3713. return true;
  3714. }
  3715. }
  3716. return false;
  3717. }
  3718. //This implements a subset of the specifiers allowed for strptime
  3719. //Another difference is it works on a string with a separate length
  3720. ECLRTL_API const char * simple_strptime(size32_t lenStr, const char * str, const char * format, struct tm * tm)
  3721. {
  3722. const char * curFormat = format;
  3723. size32_t offset = 0;
  3724. const byte * src = (const byte *)str;
  3725. unsigned value;
  3726. byte next;
  3727. while ((next = *curFormat++) != '\0')
  3728. {
  3729. if (next == '%')
  3730. {
  3731. switch (*curFormat++)
  3732. {
  3733. // Recursive cases
  3734. case 'F':
  3735. {
  3736. const char* newPtr = simple_strptime(lenStr-offset, str+offset, "%Y-%m-%d", tm);
  3737. if (!newPtr)
  3738. return NULL;
  3739. offset = newPtr - str;
  3740. }
  3741. break;
  3742. case 'D':
  3743. {
  3744. const char* newPtr = simple_strptime(lenStr-offset, str+offset, "%m/%d/%y", tm);
  3745. if (!newPtr)
  3746. return NULL;
  3747. offset = newPtr - str;
  3748. }
  3749. break;
  3750. case 'R':
  3751. {
  3752. const char* newPtr = simple_strptime(lenStr-offset, str+offset, "%H:%M", tm);
  3753. if (!newPtr)
  3754. return NULL;
  3755. offset = newPtr - str;
  3756. }
  3757. break;
  3758. case 'T':
  3759. {
  3760. const char* newPtr = simple_strptime(lenStr-offset, str+offset, "%H:%M:%S", tm);
  3761. if (!newPtr)
  3762. return NULL;
  3763. offset = newPtr - str;
  3764. }
  3765. break;
  3766. // Non-recursive cases
  3767. case 't':
  3768. while ((offset < lenStr) && isspace(src[offset]))
  3769. offset++;
  3770. break;
  3771. case 'Y':
  3772. if (!readValue(value, offset, lenStr, str, 4))
  3773. return NULL;
  3774. tm->tm_year = value-1900;
  3775. break;
  3776. case 'y':
  3777. if (!readValue(value, offset, lenStr, str, 2))
  3778. return NULL;
  3779. tm->tm_year = value > 68 ? value : value + 100;
  3780. break;
  3781. case 'm':
  3782. if (!readValue(value, offset, lenStr, str, 2) || (value < 1) || (value > 12))
  3783. return NULL;
  3784. tm->tm_mon = value-1;
  3785. break;
  3786. case 'd':
  3787. if (!readValue(value, offset, lenStr, str, 2) || (value < 1) || (value > 31))
  3788. return NULL;
  3789. tm->tm_mday = value;
  3790. break;
  3791. case 'e':
  3792. if (!readValue(value, offset, lenStr, str, 2, true) || (value < 1) || (value > 31))
  3793. return NULL;
  3794. tm->tm_mday = value;
  3795. break;
  3796. case 'b':
  3797. case 'B':
  3798. case 'h':
  3799. if (!matchString(value, offset, lenStr, src, sizeof(monthNames)/sizeof(*monthNames), monthNames, 3))
  3800. return NULL;
  3801. tm->tm_mon = value;
  3802. break;
  3803. case 'H':
  3804. if (!readValue(value, offset, lenStr, str, 2)|| (value > 24))
  3805. return NULL;
  3806. tm->tm_hour = value;
  3807. break;
  3808. case 'k':
  3809. if (!readValue(value, offset, lenStr, str, 2, true)|| (value > 24))
  3810. return NULL;
  3811. tm->tm_hour = value;
  3812. break;
  3813. case 'M':
  3814. if (!readValue(value, offset, lenStr, str, 2)|| (value > 59))
  3815. return NULL;
  3816. tm->tm_min = value;
  3817. break;
  3818. case 'S':
  3819. if (!readValue(value, offset, lenStr, str, 2)|| (value > 59))
  3820. return NULL;
  3821. tm->tm_sec = value;
  3822. break;
  3823. default:
  3824. return NULL;
  3825. }
  3826. }
  3827. else
  3828. {
  3829. if (isspace(next))
  3830. {
  3831. while ((offset < lenStr) && isspace(src[offset]))
  3832. offset++;
  3833. }
  3834. else
  3835. {
  3836. if ((offset >= lenStr) || (src[offset++] != next))
  3837. return NULL;
  3838. }
  3839. }
  3840. }
  3841. return str+offset;
  3842. }
  3843. //---------------------------------------------------------------------------
  3844. class DECL_EXCEPTION CRtlFailException : public IUserException, public CInterface
  3845. {
  3846. public:
  3847. CRtlFailException(int _code, char const * _msg) : code(_code) { msg = strdup(_msg); }
  3848. ~CRtlFailException() { free(msg); }
  3849. IMPLEMENT_IINTERFACE;
  3850. virtual int errorCode() const { return code; }
  3851. virtual StringBuffer & errorMessage(StringBuffer & buff) const { return buff.append(msg); }
  3852. virtual MessageAudience errorAudience() const { return MSGAUD_user; }
  3853. private:
  3854. int code;
  3855. char * msg;
  3856. };
  3857. void rtlFail(int code, const char *msg)
  3858. {
  3859. throw dynamic_cast<IUserException *>(new CRtlFailException(code, msg));
  3860. }
  3861. void rtlSysFail(int code, const char *msg)
  3862. {
  3863. throw MakeStringException(MSGAUD_user, code, "%s", msg);
  3864. }
  3865. void rtlThrowOutOfMemory(int code, const char *msg)
  3866. {
  3867. throw static_cast<IUserException *>(new CRtlFailException(code, msg));
  3868. }
  3869. void rtlReportRowOverflow(unsigned size, unsigned max)
  3870. {
  3871. throw MakeStringException(MSGAUD_user, 1000, "Row size %u exceeds the maximum size specified(%u)", size, max);
  3872. }
  3873. void rtlThrowNoUnicode()
  3874. {
  3875. throw MakeStringException(99, "System was built without Unicode support");
  3876. }
  3877. void rtlReportFieldOverflow(unsigned size, unsigned max, const char * name)
  3878. {
  3879. if (!name)
  3880. rtlReportRowOverflow(size, max);
  3881. else
  3882. throw MakeStringException(MSGAUD_user, 1000, "Assignment to field '%s' causes row overflow. Size %u exceeds the maximum size specified(%u)", name, size, max);
  3883. }
  3884. void rtlCheckRowOverflow(unsigned size, unsigned max)
  3885. {
  3886. if (size > max)
  3887. rtlReportRowOverflow(size, max);
  3888. }
  3889. void rtlCheckFieldOverflow(unsigned size, unsigned max, const char * field)
  3890. {
  3891. if (size > max)
  3892. rtlReportFieldOverflow(size, max, field);
  3893. }
  3894. void rtlFailUnexpected()
  3895. {
  3896. throw MakeStringException(MSGAUD_user, -1, "Unexpected code execution");
  3897. }
  3898. void rtlFailOnAssert()
  3899. {
  3900. throw MakeStringException(MSGAUD_user, -1, "Abort execution");
  3901. }
  3902. void rtlFailDivideByZero()
  3903. {
  3904. throw MakeStringException(MSGAUD_user, -1, "Division by zero");
  3905. }
  3906. //---------------------------------------------------------------------------
  3907. void deserializeRaw(unsigned recordSize, void *record, MemoryBuffer &in)
  3908. {
  3909. in.read(recordSize, record);
  3910. }
  3911. void deserializeDataX(size32_t & len, void * & data, MemoryBuffer &in)
  3912. {
  3913. free(data);
  3914. in.read(sizeof(len), &len);
  3915. data = rtlMalloc(len);
  3916. in.read(len, data);
  3917. }
  3918. void deserializeStringX(size32_t & len, char * & data, MemoryBuffer &in)
  3919. {
  3920. free(data);
  3921. in.read(sizeof(len), &len);
  3922. data = (char *)rtlMalloc(len);
  3923. in.read(len, data);
  3924. }
  3925. char * deserializeCStringX(MemoryBuffer &in)
  3926. {
  3927. unsigned len;
  3928. in.read(sizeof(len), &len);
  3929. char * data = (char *)rtlMalloc(len+1);
  3930. in.read(len, data);
  3931. data[len] = 0;
  3932. return data;
  3933. }
  3934. void deserializeUnicodeX(size32_t & len, UChar * & data, MemoryBuffer &in)
  3935. {
  3936. free(data);
  3937. in.read(sizeof(len), &len);
  3938. data = (UChar *)rtlMalloc(len*sizeof(UChar));
  3939. in.read(len*sizeof(UChar), data);
  3940. }
  3941. void deserializeUtf8X(size32_t & len, char * & data, MemoryBuffer &in)
  3942. {
  3943. free(data);
  3944. in.read(sizeof(len), &len);
  3945. unsigned size = rtlUtf8Size(len, in.readDirect(0));
  3946. data = (char *)rtlMalloc(size);
  3947. in.read(size, data);
  3948. }
  3949. UChar * deserializeVUnicodeX(MemoryBuffer &in)
  3950. {
  3951. unsigned len;
  3952. in.read(sizeof(len), &len);
  3953. UChar * data = (UChar *)rtlMalloc((len+1)*sizeof(UChar));
  3954. in.read(len*sizeof(UChar), data);
  3955. data[len] = 0;
  3956. return data;
  3957. }
  3958. void deserializeSet(bool & isAll, size32_t & len, void * & data, MemoryBuffer &in)
  3959. {
  3960. free(data);
  3961. in.read(isAll);
  3962. in.read(sizeof(len), &len);
  3963. data = rtlMalloc(len);
  3964. in.read(len, data);
  3965. }
  3966. void serializeRaw(unsigned recordSize, const void *record, MemoryBuffer &out)
  3967. {
  3968. out.append(recordSize, record);
  3969. }
  3970. void serializeDataX(size32_t len, const void * data, MemoryBuffer &out)
  3971. {
  3972. out.append(len).append(len, data);
  3973. }
  3974. void serializeStringX(size32_t len, const char * data, MemoryBuffer &out)
  3975. {
  3976. out.append(len).append(len, data);
  3977. }
  3978. void serializeCStringX(const char * data, MemoryBuffer &out)
  3979. {
  3980. unsigned len = strlen(data);
  3981. out.append(len).append(len, data);
  3982. }
  3983. void serializeUnicodeX(size32_t len, const UChar * data, MemoryBuffer &out)
  3984. {
  3985. out.append(len).append(len*sizeof(UChar), data);
  3986. }
  3987. void serializeUtf8X(size32_t len, const char * data, MemoryBuffer &out)
  3988. {
  3989. out.append(len).append(rtlUtf8Size(len, data), data);
  3990. }
  3991. void serializeSet(bool isAll, size32_t len, const void * data, MemoryBuffer &out)
  3992. {
  3993. out.append(isAll).append(len).append(len, data);
  3994. }
  3995. //---------------------------------------------------------------------------
  3996. ECLRTL_API void serializeFixedString(unsigned len, const char *field, MemoryBuffer &out)
  3997. {
  3998. out.append(len, field);
  3999. }
  4000. ECLRTL_API void serializeLPString(unsigned len, const char *field, MemoryBuffer &out)
  4001. {
  4002. out.append(len);
  4003. out.append(len, field);
  4004. }
  4005. ECLRTL_API void serializeVarString(const char *field, MemoryBuffer &out)
  4006. {
  4007. out.append(field);
  4008. }
  4009. ECLRTL_API void serializeBool(bool field, MemoryBuffer &out)
  4010. {
  4011. out.append(field);
  4012. }
  4013. ECLRTL_API void serializeFixedData(unsigned len, const void *field, MemoryBuffer &out)
  4014. {
  4015. out.append(len, field);
  4016. }
  4017. ECLRTL_API void serializeLPData(unsigned len, const void *field, MemoryBuffer &out)
  4018. {
  4019. out.append(len);
  4020. out.append(len, field);
  4021. }
  4022. ECLRTL_API void serializeInt1(signed char field, MemoryBuffer &out)
  4023. {
  4024. // MORE - why did overloading pick the int method for this???
  4025. // out.append(field);
  4026. out.appendEndian(sizeof(field), &field);
  4027. }
  4028. ECLRTL_API void serializeInt2(signed short field, MemoryBuffer &out)
  4029. {
  4030. out.appendEndian(sizeof(field), &field);
  4031. }
  4032. ECLRTL_API void serializeInt3(signed int field, MemoryBuffer &out)
  4033. {
  4034. #if __BYTE_ORDER == __LITTLE_ENDIAN
  4035. out.appendEndian(3, &field);
  4036. #else
  4037. out.appendEndian(3, ((char *) &field) + 1);
  4038. #endif
  4039. }
  4040. ECLRTL_API void serializeInt4(signed int field, MemoryBuffer &out)
  4041. {
  4042. out.appendEndian(sizeof(field), &field);
  4043. }
  4044. ECLRTL_API void serializeInt5(signed __int64 field, MemoryBuffer &out)
  4045. {
  4046. #if __BYTE_ORDER == __LITTLE_ENDIAN
  4047. out.appendEndian(5, &field);
  4048. #else
  4049. out.appendEndian(5, ((char *) &field) + 3);
  4050. #endif
  4051. }
  4052. ECLRTL_API void serializeInt6(signed __int64 field, MemoryBuffer &out)
  4053. {
  4054. #if __BYTE_ORDER == __LITTLE_ENDIAN
  4055. out.appendEndian(6, &field);
  4056. #else
  4057. out.appendEndian(6, ((char *) &field) + 2);
  4058. #endif
  4059. }
  4060. ECLRTL_API void serializeInt7(signed __int64 field, MemoryBuffer &out)
  4061. {
  4062. #if __BYTE_ORDER == __LITTLE_ENDIAN
  4063. out.appendEndian(7, &field);
  4064. #else
  4065. out.appendEndian(7, ((char *) &field) + 1);
  4066. #endif
  4067. }
  4068. ECLRTL_API void serializeInt8(signed __int64 field, MemoryBuffer &out)
  4069. {
  4070. out.appendEndian(sizeof(field), &field);
  4071. }
  4072. ECLRTL_API void serializeUInt1(unsigned char field, MemoryBuffer &out)
  4073. {
  4074. out.appendEndian(sizeof(field), &field);
  4075. }
  4076. ECLRTL_API void serializeUInt2(unsigned short field, MemoryBuffer &out)
  4077. {
  4078. out.appendEndian(sizeof(field), &field);
  4079. }
  4080. ECLRTL_API void serializeUInt3(unsigned int field, MemoryBuffer &out)
  4081. {
  4082. #if __BYTE_ORDER == __LITTLE_ENDIAN
  4083. out.appendEndian(3, &field);
  4084. #else
  4085. out.appendEndian(3, ((char *) &field) + 1);
  4086. #endif
  4087. }
  4088. ECLRTL_API void serializeUInt4(unsigned int field, MemoryBuffer &out)
  4089. {
  4090. out.appendEndian(sizeof(field), &field);
  4091. }
  4092. ECLRTL_API void serializeUInt5(unsigned __int64 field, MemoryBuffer &out)
  4093. {
  4094. #if __BYTE_ORDER == __LITTLE_ENDIAN
  4095. out.appendEndian(5, &field);
  4096. #else
  4097. out.appendEndian(5, ((char *) &field) + 3);
  4098. #endif
  4099. }
  4100. ECLRTL_API void serializeUInt6(unsigned __int64 field, MemoryBuffer &out)
  4101. {
  4102. #if __BYTE_ORDER == __LITTLE_ENDIAN
  4103. out.appendEndian(6, &field);
  4104. #else
  4105. out.appendEndian(6, ((char *) &field) + 2);
  4106. #endif
  4107. }
  4108. ECLRTL_API void serializeUInt7(unsigned __int64 field, MemoryBuffer &out)
  4109. {
  4110. #if __BYTE_ORDER == __LITTLE_ENDIAN
  4111. out.appendEndian(7, &field);
  4112. #else
  4113. out.appendEndian(7, ((char *) &field) + 1);
  4114. #endif
  4115. }
  4116. ECLRTL_API void serializeUInt8(unsigned __int64 field, MemoryBuffer &out)
  4117. {
  4118. out.appendEndian(sizeof(field), &field);
  4119. }
  4120. ECLRTL_API void serializeReal4(float field, MemoryBuffer &out)
  4121. {
  4122. out.appendEndian(sizeof(field), &field);
  4123. }
  4124. ECLRTL_API void serializeReal8(double field, MemoryBuffer &out)
  4125. {
  4126. out.append(sizeof(field), &field);
  4127. }
  4128. //These maths functions can all have out of range arguments....
  4129. //---------------------------------------------------------------------------
  4130. static double rtlInvalidArgument(DBZaction dbz, const char *source, double arg)
  4131. {
  4132. switch ((DBZaction) dbz)
  4133. {
  4134. case DBZfail:
  4135. throw MakeStringException(MSGAUD_user, -1, "Invalid argument to %s: %f", source, arg);
  4136. case DBZnan:
  4137. return rtlCreateRealNull();
  4138. }
  4139. return 0;
  4140. }
  4141. static double rtlInvalidLog(DBZaction dbz, const char *source, double arg)
  4142. {
  4143. switch ((DBZaction) dbz)
  4144. {
  4145. case DBZfail:
  4146. throw MakeStringException(MSGAUD_user, -1, "Invalid argument to %s: %f", source, arg);
  4147. case DBZnan:
  4148. if (arg)
  4149. return rtlCreateRealNull();
  4150. else
  4151. return -INFINITY;
  4152. }
  4153. return 0;
  4154. }
  4155. ECLRTL_API double rtlLog10(double x, byte dbz)
  4156. {
  4157. if (x <= 0)
  4158. return rtlInvalidLog((DBZaction) dbz, "LOG10", x);
  4159. return log10(x);
  4160. }
  4161. ECLRTL_API double rtlLog(double x, byte dbz)
  4162. {
  4163. if (x <= 0)
  4164. return rtlInvalidLog((DBZaction) dbz, "LOG10", x);
  4165. return log(x);
  4166. }
  4167. ECLRTL_API double rtlSqrt(double x, byte dbz)
  4168. {
  4169. if (x < 0)
  4170. return rtlInvalidArgument((DBZaction) dbz, "SQRT", x);
  4171. return sqrt(x);
  4172. }
  4173. ECLRTL_API double rtlACos(double x, byte dbz)
  4174. {
  4175. if (fabs(x) > 1)
  4176. return rtlInvalidArgument((DBZaction) dbz, "ACOS", x);
  4177. return acos(x);
  4178. }
  4179. ECLRTL_API double rtlASin(double x, byte dbz)
  4180. {
  4181. if (fabs(x) > 1)
  4182. return rtlInvalidArgument((DBZaction) dbz, "ASIN", x);
  4183. return asin(x);
  4184. }
  4185. ECLRTL_API double rtlFMod(double numer, double denom, byte dbz)
  4186. {
  4187. if (!denom)
  4188. return rtlInvalidArgument((DBZaction) dbz, "FMOD", denom);
  4189. return fmod(numer, denom);
  4190. }
  4191. ECLRTL_API bool rtlFMatch(double a, double b, double epsilon)
  4192. {
  4193. if (isnan(a) || isnan(b))
  4194. return false;
  4195. return fabs(a-b) <= epsilon;
  4196. }
  4197. //---------------------------------------------------------------------------
  4198. ECLRTL_API bool rtlIsValidReal(unsigned size, const void * data)
  4199. {
  4200. byte * bytes = (byte *)data;
  4201. //Valid unless it is a Nan, represented by exponent all 1's and non-zero mantissa (ignore the sign).
  4202. if (size == 4)
  4203. {
  4204. //sign(1) exponent(8) mantissa(23)
  4205. if (((bytes[3] & 0x7f) == 0x7f) && ((bytes[2] & 0x80) == 0x80))
  4206. {
  4207. if ((bytes[2] & 0x7f) != 0 || bytes[1] || bytes[0])
  4208. return false;
  4209. }
  4210. }
  4211. else if (size == 8)
  4212. {
  4213. //sign(1) exponent(11) mantissa(52)
  4214. if (((bytes[7] & 0x7f) == 0x7f) && ((bytes[6] & 0xF0) == 0xF0))
  4215. {
  4216. if ((bytes[6] & 0xF) || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
  4217. return false;
  4218. }
  4219. }
  4220. else
  4221. {
  4222. //sign(1) exponent(15) mantissa(64)
  4223. assertex(size==10);
  4224. if (((bytes[9] & 0x7f) == 0x7f) && (bytes[8] == 0xFF))
  4225. {
  4226. if (bytes[7] || bytes[6] || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
  4227. return false;
  4228. }
  4229. }
  4230. return true;
  4231. }
  4232. double rtlCreateRealNull()
  4233. {
  4234. union
  4235. {
  4236. byte data[8];
  4237. double r;
  4238. } u;
  4239. //Use a non-signaling NaN
  4240. memcpy(u.data, "\x01\x00\x00\x00\x00\x00\xF0\x7f", 8);
  4241. return u.r;
  4242. }
  4243. double rtlCreateRealInf()
  4244. {
  4245. return INFINITY;
  4246. }
  4247. bool rtlIsInfinite(double value)
  4248. {
  4249. return isinf(value);
  4250. }
  4251. bool rtlIsNaN(double value)
  4252. {
  4253. return isnan(value);
  4254. }
  4255. bool rtlIsFinite(double value)
  4256. {
  4257. return isfinite(value);
  4258. }
  4259. unsigned rtlUtf8Size(const void * data)
  4260. {
  4261. return readUtf8Size(data);
  4262. }
  4263. unsigned rtlUtf8Size(unsigned len, const void * _data)
  4264. {
  4265. const byte * data = (const byte *)_data;
  4266. size32_t offset = 0;
  4267. for (unsigned i=0; i< len; i++)
  4268. offset += readUtf8Size(data+offset);
  4269. return offset;
  4270. }
  4271. unsigned rtlUtf8Length(unsigned size, const void * _data)
  4272. {
  4273. const byte * data = (const byte *)_data;
  4274. size32_t length = 0;
  4275. for (unsigned offset=0; offset < size; offset += readUtf8Size(data+offset))
  4276. length++;
  4277. return length;
  4278. }
  4279. unsigned rtlUtf8Char(const void * data)
  4280. {
  4281. return readUtf8Char(data);
  4282. }
  4283. void rtlUnicodeToUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
  4284. {
  4285. if(inlen>outlen) inlen = outlen;
  4286. memcpy(out, in, inlen*2);
  4287. while(inlen<outlen)
  4288. out[inlen++] = 0x0020;
  4289. }
  4290. void rtlUnicodeToVUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
  4291. {
  4292. if((inlen>=outlen) && (outlen != 0)) inlen = outlen-1;
  4293. memcpy(out, in, inlen*2);
  4294. out[inlen] = 0x0000;
  4295. }
  4296. void rtlVUnicodeToUnicode(size32_t outlen, UChar * out, UChar const *in)
  4297. {
  4298. rtlUnicodeToUnicode(outlen, out, rtlUnicodeStrlen(in), in);
  4299. }
  4300. void rtlVUnicodeToVUnicode(size32_t outlen, UChar * out, UChar const *in)
  4301. {
  4302. rtlUnicodeToVUnicode(outlen, out, rtlUnicodeStrlen(in), in);
  4303. }
  4304. void rtlUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  4305. {
  4306. tgt = (UChar *)rtlMalloc(slen*2);
  4307. memcpy(tgt, src, slen*2);
  4308. tlen = slen;
  4309. }
  4310. UChar * rtlUnicodeToVUnicodeX(unsigned slen, UChar const * src)
  4311. {
  4312. UChar * data = (UChar *)rtlMalloc((slen+1)*2);
  4313. memcpy(data, src, slen*2);
  4314. data[slen] = 0x0000;
  4315. return data;
  4316. }
  4317. void rtlVUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, UChar const * src)
  4318. {
  4319. rtlUnicodeToUnicodeX(tlen, tgt, rtlUnicodeStrlen(src), src);
  4320. }
  4321. UChar * rtlVUnicodeToVUnicodeX(UChar const * src)
  4322. {
  4323. return rtlUnicodeToVUnicodeX(rtlUnicodeStrlen(src), src);
  4324. }
  4325. void rtlDecPushUnicode(size32_t len, UChar const * data)
  4326. {
  4327. char * buff = 0;
  4328. unsigned bufflen = 0;
  4329. rtlUnicodeToStrX(bufflen, buff, len, data);
  4330. DecPushString(bufflen, buff);
  4331. rtlFree(buff);
  4332. }
  4333. void rtlUtf8ToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
  4334. {
  4335. //Packs as many characaters as it can into the target, but don't include any half characters
  4336. size32_t offset = 0;
  4337. size32_t outsize = outlen*UTF8_MAXSIZE;
  4338. for (unsigned i=0; i< inlen; i++)
  4339. {
  4340. unsigned nextSize = readUtf8Size(in+offset);
  4341. if (offset + nextSize > outsize)
  4342. break;
  4343. offset += nextSize;
  4344. }
  4345. memcpy(out, in, offset);
  4346. if (offset != outsize)
  4347. memset(out+offset, ' ', outsize-offset);
  4348. }
  4349. void rtlUtf8ToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  4350. {
  4351. unsigned insize = rtlUtf8Size(inlen, in);
  4352. char * buffer = (char *)rtlMalloc(insize);
  4353. memcpy(buffer, in, insize);
  4354. outlen = inlen;
  4355. out = buffer;
  4356. }
  4357. void rtlStringToUtf8(StringBuffer & out, unsigned inlen, char const * in)
  4358. {
  4359. rtlCodepageToCodepage(out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  4360. }
  4361. #ifdef _USE_ICU
  4362. unsigned rtlUnicodeStrlen(UChar const * str)
  4363. {
  4364. return u_strlen(str);
  4365. }
  4366. #else
  4367. unsigned rtlUnicodeStrlen(UChar const * str)
  4368. {
  4369. unsigned len = 0;
  4370. while (*str++)
  4371. len++;
  4372. return len;
  4373. }
  4374. #endif
  4375. //---------------------------------------------------------------------------
  4376. void rtlUtf8ToData(size32_t outlen, void * out, size32_t inlen, const char *in)
  4377. {
  4378. unsigned insize = rtlUtf8Size(inlen, in);
  4379. if (insize >= outlen)
  4380. rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4381. else
  4382. {
  4383. rtlCodepageToCodepage(insize, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4384. memset((char*)out + insize, 0, outlen-insize);
  4385. }
  4386. }
  4387. void rtlUtf8ToDataX(size32_t & outlen, void * & out, size32_t inlen, const char *in)
  4388. {
  4389. unsigned insize = rtlUtf8Size(inlen, in);
  4390. char * cout;
  4391. rtlCodepageToCodepageX(outlen, cout, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4392. out = cout;
  4393. }
  4394. void rtlUtf8ToStr(size32_t outlen, char * out, size32_t inlen, const char *in)
  4395. {
  4396. unsigned insize = rtlUtf8Size(inlen, in);
  4397. rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4398. }
  4399. void rtlUtf8ToStrX(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  4400. {
  4401. unsigned insize = rtlUtf8Size(inlen, in);
  4402. rtlCodepageToCodepageX(outlen, out, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4403. }
  4404. char * rtlUtf8ToVStr(size32_t inlen, const char *in)
  4405. {
  4406. unsigned utfSize = rtlUtf8Size(inlen, in);
  4407. char *ret = (char *) rtlMalloc(inlen+1);
  4408. rtlCodepageToCodepage(inlen, ret, utfSize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4409. ret[inlen] = 0;
  4410. return ret;
  4411. }
  4412. void rtlDataToUtf8(size32_t outlen, char * out, size32_t inlen, const void *in)
  4413. {
  4414. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  4415. }
  4416. void rtlDataToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const void *in)
  4417. {
  4418. unsigned outsize;
  4419. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  4420. outlen = rtlUtf8Length(outsize, out);
  4421. }
  4422. void rtlStrToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
  4423. {
  4424. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  4425. }
  4426. void rtlStrToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  4427. {
  4428. unsigned outsize;
  4429. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  4430. outlen = rtlUtf8Length(outsize, out);
  4431. }
  4432. #if U_ICU_VERSION_MAJOR_NUM<50
  4433. static int rtlCompareUtf8Utf8ViaUnicode(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  4434. {
  4435. rtlDataAttr uleft(llen*sizeof(UChar));
  4436. rtlDataAttr uright(rlen*sizeof(UChar));
  4437. rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
  4438. rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
  4439. return rtlCompareUnicodeUnicode(llen, uleft.getustr(), rlen, uright.getustr(), locale);
  4440. }
  4441. #endif
  4442. #ifdef _USE_ICU
  4443. int rtlCompareUtf8Utf8(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  4444. {
  4445. #if U_ICU_VERSION_MAJOR_NUM>=50
  4446. size_t lSize = rtlUtf8Size(llen, left);
  4447. while (lSize && (left[lSize-1] == ' '))
  4448. lSize--;
  4449. size_t rSize = rtlUtf8Size(rlen, right);
  4450. while (rSize && (right[rSize-1] == ' '))
  4451. rSize--;
  4452. UCollator * collator = queryRTLLocale(locale)->queryCollator();
  4453. UErrorCode status = U_ZERO_ERROR; // Not documented, but this needs to be cleared otherwise the function can fail
  4454. return ucol_strcollUTF8(collator, left, lSize, right, rSize, &status);
  4455. #else
  4456. return rtlCompareUtf8Utf8ViaUnicode(llen, left, rlen, right, locale);
  4457. #endif
  4458. }
  4459. int rtlCompareUtf8Utf8Strength(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale, unsigned strength)
  4460. {
  4461. //GH->PG Any better way of doing this? We could possible decide it was a binary comparison instead I guess.
  4462. rtlDataAttr uleft(llen*sizeof(UChar));
  4463. rtlDataAttr uright(rlen*sizeof(UChar));
  4464. rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
  4465. rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
  4466. return rtlCompareUnicodeUnicodeStrength(llen, uleft.getustr(), rlen, uright.getustr(), locale, strength);
  4467. }
  4468. #else
  4469. int rtlCompareUtf8Utf8(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  4470. {
  4471. return rtlCompareStrStr(rtlUtf8Size(llen, left), left, rtlUtf8Size(rlen, right), right);
  4472. }
  4473. int rtlCompareUtf8Utf8Strength(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale, unsigned strength)
  4474. {
  4475. return rtlCompareUtf8Utf8(llen, left, rlen, right, locale);
  4476. }
  4477. #endif
  4478. void rtlDecPushUtf8(size32_t len, const char * data)
  4479. {
  4480. DecPushString(len, (const char *)data); // good enough for the moment
  4481. }
  4482. bool rtlUtf8ToBool(size32_t inlen, const char * in)
  4483. {
  4484. //NOTE: Theoretically this should pass insize rather than inlen, but the called function will stop when it gets
  4485. //to a non-ascii charcter, which means it will never behave incorrectly if insize != inlen
  4486. return rtlStrToBool(inlen, in);
  4487. }
  4488. __int64 rtlUtf8ToInt(size32_t inlen, const char * in)
  4489. {
  4490. //NOTE: Theoretically this should pass insize rather than inlen, but the called function will stop when it gets
  4491. //to a non-ascii charcter, which means it will never behave incorrectly if insize != inlen
  4492. return rtlStrToInt8(inlen, in); // good enough for the moment
  4493. }
  4494. double rtlUtf8ToReal(size32_t inlen, const char * in)
  4495. {
  4496. //NOTE: Theoretically this should pass insize rather than inlen, but the called function will stop when it gets
  4497. //to a non-ascii charcter, which means it will never behave incorrectly if insize != inlen
  4498. return rtlStrToReal(inlen, in); // good enough for the moment
  4499. }
  4500. void rtlCodepageToUtf8(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
  4501. {
  4502. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, codepage);
  4503. }
  4504. void rtlCodepageToUtf8X(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  4505. {
  4506. unsigned outsize;
  4507. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, codepage);
  4508. outlen = rtlUtf8Length(outsize, out);
  4509. }
  4510. void rtlUtf8ToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
  4511. {
  4512. unsigned insize = rtlUtf8Size(inlen, in);
  4513. rtlCodepageToCodepage(outlen, (char *)out, insize, in, codepage, UTF8_CODEPAGE);
  4514. }
  4515. void rtlUtf8ToCodepageX(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  4516. {
  4517. unsigned insize = rtlUtf8Size(inlen, in);
  4518. rtlCodepageToCodepageX(outlen, out, inlen, insize, in, codepage, UTF8_CODEPAGE);
  4519. }
  4520. void rtlUnicodeToUtf8X(unsigned & outlen, char * & out, unsigned inlen, const UChar * in)
  4521. {
  4522. unsigned outsize;
  4523. rtlUnicodeToCodepageX(outsize, out, inlen, in, UTF8_CODEPAGE);
  4524. outlen = rtlUtf8Length(outsize, out);
  4525. }
  4526. void rtlUnicodeToUtf8(unsigned outlen, char * out, unsigned inlen, const UChar * in)
  4527. {
  4528. rtlUnicodeToCodepage(outlen*UTF8_MAXSIZE, out, inlen, in, UTF8_CODEPAGE);
  4529. }
  4530. void rtlUtf8ToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
  4531. {
  4532. rtlCodepageToUnicodeX(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
  4533. }
  4534. void rtlUtf8ToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  4535. {
  4536. rtlCodepageToUnicode(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
  4537. }
  4538. ECLRTL_API void rtlUtf8SubStrFT(unsigned tlen, char * tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  4539. {
  4540. normalizeFromTo(from, to);
  4541. clipFromTo(from, to, slen);
  4542. unsigned copylen = to - from;
  4543. unsigned startOffset = rtlUtf8Size(from, src);
  4544. rtlUtf8ToUtf8(tlen, tgt, copylen, src+startOffset);
  4545. }
  4546. ECLRTL_API void rtlUtf8SubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  4547. {
  4548. normalizeFromTo(from, to);
  4549. unsigned len = to - from;
  4550. clipFromTo(from, to, slen);
  4551. unsigned copylen = to - from;
  4552. unsigned fillSize = len - copylen;
  4553. unsigned startOffset = rtlUtf8Size(from, src);
  4554. unsigned copySize = rtlUtf8Size(copylen, src+startOffset);
  4555. char * buffer = (char *)rtlMalloc(copySize + fillSize);
  4556. memcpy(buffer, (byte *)src+startOffset, copySize);
  4557. if (fillSize)
  4558. memset(buffer+copySize, ' ', fillSize);
  4559. tlen = len;
  4560. tgt = buffer;
  4561. }
  4562. ECLRTL_API void rtlUtf8SubStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
  4563. {
  4564. normalizeFromTo(from, slen);
  4565. unsigned len = slen - from;
  4566. unsigned startOffset = rtlUtf8Size(from, src);
  4567. unsigned copySize = rtlUtf8Size(len, src+startOffset);
  4568. char * buffer = (char *)rtlMalloc(copySize);
  4569. memcpy(buffer, (byte *)src+startOffset, copySize);
  4570. tlen = len;
  4571. tgt = buffer;
  4572. }
  4573. ECLRTL_API void rtlUtf8ToLower(size32_t l, char * t, char const * locale)
  4574. {
  4575. //Convert to lower case, but only go via unicode routines if we have to...
  4576. for (unsigned i=0; i< l; i++)
  4577. {
  4578. byte next = *t;
  4579. if (next >= 0x80)
  4580. {
  4581. //yuk, go via unicode to do the convertion.
  4582. unsigned len = l-i;
  4583. unsigned size = rtlUtf8Size(len, t+i);
  4584. rtlDataAttr unicode(len*sizeof(UChar));
  4585. rtlCodepageToUnicode(len, unicode.getustr(), size, t+i, UTF8_CODEPAGE);
  4586. rtlUnicodeToLower(len, unicode.getustr(), locale);
  4587. rtlUnicodeToCodepage(size, t+i, len, unicode.getustr(), UTF8_CODEPAGE);
  4588. return;
  4589. }
  4590. *t++ = tolower(next);
  4591. }
  4592. }
  4593. #ifdef _USE_ICU
  4594. ECLRTL_API void rtlConcatUtf8(unsigned & tlen, char * * tgt, ...)
  4595. {
  4596. //Going to have to go via unicode because of normalization. However, it might be worth optimizing the case where no special characters are present
  4597. va_list args;
  4598. unsigned totalLength = 0;
  4599. unsigned maxLength = 0;
  4600. va_start(args, tgt);
  4601. for(;;)
  4602. {
  4603. unsigned len = va_arg(args, unsigned);
  4604. if(len+1==0)
  4605. break;
  4606. va_arg(args, const char *); // Skip the string
  4607. totalLength += len;
  4608. if (len > maxLength)
  4609. maxLength = len;
  4610. }
  4611. va_end(args);
  4612. rtlDataAttr next(maxLength*sizeof(UChar));
  4613. rtlDataAttr result(totalLength*sizeof(UChar));
  4614. unsigned idx = 0;
  4615. UErrorCode err = U_ZERO_ERROR;
  4616. va_start(args, tgt);
  4617. for(;;)
  4618. {
  4619. unsigned len = va_arg(args, unsigned);
  4620. if(len+1==0)
  4621. break;
  4622. const char * str = va_arg(args, const char *);
  4623. if (len)
  4624. {
  4625. rtlUtf8ToUnicode(len, next.getustr(), len, str);
  4626. idx = unorm_concatenate(result.getustr(), idx, next.getustr(), len, result.getustr(), totalLength, UNORM_NFC, 0, &err);
  4627. }
  4628. }
  4629. va_end(args);
  4630. rtlUnicodeToUtf8X(tlen, *tgt, idx, result.getustr());
  4631. }
  4632. ECLRTL_API unsigned rtlConcatUtf8ToUtf8(unsigned tlen, char * tgt, unsigned offset, unsigned slen, const char * src)
  4633. {
  4634. //NB: Inconsistently with the other varieties, idx is a byte offset, not a character position to make the code more efficient.....
  4635. //normalization is done in the space filling routine at the end
  4636. unsigned ssize = rtlUtf8Size(slen, src);
  4637. assertex(tlen * UTF8_MAXSIZE >= offset+ssize);
  4638. memcpy(tgt+offset, src, ssize);
  4639. return offset + ssize;
  4640. }
  4641. ECLRTL_API void rtlUtf8SpaceFill(unsigned tlen, char * tgt, unsigned offset)
  4642. {
  4643. const byte * src = (const byte *)tgt;
  4644. for (unsigned i=0; i<offset; i++)
  4645. {
  4646. if (src[i] >= 0x80)
  4647. {
  4648. unsigned idx = rtlUtf8Length(offset, tgt);
  4649. rtlDataAttr unicode(idx*sizeof(UChar));
  4650. rtlUtf8ToUnicode(idx, unicode.getustr(), idx, tgt);
  4651. unicodeEnsureIsNormalized(idx, unicode.getustr());
  4652. rtlUnicodeToUtf8(tlen, tgt, idx, unicode.getustr());
  4653. return;
  4654. }
  4655. }
  4656. //no special characters=>easy route.
  4657. memset(tgt+offset, ' ', tlen*UTF8_MAXSIZE-offset);
  4658. }
  4659. #else
  4660. ECLRTL_API void rtlConcatUtf8(unsigned & tlen, char * * tgt, ...) { rtlThrowNoUnicode(); }
  4661. #endif
  4662. ECLRTL_API unsigned rtlHash32Utf8(unsigned length, const char * k, unsigned initval)
  4663. {
  4664. //These need to hash the same way as a UNICODE string would => convert to UNICODE
  4665. //It would be hard to optimize to hash the string without performing the conversion.
  4666. size32_t tempLength;
  4667. rtlDataAttr temp;
  4668. rtlUtf8ToUnicodeX(tempLength, temp.refustr(), length, k);
  4669. return rtlHash32Unicode(tempLength, temp.getustr(), initval);
  4670. }
  4671. ECLRTL_API unsigned rtlHashUtf8(unsigned length, const char * k, unsigned initval)
  4672. {
  4673. //These need to hash the same way as a UNICODE string would => convert to UNICODE
  4674. size32_t tempLength;
  4675. rtlDataAttr temp;
  4676. rtlUtf8ToUnicodeX(tempLength, temp.refustr(), length, k);
  4677. return rtlHashUnicode(tempLength, temp.getustr(), initval);
  4678. }
  4679. ECLRTL_API hash64_t rtlHash64Utf8(unsigned length, const char * k, hash64_t initval)
  4680. {
  4681. //These need to hash the same way as a UNICODE string would => convert to UNICODE
  4682. size32_t tempLength;
  4683. rtlDataAttr temp;
  4684. rtlUtf8ToUnicodeX(tempLength, temp.refustr(), length, k);
  4685. return rtlHash64Unicode(tempLength, temp.getustr(), initval);
  4686. }
  4687. unsigned rtlCrcUtf8(unsigned length, const char * k, unsigned initval)
  4688. {
  4689. return rtlCrcData(rtlUtf8Size(length, k), k, initval);
  4690. }
  4691. int rtlNewSearchUtf8Table(unsigned count, unsigned elemlen, const char * * table, unsigned width, const char * search, const char * locale)
  4692. {
  4693. //MORE: Hopelessly inefficient.... Should rethink - possibly introducing a class for doing string searching, and the Utf8 variety pre-converting the
  4694. //search strings into unicode.
  4695. int left = 0;
  4696. int right = count;
  4697. do
  4698. {
  4699. int mid = (left + right) >> 1;
  4700. int cmp = rtlCompareUtf8Utf8(width, search, elemlen, table[mid], locale);
  4701. if (cmp < 0)
  4702. right = mid;
  4703. else if (cmp > 0)
  4704. left = mid+1;
  4705. else
  4706. return mid;
  4707. } while (left < right);
  4708. return -1;
  4709. }
  4710. //---------------------------------------------------------------------------
  4711. ECLRTL_API int rtlQueryLocalFailCode(IException * e)
  4712. {
  4713. return e->errorCode();
  4714. }
  4715. ECLRTL_API void rtlGetLocalFailMessage(size32_t & len, char * & text, IException * e, const char * tag)
  4716. {
  4717. rtlExceptionExtract(len, text, e, tag);
  4718. }
  4719. ECLRTL_API void rtlFreeException(IException * e)
  4720. {
  4721. e->Release();
  4722. }
  4723. //---------------------------------------------------------------------------
  4724. //Generally any calls to this function have also checked that the length(trim(str)) <= fieldLen, so exceptions should only occur if compareLen > fieldLen
  4725. //However, function can now also handle the exception case.
  4726. ECLRTL_API void rtlCreateRange(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str, byte fill, byte pad)
  4727. {
  4728. //
  4729. if (compareLen > fieldLen)
  4730. {
  4731. if ((int)compareLen >= 0)
  4732. {
  4733. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  4734. compareLen = fieldLen;
  4735. }
  4736. else
  4737. compareLen = 0; // probably m[1..-1] or something silly
  4738. }
  4739. if (len > compareLen)
  4740. {
  4741. while ((len > compareLen) && (str[len-1] == pad))
  4742. len--;
  4743. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  4744. if (len > compareLen)
  4745. {
  4746. compareLen = 0;
  4747. fill = (fill == 0) ? 255 : 0;
  4748. }
  4749. }
  4750. outlen = fieldLen;
  4751. out = (char *)rtlMalloc(fieldLen);
  4752. if (len >= compareLen)
  4753. memcpy(out, str, compareLen);
  4754. else
  4755. {
  4756. memcpy(out, str, len);
  4757. memset(out+len, pad, compareLen-len);
  4758. }
  4759. memset(out + compareLen, fill, fieldLen-compareLen);
  4760. }
  4761. ECLRTL_API void rtlCreateStrRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4762. {
  4763. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
  4764. }
  4765. ECLRTL_API void rtlCreateStrRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4766. {
  4767. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
  4768. }
  4769. ECLRTL_API void rtlCreateDataRangeLow(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
  4770. {
  4771. rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 0, 0);
  4772. }
  4773. ECLRTL_API void rtlCreateDataRangeHigh(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
  4774. {
  4775. rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 255, 0);
  4776. }
  4777. ECLRTL_API void rtlCreateRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4778. {
  4779. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
  4780. }
  4781. ECLRTL_API void rtlCreateRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4782. {
  4783. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
  4784. }
  4785. ECLRTL_API void rtlCreateUnicodeRange(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str, byte fill)
  4786. {
  4787. //Same as function above!
  4788. if (compareLen > fieldLen)
  4789. {
  4790. if ((int)compareLen >= 0)
  4791. {
  4792. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  4793. compareLen = fieldLen;
  4794. }
  4795. else
  4796. compareLen = 0; // probably m[1..-1] or something silly
  4797. }
  4798. if (len > compareLen)
  4799. {
  4800. while ((len > compareLen) && (str[len-1] == ' '))
  4801. len--;
  4802. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  4803. if (len > compareLen)
  4804. {
  4805. compareLen = 0;
  4806. fill = (fill == 0) ? 255 : 0;
  4807. }
  4808. }
  4809. outlen = fieldLen;
  4810. out = (UChar *)rtlMalloc(fieldLen*sizeof(UChar));
  4811. if (len >= compareLen)
  4812. memcpy(out, str, compareLen*sizeof(UChar));
  4813. else
  4814. {
  4815. memcpy(out, str, len * sizeof(UChar));
  4816. while (len != compareLen)
  4817. out[len++] = ' ';
  4818. }
  4819. memset(out + compareLen, fill, (fieldLen-compareLen) * sizeof(UChar));
  4820. }
  4821. ECLRTL_API void rtlCreateUnicodeRangeLow(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
  4822. {
  4823. rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0x00);
  4824. }
  4825. ECLRTL_API void rtlCreateUnicodeRangeHigh(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
  4826. {
  4827. rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0xFF);
  4828. }
  4829. //---------------------------------------------------------------------------
  4830. ECLRTL_API unsigned rtlCountRows(size32_t len, const void * data, IRecordSize * rs)
  4831. {
  4832. if (rs->isFixedSize())
  4833. return len / rs->getFixedSize();
  4834. unsigned count = 0;
  4835. while (len)
  4836. {
  4837. size32_t thisLen = rs->getRecordSize(data);
  4838. data = (byte *)data + thisLen;
  4839. if (thisLen > len)
  4840. throw MakeStringException(0, "Invalid raw data");
  4841. len -= thisLen;
  4842. count++;
  4843. }
  4844. return count;
  4845. }
  4846. //---------------------------------------------------------------------------
  4847. ECLRTL_API size32_t rtlCountToSize(unsigned count, const void * data, IRecordSize * rs)
  4848. {
  4849. if (rs->isFixedSize())
  4850. return count * rs->getFixedSize();
  4851. unsigned size = 0;
  4852. for (unsigned i=0;i<count;i++)
  4853. {
  4854. size32_t thisLen = rs->getRecordSize(data);
  4855. data = (byte *)data + thisLen;
  4856. size += thisLen;
  4857. }
  4858. return size;
  4859. }
  4860. //---------------------------------------------------------------------------
  4861. #ifdef _USE_ICU
  4862. class rtlCodepageConverter
  4863. {
  4864. public:
  4865. rtlCodepageConverter(char const * sourceName, char const * targetName, bool & failed) : uerr(U_ZERO_ERROR)
  4866. {
  4867. srccnv = ucnv_open(sourceName, &uerr);
  4868. tgtcnv = ucnv_open(targetName, &uerr);
  4869. tgtMaxRatio = ucnv_getMaxCharSize(tgtcnv);
  4870. failed = U_FAILURE(uerr) != FALSE;
  4871. }
  4872. ~rtlCodepageConverter()
  4873. {
  4874. ucnv_close(srccnv);
  4875. ucnv_close(tgtcnv);
  4876. }
  4877. void convertX(unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4878. {
  4879. //convert from source to utf-16: try to avoid preflighting by guessing upper bound
  4880. //unicode length in UChars equal source length in chars if single byte encoding, and be less for multibyte
  4881. UChar * ubuff = (UChar *)rtlMalloc(sourceLength*2);
  4882. int32_t ulen = ucnv_toUChars(srccnv, ubuff, sourceLength, source, sourceLength, &uerr);
  4883. if(ulen > (int32_t)sourceLength)
  4884. {
  4885. //okay, so our guess was wrong, and we have to reallocate
  4886. free(ubuff);
  4887. ubuff = (UChar *)rtlMalloc(ulen*2);
  4888. ucnv_toUChars(srccnv, ubuff, ulen, source, sourceLength, &uerr);
  4889. }
  4890. if(preflight)
  4891. {
  4892. //convert from utf-16 to target: preflight to get buffer of exactly the right size
  4893. UErrorCode uerr2 = uerr; //preflight has to use copy of error code, as it is considered an 'error'
  4894. int32_t tlen = ucnv_fromUChars(tgtcnv, 0, 0, ubuff, ulen, &uerr2);
  4895. target = (char *)rtlMalloc(tlen);
  4896. targetLength = ucnv_fromUChars(tgtcnv, target, tlen, ubuff, ulen, &uerr);
  4897. }
  4898. else
  4899. {
  4900. //convert from utf-16 to target: avoid preflighting by allocating buffer of maximum size
  4901. target = (char *)rtlMalloc(ulen*tgtMaxRatio);
  4902. targetLength = ucnv_fromUChars(tgtcnv, target, ulen*tgtMaxRatio, ubuff, ulen, &uerr);
  4903. }
  4904. free(ubuff);
  4905. failed = U_FAILURE(uerr) != FALSE;
  4906. }
  4907. unsigned convert(unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4908. {
  4909. char * tgtStart = target;
  4910. ucnv_convertEx(tgtcnv, srccnv, &target, target+targetLength, &source, source+sourceLength, 0, 0, 0, 0, true, true, &uerr);
  4911. int32_t ret = target-tgtStart;
  4912. failed = U_FAILURE(uerr) != FALSE;
  4913. return ret;
  4914. }
  4915. private:
  4916. UErrorCode uerr;
  4917. UConverter * srccnv;
  4918. UConverter * tgtcnv;
  4919. int8_t tgtMaxRatio;
  4920. };
  4921. void * rtlOpenCodepageConverter(char const * sourceName, char const * targetName, bool & failed)
  4922. {
  4923. return new rtlCodepageConverter(sourceName, targetName, failed);
  4924. }
  4925. void rtlCloseCodepageConverter(void * converter)
  4926. {
  4927. delete ((rtlCodepageConverter *)converter);
  4928. }
  4929. void rtlCodepageConvertX(void * converter, unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4930. {
  4931. ((rtlCodepageConverter *)converter)->convertX(targetLength, target, sourceLength, source, failed, preflight);
  4932. }
  4933. unsigned rtlCodepageConvert(void * converter, unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4934. {
  4935. return ((rtlCodepageConverter *)converter)->convert(targetLength, target, sourceLength, source, failed);
  4936. }
  4937. #else
  4938. void * rtlOpenCodepageConverter(char const * sourceName, char const * targetName, bool & failed)
  4939. {
  4940. rtlThrowNoUnicode();
  4941. }
  4942. void rtlCloseCodepageConverter(void * converter)
  4943. {
  4944. }
  4945. void rtlCodepageConvertX(void * converter, unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4946. {
  4947. }
  4948. unsigned rtlCodepageConvert(void * converter, unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4949. {
  4950. return 0;
  4951. }
  4952. #endif
  4953. //---------------------------------------------------------------------------
  4954. void appendUChar(MemoryBuffer & buff, char x)
  4955. {
  4956. UChar c = x;
  4957. buff.append(sizeof(c), &c);
  4958. }
  4959. void appendUChar(MemoryBuffer & buff, UChar c)
  4960. {
  4961. buff.append(sizeof(c), &c);
  4962. }
  4963. void appendUStr(MemoryBuffer & x, const char * text)
  4964. {
  4965. while (*text)
  4966. {
  4967. UChar c = *text++;
  4968. x.append(sizeof(c), &c);
  4969. }
  4970. }
  4971. ECLRTL_API void xmlDecodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in)
  4972. {
  4973. StringBuffer input(inLen, in);
  4974. StringBuffer temp;
  4975. decodeXML(input, temp, NULL, NULL, false);
  4976. outLen = temp.length();
  4977. out = temp.detach();
  4978. }
  4979. bool hasPrefix(const UChar * ustr, const UChar * end, const char * str, unsigned len)
  4980. {
  4981. if ((unsigned)(end - ustr) < len)
  4982. return false;
  4983. while (len--)
  4984. {
  4985. if (*ustr++ != *str++)
  4986. return false;
  4987. }
  4988. return true;
  4989. }
  4990. ECLRTL_API void xmlDecodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in)
  4991. {
  4992. const UChar * cur = in;
  4993. const UChar * end = in+inLen;
  4994. MemoryBuffer ret;
  4995. while (cur<end)
  4996. {
  4997. switch(*cur)
  4998. {
  4999. case '&':
  5000. if(hasPrefix(cur+1, end, "amp;", 4))
  5001. {
  5002. cur += 4;
  5003. appendUChar(ret, '&');
  5004. }
  5005. else if(hasPrefix(cur+1, end, "lt;", 3))
  5006. {
  5007. cur += 3;
  5008. appendUChar(ret, '<');
  5009. }
  5010. else if(hasPrefix(cur+1, end, "gt;", 3))
  5011. {
  5012. cur += 3;
  5013. appendUChar(ret, '>');
  5014. }
  5015. else if(hasPrefix(cur+1, end, "quot;", 5))
  5016. {
  5017. cur += 5;
  5018. appendUChar(ret, '"');
  5019. }
  5020. else if(hasPrefix(cur+1, end, "apos;", 5))
  5021. {
  5022. cur += 5;
  5023. appendUChar(ret, '\'');
  5024. }
  5025. else if(hasPrefix(cur+1, end, "nbsp;", 5))
  5026. {
  5027. cur += 5;
  5028. appendUChar(ret, (UChar) 0xa0);
  5029. }
  5030. else if(hasPrefix(cur+1, end, "#", 1))
  5031. {
  5032. const UChar * saveCur = cur;
  5033. bool error = true; // until we have seen a digit...
  5034. cur += 2;
  5035. unsigned base = 10;
  5036. if (*cur == 'x')
  5037. {
  5038. base = 16;
  5039. cur++;
  5040. }
  5041. UChar value = 0;
  5042. while (cur < end)
  5043. {
  5044. unsigned digit;
  5045. UChar next = *cur;
  5046. if ((next >= '0') && (next <= '9'))
  5047. digit = next-'0';
  5048. else if ((next >= 'A') && (next <= 'F'))
  5049. digit = next-'A'+10;
  5050. else if ((next >= 'a') && (next <= 'f'))
  5051. digit = next-'a'+10;
  5052. else if (next==';')
  5053. break;
  5054. else
  5055. digit = base;
  5056. if (digit >= base)
  5057. {
  5058. error = true;
  5059. break;
  5060. }
  5061. error = false;
  5062. value = value * base + digit;
  5063. cur++;
  5064. }
  5065. if (error)
  5066. {
  5067. appendUChar(ret, '&');
  5068. cur = saveCur;
  5069. }
  5070. else
  5071. appendUChar(ret, value);
  5072. }
  5073. else
  5074. appendUChar(ret, *cur);
  5075. break;
  5076. default:
  5077. appendUChar(ret, *cur);
  5078. break;
  5079. }
  5080. cur++;
  5081. }
  5082. outLen = ret.length()/2;
  5083. out = (UChar *)ret.detach();
  5084. }
  5085. ECLRTL_API void xmlEncodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in, unsigned flags)
  5086. {
  5087. StringBuffer temp;
  5088. encodeXML(in, temp, flags, inLen, false);
  5089. outLen = temp.length();
  5090. out = temp.detach();
  5091. }
  5092. ECLRTL_API void xmlEncodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in, unsigned flags)
  5093. {
  5094. const UChar * cur = in;
  5095. MemoryBuffer ret;
  5096. ret.ensureCapacity(inLen*2);
  5097. while (inLen)
  5098. {
  5099. UChar next = *cur;
  5100. switch(*cur)
  5101. {
  5102. case '&':
  5103. appendUStr(ret, "&amp;");
  5104. break;
  5105. case '<':
  5106. appendUStr(ret, "&lt;");
  5107. break;
  5108. case '>':
  5109. appendUStr(ret, "&gt;");
  5110. break;
  5111. case '\"':
  5112. appendUStr(ret, "&quot;");
  5113. break;
  5114. case '\'':
  5115. appendUStr(ret, "&apos;");
  5116. break;
  5117. case ' ':
  5118. appendUStr(ret, flags & ENCODE_SPACES?"&#32;":" ");
  5119. break;
  5120. case '\n':
  5121. appendUStr(ret, flags & ENCODE_NEWLINES?"&#10;":"\n");
  5122. break;
  5123. case '\r':
  5124. appendUStr(ret, flags & ENCODE_NEWLINES?"&#13;":"\r");
  5125. break;
  5126. case '\t':
  5127. appendUStr(ret, flags & ENCODE_SPACES?"&#9;":"\t");
  5128. break;
  5129. default:
  5130. appendUChar(ret, next);
  5131. break;
  5132. }
  5133. inLen--;
  5134. cur++;
  5135. }
  5136. outLen = ret.length()/2;
  5137. out = (UChar *)ret.detach();
  5138. }
  5139. //---------------------------------------------------------------------------
  5140. #define STRUCTURED_EXCEPTION_TAG "Error"
  5141. inline bool isStructuredMessage(const char * text, const char * tag)
  5142. {
  5143. if (!text || text[0] != '<')
  5144. return false;
  5145. if (!tag)
  5146. return true;
  5147. size32_t lenTag = strlen(tag);
  5148. if (memcmp(text+1,tag,lenTag) != 0)
  5149. return false;
  5150. if (text[lenTag+1] != '>')
  5151. return false;
  5152. return true;
  5153. }
  5154. inline bool isStructuredError(const char * text) { return isStructuredMessage(text, STRUCTURED_EXCEPTION_TAG); }
  5155. void rtlExtractTag(size32_t & outLen, char * & out, const char * text, const char * tag, const char * rootTag)
  5156. {
  5157. if (!tag || !isStructuredMessage(text, rootTag))
  5158. {
  5159. if (text && (!tag || strcmp(tag, "text")==0))
  5160. rtlStrToStrX(outLen, out, strlen(text), text);
  5161. else
  5162. {
  5163. outLen = 0;
  5164. out = NULL;
  5165. }
  5166. }
  5167. else
  5168. {
  5169. StringBuffer startTag, endTag;
  5170. startTag.append("<").append(tag).append(">");
  5171. endTag.append("</").append(tag).append(">");
  5172. const char * start = strstr(text, startTag.str());
  5173. const char * end = strstr(text, endTag.str());
  5174. if (start && end)
  5175. {
  5176. start += startTag.length();
  5177. xmlDecodeStrX(outLen, out, end-start, start);
  5178. }
  5179. else
  5180. {
  5181. outLen = 0;
  5182. out = NULL;
  5183. }
  5184. }
  5185. }
  5186. void rtlExceptionExtract(size32_t & outLen, char * & out, const char * text, const char * tag)
  5187. {
  5188. if (!tag) tag = "text";
  5189. rtlExtractTag(outLen, out, text, tag, STRUCTURED_EXCEPTION_TAG);
  5190. }
  5191. void rtlExceptionExtract(size32_t & outLen, char * & out, IException * e, const char * tag)
  5192. {
  5193. StringBuffer text;
  5194. e->errorMessage(text);
  5195. rtlExceptionExtract(outLen, out, text.str(), tag);
  5196. }
  5197. void rtlAddExceptionTag(StringBuffer & errorText, const char * tag, const char * value)
  5198. {
  5199. if (!isStructuredError(errorText.str()))
  5200. {
  5201. StringBuffer temp;
  5202. temp.append("<" STRUCTURED_EXCEPTION_TAG "><text>");
  5203. encodeXML(errorText.str(), temp, ENCODE_WHITESPACE, errorText.length(), false);
  5204. temp.append("</text></" STRUCTURED_EXCEPTION_TAG ">");
  5205. errorText.swapWith(temp);
  5206. }
  5207. StringBuffer temp;
  5208. temp.append("<").append(tag).append(">");
  5209. encodeXML(value, temp, ENCODE_WHITESPACE, (unsigned)-1, false);
  5210. temp.append("</").append(tag).append(">");
  5211. unsigned len = errorText.length();
  5212. unsigned pos = len - strlen(STRUCTURED_EXCEPTION_TAG) - 3;
  5213. errorText.insert(pos, temp);
  5214. }
  5215. //---------------------------------------------------------------------------
  5216. void rtlSubstituteEmbeddedScript(size32_t &__lenResult, char * &__result, size32_t scriptChars, const char *script, size32_t outFieldsChars, const char *outFields, size32_t searchChars, const char *search)
  5217. {
  5218. StringBuffer result;
  5219. ::replaceString(result, rtlUtf8Size(scriptChars, script), script, rtlUtf8Size(searchChars, search), search, rtlUtf8Size(outFieldsChars, outFields), outFields);
  5220. __lenResult = result.lengthUtf8();
  5221. __result = result.detach();
  5222. }
  5223. void rtlSubstituteActivityContext(StringBuffer &result, const IThorActivityContext *ctx, size32_t scriptChars, const char *script)
  5224. {
  5225. result.append(rtlUtf8Size(scriptChars, script), script);
  5226. if (ctx)
  5227. {
  5228. char buf[20];
  5229. result.replaceStringNoCase("__activity__.isLocal", ctx->isLocal() ? "TRUE" : "FALSE");
  5230. result.replaceStringNoCase("__activity__.numSlaves", itoa(ctx->numSlaves(), buf, 10));
  5231. result.replaceStringNoCase("__activity__.numStrands", itoa(ctx->numStrands(), buf, 10));
  5232. result.replaceStringNoCase("__activity__.slave", itoa(ctx->querySlave(), buf, 10));
  5233. result.replaceStringNoCase("__activity__.strand", itoa(ctx->queryStrand(), buf, 10));
  5234. }
  5235. }
  5236. //---------------------------------------------------------------------------
  5237. void rtlRowBuilder::forceAvailable(size32_t size)
  5238. {
  5239. const size32_t chunkSize = 64;
  5240. maxsize = (size + chunkSize-1) & ~(chunkSize-1);
  5241. ptr = rtlRealloc(ptr, maxsize);
  5242. }
  5243. //---------------------------------------------------------------------------
  5244. inline unsigned numExtraBytesFromValue(unsigned __int64 first)
  5245. {
  5246. if (first >= I64C(0x10000000))
  5247. if (first >= I64C(0x40000000000))
  5248. if (first >= I64C(0x2000000000000))
  5249. if (first >= I64C(0x100000000000000))
  5250. return 8;
  5251. else
  5252. return 7;
  5253. else
  5254. return 6;
  5255. else
  5256. if (first >= I64C(0x800000000))
  5257. return 5;
  5258. else
  5259. return 4;
  5260. else
  5261. if (first >= 0x4000)
  5262. if (first >= 0x200000)
  5263. return 3;
  5264. else
  5265. return 2;
  5266. else
  5267. if (first >= 0x80)
  5268. return 1;
  5269. else
  5270. return 0;
  5271. }
  5272. //An packed byte format, based on the unicode packing of utf-8.
  5273. //The number of top bits set in the leading byte indicates how many extra
  5274. //bytes follow (0..8). It gives the same compression as using a top bit to
  5275. //indicate continuation, but seems to be quicker (and requires less look ahead).
  5276. /*
  5277. byte numExtraBytesFromFirstTable[256] =
  5278. {
  5279. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  5280. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  5281. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  5282. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  5283. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5284. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5285. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  5286. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8
  5287. };
  5288. inline unsigned numExtraBytesFromFirst(byte first)
  5289. {
  5290. return numExtraBytesFromFirstTable(first);
  5291. }
  5292. */
  5293. //NB: This seems to be faster than using the table lookup above. Probably affects the data cache less
  5294. inline unsigned numExtraBytesFromFirst(byte first)
  5295. {
  5296. if (first >= 0xF0)
  5297. if (first >= 0xFC)
  5298. if (first >= 0xFE)
  5299. if (first >= 0xFF)
  5300. return 8;
  5301. else
  5302. return 7;
  5303. else
  5304. return 6;
  5305. else
  5306. if (first >= 0xF8)
  5307. return 5;
  5308. else
  5309. return 4;
  5310. else
  5311. if (first >= 0xC0)
  5312. if (first >= 0xE0)
  5313. return 3;
  5314. else
  5315. return 2;
  5316. else
  5317. if (first >= 0x80)
  5318. return 1;
  5319. else
  5320. return 0;
  5321. }
  5322. const static byte leadingValueMask[9] = { 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01, 0x00, 0x00 };
  5323. const static byte leadingLengthMask[9] = { 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF };
  5324. //maximum number of bytes for a packed value is size+1 bytes for size <=8 and last byte being fully used.
  5325. unsigned __int64 rtlGetPackedUnsigned(const void * _ptr)
  5326. {
  5327. const byte * ptr = (const byte *)_ptr;
  5328. byte first = *ptr++;
  5329. unsigned numExtra = numExtraBytesFromFirst(first);
  5330. unsigned __int64 value = first & leadingValueMask[numExtra];
  5331. //Loop unrolling has a negligable effect
  5332. while (numExtra--)
  5333. value = (value << 8) | *ptr++;
  5334. return value;
  5335. }
  5336. void rtlSetPackedUnsigned(void * _ptr, unsigned __int64 value)
  5337. {
  5338. byte * ptr = (byte *)_ptr;
  5339. unsigned numExtra = numExtraBytesFromValue(value);
  5340. byte firstMask = leadingLengthMask[numExtra];
  5341. while (numExtra)
  5342. {
  5343. ptr[numExtra--] = (byte)value;
  5344. value >>= 8;
  5345. }
  5346. ptr[0] = (byte)value | firstMask;
  5347. }
  5348. size32_t rtlGetPackedSize(const void * ptr)
  5349. {
  5350. return numExtraBytesFromFirst(*(byte*)ptr)+1;
  5351. }
  5352. size32_t rtlGetPackedSizeFromFirst(byte first)
  5353. {
  5354. return numExtraBytesFromFirst(first)+1;
  5355. }
  5356. //Store signed by moving the sign to the bottom bit, and inverting if negative.
  5357. //so small positive and negative numbers are stored compactly.
  5358. __int64 rtlGetPackedSigned(const void * ptr)
  5359. {
  5360. unsigned __int64 value = rtlGetPackedUnsigned(ptr);
  5361. unsigned __int64 shifted = (value >> 1);
  5362. return (__int64)((value & 1) ? ~shifted : shifted);
  5363. }
  5364. void rtlSetPackedSigned(void * ptr, __int64 value)
  5365. {
  5366. unsigned __int64 storeValue;
  5367. if (value < 0)
  5368. storeValue = (~value << 1) | 1;
  5369. else
  5370. storeValue = value << 1;
  5371. rtlSetPackedUnsigned(ptr, storeValue);
  5372. }
  5373. IAtom * rtlCreateFieldNameAtom(const char * name)
  5374. {
  5375. return createAtom(name);
  5376. }
  5377. void rtlBase64Encode(size32_t & tlen, char * & tgt, size32_t slen, const void * src)
  5378. {
  5379. tlen = 0;
  5380. tgt = NULL;
  5381. if (slen)
  5382. {
  5383. StringBuffer out;
  5384. JBASE64_Encode(src, slen, out);
  5385. tlen = out.length();
  5386. if (tlen)
  5387. {
  5388. char * data = (char *) rtlMalloc(tlen);
  5389. out.getChars(0, tlen, data);
  5390. tgt = data;
  5391. }
  5392. }
  5393. }
  5394. void rtlBase64Decode(size32_t & tlen, void * & tgt, size32_t slen, const char * src)
  5395. {
  5396. tlen = 0;
  5397. if (slen)
  5398. {
  5399. StringBuffer out;
  5400. if (JBASE64_Decode(slen, src, out))
  5401. tlen = out.length();
  5402. if (tlen)
  5403. {
  5404. char * data = (char *) rtlMalloc(tlen);
  5405. out.getChars(0, tlen, data);
  5406. tgt = (void *) data;
  5407. }
  5408. }
  5409. }
  5410. //---------------------------------------------------------------------------
  5411. void RtlCInterface::Link() const { atomic_inc(&xxcount); }
  5412. bool RtlCInterface::Release(void) const
  5413. {
  5414. if (atomic_dec_and_test(&xxcount))
  5415. {
  5416. delete this;
  5417. return true;
  5418. }
  5419. return false;
  5420. }
  5421. //---------------------------------------------------------------------------
  5422. class RtlRowStream : implements IRowStream, public RtlCInterface
  5423. {
  5424. public:
  5425. RtlRowStream(size32_t _count, const byte * * _rowset) : count(_count), rowset(_rowset)
  5426. {
  5427. rtlLinkRowset(rowset);
  5428. cur = 0;
  5429. }
  5430. ~RtlRowStream()
  5431. {
  5432. rtlReleaseRowset(count, rowset);
  5433. }
  5434. RTLIMPLEMENT_IINTERFACE
  5435. virtual const void *nextRow()
  5436. {
  5437. if (cur >= count)
  5438. return NULL;
  5439. const byte * ret = rowset[cur];
  5440. cur++;
  5441. rtlLinkRow(ret);
  5442. return ret;
  5443. }
  5444. virtual void stop()
  5445. {
  5446. cur = count;
  5447. }
  5448. protected:
  5449. size32_t cur;
  5450. size32_t count;
  5451. const byte * * rowset;
  5452. };
  5453. ECLRTL_API IRowStream * createRowStream(size32_t count, const byte * * rowset)
  5454. {
  5455. return new RtlRowStream(count, rowset);
  5456. }
  5457. //The following are provided to provide compatibility with 6.x so that the dll can be loaded
  5458. //Defined at the end of the file so they cannot cause any code to accidently call them.
  5459. __declspec(noreturn) void throwIncompatible() __attribute__((noreturn));
  5460. void throwIncompatible() { rtlFail(0, "Attempt to execute incompatible query version"); }
  5461. ECLRTL_API int rtlSearchTableStringN(unsigned count, char * * table, unsigned width, const char * search) { throwIncompatible(); }
  5462. ECLRTL_API int rtlSearchTableVStringN(unsigned count, char * * table, const char * search) { throwIncompatible(); }
  5463. ECLRTL_API int rtlNewSearchDataTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search) { throwIncompatible(); }
  5464. ECLRTL_API int rtlNewSearchEStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search) { throwIncompatible(); }
  5465. ECLRTL_API int rtlNewSearchQStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search) { throwIncompatible(); }
  5466. ECLRTL_API int rtlNewSearchStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search) { throwIncompatible(); }
  5467. ECLRTL_API int rtlNewSearchUnicodeTable(unsigned count, unsigned elemlen, UChar * * table, unsigned width, const UChar * search, const char * locale) { throwIncompatible(); }
  5468. ECLRTL_API int rtlNewSearchVUnicodeTable(unsigned count, UChar * * table, const UChar * search, const char * locale) { throwIncompatible(); }
  5469. ECLRTL_API int rtlNewSearchUtf8Table(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search, const char * locale) { throwIncompatible(); }
  5470. ECLRTL_API int rtlSearchTableInteger8(unsigned count, __int64 * table, __int64 search) { throwIncompatible(); }
  5471. ECLRTL_API int rtlSearchTableUInteger8(unsigned count, unsigned __int64 * table, unsigned __int64 search) { throwIncompatible(); }
  5472. ECLRTL_API int rtlSearchTableInteger4(unsigned count, int * table, int search) { throwIncompatible(); }
  5473. ECLRTL_API int rtlSearchTableUInteger4(unsigned count, unsigned * table, unsigned search) { throwIncompatible(); }
  5474. ECLRTL_API void rtlReleaseRowset(unsigned count, byte * * rowset) { throwIncompatible(); }
  5475. ECLRTL_API byte * * rtlLinkRowset(byte * * rowset) { throwIncompatible(); }
  5476. ECLRTL_API void deserializeRowsetX(size32_t & count, byte * * & data, IEngineRowAllocator * _rowAllocator, IOutputRowDeserializer * deserializer, MemoryBuffer &in) { throwIncompatible(); }
  5477. ECLRTL_API void deserializeGroupedRowsetX(size32_t & count, byte * * & data, IEngineRowAllocator * _rowAllocator, IOutputRowDeserializer * deserializer, MemoryBuffer &in) { throwIncompatible(); }
  5478. ECLRTL_API void deserializeDictionaryX(size32_t & count, byte * * & rowset, IEngineRowAllocator * _rowAllocator, IOutputRowDeserializer * deserializer, MemoryBuffer &in) { throwIncompatible(); }
  5479. ECLRTL_API void serializeRowsetX(size32_t count, byte * * data, IOutputRowSerializer * serializer, MemoryBuffer &out) { throwIncompatible(); }
  5480. ECLRTL_API void serializeGroupedRowsetX(size32_t count, byte * * data, IOutputRowSerializer * serializer, MemoryBuffer &out) { throwIncompatible(); }
  5481. ECLRTL_API void serializeDictionaryX(size32_t count, byte * * rows, IOutputRowSerializer * serializer, MemoryBuffer & buffer) { throwIncompatible(); }
  5482. ECLRTL_API double rtlLog(double x) { throwIncompatible(); }
  5483. ECLRTL_API double rtlLog10(double x) { throwIncompatible(); }
  5484. ECLRTL_API double rtlSqrt(double x) { throwIncompatible(); }
  5485. ECLRTL_API double rtlACos(double x) { throwIncompatible(); }
  5486. ECLRTL_API double rtlASin(double x) { throwIncompatible(); }
  5487. ECLRTL_API IRowStream * createRowStream(size32_t count, byte * * rowset) { throwIncompatible(); }
  5488. // from rtlkey.hpp version 6.x
  5489. interface IKeySegmentMonitor;
  5490. interface IOverrideableKeySegmentMonitor;
  5491. interface IKeySegmentFormatTranslator;
  5492. interface IKeySegmentOffsetTranslator;
  5493. ECLRTL_API IKeySegmentMonitor *createEmptyKeySegmentMonitor(bool optional, unsigned _offset, unsigned _size) { throwIncompatible(); }
  5494. ECLRTL_API IKeySegmentMonitor *createWildKeySegmentMonitor(unsigned _offset, unsigned _size) { throwIncompatible(); }
  5495. ECLRTL_API IKeySegmentMonitor *createDummyKeySegmentMonitor(unsigned _offset, unsigned _size, bool isSigned, bool isLittleEndian) { throwIncompatible(); }
  5496. ECLRTL_API IKeySegmentMonitor *createSingleKeySegmentMonitor(bool optional, unsigned _offset, unsigned _size, const void * value) { throwIncompatible(); }
  5497. ECLRTL_API IOverrideableKeySegmentMonitor *createOverrideableKeySegmentMonitor(IKeySegmentMonitor *base) { throwIncompatible(); }
  5498. ECLRTL_API IKeySegmentMonitor *createSingleBigSignedKeySegmentMonitor(bool optional, unsigned offset, unsigned size, const void * value) { throwIncompatible(); }
  5499. ECLRTL_API IKeySegmentMonitor *createSingleLittleSignedKeySegmentMonitor(bool optional, unsigned offset, unsigned size, const void * value) { throwIncompatible(); }
  5500. ECLRTL_API IKeySegmentMonitor *createSingleLittleKeySegmentMonitor(bool optional, unsigned offset, unsigned size, const void * value) { throwIncompatible(); }
  5501. ECLRTL_API IKeySegmentMonitor *createVarOffsetKeySegmentMonitor(IKeySegmentMonitor * base, unsigned offset, IKeySegmentOffsetTranslator * translator) { throwIncompatible(); }
  5502. ECLRTL_API IKeySegmentMonitor *createTranslatedKeySegmentMonitor(IKeySegmentMonitor * base, unsigned offset, IKeySegmentFormatTranslator * translator) { throwIncompatible(); }
  5503. //from rtlds_imp.hpp version 6.x
  5504. ECLRTL_API unsigned __int64 rtlDictionaryCount(size32_t tableSize, byte **table) { throwIncompatible(); }
  5505. ECLRTL_API bool rtlDictionaryExists(size32_t tableSize, byte **table) { throwIncompatible(); }
  5506. ECLRTL_API byte *rtlDictionaryLookup(IHThorHashLookupInfo &hashInfo, size32_t tableSize, byte **table, const byte *source, byte *defaultRow) { throwIncompatible(); }
  5507. ECLRTL_API byte *rtlDictionaryLookupString(size32_t tableSize, byte **table, size32_t len, const char *source, byte *defaultRow) { throwIncompatible(); }
  5508. ECLRTL_API byte *rtlDictionaryLookupStringN(size32_t tableSize, byte **table, size32_t N, size32_t len, const char *source, byte *defaultRow) { throwIncompatible(); }
  5509. ECLRTL_API byte *rtlDictionaryLookupSigned(size32_t tableSize, byte **table, __int64 source, byte *defaultRow) { throwIncompatible(); }
  5510. ECLRTL_API byte *rtlDictionaryLookupUnsigned(size32_t tableSize, byte **table, __uint64 source, byte *defaultRow) { throwIncompatible(); }
  5511. ECLRTL_API byte *rtlDictionaryLookupSignedN(size32_t tableSize, byte **table, size32_t size, __int64 source, byte *defaultRow) { throwIncompatible(); }
  5512. ECLRTL_API byte *rtlDictionaryLookupUnsignedN(size32_t tableSize, byte **table, size32_t size, __uint64 source, byte *defaultRow) { throwIncompatible(); }
  5513. ECLRTL_API bool rtlDictionaryLookupExists(IHThorHashLookupInfo &hashInfo, size32_t tableSize, byte **table, const byte *source) { throwIncompatible(); }
  5514. ECLRTL_API bool rtlDictionaryLookupExistsString(size32_t tableSize, byte **table, size32_t len, const char *source) { throwIncompatible(); }
  5515. ECLRTL_API bool rtlDictionaryLookupExistsStringN(size32_t tableSize, byte **table, size32_t N, size32_t len, const char *source) { throwIncompatible(); }
  5516. ECLRTL_API bool rtlDictionaryLookupExistsSigned(size32_t tableSize, byte **table, __int64 source) { throwIncompatible(); }
  5517. ECLRTL_API bool rtlDictionaryLookupExistsUnsigned(size32_t tableSize, byte **table, __uint64 source) { throwIncompatible(); }
  5518. ECLRTL_API bool rtlDictionaryLookupExistsSignedN(size32_t tableSize, byte **table, size32_t size, __uint64 source) { throwIncompatible(); }
  5519. ECLRTL_API bool rtlDictionaryLookupExistsUnsignedN(size32_t tableSize, byte **table, size32_t size, __uint64 source) { throwIncompatible(); }
  5520. ECLRTL_API void appendRowsToRowset(size32_t & targetCount, byte * * & targetRowset, IEngineRowAllocator * rowAllocator, size32_t count, byte * * rows) { throwIncompatible(); }
  5521. ECLRTL_API void rtlDeserializeChildRowset(size32_t & count, byte * * & rowset, IEngineRowAllocator * _rowAllocator, IOutputRowDeserializer * deserializer, IRowDeserializerSource & in) { throwIncompatible(); }
  5522. ECLRTL_API void rtlDeserializeChildGroupRowset(size32_t & count, byte * * & rowset, IEngineRowAllocator * _rowAllocator, IOutputRowDeserializer * deserializer, IRowDeserializerSource & in) { throwIncompatible(); }
  5523. ECLRTL_API void rtlSerializeChildRowset(IRowSerializerTarget & out, IOutputRowSerializer * serializer, size32_t count, byte * * rows) { throwIncompatible(); }
  5524. ECLRTL_API void rtlSerializeChildGroupRowset(IRowSerializerTarget & out, IOutputRowSerializer * serializer, size32_t count, byte * * rows) { throwIncompatible(); }
  5525. ECLRTL_API void rtlDataset2RowsetX(size32_t & count, byte * * & rowset, IEngineRowAllocator * _rowAllocator, IOutputRowDeserializer * deserializer, size32_t lenSrc, const void * src, bool isGrouped) { throwIncompatible(); }
  5526. ECLRTL_API void rtlRowset2DatasetX(unsigned & tlen, void * & tgt, IOutputRowSerializer * serializer, size32_t count, byte * * rows, bool isGrouped) { throwIncompatible(); }
  5527. ECLRTL_API void rtlDataset2RowsetX(size32_t & count, byte * * & rowset, IEngineRowAllocator * _rowAllocator, IOutputRowDeserializer * deserializer, size32_t lenSrc, const void * src) { throwIncompatible(); }
  5528. ECLRTL_API void rtlGroupedDataset2RowsetX(size32_t & count, byte * * & rowset, IEngineRowAllocator * _rowAllocator, IOutputRowDeserializer * deserializer, size32_t lenSrc, const void * src) { throwIncompatible(); }
  5529. ECLRTL_API void rtlRowset2DatasetX(unsigned & tlen, void * & tgt, IOutputRowSerializer * serializer, size32_t count, byte * * rows) { throwIncompatible(); }
  5530. ECLRTL_API void rtlGroupedRowset2DatasetX(unsigned & tlen, void * & tgt, IOutputRowSerializer * serializer, size32_t count, byte * * rows) { throwIncompatible(); }
  5531. ECLRTL_API void rtlDeserializeDictionary(size32_t & count, byte * * & rowset, IEngineRowAllocator * rowAllocator, IOutputRowDeserializer * deserializer, size32_t lenSrc, const void * src) { throwIncompatible(); }
  5532. ECLRTL_API void rtlDeserializeDictionaryFromDataset(size32_t & count, byte * * & rowset, IEngineRowAllocator * rowAllocator, IOutputRowDeserializer * deserializer, IHThorHashLookupInfo & hashInfo, size32_t lenSrc, const void * src) { throwIncompatible(); }
  5533. ECLRTL_API void rtlSerializeDictionary(unsigned & tlen, void * & tgt, IOutputRowSerializer * serializer, size32_t count, byte * * rows) { throwIncompatible(); }
  5534. ECLRTL_API void rtlSerializeDictionaryToDataset(unsigned & tlen, void * & tgt, IOutputRowSerializer * serializer, size32_t count, byte * * rows) { throwIncompatible(); }
  5535. ECLRTL_API void rtlSerializeDictionary(IRowSerializerTarget & out, IOutputRowSerializer * serializer, size32_t count, byte * * rows) { throwIncompatible(); }
  5536. ECLRTL_API void rtlSerializeDictionaryToDataset(IRowSerializerTarget & out, IOutputRowSerializer * serializer, size32_t count, byte * * rows) { throwIncompatible(); }
  5537. ECLRTL_API void rtlDeserializeChildDictionary(size32_t & count, byte * * & rowset, IEngineRowAllocator * _rowAllocator, IOutputRowDeserializer * deserializer, IRowDeserializerSource & in) { throwIncompatible(); }
  5538. ECLRTL_API void rtlDeserializeChildDictionaryFromDataset(size32_t & count, byte * * & rowset, IEngineRowAllocator * rowAllocator, IOutputRowDeserializer * deserializer, IHThorHashLookupInfo & hashInfo, IRowDeserializerSource & in) { throwIncompatible(); }
  5539. ECLRTL_API void rtlSerializeChildDictionary(IRowSerializerTarget & out, IOutputRowSerializer * serializer, size32_t count, byte * * rows) { throwIncompatible(); }
  5540. ECLRTL_API void rtlSerializeChildDictionaryToDataset(IRowSerializerTarget & out, IOutputRowSerializer * serializer, size32_t count, byte * * rows) { throwIncompatible(); }
  5541. #if 0
  5542. void PrintExtract(StringBuffer & s, const char * tag)
  5543. {
  5544. size32_t outLen;
  5545. char * out = NULL;
  5546. rtlExceptionExtract(outLen, out, s.str(), tag);
  5547. DBGLOG("%s = %.*s", tag, outLen, out);
  5548. rtlFree(out);
  5549. }
  5550. void testStructuredExceptions()
  5551. {
  5552. StringBuffer s;
  5553. s.append("This<is>some text");
  5554. PrintExtract(s, NULL);
  5555. PrintExtract(s, "text");
  5556. PrintExtract(s, "is");
  5557. rtlAddExceptionTag(s, "location", "192.168.12.1");
  5558. PrintExtract(s, NULL);
  5559. PrintExtract(s, "text");
  5560. PrintExtract(s, "is");
  5561. PrintExtract(s, "location");
  5562. rtlAddExceptionTag(s, "author", "gavin");
  5563. PrintExtract(s, NULL);
  5564. PrintExtract(s, "text");
  5565. PrintExtract(s, "is");
  5566. PrintExtract(s, "location");
  5567. PrintExtract(s, "author");
  5568. DBGLOG("%s", s.str());
  5569. }
  5570. static void testPackedUnsigned()
  5571. {
  5572. unsigned __int64 values[] = { 0, 1, 2, 10, 127, 128, 16383, 16384, 32767, 32768, 0xffffff, 0x7fffffff, 0xffffffff,
  5573. I64C(0xffffffffffffff), I64C(0x100000000000000), I64C(0x7fffffffffffffff), I64C(0xffffffffffffffff) };
  5574. unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 8, 9, 9, 9 };
  5575. unsigned numValues = _elements_in(values);
  5576. byte temp[9];
  5577. for (unsigned i = 0; i < numValues; i++)
  5578. {
  5579. rtlSetPackedUnsigned(temp, values[i]);
  5580. assertex(rtlGetPackedSize(temp) == numBytes[i]);
  5581. assertex(rtlGetPackedUnsigned(temp) == values[i]);
  5582. }
  5583. for (unsigned j= 0; j < 2000000; j++)
  5584. {
  5585. unsigned __int64 value = I64C(1) << (rtlRandom() & 63);
  5586. // unsigned value = rtlRandom();
  5587. rtlSetPackedUnsigned(temp, value);
  5588. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value)+1);
  5589. assertex(rtlGetPackedUnsigned(temp) == value);
  5590. }
  5591. for (unsigned k= 0; k < 63; k++)
  5592. {
  5593. unsigned __int64 value1 = I64C(1) << k;
  5594. rtlSetPackedUnsigned(temp, value1);
  5595. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value1)+1);
  5596. assertex(rtlGetPackedUnsigned(temp) == value1);
  5597. unsigned __int64 value2 = value1-1;
  5598. rtlSetPackedUnsigned(temp, value2);
  5599. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value2)+1);
  5600. assertex(rtlGetPackedUnsigned(temp) == value2);
  5601. }
  5602. }
  5603. static void testPackedSigned()
  5604. {
  5605. __int64 values[] = { 0, 1, -2, 10, 63, 64, -64, -65, 8191, 8192, 0x3fffffff,
  5606. I64C(0x7fffffffffffff), I64C(0x80000000000000), I64C(0x7fffffffffffffff), I64C(0x8000000000000000) };
  5607. unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 1, 2, 2, 3, 5,
  5608. 8, 9, 9, 9 };
  5609. unsigned numValues = _elements_in(values);
  5610. byte temp[9];
  5611. for (unsigned i = 0; i < numValues; i++)
  5612. {
  5613. rtlSetPackedSigned(temp, values[i]);
  5614. assertex(rtlGetPackedSize(temp) == numBytes[i]);
  5615. assertex(rtlGetPackedSigned(temp) == values[i]);
  5616. }
  5617. }
  5618. #endif
  5619. void ensureRtlLoaded()
  5620. {
  5621. }
  5622. #ifdef _USE_CPPUNIT
  5623. #include "unittests.hpp"
  5624. class EclRtlTests : public CppUnit::TestFixture
  5625. {
  5626. CPPUNIT_TEST_SUITE( EclRtlTests );
  5627. CPPUNIT_TEST(RegexTest);
  5628. CPPUNIT_TEST(MultiRegexTest);
  5629. CPPUNIT_TEST_SUITE_END();
  5630. protected:
  5631. void RegexTest()
  5632. {
  5633. rtlCompiledStrRegex r;
  5634. size32_t outlen;
  5635. char * out = NULL;
  5636. r.setPattern("([A-Z]+)[ ]?'(S) ", true);
  5637. r->replace(outlen, out, 7, "ABC'S ", 5, "$1$2 ");
  5638. ASSERT(outlen==6);
  5639. ASSERT(out != NULL);
  5640. ASSERT(memcmp(out, "ABCS ", outlen)==0);
  5641. rtlFree(out);
  5642. }
  5643. void MultiRegexTest()
  5644. {
  5645. class RegexTestThread : public Thread
  5646. {
  5647. virtual int run()
  5648. {
  5649. for (int i = 0; i < 100000; i++)
  5650. {
  5651. rtlCompiledStrRegex r;
  5652. size32_t outlen;
  5653. char * out = NULL;
  5654. r.setPattern("([A-Z]+)[ ]?'(S) ", true);
  5655. r->replace(outlen, out, 7, "ABC'S ", 5, "$1$2 ");
  5656. ASSERT(outlen==6);
  5657. ASSERT(out != NULL);
  5658. ASSERT(memcmp(out, "ABCS ", outlen)==0);
  5659. rtlFree(out);
  5660. }
  5661. return 0;
  5662. }
  5663. };
  5664. RegexTestThread t1;
  5665. RegexTestThread t2;
  5666. RegexTestThread t3;
  5667. t1.start();
  5668. t2.start();
  5669. t3.start();
  5670. t1.join();
  5671. t2.join();
  5672. t3.join();
  5673. }
  5674. };
  5675. CPPUNIT_TEST_SUITE_REGISTRATION( EclRtlTests );
  5676. CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( EclRtlTests, "EclRtlTests" );
  5677. #endif