eclrtl.cpp 168 KB


  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "limits.h"
  14. #ifdef _USE_BOOST_REGEX
  15. #include "boost/regex.hpp" // must precede platform.h ; n.b. this uses a #pragma comment(lib, ...) to link the appropriate .lib in MSVC
  16. #endif
  17. #include "platform.h"
  18. #include <math.h>
  19. #include <stdio.h>
  20. #include "jexcept.hpp"
  21. #include "jmisc.hpp"
  22. #include "jutil.hpp"
  23. #include "jlib.hpp"
  24. #include "jptree.hpp"
  25. #include "junicode.hpp"
  26. #include "eclrtl.hpp"
  27. #include "rtlbcd.hpp"
  28. #include "eclrtl_imp.hpp"
  29. #include "unicode/uchar.h"
  30. #include "unicode/ucol.h"
  31. #include "unicode/ustring.h"
  32. #include "unicode/ucnv.h"
  33. #include "unicode/schriter.h"
  34. #include "unicode/regex.h"
  35. #include "unicode/normlzr.h"
  36. #include "unicode/locid.h"
  37. #include "jlog.hpp"
  38. #include "jmd5.hpp"
  39. #include "rtlqstr.ipp"
  40. #include "roxiemem.hpp"
  41. #define UTF8_CODEPAGE "UTF-8"
  42. #define UTF8_MAXSIZE 4
  43. IRandomNumberGenerator * random_;
  44. static CriticalSection random_Sect;
  45. MODULE_INIT(INIT_PRIORITY_ECLRTL_ECLRTL)
  46. {
  47. random_ = createRandomNumberGenerator();
  48. random_->seed((unsigned)get_cycles_now());
  49. return true;
  50. }
  51. MODULE_EXIT()
  52. {
  53. random_->Release();
  54. }
  55. //=============================================================================
  56. // Miscellaneous string functions...
  57. ECLRTL_API void * rtlMalloc(size32_t size)
  58. {
  59. if (!size)
  60. return NULL;
  61. void * retVal = malloc(size);
  62. if (!retVal)
  63. {
  64. PrintStackReport();
  65. rtlThrowOutOfMemory(0, "Memory allocation error!");
  66. }
  67. return retVal;
  68. }
  69. void rtlFree(void *ptr)
  70. {
  71. free(ptr);
  72. }
  73. ECLRTL_API void * rtlRealloc(void * _ptr, size32_t size)
  74. {
  75. void * retVal = realloc(_ptr, size);
  76. if( (0 < size) && (NULL == retVal))
  77. {
  78. PrintStackReport();
  79. rtlThrowOutOfMemory(0, "Memory reallocation error!");
  80. }
  81. return retVal;
  82. }
  83. //=============================================================================
  84. ECLRTL_API void rtlReleaseRow(const void * row)
  85. {
  86. ReleaseRoxieRow(row);
  87. }
  88. ECLRTL_API void rtlReleaseRowset(unsigned count, byte * * rowset)
  89. {
  90. ReleaseRoxieRowset(count, rowset);
  91. }
  92. ECLRTL_API void * rtlLinkRow(const void * row)
  93. {
  94. LinkRoxieRow(row);
  95. return const_cast<void *>(row);
  96. }
  97. ECLRTL_API byte * * rtlLinkRowset(byte * * rowset)
  98. {
  99. LinkRoxieRowset(rowset);
  100. return rowset;
  101. }
  102. //=============================================================================
  103. // Unicode helper classes and functions
  104. // escape
  105. static bool stripIgnorableCharacters(size32_t & lenResult, UChar * & result, size32_t length, const UChar * in)
  106. {
  107. unsigned numStripped = 0;
  108. unsigned lastGood = 0;
  109. for (unsigned i=0; i < length; i++)
  110. {
  111. UChar32 c = in[i];
  112. unsigned stripSize = 0;
  113. if (U16_IS_SURROGATE(c))
  114. {
  115. U16_GET(in, 0, i, length, c);
  116. if (u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  117. stripSize = 2;
  118. else
  119. i++; // skip the surrogate
  120. }
  121. else
  122. {
  123. if (u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  124. stripSize = 1;
  125. }
  126. if (stripSize != 0)
  127. {
  128. if (numStripped == 0)
  129. result = (UChar *)rtlMalloc((length-stripSize)*sizeof(UChar));
  130. //Copy and non ignorable characters skipped up to this point. (Note result+x is scaled by UChar)
  131. memcpy(result + lastGood - numStripped, in+lastGood, (i-lastGood) * sizeof(UChar));
  132. lastGood = i+stripSize;
  133. numStripped += stripSize;
  134. i += (stripSize-1);
  135. }
  136. }
  137. if (numStripped == 0)
  138. return false;
  139. lenResult = length-numStripped;
  140. memcpy(result + lastGood - numStripped, in+lastGood, (length-lastGood) * sizeof(UChar));
  141. return true;
  142. }
  143. void escapeUnicode(unsigned inlen, UChar const * in, StringBuffer & out)
  144. {
  145. UCharCharacterIterator iter(in, inlen);
  146. for(iter.first32(); iter.hasNext(); iter.next32())
  147. {
  148. UChar32 c = iter.current32();
  149. if(c < 0x80)
  150. out.append((char) c);
  151. else if (c < 0x10000)
  152. out.appendf("\\u%04X", c);
  153. else
  154. out.appendf("\\U%08X", c);
  155. }
  156. }
  157. // locales and collators
  158. static unsigned const unicodeStrengthLimit = 5;
  159. static UCollationStrength unicodeStrength[unicodeStrengthLimit] =
  160. {
  161. UCOL_PRIMARY,
  162. UCOL_SECONDARY,
  163. UCOL_TERTIARY,
  164. UCOL_QUATERNARY,
  165. UCOL_IDENTICAL
  166. };
  167. class RTLLocale : public CInterface
  168. {
  169. public:
  170. RTLLocale(char const * _locale) : locale(_locale)
  171. {
  172. for(unsigned i=0; i<unicodeStrengthLimit; i++)
  173. colls[i] = NULL;
  174. UErrorCode err = U_ZERO_ERROR;
  175. colls[2] = ucol_open(locale.get(), &err);
  176. assertex(U_SUCCESS(err));
  177. }
  178. ~RTLLocale()
  179. {
  180. for(unsigned i=0; i<unicodeStrengthLimit; i++)
  181. if(colls[i]) ucol_close(colls[i]);
  182. }
  183. UCollator * queryCollator() const { return colls[2]; }
  184. UCollator * queryCollator(unsigned strength) const
  185. {
  186. if(strength == 0) strength = 1;
  187. if(strength > unicodeStrengthLimit) strength = unicodeStrengthLimit;
  188. if(!colls[strength-1])
  189. {
  190. UErrorCode err = U_ZERO_ERROR;
  191. const_cast<UCollator * *>(colls)[strength-1] = ucol_open(locale.get(), &err);
  192. assertex(U_SUCCESS(err));
  193. ucol_setStrength(colls[strength-1], unicodeStrength[strength-1]);
  194. }
  195. return colls[strength-1];
  196. }
  197. private:
  198. StringAttr locale;
  199. UCollator * colls[unicodeStrengthLimit];
  200. };
  201. typedef MapStringTo<RTLLocale, char const *> MapStrToLocale;
  202. MapStrToLocale *localeMap;
  203. CriticalSection localeCrit;
  204. MODULE_INIT(INIT_PRIORITY_STANDARD)
  205. {
  206. localeMap = new MapStrToLocale;
  207. return true;
  208. }
  209. MODULE_EXIT()
  210. {
  211. delete localeMap;
  212. }
  213. bool rtlGetNormalizedUnicodeLocaleName(unsigned len, char const * in, char * out)
  214. {
  215. bool isPrimary = true;
  216. bool ok = true;
  217. unsigned i;
  218. for(i=0; i<len; i++)
  219. if(in[i] == '_')
  220. {
  221. out[i] = '_';
  222. isPrimary = false;
  223. }
  224. else if(isalpha(in[i]))
  225. {
  226. out[i] = (isPrimary ? tolower(in[i]) : toupper(in[i]));
  227. }
  228. else
  229. {
  230. out[i] = 0;
  231. ok = false;
  232. }
  233. return ok;
  234. }
  235. RTLLocale * queryRTLLocale(char const * locale)
  236. {
  237. if (!locale) locale = "";
  238. CriticalBlock b(localeCrit);
  239. RTLLocale * loc = localeMap->getValue(locale);
  240. if(!loc)
  241. {
  242. unsigned ll = strlen(locale);
  243. StringBuffer lnorm;
  244. rtlGetNormalizedUnicodeLocaleName(ll, locale, lnorm.reserve(ll));
  245. localeMap->setValue(locale, lnorm.str());
  246. loc = localeMap->getValue(locale);
  247. }
  248. return loc;
  249. }
  250. // converters
  251. class RTLUnicodeConverter : public CInterface
  252. {
  253. public:
  254. RTLUnicodeConverter(char const * codepage)
  255. {
  256. UErrorCode err = U_ZERO_ERROR;
  257. conv = ucnv_open(codepage, &err);
  258. if (!U_SUCCESS(err))
  259. {
  260. StringBuffer msg;
  261. msg.append("Unrecognised codepage '").append(codepage).append("'");
  262. rtlFail(0, msg.str());
  263. }
  264. }
  265. ~RTLUnicodeConverter()
  266. {
  267. ucnv_close(conv);
  268. }
  269. UConverter * query() const { return conv; }
  270. private:
  271. UConverter * conv;
  272. };
  273. typedef MapStringTo<RTLUnicodeConverter, char const *> MapStrToUnicodeConverter;
  274. static __thread MapStrToUnicodeConverter *unicodeConverterMap = NULL;
  275. static __thread ThreadTermFunc prevThreadTerminator = NULL;
  276. CriticalSection ucmCrit;
  277. static void clearUnicodeConverterMap()
  278. {
  279. delete unicodeConverterMap;
  280. unicodeConverterMap = NULL; // Important to clear, as this is called when threadpool threads end...
  281. if (prevThreadTerminator)
  282. {
  283. (*prevThreadTerminator)();
  284. prevThreadTerminator = NULL;
  285. }
  286. }
  287. RTLUnicodeConverter * queryRTLUnicodeConverter(char const * codepage)
  288. {
  289. if (!unicodeConverterMap) // NB: one per thread, so no contention
  290. {
  291. unicodeConverterMap = new MapStrToUnicodeConverter;
  292. // Use thread terminator hook to clear them up on thread exit.
  293. // NB: May need to revisit if not on a jlib Thread.
  294. prevThreadTerminator = addThreadTermFunc(clearUnicodeConverterMap);
  295. }
  296. RTLUnicodeConverter * conv = unicodeConverterMap->getValue(codepage);
  297. if(!conv)
  298. {
  299. unicodeConverterMap->setValue(codepage, codepage);
  300. conv = unicodeConverterMap->getValue(codepage);
  301. }
  302. return conv;
  303. }
  304. // normalization
  305. bool unicodeNeedsNormalize(unsigned inlen, UChar * in, UErrorCode * err)
  306. {
  307. return !unorm_isNormalized(in, inlen, UNORM_NFC, err);
  308. }
  309. bool vunicodeNeedsNormalize(UChar * in, UErrorCode * err)
  310. {
  311. return !unorm_isNormalized(in, -1, UNORM_NFC, err);
  312. }
  313. void unicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
  314. {
  315. UChar * buff = (UChar *)rtlMalloc(inlen*2);
  316. unsigned len = unorm_normalize(in, inlen, UNORM_NFC, 0, buff, inlen, err);
  317. while(len<inlen) buff[len++] = 0x0020;
  318. memcpy(in, buff, inlen);
  319. free(buff);
  320. }
  321. void vunicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
  322. {
  323. UChar * buff = (UChar *)rtlMalloc(inlen*2);
  324. unsigned len = unorm_normalize(in, -1, UNORM_NFC, 0, buff, inlen-1, err);
  325. buff[len] = 0x0000;
  326. memcpy(in, buff, inlen);
  327. free(buff);
  328. }
  329. void unicodeGetNormalized(unsigned & outlen, UChar * & out, unsigned inlen, UChar * in, UErrorCode * err)
  330. {
  331. outlen = unorm_normalize(in, inlen, UNORM_NFC, 0, 0, 0, err);
  332. out = (UChar *)rtlMalloc(outlen*2);
  333. unorm_normalize(in, inlen, UNORM_NFC, 0, out, outlen, err);
  334. }
  335. void vunicodeGetNormalized(UChar * & out, unsigned inlen, UChar * in, UErrorCode * err)
  336. {
  337. unsigned outlen = unorm_normalize(in, inlen, UNORM_NFC, 0, 0, 0, err);
  338. out = (UChar *)rtlMalloc((outlen+1)*2);
  339. unorm_normalize(in, inlen, UNORM_NFC, 0, out, outlen, err);
  340. out[outlen] = 0x0000;
  341. }
  342. void unicodeEnsureIsNormalized(unsigned len, UChar * str)
  343. {
  344. UErrorCode err = U_ZERO_ERROR;
  345. if(unicodeNeedsNormalize(len, str, &err))
  346. unicodeReplaceNormalized(len, str, &err);
  347. }
  348. void vunicodeEnsureIsNormalized(unsigned len, UChar * str)
  349. {
  350. UErrorCode err = U_ZERO_ERROR;
  351. if(vunicodeNeedsNormalize(str, &err))
  352. vunicodeReplaceNormalized(len, str, &err);
  353. }
  354. void unicodeEnsureIsNormalizedX(unsigned & len, UChar * & str)
  355. {
  356. UErrorCode err = U_ZERO_ERROR;
  357. if(unicodeNeedsNormalize(len, str, &err))
  358. {
  359. unsigned inlen = len;
  360. UChar * in = str;
  361. unicodeGetNormalized(len, str, inlen, in, &err);
  362. free(in);
  363. }
  364. }
  365. void vunicodeEnsureIsNormalizedX(unsigned inlen, UChar * & str)
  366. {
  367. UErrorCode err = U_ZERO_ERROR;
  368. if(unicodeNeedsNormalize(inlen, str, &err))
  369. {
  370. UChar * in = str;
  371. vunicodeGetNormalized(str, inlen, in, &err);
  372. free(in);
  373. }
  374. }
  375. void unicodeNormalizedCopy(UChar * out, UChar * in, unsigned len)
  376. {
  377. UErrorCode err = U_ZERO_ERROR;
  378. if(unicodeNeedsNormalize(len, in, &err))
  379. unorm_normalize(in, len, UNORM_NFC, 0, out, len, &err);
  380. else
  381. memcpy(out, in, len);
  382. }
  383. void normalizeUnicodeString(UnicodeString const & in, UnicodeString & out)
  384. {
  385. UErrorCode err = U_ZERO_ERROR;
  386. Normalizer::compose(in, false, 0, out, err);
  387. assertex(U_SUCCESS(err));
  388. }
  389. // padding
  390. static void multimemset(char * out, size_t outlen, char const * in, size_t inlen)
  391. {
  392. size_t outpos = 0;
  393. size_t inpos = 0;
  394. while(outpos < outlen)
  395. {
  396. out[outpos++] = in[inpos++];
  397. if(inpos == inlen)
  398. inpos = 0;
  399. }
  400. }
  401. typedef MapStringTo<MemoryAttr, size32_t> MemoryAttrMapping;
  402. MemoryAttrMapping *unicodeBlankCache;
  403. CriticalSection ubcCrit;
  404. MODULE_INIT(INIT_PRIORITY_STANDARD)
  405. {
  406. unicodeBlankCache = new MemoryAttrMapping;
  407. return true;
  408. }
  409. MODULE_EXIT()
  410. {
  411. delete unicodeBlankCache;
  412. }
  413. UChar unicodeSpace = 0x0020;
  414. void codepageBlankFill(char const * codepage, char * out, size_t len)
  415. {
  416. CriticalBlock b(ubcCrit);
  417. MemoryAttr * cached = unicodeBlankCache->getValue(codepage);
  418. if(cached)
  419. {
  420. char const * blank = (char const *)cached->get();
  421. size_t blanklen = cached->length();
  422. if(blanklen==1)
  423. memset(out, *blank, len);
  424. else
  425. multimemset(out, len, blank, blanklen);
  426. }
  427. else
  428. {
  429. unsigned blanklen;
  430. char * blank;
  431. rtlUnicodeToCodepageX(blanklen, blank, 1, &unicodeSpace, codepage);
  432. unicodeBlankCache->setValue(codepage, blanklen);
  433. unicodeBlankCache->getValue(codepage)->set(blanklen, blank);
  434. if(blanklen==1)
  435. memset(out, *blank, len);
  436. else
  437. multimemset(out, len, blank, blanklen);
  438. free(blank);
  439. }
  440. }
  441. //---------------------------------------------------------------------------
  442. // floating point functions
  443. static const double smallPowers[16] = {
  444. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
  445. 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 };
  446. static double powerOfTen(int x)
  447. {
  448. if (x < 0)
  449. return 1 / powerOfTen(-x);
  450. double value = smallPowers[x&15];
  451. double scale = 1e16;
  452. x >>= 4;
  453. while (x)
  454. {
  455. if (x & 1)
  456. value *= scale;
  457. scale *= scale;
  458. x >>= 1;
  459. }
  460. return value;
  461. };
  462. static double kk = (1.0 / ((unsigned __int64)1<<53));
  463. __int64 rtlRound(double x)
  464. {
  465. //a fudge to make numbers that are inexact after a division round up "correctly".
  466. //coded rather oddly as microsoft's optimizer has a habit of throwing it away otherwise...
  467. volatile double tt = x * kk;
  468. x += tt;
  469. if (x >= 0.0)
  470. return (__int64)(x + 0.5);
  471. return -(__int64)(-x + 0.5);
  472. }
  473. double rtlRoundTo(const double x, int places)
  474. {
  475. if (x < 0)
  476. return -rtlRoundTo(-x, places);
  477. volatile double tt = x * kk;
  478. double x0 = x + tt;
  479. if (places >= 0)
  480. {
  481. double scale = powerOfTen(places);
  482. return floor(x * scale + 0.5) / scale;
  483. }
  484. else
  485. {
  486. double scale = powerOfTen(-places);
  487. return floor(x / scale + 0.5) * scale;
  488. }
  489. }
  490. __int64 rtlRoundDown(double x)
  491. {
  492. if (x >= 0.0)
  493. return (__int64)floor(x);
  494. return (__int64)ceil(x);
  495. }
  496. __int64 rtlRoundUp(double x)
  497. {
  498. if (x >= 0.0)
  499. return (__int64)ceil(x);
  500. return (__int64)floor(x);
  501. }
  502. //=============================================================================
  503. // Numeric conversion functions... - fixed length target
  504. #define intToStringNBody() \
  505. unsigned len = numtostr(temp, val); \
  506. if (len > l) \
  507. memset(t,'*',l); \
  508. else \
  509. { \
  510. memcpy(t,temp,len); \
  511. memset(t+len, ' ', l-len); \
  512. }
  513. void rtlUInt4ToStr(size32_t l, char * t, unsigned val)
  514. {
  515. char temp[20];
  516. intToStringNBody();
  517. }
  518. void rtlUInt8ToStr(size32_t l, char * t, unsigned __int64 val)
  519. {
  520. char temp[40];
  521. intToStringNBody();
  522. }
  523. void rtlInt4ToStr(size32_t l, char * t, int val)
  524. {
  525. char temp[20];
  526. intToStringNBody();
  527. }
  528. void rtlInt8ToStr(size32_t l, char * t, __int64 val)
  529. {
  530. char temp[40];
  531. intToStringNBody();
  532. }
  533. //=============================================================================
  534. // Numeric conversion functions... - unknown length target
  535. #define intToUnknownStringBody() \
  536. unsigned len = numtostr(temp, val); \
  537. char * result = (char *)rtlMalloc(len); \
  538. memcpy(result, temp, len); \
  539. l = len; \
  540. t = result;
  541. void rtlUInt4ToStrX(size32_t & l, char * & t, unsigned val)
  542. {
  543. char temp[20];
  544. intToUnknownStringBody();
  545. }
  546. void rtlUInt8ToStrX(size32_t & l, char * & t, unsigned __int64 val)
  547. {
  548. char temp[40];
  549. intToUnknownStringBody();
  550. }
  551. void rtlInt4ToStrX(size32_t & l, char * & t, int val)
  552. {
  553. char temp[20];
  554. intToUnknownStringBody();
  555. }
  556. void rtlInt8ToStrX(size32_t & l, char * & t, __int64 val)
  557. {
  558. char temp[40];
  559. intToUnknownStringBody();
  560. }
  561. //=============================================================================
  562. // Numeric conversion functions... - fixed length ebcdic target
  563. // ILKA - converting ebcdic to numeric still uses string in between, for more efficiency
  564. // a function numtoebcdicstr should be implemented
  565. #define intToEbcdicStringNBody() \
  566. unsigned len = numtostr(astr, val); \
  567. rtlStrToEStr(sizeof(estr),estr,len,astr); \
  568. if (len > l) \
  569. memset(t,0x2A,l); \
  570. else \
  571. { \
  572. memcpy(t,estr,len); \
  573. memset(t+len, '@', l-len); \
  574. }
  575. void rtl_l42en(size32_t l, char * t, unsigned val)
  576. {
  577. char astr[20];
  578. char estr[20];
  579. intToEbcdicStringNBody();
  580. }
  581. void rtl_l82en(size32_t l, char * t, unsigned __int64 val)
  582. {
  583. char astr[40];
  584. char estr[40];
  585. intToEbcdicStringNBody();
  586. }
  587. void rtl_ls42en(size32_t l, char * t, int val)
  588. {
  589. char astr[20];
  590. char estr[20];
  591. intToEbcdicStringNBody();
  592. }
  593. void rtl_ls82en(size32_t l, char * t, __int64 val)
  594. {
  595. char astr[40];
  596. char estr[40];
  597. intToEbcdicStringNBody();
  598. }
  599. //=============================================================================
  600. // Numeric conversion functions... - unknown length ebcdic target
  601. #if defined _MSC_VER
  602. #pragma warning(push)
  603. #pragma warning(disable:4700)
  604. #endif
  605. void rtl_l42ex(size32_t & l, char * & t, unsigned val)
  606. {
  607. char astr[20];
  608. unsigned alen = numtostr(astr, val);
  609. rtlStrToEStrX(l,t,alen,astr);
  610. }
  611. void rtl_l82ex(size32_t & l, char * & t, unsigned __int64 val)
  612. {
  613. char astr[40];
  614. unsigned alen = numtostr(astr, val);
  615. rtlStrToEStrX(l,t,alen,astr);
  616. }
  617. void rtl_ls42ex(size32_t & l, char * & t, int val)
  618. {
  619. char astr[20];
  620. unsigned alen = numtostr(astr, val);
  621. rtlStrToEStrX(l,t,alen,astr);
  622. }
  623. void rtl_ls82ex(size32_t & l, char * & t, __int64 val)
  624. {
  625. char astr[40];
  626. unsigned alen = numtostr(astr, val);
  627. rtlStrToEStrX(l,t,alen,astr);
  628. }
  629. #ifdef _MSC_VER
  630. #pragma warning(pop)
  631. #endif
  632. //=============================================================================
  633. // Numeric conversion functions... - fixed length variable target
  634. #define intToVarStringNBody() \
  635. unsigned len = numtostr(temp, val) + 1; \
  636. if (len > l) \
  637. { \
  638. memset(t,'*',l); \
  639. t[l-1]=0; \
  640. } \
  641. else \
  642. memcpy(t,temp,len);
  643. void rtlUInt4ToVStr(size32_t l, char * t, unsigned val)
  644. {
  645. char temp[20];
  646. intToVarStringNBody();
  647. }
  648. void rtlUInt8ToVStr(size32_t l, char * t, unsigned __int64 val)
  649. {
  650. char temp[40];
  651. intToVarStringNBody();
  652. }
  653. void rtlInt4ToVStr(size32_t l, char * t, int val)
  654. {
  655. char temp[20];
  656. intToVarStringNBody();
  657. }
  658. void rtlInt8ToVStr(size32_t l, char * t, __int64 val)
  659. {
  660. char temp[40];
  661. intToVarStringNBody();
  662. }
  663. //=============================================================================
  664. // Numeric conversion functions... - unknown length variable target
  665. #define intToVarStringXBody() \
  666. unsigned len = numtostr(temp, val); \
  667. temp[len] = 0; \
  668. return strdup(temp);
  669. char * rtlUInt4ToVStrX(unsigned val)
  670. {
  671. char temp[20];
  672. intToVarStringXBody();
  673. }
  674. char * rtlUInt8ToVStrX(unsigned __int64 val)
  675. {
  676. char temp[40];
  677. intToVarStringXBody();
  678. }
  679. char * rtlInt4ToVStrX(int val)
  680. {
  681. char temp[20];
  682. intToVarStringXBody();
  683. }
  684. char * rtlInt8ToVStrX(__int64 val)
  685. {
  686. char temp[40];
  687. intToVarStringXBody();
  688. }
  689. //---------------------------------------------------------------------------
  690. double rtlStrToReal(size32_t l, const char * t)
  691. {
  692. char * temp = (char *)alloca(l+1);
  693. memcpy(temp, t, l);
  694. temp[l] = 0;
  695. return rtlVStrToReal(temp);
  696. }
  697. double rtlEStrToReal(size32_t l, const char * t)
  698. {
  699. char * astr = (char*)alloca(l);
  700. rtlEStrToStr(l,astr,l,t);
  701. char * temp = (char *)alloca(l+1);
  702. memcpy(temp, astr, l);
  703. temp[l] = 0;
  704. return rtlVStrToReal(temp);
  705. }
  706. double rtlVStrToReal(const char * t)
  707. {
  708. char * end;
  709. return strtod(t, &end);
  710. }
  711. double rtl_ex2f(const char * t)
  712. {
  713. unsigned len = strlen(t);
  714. char * astr = (char*)alloca(len+1);
  715. rtlEStrToStr(len,astr,len,t);
  716. astr[len] = 0;
  717. return rtlVStrToReal(astr);
  718. }
  719. double rtlUnicodeToReal(size32_t l, UChar const * t)
  720. {
  721. unsigned bufflen;
  722. char * buff;
  723. rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
  724. double ret = rtlStrToReal(bufflen, buff);
  725. rtlFree(buff);
  726. return ret;
  727. }
  728. //---------------------------------------------------------------------------
  729. static void truncFixedReal(size32_t l, char * t, StringBuffer & temp)
  730. {
  731. const char * str = temp.str();
  732. unsigned len = temp.length();
  733. if (len > l)
  734. {
  735. //If we don't lose significant digits left of the decimal point then truncate the string.
  736. const char * dot = strchr(str, '.');
  737. if (dot && ((size_t)(dot - str) <= l))
  738. len = l;
  739. }
  740. if (len > l)
  741. memset(t,'*',l);
  742. else
  743. {
  744. memcpy(t,temp.str(),len);
  745. memset(t+len, ' ', l-len);
  746. }
  747. }
  748. static void roundFixedReal(size32_t l, char * t, StringBuffer & temp)
  749. {
  750. const char * str = temp.str();
  751. unsigned len = temp.length();
  752. if (len > l)
  753. {
  754. //If we don't lose significant digits left of the decimal point then truncate the string.
  755. const char * dot = strchr(str, '.');
  756. if (dot && ((size_t)(dot - str) <= l))
  757. {
  758. len = l;
  759. //Unfortunately we now need to potentially round the number which could even lead to
  760. //an extra digit, and failure to fit. Is there a simpler way of handling this?
  761. bool decimalIsNext = ((dot - str) == l);
  762. char next = decimalIsNext ? dot[1] : str[len];
  763. bool rounding = (next >= '5');
  764. unsigned cur = len;
  765. while ((cur > 0) && rounding)
  766. {
  767. next = str[cur-1];
  768. if (next == '-')
  769. break;
  770. if (next != '.')
  771. {
  772. if (next != '9')
  773. {
  774. temp.setCharAt(cur-1, next+1);
  775. rounding = false;
  776. break;
  777. }
  778. else
  779. temp.setCharAt(cur-1, '0');
  780. }
  781. cur--;
  782. }
  783. if (rounding)
  784. {
  785. //Ugly, but it is an exceptional case.
  786. if (!decimalIsNext)
  787. temp.insert(cur, '1');
  788. else
  789. len++; // overflow
  790. }
  791. }
  792. }
  793. if (len > l)
  794. memset(t,'*',l);
  795. else
  796. {
  797. memcpy(t,temp.str(),len);
  798. memset(t+len, ' ', l-len);
  799. }
  800. }
  801. void rtlRealToStr(size32_t l, char * t, double val)
  802. {
  803. StringBuffer temp;
  804. temp.append(val);
  805. //This could either truncate or round when converting a real to a string
  806. //Rounding is more user friendly, but then (string3)(string)1.99 != (string3)1.99 which is
  807. //rather count intuitive. (That is still true if the value is out of range.)
  808. truncFixedReal(l, t, temp);
  809. }
  810. void rtlRealToStr(size32_t l, char * t, float val)
  811. {
  812. StringBuffer temp;
  813. temp.append(val);
  814. //See comment above
  815. truncFixedReal(l, t, temp);
  816. }
  817. void rtlRealToStrX(size32_t & l, char * & t, double val)
  818. {
  819. StringBuffer temp;
  820. temp.append(val);
  821. unsigned len = temp.length();
  822. char * result = (char *)rtlMalloc(len);
  823. memcpy(result,temp.str(),len);
  824. l = len;
  825. t = result;
  826. }
  827. void rtlRealToStrX(size32_t & l, char * & t, float val)
  828. {
  829. StringBuffer temp;
  830. temp.append(val);
  831. unsigned len = temp.length();
  832. char * result = (char *)rtlMalloc(len);
  833. memcpy(result,temp.str(),len);
  834. l = len;
  835. t = result;
  836. }
  837. void rtlRealToVStr(size32_t l, char * t, double val)
  838. {
  839. StringBuffer temp;
  840. temp.append(val);
  841. unsigned len = temp.length()+1;
  842. if (len > l)
  843. {
  844. memset(t,'*',l);
  845. t[l-1]=0;
  846. }
  847. else
  848. {
  849. memcpy(t,temp.str(),len);
  850. }
  851. }
  852. void rtlRealToVStr(size32_t l, char * t, float val)
  853. {
  854. StringBuffer temp;
  855. temp.append(val);
  856. unsigned len = temp.length()+1;
  857. if (len > l)
  858. {
  859. memset(t,'*',l);
  860. t[l-1]=0;
  861. }
  862. else
  863. {
  864. memcpy(t,temp.str(),len);
  865. }
  866. }
  867. char * rtlRealToVStrX(double val)
  868. {
  869. StringBuffer temp;
  870. temp.append(val);
  871. return strdup(temp);
  872. }
  873. char * rtlRealToVStrX(float val)
  874. {
  875. StringBuffer temp;
  876. temp.append(val);
  877. return strdup(temp);
  878. }
  879. //---------------------------------------------------------------------------
  880. #define SkipSpaces(l, t) \
  881. while (l) \
  882. { \
  883. char c = *t; \
  884. switch (c) \
  885. { \
  886. case ' ': \
  887. case '\t': \
  888. case '-': \
  889. case '+': \
  890. break; \
  891. default: \
  892. goto done; \
  893. } \
  894. l--; \
  895. t++; \
  896. } \
  897. done:
  898. #define SkipSignSpaces(l, t, negate) \
  899. while (l) \
  900. { \
  901. char c = *t; \
  902. switch (c) \
  903. { \
  904. case '-': \
  905. negate = true; \
  906. break; \
  907. case ' ': \
  908. case '\t': \
  909. case '+': \
  910. break; \
  911. default: \
  912. goto done; \
  913. } \
  914. l--; \
  915. t++; \
  916. } \
  917. done:
  918. unsigned rtlStrToUInt4(size32_t l, const char * t)
  919. {
  920. SkipSpaces(l, t);
  921. unsigned v = 0;
  922. while (l--)
  923. {
  924. char c = *t++;
  925. if ((c >= '0') && (c <= '9'))
  926. v = v * 10 + (c-'0');
  927. else
  928. break;
  929. }
  930. return v;
  931. }
  932. unsigned __int64 rtlStrToUInt8(size32_t l, const char * t)
  933. {
  934. SkipSpaces(l, t);
  935. unsigned __int64 v = 0;
  936. while (l--)
  937. {
  938. char c = *t++;
  939. if ((c >= '0') && (c <= '9'))
  940. v = v * 10 + (c-'0');
  941. else
  942. break;
  943. }
  944. return v;
  945. }
  946. int rtlStrToInt4(size32_t l, const char * t)
  947. {
  948. bool negate = false;
  949. SkipSignSpaces(l, t, negate);
  950. int v = 0;
  951. while (l--)
  952. {
  953. char c = *t++;
  954. if ((c >= '0') && (c <= '9'))
  955. v = v * 10 + (c-'0');
  956. else
  957. break;
  958. }
  959. return negate ? -v : v;
  960. }
  961. __int64 rtlStrToInt8(size32_t l, const char * t)
  962. {
  963. bool negate = false;
  964. SkipSignSpaces(l, t, negate);
  965. __int64 v = 0;
  966. while (l--)
  967. {
  968. char c = *t++;
  969. if ((c >= '0') && (c <= '9'))
  970. v = v * 10 + (c-'0');
  971. else
  972. break;
  973. }
  974. return negate ? -v : v;
  975. }
  976. __int64 rtlUnicodeToInt8(size32_t l, UChar const * t)
  977. {
  978. unsigned bufflen;
  979. char * buff;
  980. rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
  981. __int64 ret = rtlStrToInt8(bufflen, buff);
  982. rtlFree(buff);
  983. return ret;
  984. }
  985. bool rtlStrToBool(size32_t l, const char * t)
  986. {
  987. while (l--)
  988. {
  989. char c = *t++;
  990. if (c != ' ')
  991. return true;
  992. }
  993. return false;
  994. }
  995. bool rtlUnicodeToBool(size32_t l, UChar const * t)
  996. {
  997. while(l--)
  998. if(*t++ != 0x20) return true;
  999. return false;
  1000. }
  1001. // return true for "on", "true" or any non-zero constant, else false;
  1002. bool rtlCsvStrToBool(size32_t l, const char * t)
  1003. {
  1004. return clipStrToBool(l, t);
  1005. }
  1006. //---------------------------------------------------------------------------
  1007. unsigned rtlEStrToUInt4(size32_t l, const char * t)
  1008. {
  1009. char * astr = (char*)alloca(l);
  1010. rtlEStrToStr(l,astr,l,t);
  1011. return rtlStrToUInt4(l,astr);
  1012. }
  1013. unsigned __int64 rtlEStrToUInt8(size32_t l, const char * t)
  1014. {
  1015. char * astr = (char*)alloca(l);
  1016. rtlEStrToStr(l,astr,l,t);
  1017. return rtlStrToUInt8(l,astr);
  1018. }
  1019. int rtlEStrToInt4(size32_t l, const char * t)
  1020. {
  1021. char * astr = (char*)alloca(l);
  1022. rtlEStrToStr(l,astr,l,t);
  1023. return rtlStrToInt4(l,astr);
  1024. }
  1025. __int64 rtlEStrToInt8(size32_t l, const char * t)
  1026. {
  1027. char * astr = (char*)alloca(l);
  1028. rtlEStrToStr(l,astr,l,t);
  1029. return rtlStrToInt8(l,astr);
  1030. }
  1031. bool rtl_en2b(size32_t l, const char * t)
  1032. {
  1033. char * astr = (char*)alloca(l);
  1034. rtlEStrToStr(l,astr,l,t);
  1035. return rtlStrToBool(l,astr);
  1036. }
  1037. //---------------------------------------------------------------------------
  1038. unsigned rtlVStrToUInt4(const char * t)
  1039. {
  1040. return rtlStrToUInt4(strlen(t), t);
  1041. }
  1042. unsigned __int64 rtlVStrToUInt8(const char * t)
  1043. {
  1044. return rtlStrToUInt8(strlen(t), t);
  1045. }
  1046. int rtlVStrToInt4(const char * t)
  1047. {
  1048. return rtlStrToInt4(strlen(t), t);
  1049. }
  1050. __int64 rtlVStrToInt8(const char * t)
  1051. {
  1052. return rtlStrToInt8(strlen(t), t);
  1053. }
  1054. bool rtlVStrToBool(const char * t)
  1055. {
  1056. char c;
  1057. while ((c = *t++) != 0)
  1058. {
  1059. //MORE: Allow spaces if we change the semantics.
  1060. return true;
  1061. }
  1062. return false;
  1063. }
  1064. //---------------------------------------------------------------------------
  1065. void holeIntFormat(size32_t maxlen, char * target, __int64 value, unsigned width, unsigned flags)
  1066. {
  1067. StringBuffer result;
  1068. if (flags & 1)
  1069. result.appendf("%0*" I64F "d", width, value);
  1070. else
  1071. result.appendf("%*" I64F "d", width, value);
  1072. size32_t written = result.length();
  1073. if (written > maxlen)
  1074. memset(target, '*', maxlen);
  1075. else
  1076. {
  1077. memset(target+written, ' ', maxlen-written);
  1078. memcpy(target, result.str(), written);
  1079. }
  1080. }
  1081. void holeRealFormat(size32_t maxlen, char * target, double value, unsigned width, unsigned places)
  1082. {
  1083. if ((int) width <= 0)
  1084. return;
  1085. const unsigned tempSize = 500;
  1086. char temp[tempSize*2+2]; // Space for leading digits/0, '-' and \0 terminator
  1087. //Ensure that we output at most 2*tempSize characters.
  1088. unsigned formatWidth = width < tempSize ? width : tempSize;
  1089. if (places >= formatWidth)
  1090. places = formatWidth-1;
  1091. unsigned written = sprintf(temp, "%*.*f", formatWidth, places, value);
  1092. const char * src = temp;
  1093. if (written > width)
  1094. {
  1095. //Strip a leading 0 for very small numbers.
  1096. if (*src == '0')
  1097. {
  1098. written--;
  1099. src++;
  1100. }
  1101. }
  1102. if (written > width)
  1103. {
  1104. memset(target, '*', width);
  1105. if (places)
  1106. target[width-places-1] = '.';
  1107. }
  1108. else
  1109. {
  1110. unsigned delta = width - written;
  1111. if (delta)
  1112. memset(target, ' ', delta);
  1113. memcpy(target+delta, src, written);
  1114. }
  1115. }
  1116. //=============================================================================
  1117. // Conversion functions...
  1118. void rtlIntFormat(unsigned & len, char * & target, __int64 value, unsigned width, unsigned flags)
  1119. {
  1120. if ((int) width <= 0)
  1121. {
  1122. len = 0;
  1123. target = NULL;
  1124. return;
  1125. }
  1126. len = width;
  1127. target = (char *)rtlMalloc(width);
  1128. holeIntFormat(width, target, value, width, flags);
  1129. }
  1130. void rtlRealFormat(unsigned & len, char * & target, double value, unsigned width, unsigned places)
  1131. {
  1132. if ((int) width < 0)
  1133. {
  1134. len = 0;
  1135. target = NULL;
  1136. return;
  1137. }
  1138. len = width;
  1139. target = (char *)rtlMalloc(width);
  1140. holeRealFormat(width, target, value, width, places);
  1141. }
  1142. //=============================================================================
  1143. // String functions...
  1144. bool rtlDataToBool(unsigned len, const void * _src)
  1145. {
  1146. const char * src = (const char *)_src;
  1147. while (len--)
  1148. if (*src++)
  1149. return true;
  1150. return false;
  1151. }
  1152. void rtlBoolToData(unsigned tlen, void * tgt, bool src)
  1153. {
  1154. memset(tgt, 0, tlen);
  1155. if (src)
  1156. ((char *)tgt)[tlen-1] = 1;
  1157. }
  1158. void rtlBoolToStr(unsigned tlen, void * tgt, bool src)
  1159. {
  1160. memset(tgt, ' ', tlen);
  1161. if (src)
  1162. ((char *)tgt)[tlen-1] = '1';
  1163. }
  1164. void rtlBoolToVStr(char * tgt, bool src)
  1165. {
  1166. if (src)
  1167. *tgt++ = '1';
  1168. *tgt = 0;
  1169. }
  1170. void rtlBoolToStrX(unsigned & tlen, char * & tgt, bool src)
  1171. {
  1172. if (src)
  1173. {
  1174. char * ret = (char *)rtlMalloc(1);
  1175. ret[0] = '1';
  1176. tlen = 1;
  1177. tgt = ret;
  1178. }
  1179. else
  1180. {
  1181. tlen = 0;
  1182. tgt = NULL;
  1183. }
  1184. }
  1185. char * rtlBoolToVStrX(bool src)
  1186. {
  1187. if (src)
  1188. return strdup("1");
  1189. else
  1190. return strdup("");
  1191. }
  1192. //-----------------------------------------------------------------------------
  1193. // String copying functions....
  1194. void rtlDataToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1195. {
  1196. if (slen > tlen)
  1197. slen = tlen;
  1198. memcpy(tgt, src, slen);
  1199. if (tlen > slen)
  1200. memset((char *)tgt+slen, 0, tlen-slen);
  1201. }
  1202. void rtlStrToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1203. {
  1204. if (slen > tlen)
  1205. slen = tlen;
  1206. memcpy(tgt, src, slen);
  1207. if (tlen > slen)
  1208. memset((char *)tgt+slen, 0, tlen-slen);
  1209. }
  1210. void rtlStrToStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1211. {
  1212. if (slen > tlen)
  1213. slen = tlen;
  1214. memcpy(tgt, src, slen);
  1215. if (tlen > slen)
  1216. memset((char *)tgt+slen, ' ', tlen-slen);
  1217. }
  1218. void rtlStrToVStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1219. {
  1220. if ((slen >= tlen) && (tlen != 0))
  1221. slen = tlen-1;
  1222. memcpy(tgt, src, slen);
  1223. *((char *)tgt+slen)=0;
  1224. }
  1225. void rtlStr2EStr(unsigned tlen, char * tgt, unsigned slen, const char * src)
  1226. {
  1227. rtlStrToEStr(tlen,tgt,slen,src);
  1228. }
  1229. void rtlEStr2Data(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1230. {
  1231. if (slen > tlen)
  1232. slen = tlen;
  1233. rtlEStrToStr(slen,(char *)tgt,slen,src);
  1234. if (tlen > slen)
  1235. memset((char *)tgt+slen, 0, tlen-slen);
  1236. }
  1237. void rtlEStr2Str(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1238. {
  1239. rtlEStrToStr(tlen,(char *)tgt,slen,src);
  1240. }
  1241. void rtlEStrToVStr(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1242. {
  1243. if (slen >= tlen)
  1244. slen = tlen-1;
  1245. rtlEStrToStr(slen,(char *)tgt,slen,src);
  1246. *((char *)tgt+slen)=0;
  1247. }
  1248. void rtlEStrToEStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1249. {
  1250. if (slen > tlen)
  1251. slen = tlen;
  1252. memcpy(tgt, src, slen);
  1253. if (tlen > slen)
  1254. memset((char *)tgt+slen, '@', tlen-slen);
  1255. }
  1256. void rtlVStrToData(unsigned tlen, void * tgt, const char * src)
  1257. {
  1258. rtlStrToData(tlen, tgt, strlen(src), src);
  1259. }
  1260. void rtlVStrToStr(unsigned tlen, void * tgt, const char * src)
  1261. {
  1262. rtlStrToStr(tlen, tgt, strlen(src), src);
  1263. }
  1264. void rtlVStr2EStr(unsigned tlen, char * tgt, const char * src)
  1265. {
  1266. rtlStr2EStr(tlen, tgt, strlen(src), src);
  1267. }
  1268. void rtlVStrToVStr(unsigned tlen, void * tgt, const char * src)
  1269. {
  1270. rtlStrToVStr(tlen, tgt, strlen(src), src);
  1271. }
  1272. char *rtlCreateQuotedString(unsigned _len_tgt,char * tgt)
  1273. {
  1274. // Add ' at start and end. MORE! also needs to handle embedded quotes
  1275. char * result = (char *)rtlMalloc(_len_tgt + 3);
  1276. result[0] = '\'';
  1277. memcpy(result+1, tgt, _len_tgt);
  1278. result[_len_tgt+1] = '\'';
  1279. result[_len_tgt+2] = 0;
  1280. return result;
  1281. }
  1282. //-----------------------------------------------------------------------------
  1283. //List of strings with length of -1 to mark the end...
  1284. void rtlConcat(unsigned & tlen, char * * tgt, ...)
  1285. {
  1286. va_list args;
  1287. unsigned totalLength = 0;
  1288. va_start(args, tgt);
  1289. for (;;)
  1290. {
  1291. unsigned len = va_arg(args, unsigned);
  1292. if (len+1==0)
  1293. break;
  1294. char * str = va_arg(args, char *);
  1295. totalLength += len;
  1296. }
  1297. va_end(args);
  1298. char * buffer = (char *)rtlMalloc(totalLength);
  1299. char * cur = buffer;
  1300. va_start(args, tgt);
  1301. for (;;)
  1302. {
  1303. unsigned len = va_arg(args, unsigned);
  1304. if (len+1==0)
  1305. break;
  1306. char * str = va_arg(args, char *);
  1307. memcpy(cur, str, len);
  1308. cur += len;
  1309. }
  1310. va_end(args);
  1311. tlen = totalLength;
  1312. *tgt = buffer;
  1313. }
  1314. void rtlConcatVStr(char * * tgt, ...)
  1315. {
  1316. va_list args;
  1317. unsigned totalLength = 0;
  1318. va_start(args, tgt);
  1319. for (;;)
  1320. {
  1321. unsigned len = va_arg(args, unsigned);
  1322. if (len+1==0)
  1323. break;
  1324. char * str = va_arg(args, char *);
  1325. totalLength += len;
  1326. }
  1327. va_end(args);
  1328. char * buffer = (char *)rtlMalloc(totalLength+1);
  1329. char * cur = buffer;
  1330. va_start(args, tgt);
  1331. for (;;)
  1332. {
  1333. unsigned len = va_arg(args, unsigned);
  1334. if (len+1==0)
  1335. break;
  1336. char * str = va_arg(args, char *);
  1337. memcpy(cur, str, len);
  1338. cur += len;
  1339. }
  1340. va_end(args);
  1341. cur[0] = 0;
  1342. *tgt = buffer;
  1343. }
  1344. void rtlConcatUnicode(unsigned & tlen, UChar * * tgt, ...)
  1345. {
  1346. va_list args;
  1347. unsigned totalLength = 0;
  1348. va_start(args, tgt);
  1349. for(;;)
  1350. {
  1351. unsigned len = va_arg(args, unsigned);
  1352. if(len+1==0)
  1353. break;
  1354. UChar * str = va_arg(args, UChar *);
  1355. totalLength += len;
  1356. }
  1357. va_end(args);
  1358. UChar * buffer = (UChar *)rtlMalloc(totalLength*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
  1359. unsigned idx = 0;
  1360. UErrorCode err = U_ZERO_ERROR;
  1361. va_start(args, tgt);
  1362. for(;;)
  1363. {
  1364. unsigned len = va_arg(args, unsigned);
  1365. if(len+1==0)
  1366. break;
  1367. UChar * str = va_arg(args, UChar *);
  1368. if (len)
  1369. idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
  1370. }
  1371. va_end(args);
  1372. *tgt = buffer;
  1373. tlen = idx;
  1374. }
  1375. void rtlConcatVUnicode(UChar * * tgt, ...)
  1376. {
  1377. va_list args;
  1378. unsigned totalLength = 0;
  1379. va_start(args, tgt);
  1380. for(;;)
  1381. {
  1382. unsigned len = va_arg(args, unsigned);
  1383. if(len+1==0)
  1384. break;
  1385. UChar * str = va_arg(args, UChar *);
  1386. totalLength += len;
  1387. }
  1388. va_end(args);
  1389. UChar * buffer = (UChar *)rtlMalloc((totalLength+1)*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
  1390. unsigned idx = 0;
  1391. UErrorCode err = U_ZERO_ERROR;
  1392. va_start(args, tgt);
  1393. for(;;)
  1394. {
  1395. unsigned len = va_arg(args, unsigned);
  1396. if(len+1==0)
  1397. break;
  1398. UChar * str = va_arg(args, UChar *);
  1399. if (len)
  1400. idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
  1401. }
  1402. va_end(args);
  1403. buffer[idx++] = 0x0000;
  1404. *tgt = buffer;
  1405. }
  1406. //List of strings with length of -1 to mark the end...
  1407. void rtlConcatStrF(unsigned tlen, void * _tgt, int fill, ...)
  1408. {
  1409. va_list args;
  1410. char * tgt = (char *)_tgt;
  1411. unsigned offset = 0;
  1412. va_start(args, fill);
  1413. while (offset != tlen)
  1414. {
  1415. unsigned len = va_arg(args, unsigned);
  1416. if (len+1==0)
  1417. break;
  1418. const char * str = va_arg(args, const char *);
  1419. unsigned copyLen = len + offset > tlen ? tlen - offset : len;
  1420. memcpy(tgt+offset, str, copyLen);
  1421. offset += copyLen;
  1422. }
  1423. va_end(args);
  1424. if (offset < tlen)
  1425. memset(tgt+offset, fill, tlen-offset);
  1426. }
  1427. void rtlConcatVStrF(unsigned tlen, char * tgt, ...)
  1428. {
  1429. va_list args;
  1430. unsigned offset = 0;
  1431. va_start(args, tgt);
  1432. while (offset != tlen)
  1433. {
  1434. unsigned len = va_arg(args, unsigned);
  1435. if (len+1==0)
  1436. break;
  1437. const char * str = va_arg(args, const char *);
  1438. unsigned copyLen = len + offset > tlen ? tlen - offset : len;
  1439. memcpy(tgt+offset, str, copyLen);
  1440. offset += copyLen;
  1441. }
  1442. va_end(args);
  1443. memset(tgt+offset, 0, (tlen+1)-offset);
  1444. }
  1445. void rtlConcatUnicodeF(unsigned tlen, UChar * tgt, ...)
  1446. {
  1447. va_list args;
  1448. unsigned idx = 0;
  1449. UErrorCode err = U_ZERO_ERROR;
  1450. va_start(args, tgt);
  1451. for(;;)
  1452. {
  1453. unsigned len = va_arg(args, unsigned);
  1454. if(len+1==0)
  1455. break;
  1456. UChar * str = va_arg(args, UChar *);
  1457. if (len)
  1458. idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
  1459. }
  1460. va_end(args);
  1461. while (idx < tlen)
  1462. tgt[idx++] = ' ';
  1463. }
  1464. void rtlConcatVUnicodeF(unsigned tlen, UChar * tgt, ...)
  1465. {
  1466. va_list args;
  1467. unsigned idx = 0;
  1468. UErrorCode err = U_ZERO_ERROR;
  1469. va_start(args, tgt);
  1470. for(;;)
  1471. {
  1472. unsigned len = va_arg(args, unsigned);
  1473. if(len+1==0)
  1474. break;
  1475. UChar * str = va_arg(args, UChar *);
  1476. if (len)
  1477. idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
  1478. }
  1479. va_end(args);
  1480. while (idx < tlen)
  1481. tgt[idx++] = 0;
  1482. tgt[tlen] = 0;
  1483. }
  1484. //------------------------------------------------------------------------------------------------
  1485. // The followinf concat functions are all deprecated in favour of the variable number of argument
  1486. // versions
  1487. unsigned rtlConcatStrToStr(unsigned tlen, char * tgt, unsigned idx, unsigned slen, const char * src)
  1488. {
  1489. unsigned len = tlen-idx;
  1490. if (len > slen)
  1491. len = slen;
  1492. memcpy(tgt+idx, src, len);
  1493. return idx+len;
  1494. }
  1495. unsigned rtlConcatVStrToStr(unsigned tlen, char * tgt, unsigned idx, const char * src)
  1496. {
  1497. while (idx != tlen)
  1498. {
  1499. char next = *src++;
  1500. if (!next)
  1501. break;
  1502. tgt[idx++] = next;
  1503. }
  1504. return idx;
  1505. }
  1506. void rtlConcatStrToVStr(unsigned tlen, void * _tgt, unsigned slen, const void * src)
  1507. {
  1508. char * tgt = (char *)_tgt;
  1509. unsigned tend = strlen(tgt);
  1510. rtlStrToVStr(tlen-tend, tgt+tend, slen, src);
  1511. }
  1512. void rtlConcatVStrToVStr(unsigned tlen, void * _tgt, const char * src)
  1513. {
  1514. char * tgt = (char *)_tgt;
  1515. unsigned tend = strlen(tgt);
  1516. rtlVStrToVStr(tlen-tend, tgt+tend, src);
  1517. }
  1518. unsigned rtlConcatUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, unsigned slen, UChar const * src)
  1519. {
  1520. UErrorCode err = U_ZERO_ERROR;
  1521. return unorm_concatenate(tgt, idx, src, slen, tgt, tlen, UNORM_NFC, 0, &err);
  1522. }
  1523. unsigned rtlConcatVUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, UChar const * src)
  1524. {
  1525. return rtlConcatUnicodeToUnicode(tlen, tgt, idx, rtlUnicodeStrlen(src), src);
  1526. }
  1527. void rtlESpaceFill(unsigned tlen, char * tgt, unsigned idx)
  1528. {
  1529. if (idx < tlen)
  1530. memset(tgt+idx, '@', tlen-idx);
  1531. }
  1532. void rtlSpaceFill(unsigned tlen, char * tgt, unsigned idx)
  1533. {
  1534. if (idx < tlen)
  1535. memset(tgt+idx, ' ', tlen-idx);
  1536. }
  1537. void rtlZeroFill(unsigned tlen, char * tgt, unsigned idx)
  1538. {
  1539. if (idx < tlen)
  1540. memset(tgt+idx, 0, tlen-idx);
  1541. }
  1542. void rtlNullTerminate(unsigned tlen, char * tgt, unsigned idx)
  1543. {
  1544. if (idx >= tlen)
  1545. idx = tlen-1;
  1546. tgt[idx] = 0;
  1547. }
  1548. void rtlUnicodeSpaceFill(unsigned tlen, UChar * tgt, unsigned idx)
  1549. {
  1550. while(idx<tlen) tgt[idx++] = 0x0020;
  1551. }
  1552. void rtlUnicodeNullTerminate(unsigned tlen, UChar * tgt, unsigned idx)
  1553. {
  1554. if (idx >= tlen)
  1555. idx = tlen-1;
  1556. tgt[idx] = 0x0000;
  1557. }
  1558. void rtlUnicodeStrcpy(UChar * tgt, UChar const * src)
  1559. {
  1560. memcpy(tgt, src, rtlUnicodeStrlen(src)*2+2);
  1561. }
  1562. void rtlConcatExtend(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1563. {
  1564. unsigned len = tlen + slen;
  1565. tgt = (char *)rtlRealloc(tgt, len);
  1566. memcpy(tgt+tlen, src, slen);
  1567. tlen = len;
  1568. }
  1569. void rtlConcatUnicodeExtend(size32_t & tlen, UChar * & tgt, size32_t slen, const UChar * src)
  1570. {
  1571. unsigned len = tlen + slen;
  1572. tgt = (UChar *)rtlRealloc(tgt, len * sizeof(UChar));
  1573. memcpy(tgt+tlen, src, slen * sizeof(UChar));
  1574. tlen = len;
  1575. }
  1576. //-----------------------------------------------------------------------------
  1577. inline void normalizeFrom(unsigned & from, unsigned slen)
  1578. {
  1579. from--;
  1580. if ((int)from < 0)
  1581. from = 0;
  1582. else if (from > slen)
  1583. from = slen;
  1584. }
  1585. inline void normalizeFromTo(unsigned & from, unsigned & to)
  1586. {
  1587. from--;
  1588. if ((int)from < 0) from = 0;
  1589. if ((int)to < (int)from) to = from;
  1590. }
  1591. inline void clipFromTo(unsigned & from, unsigned & to, unsigned slen)
  1592. {
  1593. if (to > slen)
  1594. {
  1595. to = slen;
  1596. if (from > slen)
  1597. from = slen;
  1598. }
  1599. }
  1600. //NB: From and to are 1 based: Now fills to ensure the correct length.
  1601. void * doSubStrFT(unsigned & tlen, unsigned slen, const void * src, unsigned from, unsigned to, byte fillChar)
  1602. {
  1603. normalizeFromTo(from, to);
  1604. unsigned len = to - from;
  1605. clipFromTo(from, to, slen);
  1606. unsigned copylen = to - from;
  1607. char * buffer = (char *)rtlMalloc(len);
  1608. memcpy(buffer, (byte *)src+from, copylen);
  1609. if (copylen < len)
  1610. memset(buffer+copylen, fillChar, len-copylen);
  1611. tlen = len;
  1612. return buffer;
  1613. }
  1614. void rtlSubStrFX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from)
  1615. {
  1616. normalizeFrom(from, slen);
  1617. tlen = slen-from;
  1618. tgt = (char *) rtlMalloc(tlen);
  1619. memcpy(tgt, src+from, tlen);
  1620. }
  1621. void rtlSubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1622. {
  1623. tgt = (char *)doSubStrFT(tlen, slen, src, from, to, ' ');
  1624. }
  1625. void rtlSubStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1626. {
  1627. unsigned char fillChar = ' '; // More, should be passed as a parameter
  1628. normalizeFromTo(from, to);
  1629. clipFromTo(from, to, slen);
  1630. unsigned copylen = to - from;
  1631. if (copylen > tlen)
  1632. copylen = tlen;
  1633. memcpy(tgt, (const char *)src+from, copylen);
  1634. if (copylen < tlen)
  1635. memset(tgt+copylen, fillChar, tlen-copylen);
  1636. }
  1637. void rtlSubDataFT(unsigned tlen, void * tgt, unsigned slen, const void * src, unsigned from, unsigned to)
  1638. {
  1639. normalizeFromTo(from, to);
  1640. clipFromTo(from, to, slen);
  1641. unsigned copylen = to - from;
  1642. if (copylen > tlen)
  1643. copylen = tlen;
  1644. memcpy(tgt, (char *)src+from, copylen);
  1645. if (copylen < tlen)
  1646. memset((byte*)tgt+copylen, 0, tlen-copylen);
  1647. }
  1648. void rtlSubDataFTX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from, unsigned to)
  1649. {
  1650. tgt = doSubStrFT(tlen, slen, src, from, to, 0);
  1651. }
  1652. void rtlSubDataFX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from)
  1653. {
  1654. normalizeFrom(from, slen);
  1655. tlen = slen-from;
  1656. tgt = (char *) rtlMalloc(tlen);
  1657. memcpy(tgt, (const byte *)src+from, tlen);
  1658. }
  1659. void rtlUnicodeSubStrFTX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from, unsigned to)
  1660. {
  1661. normalizeFromTo(from, to);
  1662. tlen = to - from;
  1663. clipFromTo(from, to, slen);
  1664. tgt = (UChar *)rtlMalloc(tlen*2);
  1665. unsigned copylen = to - from;
  1666. memcpy(tgt, src+from, copylen*2);
  1667. while(copylen<tlen)
  1668. tgt[copylen++] = 0x0020;
  1669. }
  1670. void rtlUnicodeSubStrFX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from)
  1671. {
  1672. normalizeFrom(from, slen);
  1673. tlen = slen - from;
  1674. tgt = (UChar *)rtlMalloc(tlen*2);
  1675. memcpy(tgt, src+from, tlen*2);
  1676. }
  1677. void rtlSubQStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  1678. {
  1679. normalizeFromTo(from, to);
  1680. tlen = to - from;
  1681. clipFromTo(from, to, slen);
  1682. tgt = (char *)rtlMalloc(rtlQStrSize(tlen));
  1683. copyQStrRange(tlen, tgt, src, from, to);
  1684. }
  1685. void rtlSubQStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
  1686. {
  1687. normalizeFrom(from, slen);
  1688. tlen = slen - from;
  1689. tgt = (char *)rtlMalloc(rtlQStrSize(tlen));
  1690. copyQStrRange(tlen, tgt, src, from, slen);
  1691. }
  1692. void rtlSubQStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1693. {
  1694. normalizeFromTo(from, to);
  1695. clipFromTo(from, to, slen);
  1696. copyQStrRange(tlen, tgt, src, from ,to);
  1697. }
  1698. //-----------------------------------------------------------------------------
  1699. unsigned rtlTrimStrLen(size32_t l, const char * t)
  1700. {
  1701. while (l)
  1702. {
  1703. if (t[l-1] != ' ')
  1704. break;
  1705. l--;
  1706. }
  1707. return l;
  1708. }
  1709. unsigned rtlTrimDataLen(size32_t l, const void * _t)
  1710. {
  1711. const char * t = (const char *)_t;
  1712. while (l)
  1713. {
  1714. if (t[l-1] != 0)
  1715. break;
  1716. l--;
  1717. }
  1718. return l;
  1719. }
  1720. unsigned rtlTrimUnicodeStrLen(size32_t l, UChar const * t)
  1721. {
  1722. if (!l)
  1723. return 0;
  1724. UCharCharacterIterator iter(t, l);
  1725. for(iter.last32(); iter.hasPrevious(); iter.previous32())
  1726. if(!u_isspace(iter.current32()))
  1727. break;
  1728. if(u_isspace(iter.current32())) return iter.getIndex(); // required as the reverse iteration above doesn't hit the first character
  1729. return iter.getIndex() + 1;
  1730. }
  1731. inline size32_t rtlQuickTrimUnicode(size32_t len, UChar const * str)
  1732. {
  1733. while (len && u_isspace(str[len-1]))
  1734. len--;
  1735. return len;
  1736. }
  1737. unsigned rtlTrimVStrLen(const char * t)
  1738. {
  1739. const char * first = t;
  1740. const char * last = first;
  1741. unsigned char c;
  1742. while ((c = *t++) != 0)
  1743. {
  1744. if (c != ' ')
  1745. last = t; //nb after increment of t
  1746. }
  1747. return (last - first);
  1748. }
  1749. unsigned rtlTrimVUnicodeStrLen(UChar const * t)
  1750. {
  1751. return rtlTrimUnicodeStrLen(rtlUnicodeStrlen(t), t);
  1752. }
  1753. inline unsigned rtlLeftTrimStrStart(size32_t slen, const char * src)
  1754. {
  1755. unsigned i = 0;
  1756. while(i < slen && src[i] == ' ')
  1757. i++;
  1758. return i;
  1759. }
  1760. inline unsigned rtlLeftTrimUnicodeStrStart(size32_t slen, UChar const * src)
  1761. {
  1762. UCharCharacterIterator iter(src, slen);
  1763. for(iter.first32(); iter.hasNext(); iter.next32())
  1764. if(!u_isspace(iter.current32()))
  1765. break;
  1766. return iter.getIndex();
  1767. }
  1768. inline unsigned rtlLeftTrimVStrStart(const char * src)
  1769. {
  1770. unsigned i = 0;
  1771. while(src[i] == ' ')
  1772. i++;
  1773. return i;
  1774. }
  1775. inline void rtlTrimUtf8Len(unsigned & trimLen, size32_t & trimSize, size32_t len, const char * t)
  1776. {
  1777. const byte * start = (const byte *)t;
  1778. const byte * cur = start;
  1779. unsigned trimLength = 0;
  1780. const byte * trimEnd = cur;
  1781. for (unsigned i=0; i < len; i++)
  1782. {
  1783. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1784. if (!u_isspace(next))
  1785. {
  1786. trimLength = i+1;
  1787. trimEnd = cur;
  1788. }
  1789. }
  1790. trimLen = trimLength;
  1791. trimSize = trimEnd-start;
  1792. }
  1793. inline void rtlTrimUtf8Start(unsigned & trimLen, size32_t & trimSize, size32_t len, const char * t)
  1794. {
  1795. const byte * start = (const byte *)t;
  1796. const byte * cur = start;
  1797. for (unsigned i=0; i < len; i++)
  1798. {
  1799. const byte * prev = cur;
  1800. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1801. if (!u_isspace(next))
  1802. {
  1803. trimLen = i;
  1804. trimSize = prev-start;
  1805. return;
  1806. }
  1807. }
  1808. trimLen = len;
  1809. trimSize = cur-start;
  1810. }
  1811. inline char * rtlDupSubString(const char * src, unsigned len)
  1812. {
  1813. char * buffer = (char *)rtlMalloc(len + 1);
  1814. memcpy(buffer, src, len);
  1815. buffer[len] = 0;
  1816. return buffer;
  1817. }
  1818. inline UChar * rtlDupSubUnicode(UChar const * src, unsigned len)
  1819. {
  1820. UChar * buffer = (UChar *)rtlMalloc((len + 1) * 2);
  1821. memcpy(buffer, src, len*2);
  1822. buffer[len] = 0x00;
  1823. return buffer;
  1824. }
  1825. inline void rtlCopySubStringV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1826. {
  1827. if (slen >= tlen)
  1828. slen = tlen-1;
  1829. memcpy(tgt, src, slen);
  1830. tgt[slen] = 0;
  1831. }
  1832. //not yet used, but would be needed for assignment to string rather than vstring
  1833. inline void rtlCopySubString(size32_t tlen, char * tgt, unsigned slen, const char * src, char fill)
  1834. {
  1835. if (slen > tlen)
  1836. slen = tlen;
  1837. memcpy(tgt, src, slen);
  1838. memset(tgt + slen, fill, tlen-slen);
  1839. }
  1840. unsigned rtlTrimUtf8StrLen(size32_t len, const char * t)
  1841. {
  1842. const byte * cur = (const byte *)t;
  1843. unsigned trimLength = 0;
  1844. for (unsigned i=0; i < len; i++)
  1845. {
  1846. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1847. if (!u_isspace(next))
  1848. trimLength = i+1;
  1849. }
  1850. return trimLength;
  1851. }
  1852. //-----------------------------------------------------------------------------
  1853. // Functions to trim off left side blank spaces
  1854. void rtlTrimRight(size32_t & tlen, char * & tgt, unsigned slen, const char * src)
  1855. {
  1856. tlen = rtlTrimStrLen(slen, src);
  1857. tgt = rtlDupSubString(src, tlen);
  1858. }
  1859. void rtlTrimUnicodeRight(size32_t & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1860. {
  1861. tlen = rtlTrimUnicodeStrLen(slen, src);
  1862. tgt = rtlDupSubUnicode(src, tlen);
  1863. }
  1864. void rtlTrimVRight(size32_t & tlen, char * & tgt, const char * src)
  1865. {
  1866. tlen = rtlTrimVStrLen(src);
  1867. tgt = rtlDupSubString(src, tlen);
  1868. }
  1869. void rtlTrimVUnicodeRight(size32_t & tlen, UChar * & tgt, UChar const * src)
  1870. {
  1871. rtlTrimUnicodeRight(tlen, tgt, rtlUnicodeStrlen(src), src);
  1872. }
  1873. void rtlTrimUtf8Right(unsigned &tlen, char * &tgt, unsigned slen, char const * src)
  1874. {
  1875. unsigned trimLength;
  1876. size32_t trimSize;
  1877. rtlTrimUtf8Len(trimLength, trimSize, slen, src);
  1878. tlen = trimLength;
  1879. tgt = rtlDupSubString(src, trimSize);
  1880. }
  1881. void rtlAssignTrimRightV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1882. {
  1883. unsigned len = rtlTrimStrLen(slen, src);
  1884. rtlCopySubStringV(tlen, tgt, len, src);
  1885. }
  1886. void rtlAssignTrimVRightV(size32_t tlen, char * tgt, const char * src)
  1887. {
  1888. unsigned len = rtlTrimVStrLen(src);
  1889. rtlCopySubStringV(tlen, tgt, len, src);
  1890. }
  1891. //-------------------------------------------------------------------------------
  1892. // Functions to trim off left side blank spaces
  1893. void rtlTrimLeft(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1894. {
  1895. unsigned start = rtlLeftTrimStrStart(slen, src);
  1896. unsigned len = slen - start;
  1897. tlen = len;
  1898. tgt = rtlDupSubString(src + start, len);
  1899. }
  1900. void rtlTrimUnicodeLeft(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1901. {
  1902. unsigned start = rtlLeftTrimUnicodeStrStart(slen, src);
  1903. unsigned len = slen - start;
  1904. tlen = len;
  1905. tgt = rtlDupSubUnicode(src + start, len);
  1906. }
  1907. void rtlTrimVLeft(unsigned & tlen, char * & tgt, const char * src)
  1908. {
  1909. unsigned start = rtlLeftTrimVStrStart(src);
  1910. unsigned len = strlen(src+start);
  1911. tlen = len;
  1912. tgt = rtlDupSubString(src + start, len);
  1913. }
  1914. void rtlTrimVUnicodeLeft(unsigned & tlen, UChar * & tgt, UChar const * src)
  1915. {
  1916. rtlTrimUnicodeLeft(tlen, tgt, rtlUnicodeStrlen(src), src);
  1917. }
  1918. ECLRTL_API void rtlTrimUtf8Left(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1919. {
  1920. unsigned trimLength;
  1921. size32_t trimSize;
  1922. rtlTrimUtf8Start(trimLength, trimSize, slen, src);
  1923. unsigned len = slen-trimLength;
  1924. const char * start = src+trimSize;
  1925. tlen = len;
  1926. tgt = rtlDupSubString(start, rtlUtf8Size(len, start));
  1927. }
  1928. void rtlAssignTrimLeftV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1929. {
  1930. unsigned start = rtlLeftTrimStrStart(slen, src);
  1931. unsigned len = slen - start;
  1932. rtlCopySubStringV(tlen, tgt, len, src+start);
  1933. }
  1934. void rtlAssignTrimVLeftV(size32_t tlen, char * tgt, const char * src)
  1935. {
  1936. unsigned start = rtlLeftTrimVStrStart(src);
  1937. unsigned len = strlen(src+start);
  1938. rtlCopySubStringV(tlen, tgt, len, src+start);
  1939. }
  1940. //--------------------------------------------------------------------------------
  1941. // Functions to trim off blank spaces of both sides
  1942. void rtlTrimBoth(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1943. {
  1944. unsigned len = rtlTrimStrLen(slen, src);
  1945. unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
  1946. len -= start;
  1947. tlen = len;
  1948. tgt = rtlDupSubString(src + start, len);
  1949. }
  1950. void rtlTrimUnicodeBoth(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1951. {
  1952. unsigned len = rtlTrimUnicodeStrLen(slen, src);
  1953. unsigned start = len ? rtlLeftTrimUnicodeStrStart(slen, src) : 0;
  1954. len -= start;
  1955. tlen = len;
  1956. tgt = rtlDupSubUnicode(src + start, len);
  1957. }
  1958. void rtlTrimVBoth(unsigned & tlen, char * & tgt, const char * src)
  1959. {
  1960. unsigned len = rtlTrimVStrLen(src);
  1961. unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
  1962. len -= start;
  1963. tlen = len;
  1964. tgt = rtlDupSubString(src + start, len);
  1965. }
  1966. void rtlTrimVUnicodeBoth(unsigned & tlen, UChar * & tgt, UChar const * src)
  1967. {
  1968. rtlTrimUnicodeBoth(tlen, tgt, rtlUnicodeStrlen(src), src);
  1969. }
  1970. ECLRTL_API void rtlTrimUtf8Both(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1971. {
  1972. unsigned lTrimLength;
  1973. size32_t lTrimSize;
  1974. rtlTrimUtf8Start(lTrimLength, lTrimSize, slen, src);
  1975. rtlTrimUtf8Right(tlen, tgt, slen-lTrimLength, src+lTrimSize);
  1976. }
  1977. void rtlAssignTrimBothV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1978. {
  1979. unsigned len = rtlTrimStrLen(slen, src);
  1980. unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
  1981. len -= start;
  1982. rtlCopySubStringV(tlen, tgt, len, src+start);
  1983. }
  1984. void rtlAssignTrimVBothV(size32_t tlen, char * tgt, const char * src)
  1985. {
  1986. unsigned len = rtlTrimVStrLen(src);
  1987. unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
  1988. len -= start;
  1989. rtlCopySubStringV(tlen, tgt, len, src+start);
  1990. }
  1991. //-----------------------------------------------------------------------------
  1992. // Functions used to trim off all blank spaces in a string.
  1993. unsigned rtlTrimStrLenNonBlank(size32_t l, const char * t)
  1994. {
  1995. unsigned len = 0;
  1996. while (l)
  1997. {
  1998. l--;
  1999. if (t[l] != ' ')
  2000. len++;
  2001. }
  2002. return len;
  2003. }
  2004. unsigned rtlTrimVStrLenNonBlank(const char * t)
  2005. {
  2006. unsigned len = 0;
  2007. unsigned char c;
  2008. while ((c = *t++) != 0)
  2009. {
  2010. if (c != ' ')
  2011. len++;
  2012. }
  2013. return len;
  2014. }
  2015. void rtlTrimAll(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  2016. {
  2017. tlen = rtlTrimStrLenNonBlank(slen, src);
  2018. char * buffer = (char *)rtlMalloc(tlen + 1);
  2019. int ind = 0;
  2020. for(unsigned i = 0; i < slen; i++) {
  2021. if(src[i] != ' ') {
  2022. buffer[ind] = src[i];
  2023. ind++;
  2024. }
  2025. }
  2026. buffer[tlen] = 0;
  2027. tgt = buffer;
  2028. }
  2029. void rtlTrimUnicodeAll(unsigned & tlen, UChar * & tgt, unsigned slen, const UChar * src)
  2030. {
  2031. UnicodeString rawStr;
  2032. UCharCharacterIterator iter(src, slen);
  2033. for(iter.first32(); iter.hasNext(); iter.next32())
  2034. if(!u_isspace(iter.current32()))
  2035. rawStr.append(iter.current32());
  2036. UnicodeString tgtStr;
  2037. normalizeUnicodeString(rawStr, tgtStr); // normalized in case crazy string like [combining accent] [space] [vowel]
  2038. tlen = tgtStr.length();
  2039. tgt = (UChar *)rtlMalloc((tlen+1)*2);
  2040. tgtStr.extract(0, tlen, tgt);
  2041. tgt[tlen] = 0x0000;
  2042. }
  2043. void rtlTrimVAll(unsigned & tlen, char * & tgt, const char * src)
  2044. {
  2045. tlen = rtlTrimVStrLenNonBlank(src);
  2046. char * buffer = (char *)rtlMalloc(tlen + 1);
  2047. int ind = 0;
  2048. int i = 0;
  2049. while(src[i] != 0) {
  2050. if(src[i] != ' ') {
  2051. buffer[ind] = src[i];
  2052. ind++;
  2053. }
  2054. i++;
  2055. }
  2056. buffer[tlen] = 0;
  2057. tgt = buffer;
  2058. }
  2059. void rtlTrimVUnicodeAll(unsigned & tlen, UChar * & tgt, const UChar * src)
  2060. {
  2061. rtlTrimUnicodeAll(tlen, tgt, rtlUnicodeStrlen(src), src);
  2062. }
  2063. ECLRTL_API void rtlTrimUtf8All(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  2064. {
  2065. //Go via unicode because of possibility of combining accents etc.
  2066. rtlDataAttr temp1(slen*sizeof(UChar));
  2067. rtlUtf8ToUnicode(slen, temp1.getustr(), slen, src);
  2068. unsigned trimLen;
  2069. rtlDataAttr trimText;
  2070. rtlTrimUnicodeAll(trimLen, trimText.refustr(), slen, temp1.getustr());
  2071. rtlUnicodeToUtf8X(tlen, tgt, trimLen, trimText.getustr());
  2072. }
  2073. void rtlAssignTrimAllV(unsigned tlen, char * tgt, unsigned slen, const char * src)
  2074. {
  2075. unsigned to = 0;
  2076. for (unsigned from = 0; (from < slen)&&(to+1 < tlen); from++)
  2077. {
  2078. if (src[from] != ' ')
  2079. tgt[to++] = src[from];
  2080. }
  2081. tgt[to] = 0;
  2082. }
  2083. void rtlAssignTrimVAllV(unsigned tlen, char * tgt, const char * src)
  2084. {
  2085. unsigned to = 0;
  2086. for (;(*src && (to+1 < tlen));src++)
  2087. {
  2088. if (*src != ' ')
  2089. tgt[to++] = *src;
  2090. }
  2091. tgt[to] = 0;
  2092. }
  2093. //-----------------------------------------------------------------------------
  2094. ECLRTL_API void rtlUnicodeToVAscii(unsigned outlen, char * out, unsigned inlen, UChar const * in)
  2095. {
  2096. rtlUnicodeToVCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2097. }
  2098. ECLRTL_API void rtlData2VUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  2099. {
  2100. rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2101. }
  2102. ECLRTL_API void rtlStrToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  2103. {
  2104. rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2105. }
  2106. ECLRTL_API void rtlData2Unicode(unsigned outlen, UChar * out, unsigned inlen, void const * in)
  2107. {
  2108. rtlCodepageToUnicode(outlen, out, inlen, (const char *)in, ASCII_LIKE_CODEPAGE);
  2109. }
  2110. ECLRTL_API void rtlAssignTrimUnicodeLeftV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2111. {
  2112. unsigned len;
  2113. UChar * str;
  2114. rtlTrimUnicodeLeft(len, str, slen, src);
  2115. if (len >= tlen)
  2116. len = tlen-1;
  2117. memcpy(tgt, str, len*2);
  2118. tgt[len] = 0;
  2119. rtlFree(str);
  2120. }
  2121. ECLRTL_API void rtlAssignTrimVUnicodeLeftV(size32_t tlen, UChar * tgt, const UChar * src)
  2122. {
  2123. unsigned len;
  2124. UChar * str;
  2125. rtlTrimVUnicodeLeft(len, str, src);
  2126. if (len >= tlen)
  2127. len = tlen-1;
  2128. memcpy(tgt, str, len*2);
  2129. tgt[len] = 0;
  2130. rtlFree(str);
  2131. }
  2132. ECLRTL_API void rtlAssignTrimUnicodeRightV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2133. {
  2134. unsigned len;
  2135. UChar * str;
  2136. rtlTrimUnicodeRight(len, str, slen, src);
  2137. if (len >= tlen)
  2138. len = tlen-1;
  2139. memcpy(tgt, str, len*2);
  2140. tgt[len] = 0;
  2141. rtlFree(str);
  2142. }
  2143. ECLRTL_API void rtlAssignTrimVUnicodeRightV(size32_t tlen, UChar * tgt, const UChar * src)
  2144. {
  2145. unsigned len;
  2146. UChar * str;
  2147. rtlTrimVUnicodeRight(len, str, src);
  2148. if (len >= tlen)
  2149. len = tlen-1;
  2150. memcpy(tgt, str, len*2);
  2151. tgt[len] = 0;
  2152. rtlFree(str);
  2153. }
  2154. ECLRTL_API void rtlAssignTrimUnicodeBothV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2155. {
  2156. unsigned len;
  2157. UChar * str;
  2158. rtlTrimUnicodeBoth(len, str, slen, src);
  2159. if (len >= tlen)
  2160. len = tlen-1;
  2161. memcpy(tgt, str, len*2);
  2162. tgt[len] = 0;
  2163. rtlFree(str);
  2164. }
  2165. ECLRTL_API void rtlAssignTrimVUnicodeBothV(size32_t tlen, UChar * tgt, const UChar * src)
  2166. {
  2167. unsigned len;
  2168. UChar * str;
  2169. rtlTrimVUnicodeBoth(len, str, src);
  2170. if (len >= tlen)
  2171. len = tlen-1;
  2172. memcpy(tgt, str, len*2);
  2173. tgt[len] = 0;
  2174. rtlFree(str);
  2175. }
  2176. ECLRTL_API void rtlAssignTrimUnicodeAllV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2177. {
  2178. unsigned len;
  2179. UChar * str;
  2180. rtlTrimUnicodeAll(len, str, slen, src);
  2181. if (len >= tlen)
  2182. len = tlen-1;
  2183. memcpy(tgt, str, len*2);
  2184. tgt[len] = 0;
  2185. rtlFree(str);
  2186. }
  2187. ECLRTL_API void rtlAssignTrimVUnicodeAllV(size32_t tlen, UChar * tgt, const UChar * src)
  2188. {
  2189. unsigned len;
  2190. UChar * str;
  2191. rtlTrimVUnicodeAll(len, str, src);
  2192. if (len >= tlen)
  2193. len = tlen-1;
  2194. memcpy(tgt, str, len*2);
  2195. tgt[len] = 0;
  2196. rtlFree(str);
  2197. }
  2198. //-----------------------------------------------------------------------------
  2199. int rtlCompareStrStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2200. {
  2201. unsigned len = l1;
  2202. if (len > l2)
  2203. len = l2;
  2204. int diff = memcmp(p1, p2, len);
  2205. if (diff == 0)
  2206. {
  2207. if (len != l1)
  2208. {
  2209. for (;(diff == 0) && (len != l1);len++)
  2210. diff = ((unsigned char *)p1)[len] - ' ';
  2211. }
  2212. else if (len != l2)
  2213. {
  2214. for (;(diff == 0) && (len != l2);len++)
  2215. diff = ' ' - ((unsigned char *)p2)[len];
  2216. }
  2217. }
  2218. return diff;
  2219. }
  2220. int rtlCompareVStrVStr(const char * p1, const char * p2)
  2221. {
  2222. return rtlCompareStrStr(strlen(p1), p1, strlen(p2), p2);
  2223. }
  2224. int rtlCompareStrBlank(unsigned l1, const char * p1)
  2225. {
  2226. while (l1--)
  2227. {
  2228. int diff = (*(unsigned char *)(p1++)) - ' ';
  2229. if (diff)
  2230. return diff;
  2231. }
  2232. return 0;
  2233. }
  2234. int rtlCompareDataData(unsigned l1, const void * p1, unsigned l2, const void * p2)
  2235. {
  2236. unsigned len = l1;
  2237. if (len > l2)
  2238. len = l2;
  2239. int diff = memcmp(p1, p2, len);
  2240. if (diff == 0)
  2241. {
  2242. if (l1 > l2)
  2243. diff = +1;
  2244. else if (l1 < l2)
  2245. diff = -1;
  2246. }
  2247. return diff;
  2248. }
  2249. int rtlCompareEStrEStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2250. {
  2251. unsigned len = l1;
  2252. if (len > l2)
  2253. len = l2;
  2254. int diff = memcmp(p1, p2, len);
  2255. if (diff == 0)
  2256. {
  2257. if (len != l1)
  2258. {
  2259. for (;(diff == 0) && (len != l1);len++)
  2260. diff = ((unsigned char *)p1)[len] - '@';
  2261. }
  2262. else if (len != l2)
  2263. {
  2264. for (;(diff == 0) && (len != l2);len++)
  2265. diff = '@' - ((unsigned char *)p2)[len];
  2266. }
  2267. }
  2268. return diff;
  2269. }
  2270. const static UChar nullUStr = 0;
  2271. int rtlCompareUnicodeUnicode(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale)
  2272. {
  2273. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2274. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2275. if (!p1) p1 = &nullUStr;
  2276. if (!p2) p2 = &nullUStr;
  2277. return ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1, l1, p2, l2);
  2278. }
  2279. int rtlCompareUnicodeUnicodeStrength(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale, unsigned strength)
  2280. {
  2281. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2282. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2283. if (!p1) p1 = &nullUStr;
  2284. if (!p2) p2 = &nullUStr;
  2285. return ucol_strcoll(queryRTLLocale(locale)->queryCollator(strength), p1, l1, p2, l2);
  2286. }
  2287. int rtlCompareVUnicodeVUnicode(UChar const * p1, UChar const * p2, char const * locale)
  2288. {
  2289. return rtlCompareUnicodeUnicode(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale);
  2290. }
  2291. int rtlCompareVUnicodeVUnicodeStrength(UChar const * p1, UChar const * p2, char const * locale, unsigned strength)
  2292. {
  2293. return rtlCompareUnicodeUnicodeStrength(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale, strength);
  2294. }
  2295. void rtlKeyUnicodeX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale)
  2296. {
  2297. while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
  2298. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2299. tlen = ucol_getSortKey(coll, src, slen, 0, 0);
  2300. tgt = rtlMalloc(tlen);
  2301. ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
  2302. }
  2303. void rtlKeyUnicodeStrengthX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale, unsigned strength)
  2304. {
  2305. while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
  2306. UCollator * coll = queryRTLLocale(locale)->queryCollator(strength);
  2307. tlen = ucol_getSortKey(coll, src, slen, 0, 0);
  2308. tgt = rtlMalloc(tlen);
  2309. ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
  2310. }
  2311. ECLRTL_API int rtlPrefixDiffStrEx(unsigned l1, const char * p1, unsigned l2, const char * p2, unsigned origin)
  2312. {
  2313. unsigned len = l1 < l2 ? l1 : l2;
  2314. const byte * str1 = (const byte *)p1;
  2315. const byte * str2 = (const byte *)p2;
  2316. for (unsigned i=0; i<len; i++)
  2317. {
  2318. byte c1 = str1[i];
  2319. byte c2 = str2[i];
  2320. if (c1 != c2)
  2321. {
  2322. if (c1 < c2)
  2323. return -(int)(i+origin+1);
  2324. else
  2325. return (int)(i+origin+1);
  2326. }
  2327. }
  2328. if (l1 != l2)
  2329. return (l1 < l2) ? -(int)(len+origin+1) : (int)(len+origin+1);
  2330. return 0;
  2331. }
  2332. ECLRTL_API int rtlPrefixDiffStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2333. {
  2334. return rtlPrefixDiffStrEx(l1, p1, l2, p2, 0);
  2335. }
  2336. //MORE: I'm not sure this can really be implemented....
  2337. ECLRTL_API int rtlPrefixDiffUnicodeEx(unsigned l1, const UChar * p1, unsigned l2, const UChar * p2, char const * locale, unsigned origin)
  2338. {
  2339. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2340. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2341. unsigned len = l1 < l2 ? l1 : l2;
  2342. for (unsigned i=0; i<len; i++)
  2343. {
  2344. if (p1[i] != p2[i])
  2345. {
  2346. int c = ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1+i, l1-i, p2+i, l2-i);
  2347. if (c < 0)
  2348. return -(int)(i+origin+1);
  2349. else if (c > 0)
  2350. return (int)(i+origin+1);
  2351. }
  2352. }
  2353. if (l1 != l2)
  2354. return (l1 < l2) ? -(int)(len+origin+1) : (int)(len+origin+1);
  2355. return 0;
  2356. }
  2357. ECLRTL_API int rtlPrefixDiffUnicode(unsigned l1, const UChar * p1, unsigned l2, const UChar * p2, char const * locale)
  2358. {
  2359. return rtlPrefixDiffUnicodeEx(l1, p1, l2, p2, locale, 0);
  2360. }
  2361. //-----------------------------------------------------------------------------
  2362. void rtlStringToLower(size32_t l, char * t)
  2363. {
  2364. for (;l--;t++)
  2365. *t = tolower(*t);
  2366. }
  2367. void rtlStringToUpper(size32_t l, char * t)
  2368. {
  2369. for (;l--;t++)
  2370. *t = toupper(*t);
  2371. }
  2372. void rtlUnicodeToLower(size32_t l, UChar * t, char const * locale)
  2373. {
  2374. UChar * buff = (UChar *)rtlMalloc(l*2);
  2375. UErrorCode err = U_ZERO_ERROR;
  2376. u_strToLower(buff, l, t, l, locale, &err);
  2377. unicodeNormalizedCopy(buff, t, l);
  2378. }
  2379. void rtlUnicodeToLowerX(size32_t & lenout, UChar * & out, size32_t l, const UChar * t, char const * locale)
  2380. {
  2381. out = (UChar *)rtlMalloc(l*2);
  2382. lenout = l;
  2383. UErrorCode err = U_ZERO_ERROR;
  2384. u_strToLower(out, l, t, l, locale, &err);
  2385. }
  2386. void rtlUnicodeToUpper(size32_t l, UChar * t, char const * locale)
  2387. {
  2388. UChar * buff = (UChar *)rtlMalloc(l*2);
  2389. UErrorCode err = U_ZERO_ERROR;
  2390. u_strToUpper(buff, l, t, l, locale, &err);
  2391. unicodeNormalizedCopy(buff, t, l);
  2392. }
  2393. //=============================================================================
  2394. // Miscellaneous helper functions...
  2395. //-----------------------------------------------------------------------------
  2396. int searchTableStringN(unsigned count, const char * * table, unsigned width, const char * search)
  2397. {
  2398. int left = 0;
  2399. int right = count;
  2400. do
  2401. {
  2402. int mid = (left + right) >> 1;
  2403. int cmp = memcmp(search, table[mid], width);
  2404. if (cmp < 0)
  2405. right = mid;
  2406. else if (cmp > 0)
  2407. left = mid+1;
  2408. else
  2409. return mid;
  2410. } while (left < right);
  2411. return -1;
  2412. }
  2413. int rtlSearchTableStringN(unsigned count, char * * table, unsigned width, const char * search)
  2414. {
  2415. int left = 0;
  2416. int right = count;
  2417. do
  2418. {
  2419. int mid = (left + right) >> 1;
  2420. //we could use rtlCompareStrStr, but both source and target strings should
  2421. //be the correct length, so no point.... (unless new weird collation sequences)
  2422. //we would also need to call a different function for data
  2423. int cmp = memcmp(search, table[mid], width);
  2424. if (cmp < 0)
  2425. right = mid;
  2426. else if (cmp > 0)
  2427. left = mid+1;
  2428. else
  2429. return mid;
  2430. } while (left < right);
  2431. return -1;
  2432. }
  2433. int rtlSearchTableVStringN(unsigned count, char * * table, const char * search)
  2434. {
  2435. int left = 0;
  2436. int right = count;
  2437. do
  2438. {
  2439. int mid = (left + right) >> 1;
  2440. int cmp = strcmp(search, table[mid]);
  2441. if (cmp < 0)
  2442. right = mid;
  2443. else if (cmp > 0)
  2444. left = mid+1;
  2445. else
  2446. return mid;
  2447. } while (left < right);
  2448. return -1;
  2449. }
  2450. int rtlNewSearchDataTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2451. {
  2452. int left = 0;
  2453. int right = count;
  2454. do
  2455. {
  2456. int mid = (left + right) >> 1;
  2457. int cmp = rtlCompareDataData( width, search, elemlen, table[mid]);
  2458. if (cmp < 0)
  2459. right = mid;
  2460. else if (cmp > 0)
  2461. left = mid+1;
  2462. else {
  2463. return mid;
  2464. }
  2465. } while (left < right);
  2466. return -1;
  2467. }
  2468. int rtlNewSearchEStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2469. {
  2470. int left = 0;
  2471. int right = count;
  2472. do
  2473. {
  2474. int mid = (left + right) >> 1;
  2475. int cmp = rtlCompareEStrEStr( width, search, elemlen, table[mid]);
  2476. if (cmp < 0)
  2477. right = mid;
  2478. else if (cmp > 0)
  2479. left = mid+1;
  2480. else {
  2481. return mid;
  2482. }
  2483. } while (left < right);
  2484. return -1;
  2485. }
  2486. int rtlNewSearchQStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2487. {
  2488. int left = 0;
  2489. int right = count;
  2490. do
  2491. {
  2492. int mid = (left + right) >> 1;
  2493. int cmp = rtlCompareQStrQStr( width, search, elemlen, table[mid]);
  2494. if (cmp < 0)
  2495. right = mid;
  2496. else if (cmp > 0)
  2497. left = mid+1;
  2498. else {
  2499. return mid;
  2500. }
  2501. } while (left < right);
  2502. return -1;
  2503. }
  2504. int rtlNewSearchStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2505. {
  2506. int left = 0;
  2507. int right = count;
  2508. do
  2509. {
  2510. int mid = (left + right) >> 1;
  2511. int cmp = rtlCompareStrStr( width, search, elemlen, table[mid]);
  2512. if (cmp < 0)
  2513. right = mid;
  2514. else if (cmp > 0)
  2515. left = mid+1;
  2516. else {
  2517. return mid;
  2518. }
  2519. } while (left < right);
  2520. return -1;
  2521. }
  2522. int rtlNewSearchUnicodeTable(unsigned count, unsigned elemlen, UChar * * table, unsigned width, const UChar * search, const char * locale)
  2523. {
  2524. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2525. int left = 0;
  2526. int right = count;
  2527. if (!search) search = &nullUStr;
  2528. size32_t trimWidth = rtlQuickTrimUnicode(width, search);
  2529. do
  2530. {
  2531. int mid = (left + right) >> 1;
  2532. size32_t elemTrimWidth = rtlQuickTrimUnicode(elemlen, table[mid]);
  2533. UCollationResult cmp = ucol_strcoll(coll, search, trimWidth, table[mid], elemTrimWidth);
  2534. if (cmp == UCOL_LESS)
  2535. right = mid;
  2536. else if (cmp == UCOL_GREATER)
  2537. left = mid+1;
  2538. else
  2539. return mid;
  2540. } while (left < right);
  2541. return -1;
  2542. }
  2543. int rtlNewSearchVUnicodeTable(unsigned count, UChar * * table, const UChar * search, const char * locale)
  2544. {
  2545. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2546. int left = 0;
  2547. int right = count;
  2548. do
  2549. {
  2550. int mid = (left + right) >> 1;
  2551. UCollationResult cmp = ucol_strcoll(coll, search, rtlUnicodeStrlen(search), table[mid], rtlUnicodeStrlen(table[mid]));
  2552. if (cmp == UCOL_LESS)
  2553. right = mid;
  2554. else if (cmp == UCOL_GREATER)
  2555. left = mid+1;
  2556. else
  2557. return mid;
  2558. } while (left < right);
  2559. return -1;
  2560. }
  2561. //-----------------------------------------------------------------------------
  2562. template <class T>
  2563. int rtlSearchIntegerTable(unsigned count, T * table, T search)
  2564. {
  2565. int left = 0;
  2566. int right = count;
  2567. do
  2568. {
  2569. int mid = (left + right) >> 1;
  2570. T midValue = table[mid];
  2571. if (search < midValue)
  2572. right = mid;
  2573. else if (search > midValue)
  2574. left = mid+1;
  2575. else
  2576. return mid;
  2577. } while (left < right);
  2578. return -1;
  2579. }
  2580. int rtlSearchTableInteger8(unsigned count, __int64 * table, __int64 search)
  2581. {
  2582. return rtlSearchIntegerTable(count, table, search);
  2583. }
  2584. int rtlSearchTableUInteger8(unsigned count, unsigned __int64 * table, unsigned __int64 search)
  2585. {
  2586. return rtlSearchIntegerTable(count, table, search);
  2587. }
  2588. int rtlSearchTableInteger4(unsigned count, int * table, int search)
  2589. {
  2590. return rtlSearchIntegerTable(count, table, search);
  2591. }
  2592. int rtlSearchTableUInteger4(unsigned count, unsigned * table, unsigned search)
  2593. {
  2594. return rtlSearchIntegerTable(count, table, search);
  2595. }
  2596. //-----------------------------------------------------------------------------
  2597. unsigned rtlCrc32(unsigned len, const void * buffer, unsigned crc)
  2598. {
  2599. return crc32((const char *)buffer, len, crc);
  2600. }
  2601. //=============================================================================
  2602. // EBCDIC helper functions...
  2603. static char ccsid819[] = "\
  2604. \000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017\
  2605. \020\021\022\023\235\205\010\207\030\031\222\217\034\035\036\037\
  2606. \200\201\202\203\204\012\027\033\210\211\212\213\214\005\006\007\
  2607. \220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032\
  2608. \040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174\
  2609. \046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\254\
  2610. \055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077\
  2611. \370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042\
  2612. \330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261\
  2613. \260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244\
  2614. \265\176\163\164\165\166\167\170\171\172\241\277\320\335\336\256\
  2615. \136\243\245\267\251\247\266\274\275\276\133\135\257\250\264\327\
  2616. \173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365\
  2617. \175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377\
  2618. \134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325\
  2619. \060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237";
  2620. static unsigned char ccsid1047[] = "\
  2621. \000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017\
  2622. \020\021\022\023\235\012\010\207\030\031\222\217\034\035\036\037\
  2623. \200\201\202\203\204\205\027\033\210\211\212\213\214\005\006\007\
  2624. \220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032\
  2625. \040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174\
  2626. \046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\136\
  2627. \055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077\
  2628. \370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042\
  2629. \330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261\
  2630. \260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244\
  2631. \265\176\163\164\165\166\167\170\171\172\241\277\320\133\336\256\
  2632. \254\243\245\267\251\247\266\274\275\276\335\250\257\135\264\327\
  2633. \173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365\
  2634. \175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377\
  2635. \134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325\
  2636. \060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237";
  2637. static unsigned char ccsid1047_rev[] = "\
  2638. \000\001\002\003\067\055\056\057\026\005\025\013\014\015\016\017\
  2639. \020\021\022\023\074\075\062\046\030\031\077\047\034\035\036\037\
  2640. \100\132\177\173\133\154\120\175\115\135\134\116\153\140\113\141\
  2641. \360\361\362\363\364\365\366\367\370\371\172\136\114\176\156\157\
  2642. \174\301\302\303\304\305\306\307\310\311\321\322\323\324\325\326\
  2643. \327\330\331\342\343\344\345\346\347\350\351\255\340\275\137\155\
  2644. \171\201\202\203\204\205\206\207\210\211\221\222\223\224\225\226\
  2645. \227\230\231\242\243\244\245\246\247\250\251\300\117\320\241\007\
  2646. \040\041\042\043\044\045\006\027\050\051\052\053\054\011\012\033\
  2647. \060\061\032\063\064\065\066\010\070\071\072\073\004\024\076\377\
  2648. \101\252\112\261\237\262\152\265\273\264\232\212\260\312\257\274\
  2649. \220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\
  2650. \144\145\142\146\143\147\236\150\164\161\162\163\170\165\166\167\
  2651. \254\151\355\356\353\357\354\277\200\375\376\373\374\272\256\131\
  2652. \104\105\102\106\103\107\234\110\124\121\122\123\130\125\126\127\
  2653. \214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337";
  2654. void rtlEStrToStr(unsigned outlen, char *out, unsigned inlen, const char *in)
  2655. {
  2656. unsigned char *codepage = ccsid1047;
  2657. unsigned i,j;
  2658. unsigned lim = inlen;
  2659. if (lim>outlen) lim = outlen;
  2660. for (i=0;i<lim;i++)
  2661. {
  2662. j = in[i] & 0x00ff;
  2663. out[i] = codepage[j];
  2664. }
  2665. for (;i<outlen; i++)
  2666. out[i] = ' ';
  2667. }
  2668. void rtlStrToEStr(unsigned outlen, char *out, unsigned inlen, const char *in)
  2669. {
  2670. unsigned char *codepage = ccsid1047_rev;
  2671. unsigned i,j;
  2672. unsigned lim = inlen;
  2673. if (lim>outlen) lim = outlen;
  2674. for (i=0;i<lim;i++)
  2675. {
  2676. j = in[i] & 0x00ff;
  2677. out[i] = codepage[j];
  2678. }
  2679. for (;i<outlen; i++)
  2680. out[i] = codepage[' '];
  2681. }
  2682. //---------------------------------------------------------------------------
  2683. void rtlCodepageToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2684. {
  2685. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2686. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2687. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2688. UErrorCode err = U_ZERO_ERROR;
  2689. unsigned len = ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2690. while(len<outlen) out[len++] = 0x0020;
  2691. unicodeEnsureIsNormalized(outlen, out);
  2692. }
  2693. void rtlCodepageToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2694. {
  2695. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2696. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2697. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2698. UErrorCode err = U_ZERO_ERROR;
  2699. unsigned len = ucnv_toUChars(conv, out, outlen-1, in, inlen, &err);
  2700. if (len >= outlen) len = outlen-1;
  2701. out[len] = 0;
  2702. vunicodeEnsureIsNormalized(outlen, out);
  2703. }
  2704. void rtlVCodepageToUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
  2705. {
  2706. rtlCodepageToUnicode(outlen, out, strlen(in), in, codepage);
  2707. }
  2708. void rtlVCodepageToVUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
  2709. {
  2710. rtlCodepageToVUnicode(outlen, out, strlen(in), in, codepage);
  2711. }
  2712. void rtlCodepageToUnicodeUnescape(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2713. {
  2714. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2715. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2716. UnicodeString raw(in, inlen, codepage);
  2717. UnicodeString unescaped = raw.unescape();
  2718. UnicodeString normalized;
  2719. normalizeUnicodeString(unescaped, normalized);
  2720. if((unsigned)normalized.length()>outlen)
  2721. normalized.truncate(outlen);
  2722. else if((unsigned)normalized.length()<outlen)
  2723. normalized.padTrailing(outlen);
  2724. normalized.extract(0, outlen, out);
  2725. }
  2726. void rtlUnicodeToCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2727. {
  2728. //If the unicode contains a character which doesn't exist in the destination codepage,
  2729. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2730. //no telling how your terminal may display this (I've seen a divide sign and a right
  2731. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2732. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2733. UErrorCode err = U_ZERO_ERROR;
  2734. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
  2735. if(len<outlen)
  2736. codepageBlankFill(codepage, out+len, outlen-len);
  2737. }
  2738. void rtlUnicodeToData(unsigned outlen, void * out, unsigned inlen, UChar const * in)
  2739. {
  2740. //If the unicode contains a character which doesn't exist in the destination codepage,
  2741. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2742. //no telling how your terminal may display this (I've seen a divide sign and a right
  2743. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2744. UConverter * conv = queryRTLUnicodeConverter(ASCII_LIKE_CODEPAGE)->query();
  2745. UErrorCode err = U_ZERO_ERROR;
  2746. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
  2747. if(len<outlen)
  2748. memset((char *)out+len, 0, outlen-len);
  2749. }
  2750. void rtlUnicodeToVCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2751. {
  2752. //If the unicode contains a character which doesn't exist in the destination codepage,
  2753. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2754. //no telling how your terminal may display this (I've seen a divide sign and a right
  2755. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2756. UConverter * conv = queryRTLUnicodeConverter(ASCII_LIKE_CODEPAGE)->query();
  2757. UErrorCode err = U_ZERO_ERROR;
  2758. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen-1, in, inlen, &err);
  2759. if (len >= outlen) len = outlen-1;
  2760. out[len] = 0;
  2761. }
  2762. void rtlVUnicodeToCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
  2763. {
  2764. rtlUnicodeToCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2765. }
  2766. void rtlVUnicodeToData(unsigned outlen, void * out, UChar const * in)
  2767. {
  2768. rtlUnicodeToData(outlen, out, rtlUnicodeStrlen(in), in);
  2769. }
  2770. void rtlVUnicodeToVCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
  2771. {
  2772. rtlUnicodeToVCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2773. }
  2774. void rtlCodepageToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2775. {
  2776. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2777. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2778. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2779. UErrorCode err = U_ZERO_ERROR;
  2780. outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
  2781. if(err==U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2782. out = (UChar *)rtlMalloc(outlen*2);
  2783. ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2784. }
  2785. UChar * rtlCodepageToVUnicodeX(unsigned inlen, char const * in, char const * codepage)
  2786. {
  2787. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2788. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2789. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2790. UErrorCode err = U_ZERO_ERROR;
  2791. unsigned outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
  2792. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2793. UChar * out = (UChar *)rtlMalloc((outlen+1)*2);
  2794. ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2795. out[outlen] = 0x0000;
  2796. vunicodeEnsureIsNormalizedX(outlen, out);
  2797. return out;
  2798. }
  2799. void rtlVCodepageToUnicodeX(unsigned & outlen, UChar * & out, char const * in, char const * codepage)
  2800. {
  2801. rtlCodepageToUnicodeX(outlen, out, strlen(in), in, codepage);
  2802. }
  2803. UChar * rtlVCodepageToVUnicodeX(char const * in, char const * codepage)
  2804. {
  2805. return rtlCodepageToVUnicodeX(strlen(in), in, codepage);
  2806. }
  2807. void rtlCodepageToUnicodeXUnescape(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2808. {
  2809. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2810. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2811. UnicodeString raw(in, inlen, codepage);
  2812. UnicodeString unescaped = raw.unescape();
  2813. UnicodeString normalized;
  2814. normalizeUnicodeString(unescaped, normalized);
  2815. outlen = normalized.length();
  2816. out = (UChar *)rtlMalloc(outlen*2);
  2817. normalized.extract(0, outlen, out);
  2818. }
  2819. void rtlCodepageToUtf8XUnescape(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  2820. {
  2821. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2822. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2823. UnicodeString raw(in, inlen, codepage);
  2824. UnicodeString unescaped = raw.unescape();
  2825. UnicodeString normalized;
  2826. normalizeUnicodeString(unescaped, normalized);
  2827. UConverter * utf8Conv = queryRTLUnicodeConverter(UTF8_CODEPAGE)->query();
  2828. UErrorCode err = U_ZERO_ERROR;
  2829. size32_t outsize = normalized.extract(NULL, 0, utf8Conv, err);
  2830. err = U_ZERO_ERROR;
  2831. out = (char *)rtlMalloc(outsize);
  2832. outsize = normalized.extract(out, outsize, utf8Conv, err);
  2833. outlen = rtlUtf8Length(outsize, out);
  2834. }
  2835. void rtlUnicodeToCodepageX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in, char const * codepage)
  2836. {
  2837. //If the unicode contains a character which doesn't exist in the destination codepage,
  2838. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2839. //no telling how your terminal may display this (I've seen a divide sign and a right
  2840. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2841. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2842. UErrorCode err = U_ZERO_ERROR;
  2843. outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
  2844. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2845. out = (char *)rtlMalloc(outlen);
  2846. ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
  2847. }
  2848. void rtlUnicodeToDataX(unsigned & outlen, void * & out, unsigned inlen, UChar const * in)
  2849. {
  2850. rtlUnicodeToCodepageX(outlen, (char * &)out, inlen, in, ASCII_LIKE_CODEPAGE);
  2851. }
  2852. char * rtlUnicodeToVCodepageX(unsigned inlen, UChar const * in, char const * codepage)
  2853. {
  2854. //If the unicode contains a character which doesn't exist in the destination codepage,
  2855. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2856. //no telling how your terminal may display this (I've seen a divide sign and a right
  2857. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2858. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2859. UErrorCode err = U_ZERO_ERROR;
  2860. unsigned outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
  2861. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2862. char * out = (char *)rtlMalloc(outlen+1);
  2863. ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
  2864. out[outlen] = 0x00;
  2865. return out;
  2866. }
  2867. void rtlVUnicodeToCodepageX(unsigned & outlen, char * & out, UChar const * in, char const * codepage)
  2868. {
  2869. rtlUnicodeToCodepageX(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2870. }
  2871. char * rtlVUnicodeToVCodepageX(UChar const * in, char const * codepage)
  2872. {
  2873. return rtlUnicodeToVCodepageX(rtlUnicodeStrlen(in), in, codepage);
  2874. }
  2875. void rtlStrToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  2876. {
  2877. rtlCodepageToUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2878. }
  2879. void rtlUnicodeToStr(unsigned outlen, char * out, unsigned inlen, UChar const * in)
  2880. {
  2881. rtlUnicodeToCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2882. }
  2883. void rtlStrToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
  2884. {
  2885. rtlCodepageToUnicodeX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2886. }
  2887. void rtlUnicodeToStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  2888. {
  2889. rtlUnicodeToCodepageX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2890. }
  2891. void rtlUnicodeToEscapedStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  2892. {
  2893. StringBuffer outbuff;
  2894. escapeUnicode(inlen, in, outbuff);
  2895. outlen = outbuff.length();
  2896. out = (char *)rtlMalloc(outlen);
  2897. memcpy(out, outbuff.str(), outlen);
  2898. }
  2899. bool rtlCodepageToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  2900. {
  2901. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  2902. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  2903. UErrorCode err = U_ZERO_ERROR;
  2904. char * target = out;
  2905. ucnv_convertEx(outconv, inconv, &target, out+outlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  2906. unsigned len = target - out;
  2907. if(len < outlen)
  2908. codepageBlankFill(outcodepage, target, outlen-len);
  2909. return U_SUCCESS(err);
  2910. }
  2911. bool rtlCodepageToCodepageX(unsigned & outlen, char * & out, unsigned maxoutlen, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  2912. {
  2913. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  2914. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  2915. UErrorCode err = U_ZERO_ERROR;
  2916. //GH->PG is there a better way of coding this with out temporary buffer?
  2917. char * tempBuffer = (char *)rtlMalloc(maxoutlen);
  2918. char * target = tempBuffer;
  2919. ucnv_convertEx(outconv, inconv, &target, tempBuffer+maxoutlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  2920. unsigned len = target - tempBuffer;
  2921. outlen = len;
  2922. if (len == maxoutlen)
  2923. out = tempBuffer;
  2924. else
  2925. {
  2926. out = (char *)rtlRealloc(tempBuffer, len);
  2927. if (!out)
  2928. out = tempBuffer;
  2929. }
  2930. return U_SUCCESS(err);
  2931. }
  2932. int rtlSingleUtf8ToCodepage(char * out, unsigned inlen, char const * in, char const * outcodepage)
  2933. {
  2934. const byte head = *in; // Macros require unsigned argument on some versions of ICU
  2935. if(!U8_IS_LEAD(head))
  2936. return -1;
  2937. uint8_t trailbytes = U8_COUNT_TRAIL_BYTES(head);
  2938. if(inlen < (unsigned)(trailbytes+1))
  2939. return -1;
  2940. if(!rtlCodepageToCodepage(1, out, trailbytes+1, in, outcodepage, UTF8_CODEPAGE))
  2941. return -1;
  2942. return static_cast<int>(trailbytes); //cast okay as is certainly 0--3
  2943. }
  2944. //---------------------------------------------------------------------------
  2945. void rtlStrToDataX(unsigned & tlen, void * & tgt, unsigned slen, const void * src)
  2946. {
  2947. void * data = rtlMalloc(slen);
  2948. memcpy(data, src, slen);
  2949. tgt = data;
  2950. tlen = slen;
  2951. }
  2952. void rtlStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const void * src)
  2953. {
  2954. char * data = (char *)rtlMalloc(slen);
  2955. memcpy(data, src, slen);
  2956. tgt = data;
  2957. tlen = slen;
  2958. }
  2959. char * rtlStrToVStrX(unsigned slen, const void * src)
  2960. {
  2961. char * data = (char *)rtlMalloc(slen+1);
  2962. memcpy(data, src, slen);
  2963. data[slen] = 0;
  2964. return data;
  2965. }
  2966. char * rtlEStrToVStrX(unsigned slen, const char * src)
  2967. {
  2968. char * astr = (char*)alloca(slen);
  2969. rtlEStrToStr(slen,astr,slen,src);
  2970. return rtlStrToVStrX(slen, astr);
  2971. }
  2972. void rtlEStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  2973. {
  2974. char * data = (char *)rtlMalloc(slen);
  2975. rtlEStrToStr(slen, data, slen, src);
  2976. tgt = data;
  2977. tlen = slen;
  2978. }
  2979. void rtlStrToEStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  2980. {
  2981. char * data = (char *)rtlMalloc(slen);
  2982. rtlStrToEStr(slen, data, slen, src);
  2983. tgt = data;
  2984. tlen = slen;
  2985. }
  2986. //---------------------------------------------------------------------------
  2987. // See http://www.isthe.com/chongo/tech/comp/fnv/index.html
  2988. #define FNV1_64_INIT HASH64_INIT
  2989. #define FNV_64_PRIME I64C(0x100000001b3U)
  2990. #define APPLY_FNV64(hval, next) { hval *= FNV_64_PRIME; hval ^= next; }
  2991. hash64_t rtlHash64Data(size32_t len, const void *buf, hash64_t hval)
  2992. {
  2993. const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
  2994. #if __BYTE_ORDER == __LITTLE_ENDIAN
  2995. //This possibly breaks the aliasing rules for c++, but I can't see it causing any problems
  2996. while (len >= sizeof(unsigned))
  2997. {
  2998. unsigned next = *(const unsigned *)bp;
  2999. bp += sizeof(unsigned);
  3000. for (unsigned i=0; i < sizeof(unsigned); i++)
  3001. {
  3002. APPLY_FNV64(hval, (byte)next);
  3003. next >>= 8;
  3004. }
  3005. len -= sizeof(unsigned);
  3006. }
  3007. #endif
  3008. const unsigned char *be = bp + len; /* beyond end of buffer */
  3009. while (bp < be)
  3010. {
  3011. APPLY_FNV64(hval, *bp++);
  3012. }
  3013. return hval;
  3014. }
  3015. hash64_t rtlHash64VStr(const char *str, hash64_t hval)
  3016. {
  3017. const unsigned char *s = (const unsigned char *)str;
  3018. unsigned char c;
  3019. while ((c = *s++) != 0)
  3020. {
  3021. APPLY_FNV64(hval, c);
  3022. }
  3023. return hval;
  3024. }
  3025. hash64_t rtlHash64Unicode(unsigned length, UChar const * k, hash64_t hval)
  3026. {
  3027. unsigned trimLength = rtlTrimUnicodeStrLen(length, k);
  3028. for (unsigned i=0; i < trimLength; i++)
  3029. {
  3030. //Handle surrogate pairs correctly, but still hash the utf16 representation
  3031. const byte * cur = reinterpret_cast<const byte *>(&k[i]);
  3032. UChar32 c = k[i];
  3033. if (U16_IS_SURROGATE(c))
  3034. {
  3035. U16_GET(k, 0, i, length, c);
  3036. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3037. {
  3038. APPLY_FNV64(hval, cur[0]);
  3039. APPLY_FNV64(hval, cur[1]);
  3040. APPLY_FNV64(hval, cur[2]);
  3041. APPLY_FNV64(hval, cur[3]);
  3042. }
  3043. //Skip the surrogate pair
  3044. i++;
  3045. }
  3046. else
  3047. {
  3048. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3049. {
  3050. APPLY_FNV64(hval, cur[0]);
  3051. APPLY_FNV64(hval, cur[1]);
  3052. }
  3053. }
  3054. }
  3055. return hval;
  3056. }
  3057. hash64_t rtlHash64VUnicode(UChar const * k, hash64_t initval)
  3058. {
  3059. return rtlHash64Unicode(rtlUnicodeStrlen(k), k, initval);
  3060. }
  3061. //---------------------------------------------------------------------------
  3062. // See http://www.isthe.com/chongo/tech/comp/fnv/index.html
  3063. #define FNV1_32_INIT HASH32_INIT
  3064. #define FNV_32_PRIME 0x1000193
  3065. #define APPLY_FNV32(hval, next) { hval *= FNV_32_PRIME; hval ^= next; }
  3066. unsigned rtlHash32Data(size32_t len, const void *buf, unsigned hval)
  3067. {
  3068. const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
  3069. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3070. //This possibly breaks the aliasing rules for c++, but I can't see it causing any problems
  3071. while (len >= sizeof(unsigned))
  3072. {
  3073. unsigned next = *(const unsigned *)bp;
  3074. bp += sizeof(unsigned);
  3075. for (unsigned i=0; i < sizeof(unsigned); i++)
  3076. {
  3077. APPLY_FNV32(hval, (byte)next);
  3078. next >>= 8;
  3079. }
  3080. len -= sizeof(unsigned);
  3081. }
  3082. #endif
  3083. const unsigned char *be = bp + len; /* beyond end of buffer */
  3084. while (bp < be)
  3085. {
  3086. APPLY_FNV32(hval, *bp++);
  3087. }
  3088. return hval;
  3089. }
  3090. unsigned rtlHash32VStr(const char *str, unsigned hval)
  3091. {
  3092. const unsigned char *s = (const unsigned char *)str;
  3093. unsigned char c;
  3094. while ((c = *s++) != 0)
  3095. {
  3096. APPLY_FNV32(hval, c);
  3097. }
  3098. return hval;
  3099. }
  3100. unsigned rtlHash32Unicode(unsigned length, UChar const * k, unsigned hval)
  3101. {
  3102. unsigned trimLength = rtlTrimUnicodeStrLen(length, k);
  3103. for (unsigned i=0; i < trimLength; i++)
  3104. {
  3105. //Handle surrogate pairs correctly, but still hash the utf16 representation
  3106. const byte * cur = reinterpret_cast<const byte *>(&k[i]);
  3107. UChar32 c = k[i];
  3108. if (U16_IS_SURROGATE(c))
  3109. {
  3110. U16_GET(k, 0, i, length, c);
  3111. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3112. {
  3113. APPLY_FNV32(hval, cur[0]);
  3114. APPLY_FNV32(hval, cur[1]);
  3115. APPLY_FNV32(hval, cur[2]);
  3116. APPLY_FNV32(hval, cur[3]);
  3117. }
  3118. //Skip the surrogate pair
  3119. i++;
  3120. }
  3121. else
  3122. {
  3123. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3124. {
  3125. APPLY_FNV32(hval, cur[0]);
  3126. APPLY_FNV32(hval, cur[1]);
  3127. }
  3128. }
  3129. }
  3130. return hval;
  3131. }
  3132. unsigned rtlHash32VUnicode(UChar const * k, unsigned initval)
  3133. {
  3134. return rtlHash32Unicode(rtlUnicodeStrlen(k), k, initval);
  3135. }
  3136. //---------------------------------------------------------------------------
  3137. // Hash Helper functions
  3138. #define mix(a,b,c) \
  3139. { \
  3140. a -= b; a -= c; a ^= (c>>13); \
  3141. b -= c; b -= a; b ^= (a<<8); \
  3142. c -= a; c -= b; c ^= (b>>13); \
  3143. a -= b; a -= c; a ^= (c>>12); \
  3144. b -= c; b -= a; b ^= (a<<16); \
  3145. c -= a; c -= b; c ^= (b>>5); \
  3146. a -= b; a -= c; a ^= (c>>3); \
  3147. b -= c; b -= a; b ^= (a<<10); \
  3148. c -= a; c -= b; c ^= (b>>15); \
  3149. }
  3150. #define GETBYTE0(n) ((unsigned)k[n])
  3151. #define GETBYTE1(n) ((unsigned)k[n+1]<<8)
  3152. #define GETBYTE2(n) ((unsigned)k[n+2]<<16)
  3153. #define GETBYTE3(n) ((unsigned)k[n+3]<<24)
  3154. #define GETWORD(k,n) (GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))
  3155. // the above looks inefficient but the compiler optimizes well
  3156. // this hash looks slow but is about twice as quick as using our CRC table
  3157. // and gives gives better results
  3158. // (see paper at http://burtleburtle.net/bob/hash/evahash.html for more info)
  3159. unsigned rtlHashData( unsigned length, const void *_k, unsigned initval)
  3160. {
  3161. const unsigned char * k = (const unsigned char *)_k;
  3162. register unsigned a,b,c,len;
  3163. /* Set up the internal state */
  3164. len = length;
  3165. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  3166. c = initval; /* the previous hash value */
  3167. /*---------------------------------------- handle most of the key */
  3168. while (len >= 12)
  3169. {
  3170. a += GETWORD(k,0);
  3171. b += GETWORD(k,4);
  3172. c += GETWORD(k,8);
  3173. mix(a,b,c);
  3174. k += 12; len -= 12;
  3175. }
  3176. /*------------------------------------- handle the last 11 bytes */
  3177. c += length;
  3178. switch(len) /* all the case statements fall through */
  3179. {
  3180. case 11: c+=GETBYTE3(7);
  3181. case 10: c+=GETBYTE2(7);
  3182. case 9 : c+=GETBYTE1(7);
  3183. /* the first byte of c is reserved for the length */
  3184. case 8 : b+=GETBYTE3(4);
  3185. case 7 : b+=GETBYTE2(4);
  3186. case 6 : b+=GETBYTE1(4);
  3187. case 5 : b+=GETBYTE0(4);
  3188. case 4 : a+=GETBYTE3(0);
  3189. case 3 : a+=GETBYTE2(0);
  3190. case 2 : a+=GETBYTE1(0);
  3191. case 1 : a+=GETBYTE0(0);
  3192. /* case 0: nothing left to add */
  3193. }
  3194. mix(a,b,c);
  3195. /*-------------------------------------------- report the result */
  3196. return c;
  3197. }
  3198. unsigned rtlHashString( unsigned length, const char *_k, unsigned initval)
  3199. {
  3200. return rtlHashData(rtlTrimStrLen(length, _k), _k, initval);
  3201. }
  3202. unsigned rtlHashUnicode(unsigned length, UChar const * k, unsigned initval)
  3203. {
  3204. unsigned trimLength = rtlTrimUnicodeStrLen(length, k);
  3205. //Because of the implementation of HASH we need to strip ignoreable code points instead of skipping them
  3206. size32_t tempLength;
  3207. rtlDataAttr temp;
  3208. if (stripIgnorableCharacters(tempLength, temp.refustr(), trimLength, k))
  3209. return rtlHashData(tempLength*2, temp.getustr(), initval);
  3210. return rtlHashData(trimLength*sizeof(UChar), k, initval);
  3211. }
  3212. unsigned rtlHashVStr(const char * k, unsigned initval)
  3213. {
  3214. return rtlHashData(rtlTrimVStrLen(k), k, initval);
  3215. }
  3216. unsigned rtlHashVUnicode(UChar const * k, unsigned initval)
  3217. {
  3218. return rtlHashUnicode(rtlTrimVUnicodeStrLen(k), k, initval);
  3219. }
  3220. #define GETWORDNC(k,n) ((GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))&0xdfdfdfdf)
  3221. unsigned rtlHashDataNC( unsigned length, const void * _k, unsigned initval)
  3222. {
  3223. const unsigned char * k = (const unsigned char *)_k;
  3224. register unsigned a,b,c,len;
  3225. /* Set up the internal state */
  3226. len = length;
  3227. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  3228. c = initval; /* the previous hash value */
  3229. /*---------------------------------------- handle most of the key */
  3230. while (len >= 12)
  3231. {
  3232. a += GETWORDNC(k,0);
  3233. b += GETWORDNC(k,4);
  3234. c += GETWORDNC(k,8);
  3235. mix(a,b,c);
  3236. k += 12; len -= 12;
  3237. }
  3238. /*------------------------------------- handle the last 11 bytes */
  3239. c += length;
  3240. switch(len) /* all the case statements fall through */
  3241. {
  3242. case 11: c+=GETBYTE3(7)&0xdf;
  3243. case 10: c+=GETBYTE2(7)&0xdf;
  3244. case 9 : c+=GETBYTE1(7)&0xdf;
  3245. /* the first byte of c is reserved for the length */
  3246. case 8 : b+=GETBYTE3(4)&0xdf;
  3247. case 7 : b+=GETBYTE2(4)&0xdf;
  3248. case 6 : b+=GETBYTE1(4)&0xdf;
  3249. case 5 : b+=GETBYTE0(4)&0xdf;
  3250. case 4 : a+=GETBYTE3(0)&0xdf;
  3251. case 3 : a+=GETBYTE2(0)&0xdf;
  3252. case 2 : a+=GETBYTE1(0)&0xdf;
  3253. case 1 : a+=GETBYTE0(0)&0xdf;
  3254. /* case 0: nothing left to add */
  3255. }
  3256. mix(a,b,c);
  3257. /*-------------------------------------------- report the result */
  3258. return c;
  3259. }
  3260. unsigned rtlHashVStrNC(const char * k, unsigned initval)
  3261. {
  3262. return rtlHashDataNC(strlen(k), k, initval);
  3263. }
  3264. //---------------------------------------------------------------------------
  3265. unsigned rtlCrcData( unsigned length, const void *_k, unsigned initval)
  3266. {
  3267. return crc32((const char *)_k, length, initval);
  3268. }
  3269. unsigned rtlCrcUnicode(unsigned length, UChar const * k, unsigned initval)
  3270. {
  3271. return crc32((char const *)k, length*2, initval);
  3272. }
  3273. unsigned rtlCrcVStr( const char * k, unsigned initval)
  3274. {
  3275. return crc32(k, strlen(k), initval);
  3276. }
  3277. unsigned rtlCrcVUnicode(UChar const * k, unsigned initval)
  3278. {
  3279. return rtlCrcUnicode(rtlUnicodeStrlen(k), k, initval);
  3280. }
  3281. //---------------------------------------------------------------------------
  3282. // MD5 processing:
  3283. void rtlHashMd5Init(size32_t sizestate, void * _state)
  3284. {
  3285. assertex(sizestate >= sizeof(md5_state_s));
  3286. md5_state_s * state = (md5_state_s *)_state;
  3287. md5_init(state);
  3288. }
  3289. void rtlHashMd5Data(size32_t len, const void *buf, size32_t sizestate, void * _state)
  3290. {
  3291. md5_state_s * state = (md5_state_s * )_state;
  3292. md5_append(state, (const md5_byte_t *)buf, len);
  3293. }
  3294. void rtlHashMd5Finish(void * out, size32_t sizestate, void * _state)
  3295. {
  3296. typedef md5_byte_t digest_t[16];
  3297. md5_state_s * state = (md5_state_s *)_state;
  3298. md5_finish(state, *(digest_t*)out);
  3299. }
  3300. //---------------------------------------------------------------------------
  3301. unsigned rtlRandom()
  3302. {
  3303. CriticalBlock block(random_Sect);
  3304. return random_->next();
  3305. }
  3306. void rtlSeedRandom(unsigned value)
  3307. {
  3308. CriticalBlock block(random_Sect);
  3309. random_->seed(value);
  3310. }
  3311. // These are all useful functions for testing - not really designed for other people to use them...
  3312. ECLRTL_API unsigned rtlTick()
  3313. {
  3314. return msTick();
  3315. }
  3316. ECLRTL_API bool rtlGPF()
  3317. {
  3318. char * x = 0;
  3319. *x = 0;
  3320. return false;
  3321. }
  3322. ECLRTL_API unsigned rtlSleep(unsigned delay)
  3323. {
  3324. MilliSleep(delay);
  3325. return 0;
  3326. }
  3327. ECLRTL_API unsigned rtlDisplay(unsigned len, const char * src)
  3328. {
  3329. LOG(MCprogress, unknownJob, "%.*s", len, src);
  3330. return 0;
  3331. }
  3332. void rtlEcho(unsigned len, const char * src)
  3333. {
  3334. printf("%.*s\n", len, src);
  3335. }
  3336. ECLRTL_API unsigned __int64 rtlNano()
  3337. {
  3338. return cycle_to_nanosec(get_cycles_now());
  3339. }
  3340. ECLRTL_API void rtlTestGetPrimes(unsigned & num, void * & data)
  3341. {
  3342. unsigned numPrimes = 6;
  3343. unsigned size = sizeof(unsigned) * numPrimes;
  3344. unsigned * primes = (unsigned *)rtlMalloc(size);
  3345. primes[0] = 1;
  3346. primes[1] = 2;
  3347. primes[2] = 3;
  3348. primes[3] = 5;
  3349. primes[4] = 7;
  3350. primes[5] = 11;
  3351. num = numPrimes;
  3352. data = primes;
  3353. }
  3354. ECLRTL_API void rtlTestFibList(bool & outAll, size32_t & outSize, void * & outData, bool inAll, size32_t inSize, const void * inData)
  3355. {
  3356. const unsigned * inList = (const unsigned *)inData;
  3357. unsigned * outList = (unsigned *)rtlMalloc(inSize);
  3358. unsigned * curOut = outList;
  3359. unsigned count = inSize / sizeof(*inList);
  3360. unsigned prev = 0;
  3361. for (unsigned i=0; i < count; i++)
  3362. {
  3363. unsigned next = *inList++;
  3364. *curOut++ = next + prev;
  3365. prev = next;
  3366. }
  3367. outAll = inAll;
  3368. outSize = inSize;
  3369. outData = outList;
  3370. }
  3371. unsigned rtlDelayReturn(unsigned value, unsigned sleepTime)
  3372. {
  3373. MilliSleep(sleepTime);
  3374. return value;
  3375. }
  3376. //---------------------------------------------------------------------------
  3377. class CRtlFailException : public CInterface, public IUserException
  3378. {
  3379. public:
  3380. CRtlFailException(int _code, char const * _msg) : code(_code) { msg = strdup(_msg); }
  3381. ~CRtlFailException() { free(msg); }
  3382. IMPLEMENT_IINTERFACE;
  3383. virtual int errorCode() const { return code; }
  3384. virtual StringBuffer & errorMessage(StringBuffer & buff) const { return buff.append(msg); }
  3385. virtual MessageAudience errorAudience() const { return MSGAUD_user; }
  3386. private:
  3387. int code;
  3388. char * msg;
  3389. };
  3390. void rtlFail(int code, const char *msg)
  3391. {
  3392. throw dynamic_cast<IUserException *>(new CRtlFailException(code, msg));
  3393. }
  3394. void rtlSysFail(int code, const char *msg)
  3395. {
  3396. throw MakeStringException(MSGAUD_user, code, "%s", msg);
  3397. }
  3398. void rtlThrowOutOfMemory(int code, const char *msg)
  3399. {
  3400. throw static_cast<IUserException *>(new CRtlFailException(code, msg));
  3401. }
  3402. void rtlReportRowOverflow(unsigned size, unsigned max)
  3403. {
  3404. throw MakeStringException(MSGAUD_user, 1000, "Row size %u exceeds the maximum size specified(%u)", size, max);
  3405. }
  3406. void rtlReportFieldOverflow(unsigned size, unsigned max, const char * name)
  3407. {
  3408. if (!name)
  3409. rtlReportRowOverflow(size, max);
  3410. else
  3411. throw MakeStringException(MSGAUD_user, 1000, "Assignment to field '%s' causes row overflow. Size %u exceeds the maximum size specified(%u)", name, size, max);
  3412. }
  3413. void rtlCheckRowOverflow(unsigned size, unsigned max)
  3414. {
  3415. if (size > max)
  3416. rtlReportRowOverflow(size, max);
  3417. }
  3418. void rtlCheckFieldOverflow(unsigned size, unsigned max, const char * field)
  3419. {
  3420. if (size > max)
  3421. rtlReportFieldOverflow(size, max, field);
  3422. }
  3423. void rtlFailUnexpected()
  3424. {
  3425. throw MakeStringException(MSGAUD_user, -1, "Unexpected code execution");
  3426. }
  3427. void rtlFailOnAssert()
  3428. {
  3429. throw MakeStringException(MSGAUD_user, -1, "Abort execution");
  3430. }
  3431. void rtlFailDivideByZero()
  3432. {
  3433. throw MakeStringException(MSGAUD_user, -1, "Division by zero");
  3434. }
  3435. //---------------------------------------------------------------------------
  3436. void deserializeRaw(unsigned recordSize, void *record, MemoryBuffer &in)
  3437. {
  3438. in.read(recordSize, record);
  3439. }
  3440. void deserializeDataX(size32_t & len, void * & data, MemoryBuffer &in)
  3441. {
  3442. free(data);
  3443. in.read(sizeof(len), &len);
  3444. data = rtlMalloc(len);
  3445. in.read(len, data);
  3446. }
  3447. void deserializeStringX(size32_t & len, char * & data, MemoryBuffer &in)
  3448. {
  3449. free(data);
  3450. in.read(sizeof(len), &len);
  3451. data = (char *)rtlMalloc(len);
  3452. in.read(len, data);
  3453. }
  3454. char * deserializeCStringX(MemoryBuffer &in)
  3455. {
  3456. unsigned len;
  3457. in.read(sizeof(len), &len);
  3458. char * data = (char *)rtlMalloc(len+1);
  3459. in.read(len, data);
  3460. data[len] = 0;
  3461. return data;
  3462. }
  3463. void deserializeUnicodeX(size32_t & len, UChar * & data, MemoryBuffer &in)
  3464. {
  3465. free(data);
  3466. in.read(sizeof(len), &len);
  3467. data = (UChar *)rtlMalloc(len*sizeof(UChar));
  3468. in.read(len*sizeof(UChar), data);
  3469. }
  3470. void deserializeUtf8X(size32_t & len, char * & data, MemoryBuffer &in)
  3471. {
  3472. free(data);
  3473. in.read(sizeof(len), &len);
  3474. unsigned size = rtlUtf8Size(len, in.readDirect(0));
  3475. data = (char *)rtlMalloc(size);
  3476. in.read(size, data);
  3477. }
  3478. UChar * deserializeVUnicodeX(MemoryBuffer &in)
  3479. {
  3480. unsigned len;
  3481. in.read(sizeof(len), &len);
  3482. UChar * data = (UChar *)rtlMalloc((len+1)*sizeof(UChar));
  3483. in.read(len*sizeof(UChar), data);
  3484. data[len] = 0;
  3485. return data;
  3486. }
  3487. void deserializeSet(bool & isAll, size32_t & len, void * & data, MemoryBuffer &in)
  3488. {
  3489. free(data);
  3490. in.read(isAll);
  3491. in.read(sizeof(len), &len);
  3492. data = rtlMalloc(len);
  3493. in.read(len, data);
  3494. }
  3495. void serializeRaw(unsigned recordSize, const void *record, MemoryBuffer &out)
  3496. {
  3497. out.append(recordSize, record);
  3498. }
  3499. void serializeDataX(size32_t len, const void * data, MemoryBuffer &out)
  3500. {
  3501. out.append(len).append(len, data);
  3502. }
  3503. void serializeStringX(size32_t len, const char * data, MemoryBuffer &out)
  3504. {
  3505. out.append(len).append(len, data);
  3506. }
  3507. void serializeCStringX(const char * data, MemoryBuffer &out)
  3508. {
  3509. unsigned len = strlen(data);
  3510. out.append(len).append(len, data);
  3511. }
  3512. void serializeUnicodeX(size32_t len, const UChar * data, MemoryBuffer &out)
  3513. {
  3514. out.append(len).append(len*sizeof(UChar), data);
  3515. }
  3516. void serializeUtf8X(size32_t len, const char * data, MemoryBuffer &out)
  3517. {
  3518. out.append(len).append(rtlUtf8Size(len, data), data);
  3519. }
  3520. void serializeSet(bool isAll, size32_t len, const void * data, MemoryBuffer &out)
  3521. {
  3522. out.append(isAll).append(len).append(len, data);
  3523. }
  3524. //---------------------------------------------------------------------------
  3525. ECLRTL_API void serializeFixedString(unsigned len, const char *field, MemoryBuffer &out)
  3526. {
  3527. out.append(len, field);
  3528. }
  3529. ECLRTL_API void serializeLPString(unsigned len, const char *field, MemoryBuffer &out)
  3530. {
  3531. out.append(len);
  3532. out.append(len, field);
  3533. }
  3534. ECLRTL_API void serializeVarString(const char *field, MemoryBuffer &out)
  3535. {
  3536. out.append(field);
  3537. }
  3538. ECLRTL_API void serializeBool(bool field, MemoryBuffer &out)
  3539. {
  3540. out.append(field);
  3541. }
  3542. ECLRTL_API void serializeFixedData(unsigned len, const void *field, MemoryBuffer &out)
  3543. {
  3544. out.append(len, field);
  3545. }
  3546. ECLRTL_API void serializeLPData(unsigned len, const void *field, MemoryBuffer &out)
  3547. {
  3548. out.append(len);
  3549. out.append(len, field);
  3550. }
  3551. ECLRTL_API void serializeInt1(signed char field, MemoryBuffer &out)
  3552. {
  3553. // MORE - why did overloading pick the int method for this???
  3554. // out.append(field);
  3555. out.appendEndian(sizeof(field), &field);
  3556. }
  3557. ECLRTL_API void serializeInt2(signed short field, MemoryBuffer &out)
  3558. {
  3559. out.appendEndian(sizeof(field), &field);
  3560. }
  3561. ECLRTL_API void serializeInt3(signed int field, MemoryBuffer &out)
  3562. {
  3563. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3564. out.appendEndian(3, &field);
  3565. #else
  3566. out.appendEndian(3, ((char *) &field) + 1);
  3567. #endif
  3568. }
  3569. ECLRTL_API void serializeInt4(signed int field, MemoryBuffer &out)
  3570. {
  3571. out.appendEndian(sizeof(field), &field);
  3572. }
  3573. ECLRTL_API void serializeInt5(signed __int64 field, MemoryBuffer &out)
  3574. {
  3575. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3576. out.appendEndian(5, &field);
  3577. #else
  3578. out.appendEndian(5, ((char *) &field) + 3);
  3579. #endif
  3580. }
  3581. ECLRTL_API void serializeInt6(signed __int64 field, MemoryBuffer &out)
  3582. {
  3583. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3584. out.appendEndian(6, &field);
  3585. #else
  3586. out.appendEndian(6, ((char *) &field) + 2);
  3587. #endif
  3588. }
  3589. ECLRTL_API void serializeInt7(signed __int64 field, MemoryBuffer &out)
  3590. {
  3591. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3592. out.appendEndian(7, &field);
  3593. #else
  3594. out.appendEndian(7, ((char *) &field) + 1);
  3595. #endif
  3596. }
  3597. ECLRTL_API void serializeInt8(signed __int64 field, MemoryBuffer &out)
  3598. {
  3599. out.appendEndian(sizeof(field), &field);
  3600. }
  3601. ECLRTL_API void serializeUInt1(unsigned char field, MemoryBuffer &out)
  3602. {
  3603. out.appendEndian(sizeof(field), &field);
  3604. }
  3605. ECLRTL_API void serializeUInt2(unsigned short field, MemoryBuffer &out)
  3606. {
  3607. out.appendEndian(sizeof(field), &field);
  3608. }
  3609. ECLRTL_API void serializeUInt3(unsigned int field, MemoryBuffer &out)
  3610. {
  3611. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3612. out.appendEndian(3, &field);
  3613. #else
  3614. out.appendEndian(3, ((char *) &field) + 1);
  3615. #endif
  3616. }
  3617. ECLRTL_API void serializeUInt4(unsigned int field, MemoryBuffer &out)
  3618. {
  3619. out.appendEndian(sizeof(field), &field);
  3620. }
  3621. ECLRTL_API void serializeUInt5(unsigned __int64 field, MemoryBuffer &out)
  3622. {
  3623. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3624. out.appendEndian(5, &field);
  3625. #else
  3626. out.appendEndian(5, ((char *) &field) + 3);
  3627. #endif
  3628. }
  3629. ECLRTL_API void serializeUInt6(unsigned __int64 field, MemoryBuffer &out)
  3630. {
  3631. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3632. out.appendEndian(6, &field);
  3633. #else
  3634. out.appendEndian(6, ((char *) &field) + 2);
  3635. #endif
  3636. }
  3637. ECLRTL_API void serializeUInt7(unsigned __int64 field, MemoryBuffer &out)
  3638. {
  3639. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3640. out.appendEndian(7, &field);
  3641. #else
  3642. out.appendEndian(7, ((char *) &field) + 1);
  3643. #endif
  3644. }
  3645. ECLRTL_API void serializeUInt8(unsigned __int64 field, MemoryBuffer &out)
  3646. {
  3647. out.appendEndian(sizeof(field), &field);
  3648. }
  3649. ECLRTL_API void serializeReal4(float field, MemoryBuffer &out)
  3650. {
  3651. out.appendEndian(sizeof(field), &field);
  3652. }
  3653. ECLRTL_API void serializeReal8(double field, MemoryBuffer &out)
  3654. {
  3655. out.append(sizeof(field), &field);
  3656. }
  3657. //These maths functions can all have out of range arguments....
  3658. //---------------------------------------------------------------------------
  3659. ECLRTL_API double rtlLog10(double x)
  3660. {
  3661. if (x <= 0) return 0;
  3662. return log10(x);
  3663. }
  3664. ECLRTL_API double rtlLog(double x)
  3665. {
  3666. if (x <= 0) return 0;
  3667. return log(x);
  3668. }
  3669. ECLRTL_API double rtlSqrt(double x)
  3670. {
  3671. if (x < 0) return 0;
  3672. return sqrt(x);
  3673. }
  3674. ECLRTL_API double rtlACos(double x)
  3675. {
  3676. if (fabs(x) > 1) return 0;
  3677. return acos(x);
  3678. }
  3679. ECLRTL_API double rtlASin(double x)
  3680. {
  3681. if (fabs(x) > 1) return 0;
  3682. return asin(x);
  3683. }
  3684. //---------------------------------------------------------------------------
  3685. ECLRTL_API bool rtlIsValidReal(unsigned size, const void * data)
  3686. {
  3687. byte * bytes = (byte *)data;
  3688. //Valid unless it is a Nan, represented by exponent all 1's and non-zero mantissa (ignore the sign).
  3689. if (size == 4)
  3690. {
  3691. //sign(1) exponent(8) mantissa(23)
  3692. if (((bytes[3] & 0x7f) == 0x7f) && ((bytes[2] & 0x80) == 0x80))
  3693. {
  3694. if ((bytes[2] & 0x7f) != 0 || bytes[1] || bytes[0])
  3695. return false;
  3696. }
  3697. }
  3698. else if (size == 8)
  3699. {
  3700. //sign(1) exponent(11) mantissa(52)
  3701. if (((bytes[7] & 0x7f) == 0x7f) && ((bytes[6] & 0xF0) == 0xF0))
  3702. {
  3703. if ((bytes[6] & 0xF) || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
  3704. return false;
  3705. }
  3706. }
  3707. else
  3708. {
  3709. //sign(1) exponent(15) mantissa(64)
  3710. assertex(size==10);
  3711. if (((bytes[9] & 0x7f) == 0x7f) && (bytes[8] == 0xFF))
  3712. {
  3713. if (bytes[7] || bytes[6] || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
  3714. return false;
  3715. }
  3716. }
  3717. return true;
  3718. }
  3719. double rtlCreateRealNull()
  3720. {
  3721. union
  3722. {
  3723. byte data[8];
  3724. double r;
  3725. } u;
  3726. //Use a non-signaling NaN
  3727. memcpy(u.data, "\x01\x00\x00\x00\x00\x00\xF0\x7f", 8);
  3728. return u.r;
  3729. }
  3730. void rtlUnicodeToUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
  3731. {
  3732. if(inlen>outlen) inlen = outlen;
  3733. memcpy(out, in, inlen*2);
  3734. while(inlen<outlen)
  3735. out[inlen++] = 0x0020;
  3736. }
  3737. void rtlUnicodeToVUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
  3738. {
  3739. if((inlen>=outlen) && (outlen != 0)) inlen = outlen-1;
  3740. memcpy(out, in, inlen*2);
  3741. out[inlen] = 0x0000;
  3742. }
  3743. void rtlVUnicodeToUnicode(size32_t outlen, UChar * out, UChar const *in)
  3744. {
  3745. rtlUnicodeToUnicode(outlen, out, rtlUnicodeStrlen(in), in);
  3746. }
  3747. void rtlVUnicodeToVUnicode(size32_t outlen, UChar * out, UChar const *in)
  3748. {
  3749. rtlUnicodeToVUnicode(outlen, out, rtlUnicodeStrlen(in), in);
  3750. }
  3751. void rtlUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  3752. {
  3753. tgt = (UChar *)rtlMalloc(slen*2);
  3754. memcpy(tgt, src, slen*2);
  3755. tlen = slen;
  3756. }
  3757. UChar * rtlUnicodeToVUnicodeX(unsigned slen, UChar const * src)
  3758. {
  3759. UChar * data = (UChar *)rtlMalloc((slen+1)*2);
  3760. memcpy(data, src, slen*2);
  3761. data[slen] = 0x0000;
  3762. return data;
  3763. }
  3764. void rtlVUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, UChar const * src)
  3765. {
  3766. rtlUnicodeToUnicodeX(tlen, tgt, rtlUnicodeStrlen(src), src);
  3767. }
  3768. UChar * rtlVUnicodeToVUnicodeX(UChar const * src)
  3769. {
  3770. return rtlUnicodeToVUnicodeX(rtlUnicodeStrlen(src), src);
  3771. }
  3772. void rtlDecPushUnicode(size32_t len, UChar const * data)
  3773. {
  3774. char * buff = 0;
  3775. unsigned bufflen = 0;
  3776. rtlUnicodeToStrX(bufflen, buff, len, data);
  3777. DecPushString(bufflen, buff);
  3778. rtlFree(buff);
  3779. }
  3780. unsigned rtlUnicodeStrlen(UChar const * str)
  3781. {
  3782. return u_strlen(str);
  3783. }
  3784. //---------------------------------------------------------------------------
  3785. unsigned rtlUtf8Size(const void * data)
  3786. {
  3787. return readUtf8Size(data);
  3788. }
  3789. unsigned rtlUtf8Size(unsigned len, const void * _data)
  3790. {
  3791. const byte * data = (const byte *)_data;
  3792. size32_t offset = 0;
  3793. for (unsigned i=0; i< len; i++)
  3794. offset += readUtf8Size(data+offset);
  3795. return offset;
  3796. }
  3797. unsigned rtlUtf8Length(unsigned size, const void * _data)
  3798. {
  3799. const byte * data = (const byte *)_data;
  3800. size32_t length = 0;
  3801. for (unsigned offset=0; offset < size; offset += readUtf8Size(data+offset))
  3802. length++;
  3803. return length;
  3804. }
  3805. unsigned rtlUtf8Char(const void * data)
  3806. {
  3807. return readUtf8Char(data);
  3808. }
  3809. void rtlUtf8ToData(size32_t outlen, void * out, size32_t inlen, const char *in)
  3810. {
  3811. unsigned insize = rtlUtf8Size(inlen, in);
  3812. rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3813. }
  3814. void rtlUtf8ToDataX(size32_t & outlen, void * & out, size32_t inlen, const char *in)
  3815. {
  3816. unsigned insize = rtlUtf8Size(inlen, in);
  3817. char * cout;
  3818. rtlCodepageToCodepageX(outlen, cout, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3819. out = cout;
  3820. }
  3821. void rtlUtf8ToStr(size32_t outlen, char * out, size32_t inlen, const char *in)
  3822. {
  3823. unsigned insize = rtlUtf8Size(inlen, in);
  3824. rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3825. }
  3826. void rtlUtf8ToStrX(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  3827. {
  3828. unsigned insize = rtlUtf8Size(inlen, in);
  3829. rtlCodepageToCodepageX(outlen, out, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3830. }
  3831. char * rtlUtf8ToVStr(size32_t inlen, const char *in)
  3832. {
  3833. unsigned utfSize = rtlUtf8Size(inlen, in);
  3834. char *ret = (char *) rtlMalloc(inlen+1);
  3835. rtlCodepageToCodepage(inlen, ret, utfSize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3836. ret[inlen] = 0;
  3837. return ret;
  3838. }
  3839. void rtlDataToUtf8(size32_t outlen, char * out, size32_t inlen, const void *in)
  3840. {
  3841. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3842. }
  3843. void rtlDataToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const void *in)
  3844. {
  3845. unsigned outsize;
  3846. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3847. outlen = rtlUtf8Length(outsize, out);
  3848. }
  3849. void rtlStrToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
  3850. {
  3851. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3852. }
  3853. void rtlStrToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  3854. {
  3855. unsigned outsize;
  3856. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3857. outlen = rtlUtf8Length(outsize, out);
  3858. }
  3859. void rtlUtf8ToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
  3860. {
  3861. //Packs as many characaters as it can into the target, but don't include any half characters
  3862. size32_t offset = 0;
  3863. size32_t outsize = outlen*UTF8_MAXSIZE;
  3864. for (unsigned i=0; i< inlen; i++)
  3865. {
  3866. unsigned nextSize = readUtf8Size(in+offset);
  3867. if (offset + nextSize > outsize)
  3868. break;
  3869. offset += nextSize;
  3870. }
  3871. memcpy(out, in, offset);
  3872. if (offset != outsize)
  3873. memset(out+offset, ' ', outsize-offset);
  3874. }
  3875. void rtlUtf8ToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  3876. {
  3877. unsigned insize = rtlUtf8Size(inlen, in);
  3878. char * buffer = (char *)rtlMalloc(insize);
  3879. memcpy(buffer, in, insize);
  3880. outlen = inlen;
  3881. out = buffer;
  3882. }
  3883. static int rtlCompareUtf8Utf8ViaUnicode(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  3884. {
  3885. rtlDataAttr uleft(llen*sizeof(UChar));
  3886. rtlDataAttr uright(rlen*sizeof(UChar));
  3887. rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
  3888. rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
  3889. return rtlCompareUnicodeUnicode(llen, uleft.getustr(), rlen, uright.getustr(), locale);
  3890. }
  3891. int rtlCompareUtf8Utf8(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  3892. {
  3893. //MORE: Do a simple comparison as long as there are no non->0x80 characters around
  3894. // fall back to a full unicode comparison if we hit one - or in the next character to allow for accents etc.
  3895. const byte * bleft = (const byte *)left;
  3896. const byte * bright = (const byte *)right;
  3897. unsigned len = llen > rlen ? rlen : llen;
  3898. for (unsigned i = 0; i < len; i++)
  3899. {
  3900. byte nextLeft = bleft[i];
  3901. byte nextRight = bright[i];
  3902. if (nextLeft >= 0x80 || nextRight >= 0x80)
  3903. return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
  3904. if ((i+1 != len) && ((bleft[i+1] >= 0x80) || bright[i+1] >= 0x80))
  3905. return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
  3906. if (nextLeft != nextRight)
  3907. return nextLeft - nextRight;
  3908. }
  3909. int diff = 0;
  3910. if (len != llen)
  3911. {
  3912. for (;(diff == 0) && (len != llen);len++)
  3913. diff = bleft[len] - ' ';
  3914. }
  3915. else if (len != rlen)
  3916. {
  3917. for (;(diff == 0) && (len != rlen);len++)
  3918. diff = ' ' - bright[len];
  3919. }
  3920. return diff;
  3921. }
  3922. int rtlCompareUtf8Utf8Strength(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale, unsigned strength)
  3923. {
  3924. //GH->PG Any better way of doing this? We could possible decide it was a binary comparison instead I guess.
  3925. rtlDataAttr uleft(llen*sizeof(UChar));
  3926. rtlDataAttr uright(rlen*sizeof(UChar));
  3927. rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
  3928. rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
  3929. return rtlCompareUnicodeUnicodeStrength(llen, uleft.getustr(), rlen, uright.getustr(), locale, strength);
  3930. }
  3931. void rtlDecPushUtf8(size32_t len, const void * data)
  3932. {
  3933. DecPushString(len, (const char *)data); // good enough for the moment
  3934. }
  3935. bool rtlUtf8ToBool(size32_t inlen, const char * in)
  3936. {
  3937. return rtlStrToBool(inlen, in);
  3938. }
  3939. __int64 rtlUtf8ToInt(size32_t inlen, const char * in)
  3940. {
  3941. return rtlStrToInt8(inlen, in); // good enough for the moment
  3942. }
  3943. double rtlUtf8ToReal(size32_t inlen, const char * in)
  3944. {
  3945. return rtlStrToReal(inlen, in); // good enough for the moment
  3946. }
  3947. void rtlCodepageToUtf8(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
  3948. {
  3949. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, codepage);
  3950. }
  3951. void rtlCodepageToUtf8X(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  3952. {
  3953. unsigned outsize;
  3954. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, codepage);
  3955. outlen = rtlUtf8Length(outsize, out);
  3956. }
  3957. void rtlUtf8ToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
  3958. {
  3959. unsigned insize = rtlUtf8Size(inlen, in);
  3960. rtlCodepageToCodepage(outlen, (char *)out, insize, in, codepage, UTF8_CODEPAGE);
  3961. }
  3962. void rtlUtf8ToCodepageX(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  3963. {
  3964. unsigned insize = rtlUtf8Size(inlen, in);
  3965. rtlCodepageToCodepageX(outlen, out, inlen, insize, in, codepage, UTF8_CODEPAGE);
  3966. }
  3967. void rtlUnicodeToUtf8X(unsigned & outlen, char * & out, unsigned inlen, const UChar * in)
  3968. {
  3969. unsigned outsize;
  3970. rtlUnicodeToCodepageX(outsize, out, inlen, in, UTF8_CODEPAGE);
  3971. outlen = rtlUtf8Length(outsize, out);
  3972. }
  3973. void rtlUnicodeToUtf8(unsigned outlen, char * out, unsigned inlen, const UChar * in)
  3974. {
  3975. rtlUnicodeToCodepage(outlen*UTF8_MAXSIZE, out, inlen, in, UTF8_CODEPAGE);
  3976. }
  3977. void rtlUtf8ToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
  3978. {
  3979. rtlCodepageToUnicodeX(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
  3980. }
  3981. void rtlUtf8ToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  3982. {
  3983. rtlCodepageToUnicode(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
  3984. }
  3985. ECLRTL_API void rtlUtf8SubStrFT(unsigned tlen, char * tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  3986. {
  3987. normalizeFromTo(from, to);
  3988. clipFromTo(from, to, slen);
  3989. unsigned copylen = to - from;
  3990. unsigned startOffset = rtlUtf8Size(from, src);
  3991. rtlUtf8ToUtf8(tlen, tgt, copylen, src+startOffset);
  3992. }
  3993. ECLRTL_API void rtlUtf8SubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  3994. {
  3995. normalizeFromTo(from, to);
  3996. unsigned len = to - from;
  3997. clipFromTo(from, to, slen);
  3998. unsigned copylen = to - from;
  3999. unsigned fillSize = len - copylen;
  4000. unsigned startOffset = rtlUtf8Size(from, src);
  4001. unsigned copySize = rtlUtf8Size(copylen, src+startOffset);
  4002. char * buffer = (char *)rtlMalloc(copySize + fillSize);
  4003. memcpy(buffer, (byte *)src+startOffset, copySize);
  4004. if (fillSize)
  4005. memset(buffer+copySize, ' ', fillSize);
  4006. tlen = len;
  4007. tgt = buffer;
  4008. }
  4009. ECLRTL_API void rtlUtf8SubStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
  4010. {
  4011. normalizeFromTo(from, slen);
  4012. unsigned len = slen - from;
  4013. unsigned startOffset = rtlUtf8Size(from, src);
  4014. unsigned copySize = rtlUtf8Size(len, src+startOffset);
  4015. char * buffer = (char *)rtlMalloc(copySize);
  4016. memcpy(buffer, (byte *)src+startOffset, copySize);
  4017. tlen = len;
  4018. tgt = buffer;
  4019. }
  4020. ECLRTL_API void rtlUtf8ToLower(size32_t l, char * t, char const * locale)
  4021. {
  4022. //Convert to lower case, but only go via unicode routines if we have to...
  4023. for (unsigned i=0; i< l; i++)
  4024. {
  4025. byte next = *t;
  4026. if (next >= 0x80)
  4027. {
  4028. //yuk, go via unicode to do the convertion.
  4029. unsigned len = l-i;
  4030. unsigned size = rtlUtf8Size(len, t+i);
  4031. rtlDataAttr unicode(len*sizeof(UChar));
  4032. rtlCodepageToUnicode(len, unicode.getustr(), size, t+i, UTF8_CODEPAGE);
  4033. rtlUnicodeToLower(len, unicode.getustr(), locale);
  4034. rtlUnicodeToCodepage(size, t+i, len, unicode.getustr(), UTF8_CODEPAGE);
  4035. return;
  4036. }
  4037. *t++ = tolower(next);
  4038. }
  4039. }
  4040. ECLRTL_API void rtlConcatUtf8(unsigned & tlen, char * * tgt, ...)
  4041. {
  4042. //Going to have to go via unicode because of normalization. However, it might be worth optimizing the case where no special characters are present
  4043. va_list args;
  4044. unsigned totalLength = 0;
  4045. unsigned maxLength = 0;
  4046. va_start(args, tgt);
  4047. for(;;)
  4048. {
  4049. unsigned len = va_arg(args, unsigned);
  4050. if(len+1==0)
  4051. break;
  4052. const char * str = va_arg(args, const char *);
  4053. totalLength += len;
  4054. if (len > maxLength)
  4055. maxLength = len;
  4056. }
  4057. va_end(args);
  4058. rtlDataAttr next(maxLength*sizeof(UChar));
  4059. rtlDataAttr result(totalLength*sizeof(UChar));
  4060. unsigned idx = 0;
  4061. UErrorCode err = U_ZERO_ERROR;
  4062. va_start(args, tgt);
  4063. for(;;)
  4064. {
  4065. unsigned len = va_arg(args, unsigned);
  4066. if(len+1==0)
  4067. break;
  4068. if (len)
  4069. {
  4070. const char * str = va_arg(args, const char *);
  4071. rtlUtf8ToUnicode(len, next.getustr(), len, str);
  4072. idx = unorm_concatenate(result.getustr(), idx, next.getustr(), len, result.getustr(), totalLength, UNORM_NFC, 0, &err);
  4073. }
  4074. }
  4075. va_end(args);
  4076. rtlUnicodeToUtf8X(tlen, *tgt, idx, result.getustr());
  4077. }
  4078. ECLRTL_API unsigned rtlConcatUtf8ToUtf8(unsigned tlen, char * tgt, unsigned offset, unsigned slen, const char * src)
  4079. {
  4080. //NB: Inconsistently with the other varieties, idx is a byte offset, not a character position to make the code more efficient.....
  4081. //normalization is done in the space filling routine at the end
  4082. unsigned ssize = rtlUtf8Size(slen, src);
  4083. assertex(tlen * UTF8_MAXSIZE >= offset+ssize);
  4084. memcpy(tgt+offset, src, ssize);
  4085. return offset + ssize;
  4086. }
  4087. ECLRTL_API void rtlUtf8SpaceFill(unsigned tlen, char * tgt, unsigned offset)
  4088. {
  4089. const byte * src = (const byte *)tgt;
  4090. for (unsigned i=0; i<offset; i++)
  4091. {
  4092. if (src[i] >= 0x80)
  4093. {
  4094. unsigned idx = rtlUtf8Length(offset, tgt);
  4095. rtlDataAttr unicode(idx*sizeof(UChar));
  4096. rtlUtf8ToUnicode(idx, unicode.getustr(), idx, tgt);
  4097. unicodeEnsureIsNormalized(idx, unicode.getustr());
  4098. rtlUnicodeToUtf8(tlen, tgt, idx, unicode.getustr());
  4099. return;
  4100. }
  4101. }
  4102. //no special characters=>easy route.
  4103. memset(tgt+offset, ' ', tlen*UTF8_MAXSIZE-offset);
  4104. }
  4105. ECLRTL_API unsigned rtlHash32Utf8(unsigned length, const char * k, unsigned initval)
  4106. {
  4107. //These need to hash the same way as a UNICODE string would => convert to UNICODE
  4108. //It would be hard to optimize to hash the string without performing the conversion.
  4109. size32_t tempLength;
  4110. rtlDataAttr temp;
  4111. rtlUtf8ToUnicodeX(tempLength, temp.refustr(), length, k);
  4112. return rtlHash32Unicode(tempLength, temp.getustr(), initval);
  4113. }
  4114. ECLRTL_API unsigned rtlHashUtf8(unsigned length, const char * k, unsigned initval)
  4115. {
  4116. //These need to hash the same way as a UNICODE string would => convert to UNICODE
  4117. size32_t tempLength;
  4118. rtlDataAttr temp;
  4119. rtlUtf8ToUnicodeX(tempLength, temp.refustr(), length, k);
  4120. return rtlHashUnicode(tempLength, temp.getustr(), initval);
  4121. }
  4122. ECLRTL_API hash64_t rtlHash64Utf8(unsigned length, const char * k, hash64_t initval)
  4123. {
  4124. //These need to hash the same way as a UNICODE string would => convert to UNICODE
  4125. size32_t tempLength;
  4126. rtlDataAttr temp;
  4127. rtlUtf8ToUnicodeX(tempLength, temp.refustr(), length, k);
  4128. return rtlHash64Unicode(tempLength, temp.getustr(), initval);
  4129. }
  4130. unsigned rtlCrcUtf8(unsigned length, const char * k, unsigned initval)
  4131. {
  4132. return rtlCrcData(rtlUtf8Size(length, k), k, initval);
  4133. }
  4134. int rtlNewSearchUtf8Table(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search, const char * locale)
  4135. {
  4136. //MORE: Hopelessly inefficient.... Should rethink - possibly introducing a class for doing string searching, and the Utf8 variety pre-converting the
  4137. //search strings into unicode.
  4138. int left = 0;
  4139. int right = count;
  4140. do
  4141. {
  4142. int mid = (left + right) >> 1;
  4143. int cmp = rtlCompareUtf8Utf8(width, search, elemlen, table[mid], locale);
  4144. if (cmp < 0)
  4145. right = mid;
  4146. else if (cmp > 0)
  4147. left = mid+1;
  4148. else
  4149. return mid;
  4150. } while (left < right);
  4151. return -1;
  4152. }
  4153. //---------------------------------------------------------------------------
  4154. #ifdef _USE_BOOST_REGEX
  4155. class CStrRegExprFindInstance : implements IStrRegExprFindInstance
  4156. {
  4157. private:
  4158. bool matched;
  4159. const boost::regex * regEx;
  4160. boost::cmatch subs;
  4161. char * sample; //only required if findstr/findvstr will be called
  4162. public:
  4163. CStrRegExprFindInstance(const boost::regex * _regEx, const char * _str, size32_t _from, size32_t _len, bool _keep)
  4164. : regEx(_regEx)
  4165. {
  4166. matched = false;
  4167. sample = NULL;
  4168. try
  4169. {
  4170. if (_keep)
  4171. {
  4172. sample = (char *)rtlMalloc(_len + 1); //required for findstr
  4173. memcpy(sample, _str + _from, _len);
  4174. sample[_len] = (char)NULL;
  4175. matched = boost::regex_search(sample, subs, *regEx);
  4176. }
  4177. else
  4178. {
  4179. matched = boost::regex_search(_str + _from, _str + _len, subs, *regEx);
  4180. }
  4181. }
  4182. catch (const std::runtime_error & e)
  4183. {
  4184. throw MakeStringException(0, "Error in regex search: %s (regex: %s)", e.what(), regEx->str().c_str());
  4185. }
  4186. }
  4187. ~CStrRegExprFindInstance() //CAVEAT non-virtual destructor !
  4188. {
  4189. free(sample);
  4190. }
  4191. //IStrRegExprFindInstance
  4192. bool found() const { return matched; }
  4193. void getMatchX(unsigned & outlen, char * & out, unsigned n = 0) const
  4194. {
  4195. if (matched && (n < subs.size()))
  4196. {
  4197. outlen = subs[n].second - subs[n].first;
  4198. out = (char *)rtlMalloc(outlen);
  4199. memcpy(out, subs[n].first, outlen);
  4200. }
  4201. else
  4202. {
  4203. outlen = 0;
  4204. out = NULL;
  4205. }
  4206. }
  4207. char const * findvstr(unsigned outlen, char * out, unsigned n = 0)
  4208. {
  4209. if (matched && (n < subs.size()))
  4210. {
  4211. unsigned sublen = subs[n].second - subs[n].first;
  4212. if (sublen >= outlen)
  4213. sublen = outlen - 1;
  4214. memcpy(out, subs[n].first, sublen);
  4215. out[sublen] = 0;
  4216. }
  4217. else
  4218. {
  4219. out[0] = 0;
  4220. }
  4221. return out;
  4222. }
  4223. };
  4224. //---------------------------------------------------------------------------
  4225. class CCompiledStrRegExpr : implements ICompiledStrRegExpr
  4226. {
  4227. private:
  4228. boost::regex regEx;
  4229. public:
  4230. CCompiledStrRegExpr(const char * _regExp, bool _isCaseSensitive = false)
  4231. {
  4232. try
  4233. {
  4234. if (_isCaseSensitive)
  4235. regEx.assign(_regExp, boost::regbase::perl);
  4236. else
  4237. regEx.assign(_regExp, boost::regbase::perl | boost::regbase::icase);
  4238. }
  4239. catch(const boost::bad_expression & e)
  4240. {
  4241. StringBuffer msg;
  4242. msg.append("Bad regular expression: ").append(e.what()).append(": ").append(_regExp);
  4243. rtlFail(0, msg.str()); //throws
  4244. }
  4245. }
  4246. //ICompiledStrRegExpr
  4247. void replace(size32_t & outlen, char * & out, size32_t slen, char const * str, size32_t rlen, char const * replace) const
  4248. {
  4249. std::string src(str, str + slen);
  4250. std::string fmt(replace, replace + rlen);
  4251. std::string tgt;
  4252. try
  4253. {
  4254. // tgt = boost::regex_merge(src, cre->regEx, fmt, boost::format_perl); //Algorithm regex_merge has been renamed regex_replace, existing code will continue to compile, but new code should use regex_replace instead.
  4255. tgt = boost::regex_replace(src, regEx, fmt, boost::format_perl);
  4256. }
  4257. catch(const std::runtime_error & e)
  4258. {
  4259. throw MakeStringException(0, "Error in regex replace: %s (regex: %s)", e.what(), regEx.str().c_str());
  4260. }
  4261. outlen = tgt.length();
  4262. out = (char *)rtlMalloc(outlen);
  4263. memcpy(out, tgt.data(), outlen);
  4264. }
  4265. IStrRegExprFindInstance * find(const char * str, size32_t from, size32_t len, bool needToKeepSearchString) const
  4266. {
  4267. CStrRegExprFindInstance * findInst = new CStrRegExprFindInstance(&regEx, str, from, len, needToKeepSearchString);
  4268. return findInst;
  4269. }
  4270. };
  4271. //---------------------------------------------------------------------------
  4272. ECLRTL_API ICompiledStrRegExpr * rtlCreateCompiledStrRegExpr(const char * regExpr, bool isCaseSensitive)
  4273. {
  4274. CCompiledStrRegExpr * expr = new CCompiledStrRegExpr(regExpr, isCaseSensitive);
  4275. return expr;
  4276. }
  4277. ECLRTL_API void rtlDestroyCompiledStrRegExpr(ICompiledStrRegExpr * compiledExpr)
  4278. {
  4279. if (compiledExpr)
  4280. delete (CCompiledStrRegExpr*)compiledExpr;
  4281. }
  4282. ECLRTL_API void rtlDestroyStrRegExprFindInstance(IStrRegExprFindInstance * findInst)
  4283. {
  4284. if (findInst)
  4285. delete (CStrRegExprFindInstance*)findInst;
  4286. }
  4287. //---------------------------------------------------------------------------
  4288. // RegEx Compiler for unicode strings
  4289. class CUStrRegExprFindInstance : implements IUStrRegExprFindInstance
  4290. {
  4291. private:
  4292. bool matched;
  4293. RegexMatcher * matcher;
  4294. UnicodeString sample;
  4295. unsigned matchedSize;
  4296. public:
  4297. CUStrRegExprFindInstance(RegexMatcher * _matcher, const UChar * _str, size32_t _from, size32_t _len)
  4298. : matcher(_matcher)
  4299. {
  4300. matched = false;
  4301. sample.setTo(_str + _from, _len);
  4302. matcher->reset(sample);
  4303. matched = matcher->find();
  4304. if (matched)
  4305. matchedSize = (unsigned)matcher->groupCount() + 1;
  4306. }
  4307. //IUStrRegExprFindInstance
  4308. bool found() const { return matched; }
  4309. void getMatchX(unsigned & outlen, UChar * & out, unsigned n = 0) const
  4310. {
  4311. if(matched && (n < matchedSize))
  4312. {
  4313. assertex(matcher);
  4314. UErrorCode uerr = U_ZERO_ERROR;
  4315. int32_t start = n ? matcher->start(n, uerr) : matcher->start(uerr);
  4316. int32_t end = n ? matcher->end(n, uerr) : matcher->end(uerr);
  4317. outlen = end - start;
  4318. out = (UChar *)rtlMalloc(outlen*2);
  4319. sample.extract(start, outlen, out);
  4320. }
  4321. else
  4322. {
  4323. outlen = 0;
  4324. out = NULL;
  4325. }
  4326. }
  4327. UChar const * findvstr(unsigned outlen, UChar * out, unsigned n = 0)
  4328. {
  4329. if(matched && (n < matchedSize))
  4330. {
  4331. assertex(matcher);
  4332. UErrorCode uerr = U_ZERO_ERROR;
  4333. int32_t start = n ? matcher->start(n, uerr) : matcher->start(uerr);
  4334. int32_t end = n ? matcher->end(n, uerr) : matcher->end(uerr);
  4335. unsigned sublen = end - start;
  4336. if(sublen >= outlen)
  4337. sublen = outlen - 1;
  4338. sample.extract(start, sublen, out);
  4339. out[sublen] = 0;
  4340. }
  4341. else
  4342. {
  4343. out[0] = 0;
  4344. }
  4345. return out;
  4346. }
  4347. };
  4348. //---------------------------------------------------------------------------
  4349. class CCompiledUStrRegExpr : implements ICompiledUStrRegExpr
  4350. {
  4351. private:
  4352. RegexPattern * pattern;
  4353. RegexMatcher * matcher;
  4354. public:
  4355. CCompiledUStrRegExpr(const UChar * _UregExp, bool _isCaseSensitive = false)
  4356. {
  4357. UErrorCode uerr = U_ZERO_ERROR;
  4358. UParseError uperr;
  4359. if (_isCaseSensitive)
  4360. pattern = RegexPattern::compile(_UregExp, uperr, uerr);
  4361. else
  4362. pattern = RegexPattern::compile(_UregExp, UREGEX_CASE_INSENSITIVE, uperr, uerr);
  4363. matcher = pattern->matcher(uerr);
  4364. if (U_FAILURE(uerr))
  4365. {
  4366. char * expAscii;
  4367. unsigned expAsciiLen;
  4368. rtlUnicodeToEscapedStrX(expAsciiLen, expAscii, rtlUnicodeStrlen(_UregExp), _UregExp);
  4369. StringBuffer msg;
  4370. msg.append("Bad regular expression: ").append(u_errorName(uerr)).append(": ").append(expAsciiLen, expAscii);
  4371. rtlFree(expAscii);
  4372. delete matcher;
  4373. delete pattern;
  4374. matcher = 0;
  4375. pattern = 0;
  4376. rtlFail(0, msg.str()); //throws
  4377. }
  4378. }
  4379. ~CCompiledUStrRegExpr()
  4380. {
  4381. if (matcher)
  4382. delete matcher;
  4383. if (pattern)
  4384. delete pattern;
  4385. }
  4386. void replace(size32_t & outlen, UChar * & out, size32_t slen, const UChar * str, size32_t rlen, UChar const * replace) const
  4387. {
  4388. UnicodeString const src(str, slen);
  4389. UErrorCode err = U_ZERO_ERROR;
  4390. RegexMatcher * replacer = pattern->matcher(src, err);
  4391. UnicodeString const fmt(replace, rlen);
  4392. UnicodeString const tgt = replacer->replaceAll(fmt, err);
  4393. outlen = tgt.length();
  4394. out = (UChar *)rtlMalloc(outlen*2);
  4395. tgt.extract(0, outlen, out);
  4396. delete replacer;
  4397. }
  4398. IUStrRegExprFindInstance * find(const UChar * str, size32_t from, size32_t len) const
  4399. {
  4400. CUStrRegExprFindInstance * findInst = new CUStrRegExprFindInstance(matcher, str, from, len);
  4401. return findInst;
  4402. }
  4403. };
  4404. //---------------------------------------------------------------------------
  4405. ECLRTL_API ICompiledUStrRegExpr * rtlCreateCompiledUStrRegExpr(const UChar * regExpr, bool isCaseSensitive)
  4406. {
  4407. CCompiledUStrRegExpr * expr = new CCompiledUStrRegExpr(regExpr, isCaseSensitive);
  4408. return expr;
  4409. }
  4410. ECLRTL_API void rtlDestroyCompiledUStrRegExpr(ICompiledUStrRegExpr * compiledExpr)
  4411. {
  4412. if (compiledExpr)
  4413. delete (CCompiledUStrRegExpr*)compiledExpr;
  4414. }
  4415. ECLRTL_API void rtlDestroyUStrRegExprFindInstance(IUStrRegExprFindInstance * findInst)
  4416. {
  4417. if (findInst)
  4418. delete (CUStrRegExprFindInstance*)findInst;
  4419. }
  4420. #else // _USE_BOOST_REGEX not set
  4421. ECLRTL_API ICompiledStrRegExpr * rtlCreateCompiledStrRegExpr(const char * regExpr, bool isCaseSensitive)
  4422. {
  4423. UNIMPLEMENTED_X("Boost regex disabled");
  4424. }
  4425. ECLRTL_API void rtlDestroyCompiledStrRegExpr(ICompiledStrRegExpr * compiledExpr)
  4426. {
  4427. }
  4428. ECLRTL_API void rtlDestroyStrRegExprFindInstance(IStrRegExprFindInstance * findInst)
  4429. {
  4430. }
  4431. ECLRTL_API ICompiledUStrRegExpr * rtlCreateCompiledUStrRegExpr(const UChar * regExpr, bool isCaseSensitive)
  4432. {
  4433. UNIMPLEMENTED_X("Boost regex disabled");
  4434. }
  4435. ECLRTL_API void rtlDestroyCompiledUStrRegExpr(ICompiledUStrRegExpr * compiledExpr)
  4436. {
  4437. }
  4438. ECLRTL_API void rtlDestroyUStrRegExprFindInstance(IUStrRegExprFindInstance * findInst)
  4439. {
  4440. }
  4441. #endif
  4442. //---------------------------------------------------------------------------
  4443. ECLRTL_API int rtlQueryLocalFailCode(IException * e)
  4444. {
  4445. return e->errorCode();
  4446. }
  4447. ECLRTL_API void rtlGetLocalFailMessage(size32_t & len, char * & text, IException * e, const char * tag)
  4448. {
  4449. rtlExceptionExtract(len, text, e, tag);
  4450. }
  4451. ECLRTL_API void rtlFreeException(IException * e)
  4452. {
  4453. e->Release();
  4454. }
  4455. //---------------------------------------------------------------------------
  4456. //Generally any calls to this function have also checked that the length(trim(str)) <= fieldLen, so exceptions should only occur if compareLen > fieldLen
  4457. //However, function can now also handle the exception case.
  4458. ECLRTL_API void rtlCreateRange(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str, byte fill, byte pad)
  4459. {
  4460. //
  4461. if (compareLen > fieldLen)
  4462. {
  4463. if ((int)compareLen >= 0)
  4464. {
  4465. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  4466. compareLen = fieldLen;
  4467. }
  4468. else
  4469. compareLen = 0; // probably m[1..-1] or something silly
  4470. }
  4471. if (len > compareLen)
  4472. {
  4473. while ((len > compareLen) && (str[len-1] == pad))
  4474. len--;
  4475. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  4476. if (len > compareLen)
  4477. {
  4478. compareLen = 0;
  4479. fill = (fill == 0) ? 255 : 0;
  4480. }
  4481. }
  4482. outlen = fieldLen;
  4483. out = (char *)rtlMalloc(fieldLen);
  4484. if (len >= compareLen)
  4485. memcpy(out, str, compareLen);
  4486. else
  4487. {
  4488. memcpy(out, str, len);
  4489. memset(out+len, pad, compareLen-len);
  4490. }
  4491. memset(out + compareLen, fill, fieldLen-compareLen);
  4492. }
  4493. ECLRTL_API void rtlCreateStrRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4494. {
  4495. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
  4496. }
  4497. ECLRTL_API void rtlCreateStrRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4498. {
  4499. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
  4500. }
  4501. ECLRTL_API void rtlCreateDataRangeLow(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
  4502. {
  4503. rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 0, 0);
  4504. }
  4505. ECLRTL_API void rtlCreateDataRangeHigh(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
  4506. {
  4507. rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 255, 0);
  4508. }
  4509. ECLRTL_API void rtlCreateRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4510. {
  4511. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
  4512. }
  4513. ECLRTL_API void rtlCreateRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4514. {
  4515. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
  4516. }
  4517. ECLRTL_API void rtlCreateUnicodeRange(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str, byte fill)
  4518. {
  4519. //Same as function above!
  4520. if (compareLen > fieldLen)
  4521. {
  4522. if ((int)compareLen >= 0)
  4523. {
  4524. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  4525. compareLen = fieldLen;
  4526. }
  4527. else
  4528. compareLen = 0; // probably m[1..-1] or something silly
  4529. }
  4530. if (len > compareLen)
  4531. {
  4532. while ((len > compareLen) && (str[len-1] == ' '))
  4533. len--;
  4534. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  4535. if (len > compareLen)
  4536. {
  4537. compareLen = 0;
  4538. fill = (fill == 0) ? 255 : 0;
  4539. }
  4540. }
  4541. outlen = fieldLen;
  4542. out = (UChar *)rtlMalloc(fieldLen*sizeof(UChar));
  4543. if (len >= compareLen)
  4544. memcpy(out, str, compareLen*sizeof(UChar));
  4545. else
  4546. {
  4547. memcpy(out, str, len * sizeof(UChar));
  4548. while (len != compareLen)
  4549. out[len++] = ' ';
  4550. }
  4551. memset(out + compareLen, fill, (fieldLen-compareLen) * sizeof(UChar));
  4552. }
  4553. ECLRTL_API void rtlCreateUnicodeRangeLow(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
  4554. {
  4555. rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0x00);
  4556. }
  4557. ECLRTL_API void rtlCreateUnicodeRangeHigh(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
  4558. {
  4559. rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0xFF);
  4560. }
  4561. //---------------------------------------------------------------------------
  4562. ECLRTL_API unsigned rtlCountRows(size32_t len, const void * data, IRecordSize * rs)
  4563. {
  4564. if (rs->isFixedSize())
  4565. return len / rs->getFixedSize();
  4566. unsigned count = 0;
  4567. while (len)
  4568. {
  4569. size32_t thisLen = rs->getRecordSize(data);
  4570. data = (byte *)data + thisLen;
  4571. if (thisLen > len)
  4572. throw MakeStringException(0, "Invalid raw data");
  4573. len -= thisLen;
  4574. count++;
  4575. }
  4576. return count;
  4577. }
  4578. //---------------------------------------------------------------------------
  4579. ECLRTL_API size32_t rtlCountToSize(unsigned count, const void * data, IRecordSize * rs)
  4580. {
  4581. if (rs->isFixedSize())
  4582. return count * rs->getFixedSize();
  4583. unsigned size = 0;
  4584. for (unsigned i=0;i<count;i++)
  4585. {
  4586. size32_t thisLen = rs->getRecordSize(data);
  4587. data = (byte *)data + thisLen;
  4588. size += thisLen;
  4589. }
  4590. return size;
  4591. }
  4592. //---------------------------------------------------------------------------
  4593. class rtlCodepageConverter
  4594. {
  4595. public:
  4596. rtlCodepageConverter(char const * sourceName, char const * targetName, bool & failed) : uerr(U_ZERO_ERROR)
  4597. {
  4598. srccnv = ucnv_open(sourceName, &uerr);
  4599. tgtcnv = ucnv_open(targetName, &uerr);
  4600. tgtMaxRatio = ucnv_getMaxCharSize(tgtcnv);
  4601. failed = U_FAILURE(uerr);
  4602. }
  4603. ~rtlCodepageConverter()
  4604. {
  4605. ucnv_close(srccnv);
  4606. ucnv_close(tgtcnv);
  4607. }
  4608. void convertX(unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4609. {
  4610. //convert from source to utf-16: try to avoid preflighting by guessing upper bound
  4611. //unicode length in UChars equal source length in chars if single byte encoding, and be less for multibyte
  4612. UChar * ubuff = (UChar *)rtlMalloc(sourceLength*2);
  4613. int32_t ulen = ucnv_toUChars(srccnv, ubuff, sourceLength, source, sourceLength, &uerr);
  4614. if(ulen > (int32_t)sourceLength)
  4615. {
  4616. //okay, so our guess was wrong, and we have to reallocate
  4617. free(ubuff);
  4618. ubuff = (UChar *)rtlMalloc(ulen*2);
  4619. ucnv_toUChars(srccnv, ubuff, ulen, source, sourceLength, &uerr);
  4620. }
  4621. if(preflight)
  4622. {
  4623. //convert from utf-16 to target: preflight to get buffer of exactly the right size
  4624. UErrorCode uerr2 = uerr; //preflight has to use copy of error code, as it is considered an 'error'
  4625. int32_t tlen = ucnv_fromUChars(tgtcnv, 0, 0, ubuff, ulen, &uerr2);
  4626. target = (char *)rtlMalloc(tlen);
  4627. targetLength = ucnv_fromUChars(tgtcnv, target, tlen, ubuff, ulen, &uerr);
  4628. }
  4629. else
  4630. {
  4631. //convert from utf-16 to target: avoid preflighting by allocating buffer of maximum size
  4632. target = (char *)rtlMalloc(ulen*tgtMaxRatio);
  4633. targetLength = ucnv_fromUChars(tgtcnv, target, ulen*tgtMaxRatio, ubuff, ulen, &uerr);
  4634. }
  4635. free(ubuff);
  4636. failed = U_FAILURE(uerr);
  4637. }
  4638. unsigned convert(unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4639. {
  4640. char * tgtStart = target;
  4641. ucnv_convertEx(tgtcnv, srccnv, &target, target+targetLength, &source, source+sourceLength, 0, 0, 0, 0, true, true, &uerr);
  4642. int32_t ret = target-tgtStart;
  4643. failed = U_FAILURE(uerr);
  4644. return ret;
  4645. }
  4646. private:
  4647. UErrorCode uerr;
  4648. UConverter * srccnv;
  4649. UConverter * tgtcnv;
  4650. int8_t tgtMaxRatio;
  4651. };
  4652. void * rtlOpenCodepageConverter(char const * sourceName, char const * targetName, bool & failed)
  4653. {
  4654. return new rtlCodepageConverter(sourceName, targetName, failed);
  4655. }
  4656. void rtlCloseCodepageConverter(void * converter)
  4657. {
  4658. delete ((rtlCodepageConverter *)converter);
  4659. }
  4660. void rtlCodepageConvertX(void * converter, unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4661. {
  4662. ((rtlCodepageConverter *)converter)->convertX(targetLength, target, sourceLength, source, failed, preflight);
  4663. }
  4664. unsigned rtlCodepageConvert(void * converter, unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4665. {
  4666. return ((rtlCodepageConverter *)converter)->convert(targetLength, target, sourceLength, source, failed);
  4667. }
  4668. //---------------------------------------------------------------------------
  4669. void appendUChar(MemoryBuffer & buff, char x)
  4670. {
  4671. UChar c = x;
  4672. buff.append(sizeof(c), &c);
  4673. }
  4674. void appendUChar(MemoryBuffer & buff, UChar c)
  4675. {
  4676. buff.append(sizeof(c), &c);
  4677. }
  4678. void appendUStr(MemoryBuffer & x, const char * text)
  4679. {
  4680. while (*text)
  4681. {
  4682. UChar c = *text++;
  4683. x.append(sizeof(c), &c);
  4684. }
  4685. }
  4686. ECLRTL_API void xmlDecodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in)
  4687. {
  4688. StringBuffer input(inLen, in);
  4689. StringBuffer temp;
  4690. decodeXML(input, temp, NULL, NULL, false);
  4691. outLen = temp.length();
  4692. out = temp.detach();
  4693. }
  4694. bool hasPrefix(const UChar * ustr, const UChar * end, const char * str, unsigned len)
  4695. {
  4696. if ((unsigned)(end - ustr) < len)
  4697. return false;
  4698. while (len--)
  4699. {
  4700. if (*ustr++ != *str++)
  4701. return false;
  4702. }
  4703. return true;
  4704. }
  4705. ECLRTL_API void xmlDecodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in)
  4706. {
  4707. const UChar * cur = in;
  4708. const UChar * end = in+inLen;
  4709. MemoryBuffer ret;
  4710. while (cur<end)
  4711. {
  4712. switch(*cur)
  4713. {
  4714. case '&':
  4715. if(hasPrefix(cur+1, end, "amp;", 4))
  4716. {
  4717. cur += 4;
  4718. appendUChar(ret, '&');
  4719. }
  4720. else if(hasPrefix(cur+1, end, "lt;", 3))
  4721. {
  4722. cur += 3;
  4723. appendUChar(ret, '<');
  4724. }
  4725. else if(hasPrefix(cur+1, end, "gt;", 3))
  4726. {
  4727. cur += 3;
  4728. appendUChar(ret, '>');
  4729. }
  4730. else if(hasPrefix(cur+1, end, "quot;", 5))
  4731. {
  4732. cur += 5;
  4733. appendUChar(ret, '"');
  4734. }
  4735. else if(hasPrefix(cur+1, end, "apos;", 5))
  4736. {
  4737. cur += 5;
  4738. appendUChar(ret, '\'');
  4739. }
  4740. else if(hasPrefix(cur+1, end, "nbsp;", 5))
  4741. {
  4742. cur += 5;
  4743. appendUChar(ret, (UChar) 0xa0);
  4744. }
  4745. else if(hasPrefix(cur+1, end, "#", 1))
  4746. {
  4747. const UChar * saveCur = cur;
  4748. bool error = true; // until we have seen a digit...
  4749. cur += 2;
  4750. unsigned base = 10;
  4751. if (*cur == 'x')
  4752. {
  4753. base = 16;
  4754. cur++;
  4755. }
  4756. UChar value = 0;
  4757. while (cur < end)
  4758. {
  4759. unsigned digit;
  4760. UChar next = *cur;
  4761. if ((next >= '0') && (next <= '9'))
  4762. digit = next-'0';
  4763. else if ((next >= 'A') && (next <= 'F'))
  4764. digit = next-'A'+10;
  4765. else if ((next >= 'a') && (next <= 'f'))
  4766. digit = next-'a'+10;
  4767. else if (next==';')
  4768. break;
  4769. else
  4770. digit = base;
  4771. if (digit >= base)
  4772. {
  4773. error = true;
  4774. break;
  4775. }
  4776. error = false;
  4777. value = value * base + digit;
  4778. cur++;
  4779. }
  4780. if (error)
  4781. {
  4782. appendUChar(ret, '&');
  4783. cur = saveCur;
  4784. }
  4785. else
  4786. appendUChar(ret, value);
  4787. }
  4788. else
  4789. appendUChar(ret, *cur);
  4790. break;
  4791. default:
  4792. appendUChar(ret, *cur);
  4793. break;
  4794. }
  4795. cur++;
  4796. }
  4797. outLen = ret.length()/2;
  4798. out = (UChar *)ret.detach();
  4799. }
  4800. ECLRTL_API void xmlEncodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in, unsigned flags)
  4801. {
  4802. StringBuffer temp;
  4803. encodeXML(in, temp, flags, inLen, false);
  4804. outLen = temp.length();
  4805. out = temp.detach();
  4806. }
  4807. ECLRTL_API void xmlEncodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in, unsigned flags)
  4808. {
  4809. const UChar * cur = in;
  4810. MemoryBuffer ret;
  4811. ret.ensureCapacity(inLen*2);
  4812. while (inLen)
  4813. {
  4814. UChar next = *cur;
  4815. switch(*cur)
  4816. {
  4817. case '&':
  4818. appendUStr(ret, "&amp;");
  4819. break;
  4820. case '<':
  4821. appendUStr(ret, "&lt;");
  4822. break;
  4823. case '>':
  4824. appendUStr(ret, "&gt;");
  4825. break;
  4826. case '\"':
  4827. appendUStr(ret, "&quot;");
  4828. break;
  4829. case '\'':
  4830. appendUStr(ret, "&apos;");
  4831. break;
  4832. case ' ':
  4833. appendUStr(ret, flags & ENCODE_SPACES?"&#32;":" ");
  4834. break;
  4835. case '\n':
  4836. appendUStr(ret, flags & ENCODE_NEWLINES?"&#10;":"\n");
  4837. break;
  4838. case '\r':
  4839. appendUStr(ret, flags & ENCODE_NEWLINES?"&#13;":"\r");
  4840. break;
  4841. case '\t':
  4842. appendUStr(ret, flags & ENCODE_SPACES?"&#9;":"\t");
  4843. break;
  4844. default:
  4845. appendUChar(ret, next);
  4846. break;
  4847. }
  4848. inLen--;
  4849. cur++;
  4850. }
  4851. outLen = ret.length()/2;
  4852. out = (UChar *)ret.detach();
  4853. }
  4854. //---------------------------------------------------------------------------
  4855. #define STRUCTURED_EXCEPTION_TAG "Error"
  4856. inline bool isStructuredMessage(const char * text, const char * tag)
  4857. {
  4858. if (!text || text[0] != '<')
  4859. return false;
  4860. if (!tag)
  4861. return true;
  4862. size32_t lenTag = strlen(tag);
  4863. if (memcmp(text+1,tag,lenTag) != 0)
  4864. return false;
  4865. if (text[lenTag+1] != '>')
  4866. return false;
  4867. return true;
  4868. }
  4869. inline bool isStructuredError(const char * text) { return isStructuredMessage(text, STRUCTURED_EXCEPTION_TAG); }
  4870. void rtlExtractTag(size32_t & outLen, char * & out, const char * text, const char * tag, const char * rootTag)
  4871. {
  4872. if (!tag || !isStructuredMessage(text, rootTag))
  4873. {
  4874. if (text && (!tag || strcmp(tag, "text")==0))
  4875. rtlStrToStrX(outLen, out, strlen(text), text);
  4876. else
  4877. {
  4878. outLen = 0;
  4879. out = NULL;
  4880. }
  4881. }
  4882. else
  4883. {
  4884. StringBuffer startTag, endTag;
  4885. startTag.append("<").append(tag).append(">");
  4886. endTag.append("</").append(tag).append(">");
  4887. const char * start = strstr(text, startTag.str());
  4888. const char * end = strstr(text, endTag.str());
  4889. if (start && end)
  4890. {
  4891. start += startTag.length();
  4892. xmlDecodeStrX(outLen, out, end-start, start);
  4893. }
  4894. else
  4895. {
  4896. outLen = 0;
  4897. out = NULL;
  4898. }
  4899. }
  4900. }
  4901. void rtlExceptionExtract(size32_t & outLen, char * & out, const char * text, const char * tag)
  4902. {
  4903. if (!tag) tag = "text";
  4904. rtlExtractTag(outLen, out, text, tag, STRUCTURED_EXCEPTION_TAG);
  4905. }
  4906. void rtlExceptionExtract(size32_t & outLen, char * & out, IException * e, const char * tag)
  4907. {
  4908. StringBuffer text;
  4909. e->errorMessage(text);
  4910. rtlExceptionExtract(outLen, out, text.str(), tag);
  4911. }
  4912. void rtlAddExceptionTag(StringBuffer & errorText, const char * tag, const char * value)
  4913. {
  4914. if (!isStructuredError(errorText.str()))
  4915. {
  4916. StringBuffer temp;
  4917. temp.append("<" STRUCTURED_EXCEPTION_TAG "><text>");
  4918. encodeXML(errorText.str(), temp, ENCODE_WHITESPACE, errorText.length(), false);
  4919. temp.append("</text></" STRUCTURED_EXCEPTION_TAG ">");
  4920. errorText.swapWith(temp);
  4921. }
  4922. StringBuffer temp;
  4923. temp.append("<").append(tag).append(">");
  4924. encodeXML(value, temp, ENCODE_WHITESPACE, (unsigned)-1, false);
  4925. temp.append("</").append(tag).append(">");
  4926. unsigned len = errorText.length();
  4927. unsigned pos = len - strlen(STRUCTURED_EXCEPTION_TAG) - 3;
  4928. errorText.insert(pos, temp);
  4929. }
  4930. //---------------------------------------------------------------------------
  4931. void rtlRowBuilder::forceAvailable(size32_t size)
  4932. {
  4933. const size32_t chunkSize = 64;
  4934. maxsize = (size + chunkSize-1) & ~(chunkSize-1);
  4935. ptr = rtlRealloc(ptr, maxsize);
  4936. }
  4937. //---------------------------------------------------------------------------
  4938. inline unsigned numExtraBytesFromValue(unsigned __int64 first)
  4939. {
  4940. if (first >= I64C(0x10000000))
  4941. if (first >= I64C(0x40000000000))
  4942. if (first >= I64C(0x2000000000000))
  4943. if (first >= I64C(0x100000000000000))
  4944. return 8;
  4945. else
  4946. return 7;
  4947. else
  4948. return 6;
  4949. else
  4950. if (first >= I64C(0x800000000))
  4951. return 5;
  4952. else
  4953. return 4;
  4954. else
  4955. if (first >= 0x4000)
  4956. if (first >= 0x200000)
  4957. return 3;
  4958. else
  4959. return 2;
  4960. else
  4961. if (first >= 0x80)
  4962. return 1;
  4963. else
  4964. return 0;
  4965. }
  4966. //An packed byte format, based on the unicode packing of utf-8.
  4967. //The number of top bits set in the leading byte indicates how many extra
  4968. //bytes follow (0..8). It gives the same compression as using a top bit to
  4969. //indicate continuation, but seems to be quicker (and requires less look ahead).
  4970. /*
  4971. byte numExtraBytesFromFirstTable[256] =
  4972. {
  4973. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4974. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4975. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4976. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4977. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  4978. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  4979. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  4980. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8
  4981. };
  4982. inline unsigned numExtraBytesFromFirst(byte first)
  4983. {
  4984. return numExtraBytesFromFirstTable(first);
  4985. }
  4986. */
  4987. //NB: This seems to be faster than using the table lookup above. Probably affects the data cache less
  4988. inline unsigned numExtraBytesFromFirst(byte first)
  4989. {
  4990. if (first >= 0xF0)
  4991. if (first >= 0xFC)
  4992. if (first >= 0xFE)
  4993. if (first >= 0xFF)
  4994. return 8;
  4995. else
  4996. return 7;
  4997. else
  4998. return 6;
  4999. else
  5000. if (first >= 0xF8)
  5001. return 5;
  5002. else
  5003. return 4;
  5004. else
  5005. if (first >= 0xC0)
  5006. if (first >= 0xE0)
  5007. return 3;
  5008. else
  5009. return 2;
  5010. else
  5011. if (first >= 0x80)
  5012. return 1;
  5013. else
  5014. return 0;
  5015. }
  5016. const static byte leadingValueMask[9] = { 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01, 0x00, 0x00 };
  5017. const static byte leadingLengthMask[9] = { 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF };
  5018. //maximum number of bytes for a packed value is size+1 bytes for size <=8 and last byte being fully used.
  5019. unsigned __int64 rtlGetPackedUnsigned(const void * _ptr)
  5020. {
  5021. const byte * ptr = (const byte *)_ptr;
  5022. byte first = *ptr++;
  5023. unsigned numExtra = numExtraBytesFromFirst(first);
  5024. unsigned __int64 value = first & leadingValueMask[numExtra];
  5025. //Loop unrolling has a negligable effect
  5026. while (numExtra--)
  5027. value = (value << 8) | *ptr++;
  5028. return value;
  5029. }
  5030. void rtlSetPackedUnsigned(void * _ptr, unsigned __int64 value)
  5031. {
  5032. byte * ptr = (byte *)_ptr;
  5033. unsigned numExtra = numExtraBytesFromValue(value);
  5034. byte firstMask = leadingLengthMask[numExtra];
  5035. while (numExtra)
  5036. {
  5037. ptr[numExtra--] = (byte)value;
  5038. value >>= 8;
  5039. }
  5040. ptr[0] = (byte)value | firstMask;
  5041. }
  5042. size32_t rtlGetPackedSize(const void * ptr)
  5043. {
  5044. return numExtraBytesFromFirst(*(byte*)ptr)+1;
  5045. }
  5046. size32_t rtlGetPackedSizeFromFirst(byte first)
  5047. {
  5048. return numExtraBytesFromFirst(first)+1;
  5049. }
  5050. //Store signed by moving the sign to the bottom bit, and inverting if negative.
  5051. //so small positive and negative numbers are stored compactly.
  5052. __int64 rtlGetPackedSigned(const void * ptr)
  5053. {
  5054. unsigned __int64 value = rtlGetPackedUnsigned(ptr);
  5055. unsigned __int64 shifted = (value >> 1);
  5056. return (__int64)((value & 1) ? ~shifted : shifted);
  5057. }
  5058. void rtlSetPackedSigned(void * ptr, __int64 value)
  5059. {
  5060. unsigned __int64 storeValue;
  5061. if (value < 0)
  5062. storeValue = (~value << 1) | 1;
  5063. else
  5064. storeValue = value << 1;
  5065. rtlSetPackedUnsigned(ptr, storeValue);
  5066. }
  5067. IAtom * rtlCreateFieldNameAtom(const char * name)
  5068. {
  5069. return createAtom(name);
  5070. }
  5071. void rtlBase64Encode(size32_t & tlen, char * & tgt, size32_t slen, const void * src)
  5072. {
  5073. tlen = 0;
  5074. tgt = NULL;
  5075. if (slen)
  5076. {
  5077. StringBuffer out;
  5078. JBASE64_Encode(src, slen, out);
  5079. tlen = out.length();
  5080. if (tlen)
  5081. {
  5082. char * data = (char *) rtlMalloc(tlen);
  5083. out.getChars(0, tlen, data);
  5084. tgt = data;
  5085. }
  5086. }
  5087. }
  5088. void rtlBase64Decode(size32_t & tlen, void * & tgt, size32_t slen, const char * src)
  5089. {
  5090. tlen = 0;
  5091. if (slen)
  5092. {
  5093. StringBuffer out;
  5094. if (JBASE64_Decode(slen, src, out))
  5095. tlen = out.length();
  5096. if (tlen)
  5097. {
  5098. char * data = (char *) rtlMalloc(tlen);
  5099. out.getChars(0, tlen, data);
  5100. tgt = (void *) data;
  5101. }
  5102. }
  5103. }
  5104. //---------------------------------------------------------------------------
  5105. void RtlCInterface::Link() const { atomic_inc(&xxcount); }
  5106. bool RtlCInterface::Release(void) const
  5107. {
  5108. if (atomic_dec_and_test(&xxcount))
  5109. {
  5110. delete this;
  5111. return true;
  5112. }
  5113. return false;
  5114. }
  5115. //---------------------------------------------------------------------------
  5116. class RtlRowStream : implements IRowStream, public RtlCInterface
  5117. {
  5118. public:
  5119. RtlRowStream(size32_t _count, byte * * _rowset) : count(_count), rowset(_rowset)
  5120. {
  5121. rtlLinkRowset(rowset);
  5122. cur = 0;
  5123. }
  5124. ~RtlRowStream()
  5125. {
  5126. rtlReleaseRowset(count, rowset);
  5127. }
  5128. RTLIMPLEMENT_IINTERFACE
  5129. virtual const void *nextRow()
  5130. {
  5131. if (cur >= count)
  5132. return NULL;
  5133. byte * ret = rowset[cur];
  5134. cur++;
  5135. rtlLinkRow(ret);
  5136. return ret;
  5137. }
  5138. virtual void stop()
  5139. {
  5140. cur = count;
  5141. }
  5142. protected:
  5143. size32_t cur;
  5144. size32_t count;
  5145. byte * * rowset;
  5146. };
  5147. ECLRTL_API IRowStream * createRowStream(size32_t count, byte * * rowset)
  5148. {
  5149. return new RtlRowStream(count, rowset);
  5150. }
  5151. #if 0
  5152. void PrintExtract(StringBuffer & s, const char * tag)
  5153. {
  5154. size32_t outLen;
  5155. char * out = NULL;
  5156. rtlExceptionExtract(outLen, out, s.str(), tag);
  5157. PrintLog("%s = %.*s", tag, outLen, out);
  5158. rtlFree(out);
  5159. }
  5160. void testStructuredExceptions()
  5161. {
  5162. StringBuffer s;
  5163. s.append("This<is>some text");
  5164. PrintExtract(s, NULL);
  5165. PrintExtract(s, "text");
  5166. PrintExtract(s, "is");
  5167. rtlAddExceptionTag(s, "location", "192.168.12.1");
  5168. PrintExtract(s, NULL);
  5169. PrintExtract(s, "text");
  5170. PrintExtract(s, "is");
  5171. PrintExtract(s, "location");
  5172. rtlAddExceptionTag(s, "author", "gavin");
  5173. PrintExtract(s, NULL);
  5174. PrintExtract(s, "text");
  5175. PrintExtract(s, "is");
  5176. PrintExtract(s, "location");
  5177. PrintExtract(s, "author");
  5178. PrintLog("%s", s.str());
  5179. }
  5180. static void testPackedUnsigned()
  5181. {
  5182. unsigned __int64 values[] = { 0, 1, 2, 10, 127, 128, 16383, 16384, 32767, 32768, 0xffffff, 0x7fffffff, 0xffffffff,
  5183. I64C(0xffffffffffffff), I64C(0x100000000000000), I64C(0x7fffffffffffffff), I64C(0xffffffffffffffff) };
  5184. unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 8, 9, 9, 9 };
  5185. unsigned numValues = _elements_in(values);
  5186. byte temp[9];
  5187. for (unsigned i = 0; i < numValues; i++)
  5188. {
  5189. rtlSetPackedUnsigned(temp, values[i]);
  5190. assertex(rtlGetPackedSize(temp) == numBytes[i]);
  5191. assertex(rtlGetPackedUnsigned(temp) == values[i]);
  5192. }
  5193. for (unsigned j= 0; j < 2000000; j++)
  5194. {
  5195. unsigned __int64 value = I64C(1) << (rtlRandom() & 63);
  5196. // unsigned value = rtlRandom();
  5197. rtlSetPackedUnsigned(temp, value);
  5198. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value)+1);
  5199. assertex(rtlGetPackedUnsigned(temp) == value);
  5200. }
  5201. for (unsigned k= 0; k < 63; k++)
  5202. {
  5203. unsigned __int64 value1 = I64C(1) << k;
  5204. rtlSetPackedUnsigned(temp, value1);
  5205. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value1)+1);
  5206. assertex(rtlGetPackedUnsigned(temp) == value1);
  5207. unsigned __int64 value2 = value1-1;
  5208. rtlSetPackedUnsigned(temp, value2);
  5209. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value2)+1);
  5210. assertex(rtlGetPackedUnsigned(temp) == value2);
  5211. }
  5212. }
  5213. static void testPackedSigned()
  5214. {
  5215. __int64 values[] = { 0, 1, -2, 10, 63, 64, -64, -65, 8191, 8192, 0x3fffffff,
  5216. I64C(0x7fffffffffffff), I64C(0x80000000000000), I64C(0x7fffffffffffffff), I64C(0x8000000000000000) };
  5217. unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 1, 2, 2, 3, 5,
  5218. 8, 9, 9, 9 };
  5219. unsigned numValues = _elements_in(values);
  5220. byte temp[9];
  5221. for (unsigned i = 0; i < numValues; i++)
  5222. {
  5223. rtlSetPackedSigned(temp, values[i]);
  5224. assertex(rtlGetPackedSize(temp) == numBytes[i]);
  5225. assertex(rtlGetPackedSigned(temp) == values[i]);
  5226. }
  5227. }
  5228. #endif
  5229. void ensureRtlLoaded()
  5230. {
  5231. }
  5232. #ifdef _USE_CPPUNIT
  5233. #include "unittests.hpp"
  5234. class EclRtlTests : public CppUnit::TestFixture
  5235. {
  5236. CPPUNIT_TEST_SUITE( EclRtlTests );
  5237. CPPUNIT_TEST(RegexTest);
  5238. CPPUNIT_TEST(MultiRegexTest);
  5239. CPPUNIT_TEST_SUITE_END();
  5240. protected:
  5241. void RegexTest()
  5242. {
  5243. rtlCompiledStrRegex r;
  5244. size32_t outlen;
  5245. char * out = NULL;
  5246. r.setPattern("([A-Z]+)[ ]?'(S) ", true);
  5247. r->replace(outlen, out, 7, "ABC'S ", 5, "$1$2 ");
  5248. ASSERT(outlen==6);
  5249. ASSERT(out != NULL);
  5250. ASSERT(memcmp(out, "ABCS ", outlen)==0);
  5251. rtlFree(out);
  5252. }
  5253. void MultiRegexTest()
  5254. {
  5255. class RegexTestThread : public Thread
  5256. {
  5257. virtual int run()
  5258. {
  5259. for (int i = 0; i < 100000; i++)
  5260. {
  5261. rtlCompiledStrRegex r;
  5262. size32_t outlen;
  5263. char * out = NULL;
  5264. r.setPattern("([A-Z]+)[ ]?'(S) ", true);
  5265. r->replace(outlen, out, 7, "ABC'S ", 5, "$1$2 ");
  5266. ASSERT(outlen==6);
  5267. ASSERT(out != NULL);
  5268. ASSERT(memcmp(out, "ABCS ", outlen)==0);
  5269. rtlFree(out);
  5270. }
  5271. return 0;
  5272. }
  5273. };
  5274. RegexTestThread t1;
  5275. RegexTestThread t2;
  5276. RegexTestThread t3;
  5277. t1.start();
  5278. t2.start();
  5279. t3.start();
  5280. t1.join();
  5281. t2.join();
  5282. t3.join();
  5283. }
  5284. };
  5285. CPPUNIT_TEST_SUITE_REGISTRATION( EclRtlTests );
  5286. CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( EclRtlTests, "EclRtlTests" );
  5287. #endif