eclrtl.cpp 165 KB


  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "limits.h"
  14. #include "platform.h"
  15. #include <math.h>
  16. #include <stdio.h>
  17. #include "jexcept.hpp"
  18. #include "jmisc.hpp"
  19. #include "jutil.hpp"
  20. #include "jlib.hpp"
  21. #include "jptree.hpp"
  22. #include "junicode.hpp"
  23. #include "eclrtl.hpp"
  24. #include "rtlbcd.hpp"
  25. #include "eclrtl_imp.hpp"
  26. #ifdef _USE_ICU
  27. #include "unicode/uchar.h"
  28. #include "unicode/ucol.h"
  29. #include "unicode/ustring.h"
  30. #include "unicode/ucnv.h"
  31. #include "unicode/schriter.h"
  32. #include "unicode/regex.h"
  33. #include "unicode/normlzr.h"
  34. #include "unicode/locid.h"
  35. #endif
  36. #include "jlog.hpp"
  37. #include "jmd5.hpp"
  38. #include "rtlqstr.ipp"
  39. #include "roxiemem.hpp"
  40. #define UTF8_CODEPAGE "UTF-8"
  41. #define UTF8_MAXSIZE 4
  42. IRandomNumberGenerator * random_;
  43. static CriticalSection random_Sect;
  44. MODULE_INIT(INIT_PRIORITY_ECLRTL_ECLRTL)
  45. {
  46. random_ = createRandomNumberGenerator();
  47. random_->seed((unsigned)get_cycles_now());
  48. return true;
  49. }
  50. MODULE_EXIT()
  51. {
  52. random_->Release();
  53. }
  54. #ifndef _USE_ICU
  55. static inline bool u_isspace(UChar next) { return isspace((byte)next); }
  56. #endif
  57. //=============================================================================
  58. // Miscellaneous string functions...
  59. ECLRTL_API void * rtlMalloc(size32_t size)
  60. {
  61. if (!size)
  62. return NULL;
  63. void * retVal = malloc(size);
  64. if (!retVal)
  65. {
  66. PrintStackReport();
  67. rtlThrowOutOfMemory(0, "Memory allocation error!");
  68. }
  69. return retVal;
  70. }
  71. void rtlFree(void *ptr)
  72. {
  73. free(ptr);
  74. }
  75. ECLRTL_API void * rtlRealloc(void * _ptr, size32_t size)
  76. {
  77. void * retVal = realloc(_ptr, size);
  78. if( (0 < size) && (NULL == retVal))
  79. {
  80. PrintStackReport();
  81. rtlThrowOutOfMemory(0, "Memory reallocation error!");
  82. }
  83. return retVal;
  84. }
  85. //=============================================================================
  86. ECLRTL_API void rtlReleaseRow(const void * row)
  87. {
  88. ReleaseRoxieRow(row);
  89. }
  90. ECLRTL_API void rtlReleaseRowset(unsigned count, const byte * * rowset)
  91. {
  92. ReleaseRoxieRowset(count, rowset);
  93. }
  94. ECLRTL_API void * rtlLinkRow(const void * row)
  95. {
  96. LinkRoxieRow(row);
  97. return const_cast<void *>(row);
  98. }
  99. ECLRTL_API const byte * * rtlLinkRowset(const byte * * rowset)
  100. {
  101. LinkRoxieRowset(rowset);
  102. return rowset;
  103. }
  104. //=============================================================================
  105. // Unicode helper classes and functions
  106. // escape
  107. bool rtlGetNormalizedUnicodeLocaleName(unsigned len, char const * in, char * out)
  108. {
  109. bool isPrimary = true;
  110. bool ok = true;
  111. unsigned i;
  112. for(i=0; i<len; i++)
  113. if(in[i] == '_')
  114. {
  115. out[i] = '_';
  116. isPrimary = false;
  117. }
  118. else if(isalpha(in[i]))
  119. {
  120. out[i] = (isPrimary ? tolower(in[i]) : toupper(in[i]));
  121. }
  122. else
  123. {
  124. out[i] = 0;
  125. ok = false;
  126. }
  127. return ok;
  128. }
  129. #ifdef _USE_ICU
  130. static bool stripIgnorableCharacters(size32_t & lenResult, UChar * & result, size32_t length, const UChar * in)
  131. {
  132. unsigned numStripped = 0;
  133. unsigned lastGood = 0;
  134. for (unsigned i=0; i < length; i++)
  135. {
  136. UChar32 c = in[i];
  137. unsigned stripSize = 0;
  138. if (U16_IS_SURROGATE(c))
  139. {
  140. U16_GET(in, 0, i, length, c);
  141. if (u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  142. stripSize = 2;
  143. else
  144. i++; // skip the surrogate
  145. }
  146. else
  147. {
  148. if (u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  149. stripSize = 1;
  150. }
  151. if (stripSize != 0)
  152. {
  153. if (numStripped == 0)
  154. result = (UChar *)rtlMalloc((length-stripSize)*sizeof(UChar));
  155. //Copy and non ignorable characters skipped up to this point. (Note result+x is scaled by UChar)
  156. memcpy(result + lastGood - numStripped, in+lastGood, (i-lastGood) * sizeof(UChar));
  157. lastGood = i+stripSize;
  158. numStripped += stripSize;
  159. i += (stripSize-1);
  160. }
  161. }
  162. if (numStripped == 0)
  163. return false;
  164. lenResult = length-numStripped;
  165. memcpy(result + lastGood - numStripped, in+lastGood, (length-lastGood) * sizeof(UChar));
  166. return true;
  167. }
  168. void escapeUnicode(unsigned inlen, UChar const * in, StringBuffer & out)
  169. {
  170. UCharCharacterIterator iter(in, inlen);
  171. for(iter.first32(); iter.hasNext(); iter.next32())
  172. {
  173. UChar32 c = iter.current32();
  174. if(c < 0x80)
  175. out.append((char) c);
  176. else if (c < 0x10000)
  177. out.appendf("\\u%04X", c);
  178. else
  179. out.appendf("\\U%08X", c);
  180. }
  181. }
  182. // locales and collators
  183. static unsigned const unicodeStrengthLimit = 5;
  184. static UCollationStrength unicodeStrength[unicodeStrengthLimit] =
  185. {
  186. UCOL_PRIMARY,
  187. UCOL_SECONDARY,
  188. UCOL_TERTIARY,
  189. UCOL_QUATERNARY,
  190. UCOL_IDENTICAL
  191. };
  192. class RTLLocale : public CInterface
  193. {
  194. public:
  195. RTLLocale(char const * _locale) : locale(_locale)
  196. {
  197. for(unsigned i=0; i<unicodeStrengthLimit; i++)
  198. colls[i] = NULL;
  199. UErrorCode err = U_ZERO_ERROR;
  200. colls[2] = ucol_open(locale.get(), &err);
  201. assertex(U_SUCCESS(err));
  202. }
  203. ~RTLLocale()
  204. {
  205. for(unsigned i=0; i<unicodeStrengthLimit; i++)
  206. if(colls[i]) ucol_close(colls[i]);
  207. }
  208. UCollator * queryCollator() const { return colls[2]; }
  209. UCollator * queryCollator(unsigned strength) const
  210. {
  211. if(strength == 0) strength = 1;
  212. if(strength > unicodeStrengthLimit) strength = unicodeStrengthLimit;
  213. if(!colls[strength-1])
  214. {
  215. UErrorCode err = U_ZERO_ERROR;
  216. const_cast<UCollator * *>(colls)[strength-1] = ucol_open(locale.get(), &err);
  217. assertex(U_SUCCESS(err));
  218. ucol_setStrength(colls[strength-1], unicodeStrength[strength-1]);
  219. }
  220. return colls[strength-1];
  221. }
  222. private:
  223. StringAttr locale;
  224. UCollator * colls[unicodeStrengthLimit];
  225. };
  226. typedef MapStringTo<RTLLocale, char const *> MapStrToLocale;
  227. MapStrToLocale *localeMap;
  228. CriticalSection localeCrit;
  229. MODULE_INIT(INIT_PRIORITY_STANDARD)
  230. {
  231. localeMap = new MapStrToLocale;
  232. return true;
  233. }
  234. MODULE_EXIT()
  235. {
  236. delete localeMap;
  237. }
  238. RTLLocale * queryRTLLocale(char const * locale)
  239. {
  240. if (!locale) locale = "";
  241. CriticalBlock b(localeCrit);
  242. RTLLocale * loc = localeMap->getValue(locale);
  243. if(!loc)
  244. {
  245. unsigned ll = strlen(locale);
  246. StringBuffer lnorm;
  247. rtlGetNormalizedUnicodeLocaleName(ll, locale, lnorm.reserve(ll));
  248. localeMap->setValue(locale, lnorm.str());
  249. loc = localeMap->getValue(locale);
  250. }
  251. return loc;
  252. }
  253. // converters
  254. class RTLUnicodeConverter : public CInterface
  255. {
  256. public:
  257. RTLUnicodeConverter(char const * codepage)
  258. {
  259. UErrorCode err = U_ZERO_ERROR;
  260. conv = ucnv_open(codepage, &err);
  261. if (!U_SUCCESS(err))
  262. {
  263. StringBuffer msg;
  264. msg.append("Unrecognised codepage '").append(codepage).append("'");
  265. rtlFail(0, msg.str());
  266. }
  267. }
  268. ~RTLUnicodeConverter()
  269. {
  270. ucnv_close(conv);
  271. }
  272. UConverter * query() const { return conv; }
  273. private:
  274. UConverter * conv;
  275. };
  276. typedef MapStringTo<RTLUnicodeConverter, char const *> MapStrToUnicodeConverter;
  277. static __thread MapStrToUnicodeConverter *unicodeConverterMap = NULL;
  278. static __thread ThreadTermFunc prevThreadTerminator = NULL;
  279. static void clearUnicodeConverterMap()
  280. {
  281. delete unicodeConverterMap;
  282. unicodeConverterMap = NULL; // Important to clear, as this is called when threadpool threads end...
  283. if (prevThreadTerminator)
  284. {
  285. (*prevThreadTerminator)();
  286. prevThreadTerminator = NULL;
  287. }
  288. }
  289. RTLUnicodeConverter * queryRTLUnicodeConverter(char const * codepage)
  290. {
  291. if (!unicodeConverterMap) // NB: one per thread, so no contention
  292. {
  293. unicodeConverterMap = new MapStrToUnicodeConverter;
  294. // Use thread terminator hook to clear them up on thread exit.
  295. // NB: May need to revisit if not on a jlib Thread.
  296. prevThreadTerminator = addThreadTermFunc(clearUnicodeConverterMap);
  297. }
  298. RTLUnicodeConverter * conv = unicodeConverterMap->getValue(codepage);
  299. if(!conv)
  300. {
  301. unicodeConverterMap->setValue(codepage, codepage);
  302. conv = unicodeConverterMap->getValue(codepage);
  303. }
  304. return conv;
  305. }
  306. // normalization
  307. bool unicodeNeedsNormalize(unsigned inlen, UChar * in, UErrorCode * err)
  308. {
  309. return !unorm_isNormalized(in, inlen, UNORM_NFC, err);
  310. }
  311. bool vunicodeNeedsNormalize(UChar * in, UErrorCode * err)
  312. {
  313. return !unorm_isNormalized(in, -1, UNORM_NFC, err);
  314. }
  315. void unicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
  316. {
  317. UChar * buff = (UChar *)rtlMalloc(inlen*2);
  318. unsigned len = unorm_normalize(in, inlen, UNORM_NFC, 0, buff, inlen, err);
  319. while(len<inlen) buff[len++] = 0x0020;
  320. memcpy(in, buff, inlen);
  321. free(buff);
  322. }
  323. void vunicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
  324. {
  325. UChar * buff = (UChar *)rtlMalloc(inlen*2);
  326. unsigned len = unorm_normalize(in, -1, UNORM_NFC, 0, buff, inlen-1, err);
  327. buff[len] = 0x0000;
  328. memcpy(in, buff, inlen);
  329. free(buff);
  330. }
  331. void unicodeGetNormalized(unsigned & outlen, UChar * & out, unsigned inlen, UChar * in, UErrorCode * err)
  332. {
  333. outlen = unorm_normalize(in, inlen, UNORM_NFC, 0, 0, 0, err);
  334. out = (UChar *)rtlMalloc(outlen*2);
  335. unorm_normalize(in, inlen, UNORM_NFC, 0, out, outlen, err);
  336. }
  337. void vunicodeGetNormalized(UChar * & out, unsigned inlen, UChar * in, UErrorCode * err)
  338. {
  339. unsigned outlen = unorm_normalize(in, inlen, UNORM_NFC, 0, 0, 0, err);
  340. out = (UChar *)rtlMalloc((outlen+1)*2);
  341. unorm_normalize(in, inlen, UNORM_NFC, 0, out, outlen, err);
  342. out[outlen] = 0x0000;
  343. }
  344. void unicodeEnsureIsNormalized(unsigned len, UChar * str)
  345. {
  346. UErrorCode err = U_ZERO_ERROR;
  347. if(unicodeNeedsNormalize(len, str, &err))
  348. unicodeReplaceNormalized(len, str, &err);
  349. }
  350. void vunicodeEnsureIsNormalized(unsigned len, UChar * str)
  351. {
  352. UErrorCode err = U_ZERO_ERROR;
  353. if(vunicodeNeedsNormalize(str, &err))
  354. vunicodeReplaceNormalized(len, str, &err);
  355. }
  356. void unicodeEnsureIsNormalizedX(unsigned & len, UChar * & str)
  357. {
  358. UErrorCode err = U_ZERO_ERROR;
  359. if(unicodeNeedsNormalize(len, str, &err))
  360. {
  361. unsigned inlen = len;
  362. UChar * in = str;
  363. unicodeGetNormalized(len, str, inlen, in, &err);
  364. free(in);
  365. }
  366. }
  367. void vunicodeEnsureIsNormalizedX(unsigned inlen, UChar * & str)
  368. {
  369. UErrorCode err = U_ZERO_ERROR;
  370. if(unicodeNeedsNormalize(inlen, str, &err))
  371. {
  372. UChar * in = str;
  373. vunicodeGetNormalized(str, inlen, in, &err);
  374. free(in);
  375. }
  376. }
  377. void unicodeNormalizedCopy(UChar * out, UChar * in, unsigned len)
  378. {
  379. UErrorCode err = U_ZERO_ERROR;
  380. if(unicodeNeedsNormalize(len, in, &err))
  381. unorm_normalize(in, len, UNORM_NFC, 0, out, len, &err);
  382. else
  383. memcpy(out, in, len);
  384. }
  385. void normalizeUnicodeString(UnicodeString const & in, UnicodeString & out)
  386. {
  387. UErrorCode err = U_ZERO_ERROR;
  388. Normalizer::compose(in, false, 0, out, err);
  389. assertex(U_SUCCESS(err));
  390. }
  391. #endif
  392. // padding
  393. static void multimemset(char * out, size_t outlen, char const * in, size_t inlen)
  394. {
  395. size_t outpos = 0;
  396. size_t inpos = 0;
  397. while(outpos < outlen)
  398. {
  399. out[outpos++] = in[inpos++];
  400. if(inpos == inlen)
  401. inpos = 0;
  402. }
  403. }
  404. typedef MapStringTo<MemoryAttr, size32_t> MemoryAttrMapping;
  405. MemoryAttrMapping *unicodeBlankCache;
  406. CriticalSection ubcCrit;
  407. MODULE_INIT(INIT_PRIORITY_STANDARD)
  408. {
  409. unicodeBlankCache = new MemoryAttrMapping;
  410. return true;
  411. }
  412. MODULE_EXIT()
  413. {
  414. delete unicodeBlankCache;
  415. }
  416. UChar unicodeSpace = 0x0020;
  417. void codepageBlankFill(char const * codepage, char * out, size_t len)
  418. {
  419. CriticalBlock b(ubcCrit);
  420. MemoryAttr * cached = unicodeBlankCache->getValue(codepage);
  421. if(cached)
  422. {
  423. char const * blank = (char const *)cached->get();
  424. size_t blanklen = cached->length();
  425. if(blanklen==1)
  426. memset(out, *blank, len);
  427. else
  428. multimemset(out, len, blank, blanklen);
  429. }
  430. else
  431. {
  432. unsigned blanklen;
  433. char * blank;
  434. rtlUnicodeToCodepageX(blanklen, blank, 1, &unicodeSpace, codepage);
  435. unicodeBlankCache->setValue(codepage, blanklen);
  436. unicodeBlankCache->getValue(codepage)->set(blanklen, blank);
  437. if(blanklen==1)
  438. memset(out, *blank, len);
  439. else
  440. multimemset(out, len, blank, blanklen);
  441. free(blank);
  442. }
  443. }
  444. //---------------------------------------------------------------------------
  445. // floating point functions
  446. static const double smallPowers[16] = {
  447. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
  448. 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 };
  449. static double powerOfTen(int x)
  450. {
  451. if (x < 0)
  452. return 1 / powerOfTen(-x);
  453. double value = smallPowers[x&15];
  454. double scale = 1e16;
  455. x >>= 4;
  456. while (x)
  457. {
  458. if (x & 1)
  459. value *= scale;
  460. scale *= scale;
  461. x >>= 1;
  462. }
  463. return value;
  464. };
  465. static double kk = (1.0 / ((unsigned __int64)1<<53));
  466. __int64 rtlRound(double x)
  467. {
  468. //a fudge to make numbers that are inexact after a division round up "correctly".
  469. //coded rather oddly as microsoft's optimizer has a habit of throwing it away otherwise...
  470. volatile double tt = x * kk;
  471. x += tt;
  472. if (x >= 0.0)
  473. return (__int64)(x + 0.5);
  474. return -(__int64)(-x + 0.5);
  475. }
  476. double rtlRoundTo(const double x, int places)
  477. {
  478. if (x < 0)
  479. return -rtlRoundTo(-x, places);
  480. volatile double tt = x * kk;
  481. double x0 = x + tt;
  482. if (places >= 0)
  483. {
  484. double scale = powerOfTen(places);
  485. return floor(x * scale + 0.5) / scale;
  486. }
  487. else
  488. {
  489. double scale = powerOfTen(-places);
  490. return floor(x / scale + 0.5) * scale;
  491. }
  492. }
  493. __int64 rtlRoundDown(double x)
  494. {
  495. if (x >= 0.0)
  496. return (__int64)floor(x);
  497. return (__int64)ceil(x);
  498. }
  499. __int64 rtlRoundUp(double x)
  500. {
  501. if (x >= 0.0)
  502. return (__int64)ceil(x);
  503. return (__int64)floor(x);
  504. }
  505. //=============================================================================
  506. // Numeric conversion functions... - fixed length target
  507. #define intToStringNBody() \
  508. unsigned len = numtostr(temp, val); \
  509. if (len > l) \
  510. memset(t,'*',l); \
  511. else \
  512. { \
  513. memcpy(t,temp,len); \
  514. memset(t+len, ' ', l-len); \
  515. }
  516. void rtlUInt4ToStr(size32_t l, char * t, unsigned val)
  517. {
  518. char temp[20];
  519. intToStringNBody();
  520. }
  521. void rtlUInt8ToStr(size32_t l, char * t, unsigned __int64 val)
  522. {
  523. char temp[40];
  524. intToStringNBody();
  525. }
  526. void rtlInt4ToStr(size32_t l, char * t, int val)
  527. {
  528. char temp[20];
  529. intToStringNBody();
  530. }
  531. void rtlInt8ToStr(size32_t l, char * t, __int64 val)
  532. {
  533. char temp[40];
  534. intToStringNBody();
  535. }
  536. //=============================================================================
  537. // Numeric conversion functions... - unknown length target
  538. #define intToUnknownStringBody() \
  539. unsigned len = numtostr(temp, val); \
  540. char * result = (char *)rtlMalloc(len); \
  541. memcpy(result, temp, len); \
  542. l = len; \
  543. t = result;
  544. void rtlUInt4ToStrX(size32_t & l, char * & t, unsigned val)
  545. {
  546. char temp[20];
  547. intToUnknownStringBody();
  548. }
  549. void rtlUInt8ToStrX(size32_t & l, char * & t, unsigned __int64 val)
  550. {
  551. char temp[40];
  552. intToUnknownStringBody();
  553. }
  554. void rtlInt4ToStrX(size32_t & l, char * & t, int val)
  555. {
  556. char temp[20];
  557. intToUnknownStringBody();
  558. }
  559. void rtlInt8ToStrX(size32_t & l, char * & t, __int64 val)
  560. {
  561. char temp[40];
  562. intToUnknownStringBody();
  563. }
  564. //=============================================================================
  565. // Numeric conversion functions... - fixed length ebcdic target
  566. // ILKA - converting ebcdic to numeric still uses string in between, for more efficiency
  567. // a function numtoebcdicstr should be implemented
  568. #define intToEbcdicStringNBody() \
  569. unsigned len = numtostr(astr, val); \
  570. rtlStrToEStr(sizeof(estr),estr,len,astr); \
  571. if (len > l) \
  572. memset(t,0x2A,l); \
  573. else \
  574. { \
  575. memcpy(t,estr,len); \
  576. memset(t+len, '@', l-len); \
  577. }
  578. void rtl_l42en(size32_t l, char * t, unsigned val)
  579. {
  580. char astr[20];
  581. char estr[20];
  582. intToEbcdicStringNBody();
  583. }
  584. void rtl_l82en(size32_t l, char * t, unsigned __int64 val)
  585. {
  586. char astr[40];
  587. char estr[40];
  588. intToEbcdicStringNBody();
  589. }
  590. void rtl_ls42en(size32_t l, char * t, int val)
  591. {
  592. char astr[20];
  593. char estr[20];
  594. intToEbcdicStringNBody();
  595. }
  596. void rtl_ls82en(size32_t l, char * t, __int64 val)
  597. {
  598. char astr[40];
  599. char estr[40];
  600. intToEbcdicStringNBody();
  601. }
  602. //=============================================================================
  603. // Numeric conversion functions... - unknown length ebcdic target
  604. #if defined _MSC_VER
  605. #pragma warning(push)
  606. #pragma warning(disable:4700)
  607. #endif
  608. void rtl_l42ex(size32_t & l, char * & t, unsigned val)
  609. {
  610. char astr[20];
  611. unsigned alen = numtostr(astr, val);
  612. rtlStrToEStrX(l,t,alen,astr);
  613. }
  614. void rtl_l82ex(size32_t & l, char * & t, unsigned __int64 val)
  615. {
  616. char astr[40];
  617. unsigned alen = numtostr(astr, val);
  618. rtlStrToEStrX(l,t,alen,astr);
  619. }
  620. void rtl_ls42ex(size32_t & l, char * & t, int val)
  621. {
  622. char astr[20];
  623. unsigned alen = numtostr(astr, val);
  624. rtlStrToEStrX(l,t,alen,astr);
  625. }
  626. void rtl_ls82ex(size32_t & l, char * & t, __int64 val)
  627. {
  628. char astr[40];
  629. unsigned alen = numtostr(astr, val);
  630. rtlStrToEStrX(l,t,alen,astr);
  631. }
  632. #ifdef _MSC_VER
  633. #pragma warning(pop)
  634. #endif
  635. //=============================================================================
  636. // Numeric conversion functions... - fixed length variable target
  637. #define intToVarStringNBody() \
  638. unsigned len = numtostr(temp, val) + 1; \
  639. if (len > l) \
  640. { \
  641. memset(t,'*',l); \
  642. t[l-1]=0; \
  643. } \
  644. else \
  645. memcpy(t,temp,len);
  646. void rtlUInt4ToVStr(size32_t l, char * t, unsigned val)
  647. {
  648. char temp[20];
  649. intToVarStringNBody();
  650. }
  651. void rtlUInt8ToVStr(size32_t l, char * t, unsigned __int64 val)
  652. {
  653. char temp[40];
  654. intToVarStringNBody();
  655. }
  656. void rtlInt4ToVStr(size32_t l, char * t, int val)
  657. {
  658. char temp[20];
  659. intToVarStringNBody();
  660. }
  661. void rtlInt8ToVStr(size32_t l, char * t, __int64 val)
  662. {
  663. char temp[40];
  664. intToVarStringNBody();
  665. }
  666. //=============================================================================
  667. // Numeric conversion functions... - unknown length variable target
  668. #define intToVarStringXBody() \
  669. unsigned len = numtostr(temp, val); \
  670. temp[len] = 0; \
  671. return strdup(temp);
  672. char * rtlUInt4ToVStrX(unsigned val)
  673. {
  674. char temp[20];
  675. intToVarStringXBody();
  676. }
  677. char * rtlUInt8ToVStrX(unsigned __int64 val)
  678. {
  679. char temp[40];
  680. intToVarStringXBody();
  681. }
  682. char * rtlInt4ToVStrX(int val)
  683. {
  684. char temp[20];
  685. intToVarStringXBody();
  686. }
  687. char * rtlInt8ToVStrX(__int64 val)
  688. {
  689. char temp[40];
  690. intToVarStringXBody();
  691. }
  692. //---------------------------------------------------------------------------
  693. static const unsigned largeAllocaThreshold = 1024*10;
  694. #define CONDSTACKALLOC(MA, SZ) ((SZ>largeAllocaThreshold) ? MA.allocate(SZ) : alloca(SZ))
  695. double rtlStrToReal(size32_t l, const char * t)
  696. {
  697. MemoryAttr heapMem;
  698. char * temp = (char *)CONDSTACKALLOC(heapMem, l+1);
  699. memcpy(temp, t, l);
  700. temp[l] = 0;
  701. return rtlVStrToReal(temp);
  702. }
  703. double rtlEStrToReal(size32_t l, const char * t)
  704. {
  705. MemoryAttr heapMem;
  706. char * temp = (char *)CONDSTACKALLOC(heapMem, l+1);
  707. rtlEStrToStr(l,temp,l,t);
  708. temp[l] = 0;
  709. return rtlVStrToReal(temp);
  710. }
  711. double rtlVStrToReal(const char * t)
  712. {
  713. char * end;
  714. return strtod(t, &end);
  715. }
  716. double rtl_ex2f(const char * t)
  717. {
  718. return rtlEStrToReal(strlen(t), t);
  719. }
  720. double rtlUnicodeToReal(size32_t l, UChar const * t)
  721. {
  722. unsigned bufflen;
  723. char * buff;
  724. rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
  725. double ret = rtlStrToReal(bufflen, buff);
  726. rtlFree(buff);
  727. return ret;
  728. }
  729. //---------------------------------------------------------------------------
  730. static void truncFixedReal(size32_t l, char * t, StringBuffer & temp)
  731. {
  732. const char * str = temp.str();
  733. unsigned len = temp.length();
  734. if (len > l)
  735. {
  736. //If we don't lose significant digits left of the decimal point then truncate the string.
  737. const char * dot = strchr(str, '.');
  738. if (dot && ((size_t)(dot - str) <= l))
  739. len = l;
  740. }
  741. if (len > l)
  742. memset(t,'*',l);
  743. else
  744. {
  745. memcpy(t,temp.str(),len);
  746. memset(t+len, ' ', l-len);
  747. }
  748. }
  749. void rtlRealToStr(size32_t l, char * t, double val)
  750. {
  751. StringBuffer temp;
  752. temp.append(val);
  753. //This could either truncate or round when converting a real to a string
  754. //Rounding is more user friendly, but then (string3)(string)1.99 != (string3)1.99 which is
  755. //rather count intuitive. (That is still true if the value is out of range.)
  756. truncFixedReal(l, t, temp);
  757. }
  758. void rtlRealToStr(size32_t l, char * t, float val)
  759. {
  760. StringBuffer temp;
  761. temp.append(val);
  762. //See comment above
  763. truncFixedReal(l, t, temp);
  764. }
  765. void rtlRealToStrX(size32_t & l, char * & t, double val)
  766. {
  767. StringBuffer temp;
  768. temp.append(val);
  769. unsigned len = temp.length();
  770. char * result = (char *)rtlMalloc(len);
  771. memcpy(result,temp.str(),len);
  772. l = len;
  773. t = result;
  774. }
  775. void rtlRealToStrX(size32_t & l, char * & t, float val)
  776. {
  777. StringBuffer temp;
  778. temp.append(val);
  779. unsigned len = temp.length();
  780. char * result = (char *)rtlMalloc(len);
  781. memcpy(result,temp.str(),len);
  782. l = len;
  783. t = result;
  784. }
  785. void rtlRealToVStr(size32_t l, char * t, double val)
  786. {
  787. StringBuffer temp;
  788. temp.append(val);
  789. unsigned len = temp.length()+1;
  790. if (len > l)
  791. {
  792. memset(t,'*',l);
  793. t[l-1]=0;
  794. }
  795. else
  796. {
  797. memcpy(t,temp.str(),len);
  798. }
  799. }
  800. void rtlRealToVStr(size32_t l, char * t, float val)
  801. {
  802. StringBuffer temp;
  803. temp.append(val);
  804. unsigned len = temp.length()+1;
  805. if (len > l)
  806. {
  807. memset(t,'*',l);
  808. t[l-1]=0;
  809. }
  810. else
  811. {
  812. memcpy(t,temp.str(),len);
  813. }
  814. }
  815. char * rtlRealToVStrX(double val)
  816. {
  817. StringBuffer temp;
  818. temp.append(val);
  819. return strdup(temp);
  820. }
  821. char * rtlRealToVStrX(float val)
  822. {
  823. StringBuffer temp;
  824. temp.append(val);
  825. return strdup(temp);
  826. }
  827. //---------------------------------------------------------------------------
  828. #define SkipSpaces(l, t) \
  829. while (l) \
  830. { \
  831. char c = *t; \
  832. switch (c) \
  833. { \
  834. case ' ': \
  835. case '\t': \
  836. case '-': \
  837. case '+': \
  838. break; \
  839. default: \
  840. goto done; \
  841. } \
  842. l--; \
  843. t++; \
  844. } \
  845. done:
  846. #define SkipSignSpaces(l, t, negate) \
  847. while (l) \
  848. { \
  849. char c = *t; \
  850. switch (c) \
  851. { \
  852. case '-': \
  853. negate = true; \
  854. break; \
  855. case ' ': \
  856. case '\t': \
  857. case '+': \
  858. break; \
  859. default: \
  860. goto done; \
  861. } \
  862. l--; \
  863. t++; \
  864. } \
  865. done:
  866. unsigned rtlStrToUInt4(size32_t l, const char * t)
  867. {
  868. SkipSpaces(l, t);
  869. unsigned v = 0;
  870. while (l--)
  871. {
  872. char c = *t++;
  873. if ((c >= '0') && (c <= '9'))
  874. v = v * 10 + (c-'0');
  875. else
  876. break;
  877. }
  878. return v;
  879. }
  880. unsigned __int64 rtlStrToUInt8(size32_t l, const char * t)
  881. {
  882. SkipSpaces(l, t);
  883. unsigned __int64 v = 0;
  884. while (l--)
  885. {
  886. char c = *t++;
  887. if ((c >= '0') && (c <= '9'))
  888. v = v * 10 + (c-'0');
  889. else
  890. break;
  891. }
  892. return v;
  893. }
  894. int rtlStrToInt4(size32_t l, const char * t)
  895. {
  896. bool negate = false;
  897. SkipSignSpaces(l, t, negate);
  898. int v = 0;
  899. while (l--)
  900. {
  901. char c = *t++;
  902. if ((c >= '0') && (c <= '9'))
  903. v = v * 10 + (c-'0');
  904. else
  905. break;
  906. }
  907. return negate ? -v : v;
  908. }
  909. __int64 rtlStrToInt8(size32_t l, const char * t)
  910. {
  911. bool negate = false;
  912. SkipSignSpaces(l, t, negate);
  913. __int64 v = 0;
  914. while (l--)
  915. {
  916. char c = *t++;
  917. if ((c >= '0') && (c <= '9'))
  918. v = v * 10 + (c-'0');
  919. else
  920. break;
  921. }
  922. return negate ? -v : v;
  923. }
  924. __int64 rtlUnicodeToInt8(size32_t l, UChar const * t)
  925. {
  926. unsigned bufflen;
  927. char * buff;
  928. rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
  929. __int64 ret = rtlStrToInt8(bufflen, buff);
  930. rtlFree(buff);
  931. return ret;
  932. }
  933. bool rtlStrToBool(size32_t l, const char * t)
  934. {
  935. while (l--)
  936. {
  937. char c = *t++;
  938. if (c != ' ')
  939. return true;
  940. }
  941. return false;
  942. }
  943. bool rtlUnicodeToBool(size32_t l, UChar const * t)
  944. {
  945. while(l--)
  946. if(*t++ != 0x20) return true;
  947. return false;
  948. }
  949. // return true for "on", "true" or any non-zero constant, else false;
  950. bool rtlCsvStrToBool(size32_t l, const char * t)
  951. {
  952. return clipStrToBool(l, t);
  953. }
  954. //---------------------------------------------------------------------------
  955. unsigned rtlEStrToUInt4(size32_t l, const char * t)
  956. {
  957. MemoryAttr heapMem;
  958. char * astr = (char *)CONDSTACKALLOC(heapMem, l);
  959. rtlEStrToStr(l,astr,l,t);
  960. return rtlStrToUInt4(l,astr);
  961. }
  962. unsigned __int64 rtlEStrToUInt8(size32_t l, const char * t)
  963. {
  964. MemoryAttr heapMem;
  965. char * astr = (char *)CONDSTACKALLOC(heapMem, l);
  966. rtlEStrToStr(l,astr,l,t);
  967. return rtlStrToUInt8(l,astr);
  968. }
  969. int rtlEStrToInt4(size32_t l, const char * t)
  970. {
  971. MemoryAttr heapMem;
  972. char * astr = (char *)CONDSTACKALLOC(heapMem, l);
  973. rtlEStrToStr(l,astr,l,t);
  974. return rtlStrToInt4(l,astr);
  975. }
  976. __int64 rtlEStrToInt8(size32_t l, const char * t)
  977. {
  978. MemoryAttr heapMem;
  979. char * astr = (char *)CONDSTACKALLOC(heapMem, l);
  980. rtlEStrToStr(l,astr,l,t);
  981. return rtlStrToInt8(l,astr);
  982. }
  983. bool rtl_en2b(size32_t l, const char * t)
  984. {
  985. MemoryAttr heapMem;
  986. char * astr = (char *)CONDSTACKALLOC(heapMem, l);
  987. rtlEStrToStr(l,astr,l,t);
  988. return rtlStrToBool(l,astr);
  989. }
  990. //---------------------------------------------------------------------------
  991. unsigned rtlVStrToUInt4(const char * t)
  992. {
  993. return rtlStrToUInt4(strlen(t), t);
  994. }
  995. unsigned __int64 rtlVStrToUInt8(const char * t)
  996. {
  997. return rtlStrToUInt8(strlen(t), t);
  998. }
  999. int rtlVStrToInt4(const char * t)
  1000. {
  1001. return rtlStrToInt4(strlen(t), t);
  1002. }
  1003. __int64 rtlVStrToInt8(const char * t)
  1004. {
  1005. return rtlStrToInt8(strlen(t), t);
  1006. }
  1007. bool rtlVStrToBool(const char * t)
  1008. {
  1009. char c;
  1010. while ((c = *t++) != 0)
  1011. {
  1012. //MORE: Allow spaces if we change the semantics.
  1013. return true;
  1014. }
  1015. return false;
  1016. }
  1017. //---------------------------------------------------------------------------
  1018. void holeIntFormat(size32_t maxlen, char * target, __int64 value, unsigned width, unsigned flags)
  1019. {
  1020. StringBuffer result;
  1021. if (flags & 1)
  1022. result.appendf("%0*" I64F "d", width, value);
  1023. else
  1024. result.appendf("%*" I64F "d", width, value);
  1025. size32_t written = result.length();
  1026. if (written > maxlen)
  1027. memset(target, '*', maxlen);
  1028. else
  1029. {
  1030. memset(target+written, ' ', maxlen-written);
  1031. memcpy(target, result.str(), written);
  1032. }
  1033. }
  1034. void holeRealFormat(size32_t maxlen, char * target, double value, unsigned width, unsigned places)
  1035. {
  1036. if ((int) width <= 0)
  1037. return;
  1038. const unsigned tempSize = 500;
  1039. char temp[tempSize*2+2]; // Space for leading digits/0, '-' and \0 terminator
  1040. //Ensure that we output at most 2*tempSize characters.
  1041. unsigned formatWidth = width < tempSize ? width : tempSize;
  1042. if (places >= formatWidth)
  1043. places = formatWidth-1;
  1044. unsigned written = sprintf(temp, "%*.*f", formatWidth, places, value);
  1045. const char * src = temp;
  1046. if (written > width)
  1047. {
  1048. //Strip a leading 0 for very small numbers.
  1049. if (*src == '0')
  1050. {
  1051. written--;
  1052. src++;
  1053. }
  1054. }
  1055. if (written > width)
  1056. {
  1057. memset(target, '*', width);
  1058. if (places)
  1059. target[width-places-1] = '.';
  1060. }
  1061. else
  1062. {
  1063. unsigned delta = width - written;
  1064. if (delta)
  1065. memset(target, ' ', delta);
  1066. memcpy(target+delta, src, written);
  1067. }
  1068. }
  1069. //=============================================================================
  1070. // Conversion functions...
  1071. void rtlIntFormat(unsigned & len, char * & target, __int64 value, unsigned width, unsigned flags)
  1072. {
  1073. if ((int) width <= 0)
  1074. {
  1075. len = 0;
  1076. target = NULL;
  1077. return;
  1078. }
  1079. len = width;
  1080. target = (char *)rtlMalloc(width);
  1081. holeIntFormat(width, target, value, width, flags);
  1082. }
  1083. void rtlRealFormat(unsigned & len, char * & target, double value, unsigned width, unsigned places)
  1084. {
  1085. if ((int) width < 0)
  1086. {
  1087. len = 0;
  1088. target = NULL;
  1089. return;
  1090. }
  1091. len = width;
  1092. target = (char *)rtlMalloc(width);
  1093. holeRealFormat(width, target, value, width, places);
  1094. }
  1095. //=============================================================================
  1096. // String functions...
  1097. bool rtlDataToBool(unsigned len, const void * _src)
  1098. {
  1099. const char * src = (const char *)_src;
  1100. while (len--)
  1101. if (*src++)
  1102. return true;
  1103. return false;
  1104. }
  1105. void rtlBoolToData(unsigned tlen, void * tgt, bool src)
  1106. {
  1107. memset(tgt, 0, tlen);
  1108. if (src)
  1109. ((char *)tgt)[tlen-1] = 1;
  1110. }
  1111. void rtlBoolToStr(unsigned tlen, void * tgt, bool src)
  1112. {
  1113. memset(tgt, ' ', tlen);
  1114. if (src)
  1115. ((char *)tgt)[tlen-1] = '1';
  1116. }
  1117. void rtlBoolToVStr(char * tgt, bool src)
  1118. {
  1119. if (src)
  1120. *tgt++ = '1';
  1121. *tgt = 0;
  1122. }
  1123. void rtlBoolToStrX(unsigned & tlen, char * & tgt, bool src)
  1124. {
  1125. if (src)
  1126. {
  1127. char * ret = (char *)rtlMalloc(1);
  1128. ret[0] = '1';
  1129. tlen = 1;
  1130. tgt = ret;
  1131. }
  1132. else
  1133. {
  1134. tlen = 0;
  1135. tgt = NULL;
  1136. }
  1137. }
  1138. char * rtlBoolToVStrX(bool src)
  1139. {
  1140. if (src)
  1141. return strdup("1");
  1142. else
  1143. return strdup("");
  1144. }
  1145. //-----------------------------------------------------------------------------
  1146. // String copying functions....
  1147. void rtlDataToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1148. {
  1149. if (slen > tlen)
  1150. slen = tlen;
  1151. memcpy(tgt, src, slen);
  1152. if (tlen > slen)
  1153. memset((char *)tgt+slen, 0, tlen-slen);
  1154. }
  1155. void rtlStrToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1156. {
  1157. if (slen > tlen)
  1158. slen = tlen;
  1159. memcpy(tgt, src, slen);
  1160. if (tlen > slen)
  1161. memset((char *)tgt+slen, 0, tlen-slen);
  1162. }
  1163. void rtlStrToStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1164. {
  1165. if (slen > tlen)
  1166. slen = tlen;
  1167. memcpy(tgt, src, slen);
  1168. if (tlen > slen)
  1169. memset((char *)tgt+slen, ' ', tlen-slen);
  1170. }
  1171. void rtlStrToVStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1172. {
  1173. if ((slen >= tlen) && (tlen != 0))
  1174. slen = tlen-1;
  1175. memcpy(tgt, src, slen);
  1176. *((char *)tgt+slen)=0;
  1177. }
  1178. void rtlStr2EStr(unsigned tlen, char * tgt, unsigned slen, const char * src)
  1179. {
  1180. rtlStrToEStr(tlen,tgt,slen,src);
  1181. }
  1182. void rtlEStr2Data(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1183. {
  1184. if (slen > tlen)
  1185. slen = tlen;
  1186. rtlEStrToStr(slen,(char *)tgt,slen,src);
  1187. if (tlen > slen)
  1188. memset((char *)tgt+slen, 0, tlen-slen);
  1189. }
  1190. void rtlEStr2Str(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1191. {
  1192. rtlEStrToStr(tlen,(char *)tgt,slen,src);
  1193. }
  1194. void rtlEStrToVStr(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1195. {
  1196. if (slen >= tlen)
  1197. slen = tlen-1;
  1198. rtlEStrToStr(slen,(char *)tgt,slen,src);
  1199. *((char *)tgt+slen)=0;
  1200. }
  1201. void rtlEStrToEStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1202. {
  1203. if (slen > tlen)
  1204. slen = tlen;
  1205. memcpy(tgt, src, slen);
  1206. if (tlen > slen)
  1207. memset((char *)tgt+slen, '@', tlen-slen);
  1208. }
  1209. void rtlVStrToData(unsigned tlen, void * tgt, const char * src)
  1210. {
  1211. rtlStrToData(tlen, tgt, strlen(src), src);
  1212. }
  1213. void rtlVStrToStr(unsigned tlen, void * tgt, const char * src)
  1214. {
  1215. rtlStrToStr(tlen, tgt, strlen(src), src);
  1216. }
  1217. void rtlVStr2EStr(unsigned tlen, char * tgt, const char * src)
  1218. {
  1219. rtlStr2EStr(tlen, tgt, strlen(src), src);
  1220. }
  1221. void rtlVStrToVStr(unsigned tlen, void * tgt, const char * src)
  1222. {
  1223. rtlStrToVStr(tlen, tgt, strlen(src), src);
  1224. }
  1225. char *rtlCreateQuotedString(unsigned _len_tgt,char * tgt)
  1226. {
  1227. // Add ' at start and end. MORE! also needs to handle embedded quotes
  1228. char * result = (char *)rtlMalloc(_len_tgt + 3);
  1229. result[0] = '\'';
  1230. memcpy(result+1, tgt, _len_tgt);
  1231. result[_len_tgt+1] = '\'';
  1232. result[_len_tgt+2] = 0;
  1233. return result;
  1234. }
  1235. //-----------------------------------------------------------------------------
  1236. //List of strings with length of -1 to mark the end...
  1237. void rtlConcat(unsigned & tlen, char * * tgt, ...)
  1238. {
  1239. va_list args;
  1240. unsigned totalLength = 0;
  1241. va_start(args, tgt);
  1242. for (;;)
  1243. {
  1244. unsigned len = va_arg(args, unsigned);
  1245. if (len+1==0)
  1246. break;
  1247. va_arg(args, char *); // Skip the string
  1248. totalLength += len;
  1249. }
  1250. va_end(args);
  1251. char * buffer = (char *)rtlMalloc(totalLength);
  1252. char * cur = buffer;
  1253. va_start(args, tgt);
  1254. for (;;)
  1255. {
  1256. unsigned len = va_arg(args, unsigned);
  1257. if (len+1==0)
  1258. break;
  1259. char * str = va_arg(args, char *);
  1260. memcpy(cur, str, len);
  1261. cur += len;
  1262. }
  1263. va_end(args);
  1264. tlen = totalLength;
  1265. *tgt = buffer;
  1266. }
  1267. void rtlConcatVStr(char * * tgt, ...)
  1268. {
  1269. va_list args;
  1270. unsigned totalLength = 0;
  1271. va_start(args, tgt);
  1272. for (;;)
  1273. {
  1274. unsigned len = va_arg(args, unsigned);
  1275. if (len+1==0)
  1276. break;
  1277. va_arg(args, char *); // Skip the string
  1278. totalLength += len;
  1279. }
  1280. va_end(args);
  1281. char * buffer = (char *)rtlMalloc(totalLength+1);
  1282. char * cur = buffer;
  1283. va_start(args, tgt);
  1284. for (;;)
  1285. {
  1286. unsigned len = va_arg(args, unsigned);
  1287. if (len+1==0)
  1288. break;
  1289. char * str = va_arg(args, char *);
  1290. memcpy(cur, str, len);
  1291. cur += len;
  1292. }
  1293. va_end(args);
  1294. cur[0] = 0;
  1295. *tgt = buffer;
  1296. }
  1297. #ifdef _USE_ICU
  1298. void rtlConcatUnicode(unsigned & tlen, UChar * * tgt, ...)
  1299. {
  1300. va_list args;
  1301. unsigned totalLength = 0;
  1302. va_start(args, tgt);
  1303. for(;;)
  1304. {
  1305. unsigned len = va_arg(args, unsigned);
  1306. if(len+1==0)
  1307. break;
  1308. va_arg(args, UChar *); // Skip the string
  1309. totalLength += len;
  1310. }
  1311. va_end(args);
  1312. UChar * buffer = (UChar *)rtlMalloc(totalLength*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
  1313. unsigned idx = 0;
  1314. UErrorCode err = U_ZERO_ERROR;
  1315. va_start(args, tgt);
  1316. for(;;)
  1317. {
  1318. unsigned len = va_arg(args, unsigned);
  1319. if(len+1==0)
  1320. break;
  1321. UChar * str = va_arg(args, UChar *);
  1322. if (len)
  1323. idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
  1324. }
  1325. va_end(args);
  1326. *tgt = buffer;
  1327. tlen = idx;
  1328. }
  1329. void rtlConcatVUnicode(UChar * * tgt, ...)
  1330. {
  1331. va_list args;
  1332. unsigned totalLength = 0;
  1333. va_start(args, tgt);
  1334. for(;;)
  1335. {
  1336. unsigned len = va_arg(args, unsigned);
  1337. if(len+1==0)
  1338. break;
  1339. va_arg(args, UChar *); // Skip the string
  1340. totalLength += len;
  1341. }
  1342. va_end(args);
  1343. UChar * buffer = (UChar *)rtlMalloc((totalLength+1)*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
  1344. unsigned idx = 0;
  1345. UErrorCode err = U_ZERO_ERROR;
  1346. va_start(args, tgt);
  1347. for(;;)
  1348. {
  1349. unsigned len = va_arg(args, unsigned);
  1350. if(len+1==0)
  1351. break;
  1352. UChar * str = va_arg(args, UChar *);
  1353. if (len)
  1354. idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
  1355. }
  1356. va_end(args);
  1357. buffer[idx++] = 0x0000;
  1358. *tgt = buffer;
  1359. }
  1360. #else
  1361. void rtlConcatUnicode(unsigned & tlen, UChar * * tgt, ...)
  1362. {
  1363. rtlThrowNoUnicode();
  1364. }
  1365. void rtlConcatVUnicode(UChar * * tgt, ...)
  1366. {
  1367. rtlThrowNoUnicode();
  1368. }
  1369. #endif
  1370. //List of strings with length of -1 to mark the end...
  1371. void rtlConcatStrF(unsigned tlen, void * _tgt, int fill, ...)
  1372. {
  1373. va_list args;
  1374. char * tgt = (char *)_tgt;
  1375. unsigned offset = 0;
  1376. va_start(args, fill);
  1377. while (offset != tlen)
  1378. {
  1379. unsigned len = va_arg(args, unsigned);
  1380. if (len+1==0)
  1381. break;
  1382. const char * str = va_arg(args, const char *);
  1383. unsigned copyLen = len + offset > tlen ? tlen - offset : len;
  1384. memcpy(tgt+offset, str, copyLen);
  1385. offset += copyLen;
  1386. }
  1387. va_end(args);
  1388. if (offset < tlen)
  1389. memset(tgt+offset, fill, tlen-offset);
  1390. }
  1391. void rtlConcatVStrF(unsigned tlen, char * tgt, ...)
  1392. {
  1393. va_list args;
  1394. unsigned offset = 0;
  1395. va_start(args, tgt);
  1396. while (offset != tlen)
  1397. {
  1398. unsigned len = va_arg(args, unsigned);
  1399. if (len+1==0)
  1400. break;
  1401. const char * str = va_arg(args, const char *);
  1402. unsigned copyLen = len + offset > tlen ? tlen - offset : len;
  1403. memcpy(tgt+offset, str, copyLen);
  1404. offset += copyLen;
  1405. }
  1406. va_end(args);
  1407. memset(tgt+offset, 0, (tlen+1)-offset);
  1408. }
  1409. #ifdef _USE_ICU
  1410. void rtlConcatUnicodeF(unsigned tlen, UChar * tgt, ...)
  1411. {
  1412. va_list args;
  1413. unsigned idx = 0;
  1414. UErrorCode err = U_ZERO_ERROR;
  1415. va_start(args, tgt);
  1416. for(;;)
  1417. {
  1418. unsigned len = va_arg(args, unsigned);
  1419. if(len+1==0)
  1420. break;
  1421. UChar * str = va_arg(args, UChar *);
  1422. if (len)
  1423. idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
  1424. }
  1425. va_end(args);
  1426. while (idx < tlen)
  1427. tgt[idx++] = ' ';
  1428. }
  1429. void rtlConcatVUnicodeF(unsigned tlen, UChar * tgt, ...)
  1430. {
  1431. va_list args;
  1432. unsigned idx = 0;
  1433. UErrorCode err = U_ZERO_ERROR;
  1434. va_start(args, tgt);
  1435. for(;;)
  1436. {
  1437. unsigned len = va_arg(args, unsigned);
  1438. if(len+1==0)
  1439. break;
  1440. UChar * str = va_arg(args, UChar *);
  1441. if (len)
  1442. idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
  1443. }
  1444. va_end(args);
  1445. while (idx < tlen)
  1446. tgt[idx++] = 0;
  1447. tgt[tlen] = 0;
  1448. }
  1449. #endif
  1450. //------------------------------------------------------------------------------------------------
  1451. // The followinf concat functions are all deprecated in favour of the variable number of argument
  1452. // versions
  1453. unsigned rtlConcatStrToStr(unsigned tlen, char * tgt, unsigned idx, unsigned slen, const char * src)
  1454. {
  1455. unsigned len = tlen-idx;
  1456. if (len > slen)
  1457. len = slen;
  1458. memcpy(tgt+idx, src, len);
  1459. return idx+len;
  1460. }
  1461. unsigned rtlConcatVStrToStr(unsigned tlen, char * tgt, unsigned idx, const char * src)
  1462. {
  1463. while (idx != tlen)
  1464. {
  1465. char next = *src++;
  1466. if (!next)
  1467. break;
  1468. tgt[idx++] = next;
  1469. }
  1470. return idx;
  1471. }
  1472. void rtlConcatStrToVStr(unsigned tlen, void * _tgt, unsigned slen, const void * src)
  1473. {
  1474. char * tgt = (char *)_tgt;
  1475. unsigned tend = strlen(tgt);
  1476. rtlStrToVStr(tlen-tend, tgt+tend, slen, src);
  1477. }
  1478. void rtlConcatVStrToVStr(unsigned tlen, void * _tgt, const char * src)
  1479. {
  1480. char * tgt = (char *)_tgt;
  1481. unsigned tend = strlen(tgt);
  1482. rtlVStrToVStr(tlen-tend, tgt+tend, src);
  1483. }
  1484. #ifdef _USE_ICU
  1485. unsigned rtlConcatUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, unsigned slen, UChar const * src)
  1486. {
  1487. UErrorCode err = U_ZERO_ERROR;
  1488. return unorm_concatenate(tgt, idx, src, slen, tgt, tlen, UNORM_NFC, 0, &err);
  1489. }
  1490. unsigned rtlConcatVUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, UChar const * src)
  1491. {
  1492. return rtlConcatUnicodeToUnicode(tlen, tgt, idx, rtlUnicodeStrlen(src), src);
  1493. }
  1494. #endif
  1495. void rtlESpaceFill(unsigned tlen, char * tgt, unsigned idx)
  1496. {
  1497. if (idx < tlen)
  1498. memset(tgt+idx, '@', tlen-idx);
  1499. }
  1500. void rtlSpaceFill(unsigned tlen, char * tgt, unsigned idx)
  1501. {
  1502. if (idx < tlen)
  1503. memset(tgt+idx, ' ', tlen-idx);
  1504. }
  1505. void rtlZeroFill(unsigned tlen, char * tgt, unsigned idx)
  1506. {
  1507. if (idx < tlen)
  1508. memset(tgt+idx, 0, tlen-idx);
  1509. }
  1510. void rtlNullTerminate(unsigned tlen, char * tgt, unsigned idx)
  1511. {
  1512. if (idx >= tlen)
  1513. idx = tlen-1;
  1514. tgt[idx] = 0;
  1515. }
  1516. void rtlUnicodeSpaceFill(unsigned tlen, UChar * tgt, unsigned idx)
  1517. {
  1518. while(idx<tlen) tgt[idx++] = 0x0020;
  1519. }
  1520. void rtlUnicodeNullTerminate(unsigned tlen, UChar * tgt, unsigned idx)
  1521. {
  1522. if (idx >= tlen)
  1523. idx = tlen-1;
  1524. tgt[idx] = 0x0000;
  1525. }
  1526. void rtlUnicodeStrcpy(UChar * tgt, UChar const * src)
  1527. {
  1528. memcpy(tgt, src, rtlUnicodeStrlen(src)*2+2);
  1529. }
  1530. void rtlConcatExtend(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1531. {
  1532. unsigned len = tlen + slen;
  1533. tgt = (char *)rtlRealloc(tgt, len);
  1534. memcpy(tgt+tlen, src, slen);
  1535. tlen = len;
  1536. }
  1537. void rtlConcatUnicodeExtend(size32_t & tlen, UChar * & tgt, size32_t slen, const UChar * src)
  1538. {
  1539. unsigned len = tlen + slen;
  1540. tgt = (UChar *)rtlRealloc(tgt, len * sizeof(UChar));
  1541. memcpy(tgt+tlen, src, slen * sizeof(UChar));
  1542. tlen = len;
  1543. }
  1544. //-----------------------------------------------------------------------------
  1545. inline void normalizeFrom(unsigned & from, unsigned slen)
  1546. {
  1547. from--;
  1548. if ((int)from < 0)
  1549. from = 0;
  1550. else if (from > slen)
  1551. from = slen;
  1552. }
  1553. inline void normalizeFromTo(unsigned & from, unsigned & to)
  1554. {
  1555. from--;
  1556. if ((int)from < 0) from = 0;
  1557. if ((int)to < (int)from) to = from;
  1558. }
  1559. inline void clipFromTo(unsigned & from, unsigned & to, unsigned slen)
  1560. {
  1561. if (to > slen)
  1562. {
  1563. to = slen;
  1564. if (from > slen)
  1565. from = slen;
  1566. }
  1567. }
  1568. //NB: From and to are 1 based: Now fills to ensure the correct length.
  1569. void * doSubStrFT(unsigned & tlen, unsigned slen, const void * src, unsigned from, unsigned to, byte fillChar)
  1570. {
  1571. normalizeFromTo(from, to);
  1572. unsigned len = to - from;
  1573. clipFromTo(from, to, slen);
  1574. unsigned copylen = to - from;
  1575. char * buffer = (char *)rtlMalloc(len);
  1576. memcpy(buffer, (byte *)src+from, copylen);
  1577. if (copylen < len)
  1578. memset(buffer+copylen, fillChar, len-copylen);
  1579. tlen = len;
  1580. return buffer;
  1581. }
  1582. void rtlSubStrFX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from)
  1583. {
  1584. normalizeFrom(from, slen);
  1585. tlen = slen-from;
  1586. tgt = (char *) rtlMalloc(tlen);
  1587. memcpy(tgt, src+from, tlen);
  1588. }
  1589. void rtlSubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1590. {
  1591. tgt = (char *)doSubStrFT(tlen, slen, src, from, to, ' ');
  1592. }
  1593. void rtlSubStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1594. {
  1595. unsigned char fillChar = ' '; // More, should be passed as a parameter
  1596. normalizeFromTo(from, to);
  1597. clipFromTo(from, to, slen);
  1598. unsigned copylen = to - from;
  1599. if (copylen > tlen)
  1600. copylen = tlen;
  1601. memcpy(tgt, (const char *)src+from, copylen);
  1602. if (copylen < tlen)
  1603. memset(tgt+copylen, fillChar, tlen-copylen);
  1604. }
  1605. void rtlSubDataFT(unsigned tlen, void * tgt, unsigned slen, const void * src, unsigned from, unsigned to)
  1606. {
  1607. normalizeFromTo(from, to);
  1608. clipFromTo(from, to, slen);
  1609. unsigned copylen = to - from;
  1610. if (copylen > tlen)
  1611. copylen = tlen;
  1612. memcpy(tgt, (char *)src+from, copylen);
  1613. if (copylen < tlen)
  1614. memset((byte*)tgt+copylen, 0, tlen-copylen);
  1615. }
  1616. void rtlSubDataFTX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from, unsigned to)
  1617. {
  1618. tgt = doSubStrFT(tlen, slen, src, from, to, 0);
  1619. }
  1620. void rtlSubDataFX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from)
  1621. {
  1622. normalizeFrom(from, slen);
  1623. tlen = slen-from;
  1624. tgt = (char *) rtlMalloc(tlen);
  1625. memcpy(tgt, (const byte *)src+from, tlen);
  1626. }
  1627. void rtlUnicodeSubStrFTX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from, unsigned to)
  1628. {
  1629. normalizeFromTo(from, to);
  1630. tlen = to - from;
  1631. clipFromTo(from, to, slen);
  1632. tgt = (UChar *)rtlMalloc(tlen*2);
  1633. unsigned copylen = to - from;
  1634. memcpy(tgt, src+from, copylen*2);
  1635. while(copylen<tlen)
  1636. tgt[copylen++] = 0x0020;
  1637. }
  1638. void rtlUnicodeSubStrFX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from)
  1639. {
  1640. normalizeFrom(from, slen);
  1641. tlen = slen - from;
  1642. tgt = (UChar *)rtlMalloc(tlen*2);
  1643. memcpy(tgt, src+from, tlen*2);
  1644. }
  1645. void rtlSubQStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  1646. {
  1647. normalizeFromTo(from, to);
  1648. tlen = to - from;
  1649. clipFromTo(from, to, slen);
  1650. tgt = (char *)rtlMalloc(rtlQStrSize(tlen));
  1651. copyQStrRange(tlen, tgt, src, from, to);
  1652. }
  1653. void rtlSubQStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
  1654. {
  1655. normalizeFrom(from, slen);
  1656. tlen = slen - from;
  1657. tgt = (char *)rtlMalloc(rtlQStrSize(tlen));
  1658. copyQStrRange(tlen, tgt, src, from, slen);
  1659. }
  1660. void rtlSubQStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1661. {
  1662. normalizeFromTo(from, to);
  1663. clipFromTo(from, to, slen);
  1664. copyQStrRange(tlen, tgt, src, from ,to);
  1665. }
  1666. //-----------------------------------------------------------------------------
  1667. unsigned rtlTrimStrLen(size32_t l, const char * t)
  1668. {
  1669. while (l)
  1670. {
  1671. if (t[l-1] != ' ')
  1672. break;
  1673. l--;
  1674. }
  1675. return l;
  1676. }
  1677. unsigned rtlTrimDataLen(size32_t l, const void * _t)
  1678. {
  1679. const char * t = (const char *)_t;
  1680. while (l)
  1681. {
  1682. if (t[l-1] != 0)
  1683. break;
  1684. l--;
  1685. }
  1686. return l;
  1687. }
  1688. inline size32_t rtlQuickTrimUnicode(size32_t len, UChar const * str)
  1689. {
  1690. while (len && u_isspace(str[len-1]))
  1691. len--;
  1692. return len;
  1693. }
  1694. unsigned rtlTrimUnicodeStrLen(size32_t l, UChar const * t)
  1695. {
  1696. #ifdef _USE_ICU
  1697. if (!l)
  1698. return 0;
  1699. UCharCharacterIterator iter(t, l);
  1700. for(iter.last32(); iter.hasPrevious(); iter.previous32())
  1701. if(!u_isspace(iter.current32()))
  1702. break;
  1703. if(u_isspace(iter.current32())) return iter.getIndex(); // required as the reverse iteration above doesn't hit the first character
  1704. return iter.getIndex() + 1;
  1705. #else
  1706. return rtlQuickTrimUnicode(l, t);
  1707. #endif
  1708. }
  1709. unsigned rtlTrimVStrLen(const char * t)
  1710. {
  1711. const char * first = t;
  1712. const char * last = first;
  1713. unsigned char c;
  1714. while ((c = *t++) != 0)
  1715. {
  1716. if (c != ' ')
  1717. last = t; //nb after increment of t
  1718. }
  1719. return (last - first);
  1720. }
  1721. unsigned rtlTrimVUnicodeStrLen(UChar const * t)
  1722. {
  1723. return rtlTrimUnicodeStrLen(rtlUnicodeStrlen(t), t);
  1724. }
  1725. inline unsigned rtlLeftTrimStrStart(size32_t slen, const char * src)
  1726. {
  1727. unsigned i = 0;
  1728. while(i < slen && src[i] == ' ')
  1729. i++;
  1730. return i;
  1731. }
  1732. inline unsigned rtlLeftTrimUnicodeStrStart(size32_t slen, UChar const * src)
  1733. {
  1734. #ifdef _USE_ICU
  1735. UCharCharacterIterator iter(src, slen);
  1736. for(iter.first32(); iter.hasNext(); iter.next32())
  1737. if(!u_isspace(iter.current32()))
  1738. break;
  1739. return iter.getIndex();
  1740. #else
  1741. return slen;
  1742. #endif
  1743. }
  1744. inline unsigned rtlLeftTrimVStrStart(const char * src)
  1745. {
  1746. unsigned i = 0;
  1747. while(src[i] == ' ')
  1748. i++;
  1749. return i;
  1750. }
  1751. inline void rtlTrimUtf8Len(unsigned & trimLen, size32_t & trimSize, size32_t len, const char * t)
  1752. {
  1753. const byte * start = (const byte *)t;
  1754. const byte * cur = start;
  1755. unsigned trimLength = 0;
  1756. const byte * trimEnd = cur;
  1757. for (unsigned i=0; i < len; i++)
  1758. {
  1759. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1760. if (!u_isspace(next))
  1761. {
  1762. trimLength = i+1;
  1763. trimEnd = cur;
  1764. }
  1765. }
  1766. trimLen = trimLength;
  1767. trimSize = trimEnd-start;
  1768. }
  1769. inline void rtlTrimUtf8Start(unsigned & trimLen, size32_t & trimSize, size32_t len, const char * t)
  1770. {
  1771. const byte * start = (const byte *)t;
  1772. const byte * cur = start;
  1773. for (unsigned i=0; i < len; i++)
  1774. {
  1775. const byte * prev = cur;
  1776. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1777. if (!u_isspace(next))
  1778. {
  1779. trimLen = i;
  1780. trimSize = prev-start;
  1781. return;
  1782. }
  1783. }
  1784. trimLen = len;
  1785. trimSize = cur-start;
  1786. }
  1787. inline char * rtlDupSubString(const char * src, unsigned len)
  1788. {
  1789. char * buffer = (char *)rtlMalloc(len + 1);
  1790. memcpy(buffer, src, len);
  1791. buffer[len] = 0;
  1792. return buffer;
  1793. }
  1794. inline UChar * rtlDupSubUnicode(UChar const * src, unsigned len)
  1795. {
  1796. UChar * buffer = (UChar *)rtlMalloc((len + 1) * 2);
  1797. memcpy(buffer, src, len*2);
  1798. buffer[len] = 0x00;
  1799. return buffer;
  1800. }
  1801. inline void rtlCopySubStringV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1802. {
  1803. if (slen >= tlen)
  1804. slen = tlen-1;
  1805. memcpy(tgt, src, slen);
  1806. tgt[slen] = 0;
  1807. }
  1808. //not yet used, but would be needed for assignment to string rather than vstring
  1809. inline void rtlCopySubString(size32_t tlen, char * tgt, unsigned slen, const char * src, char fill)
  1810. {
  1811. if (slen > tlen)
  1812. slen = tlen;
  1813. memcpy(tgt, src, slen);
  1814. memset(tgt + slen, fill, tlen-slen);
  1815. }
  1816. unsigned rtlTrimUtf8StrLen(size32_t len, const char * t)
  1817. {
  1818. unsigned trimLength = 0;
  1819. const byte * cur = (const byte *)t;
  1820. for (unsigned i=0; i < len; i++)
  1821. {
  1822. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1823. if (!u_isspace(next))
  1824. trimLength = i+1;
  1825. }
  1826. return trimLength;
  1827. }
  1828. //-----------------------------------------------------------------------------
  1829. // Functions to trim off left side blank spaces
  1830. void rtlTrimRight(size32_t & tlen, char * & tgt, unsigned slen, const char * src)
  1831. {
  1832. tlen = rtlTrimStrLen(slen, src);
  1833. tgt = rtlDupSubString(src, tlen);
  1834. }
  1835. void rtlTrimUnicodeRight(size32_t & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1836. {
  1837. tlen = rtlTrimUnicodeStrLen(slen, src);
  1838. tgt = rtlDupSubUnicode(src, tlen);
  1839. }
  1840. void rtlTrimVRight(size32_t & tlen, char * & tgt, const char * src)
  1841. {
  1842. tlen = rtlTrimVStrLen(src);
  1843. tgt = rtlDupSubString(src, tlen);
  1844. }
  1845. void rtlTrimVUnicodeRight(size32_t & tlen, UChar * & tgt, UChar const * src)
  1846. {
  1847. rtlTrimUnicodeRight(tlen, tgt, rtlUnicodeStrlen(src), src);
  1848. }
  1849. void rtlTrimUtf8Right(unsigned &tlen, char * &tgt, unsigned slen, char const * src)
  1850. {
  1851. unsigned trimLength;
  1852. size32_t trimSize;
  1853. rtlTrimUtf8Len(trimLength, trimSize, slen, src);
  1854. tlen = trimLength;
  1855. tgt = rtlDupSubString(src, trimSize);
  1856. }
  1857. void rtlAssignTrimRightV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1858. {
  1859. unsigned len = rtlTrimStrLen(slen, src);
  1860. rtlCopySubStringV(tlen, tgt, len, src);
  1861. }
  1862. void rtlAssignTrimVRightV(size32_t tlen, char * tgt, const char * src)
  1863. {
  1864. unsigned len = rtlTrimVStrLen(src);
  1865. rtlCopySubStringV(tlen, tgt, len, src);
  1866. }
  1867. //-------------------------------------------------------------------------------
  1868. // Functions to trim off left side blank spaces
  1869. void rtlTrimLeft(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1870. {
  1871. unsigned start = rtlLeftTrimStrStart(slen, src);
  1872. unsigned len = slen - start;
  1873. tlen = len;
  1874. tgt = rtlDupSubString(src + start, len);
  1875. }
  1876. void rtlTrimUnicodeLeft(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1877. {
  1878. unsigned start = rtlLeftTrimUnicodeStrStart(slen, src);
  1879. unsigned len = slen - start;
  1880. tlen = len;
  1881. tgt = rtlDupSubUnicode(src + start, len);
  1882. }
  1883. void rtlTrimVLeft(unsigned & tlen, char * & tgt, const char * src)
  1884. {
  1885. unsigned start = rtlLeftTrimVStrStart(src);
  1886. unsigned len = strlen(src+start);
  1887. tlen = len;
  1888. tgt = rtlDupSubString(src + start, len);
  1889. }
  1890. void rtlTrimVUnicodeLeft(unsigned & tlen, UChar * & tgt, UChar const * src)
  1891. {
  1892. rtlTrimUnicodeLeft(tlen, tgt, rtlUnicodeStrlen(src), src);
  1893. }
  1894. ECLRTL_API void rtlTrimUtf8Left(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1895. {
  1896. unsigned trimLength;
  1897. size32_t trimSize;
  1898. rtlTrimUtf8Start(trimLength, trimSize, slen, src);
  1899. unsigned len = slen-trimLength;
  1900. const char * start = src+trimSize;
  1901. tlen = len;
  1902. tgt = rtlDupSubString(start, rtlUtf8Size(len, start));
  1903. }
  1904. void rtlAssignTrimLeftV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1905. {
  1906. unsigned start = rtlLeftTrimStrStart(slen, src);
  1907. unsigned len = slen - start;
  1908. rtlCopySubStringV(tlen, tgt, len, src+start);
  1909. }
  1910. void rtlAssignTrimVLeftV(size32_t tlen, char * tgt, const char * src)
  1911. {
  1912. unsigned start = rtlLeftTrimVStrStart(src);
  1913. unsigned len = strlen(src+start);
  1914. rtlCopySubStringV(tlen, tgt, len, src+start);
  1915. }
  1916. //--------------------------------------------------------------------------------
  1917. // Functions to trim off blank spaces of both sides
  1918. void rtlTrimBoth(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1919. {
  1920. unsigned len = rtlTrimStrLen(slen, src);
  1921. unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
  1922. len -= start;
  1923. tlen = len;
  1924. tgt = rtlDupSubString(src + start, len);
  1925. }
  1926. void rtlTrimUnicodeBoth(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1927. {
  1928. unsigned len = rtlTrimUnicodeStrLen(slen, src);
  1929. unsigned start = len ? rtlLeftTrimUnicodeStrStart(slen, src) : 0;
  1930. len -= start;
  1931. tlen = len;
  1932. tgt = rtlDupSubUnicode(src + start, len);
  1933. }
  1934. void rtlTrimVBoth(unsigned & tlen, char * & tgt, const char * src)
  1935. {
  1936. unsigned len = rtlTrimVStrLen(src);
  1937. unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
  1938. len -= start;
  1939. tlen = len;
  1940. tgt = rtlDupSubString(src + start, len);
  1941. }
  1942. void rtlTrimVUnicodeBoth(unsigned & tlen, UChar * & tgt, UChar const * src)
  1943. {
  1944. rtlTrimUnicodeBoth(tlen, tgt, rtlUnicodeStrlen(src), src);
  1945. }
  1946. ECLRTL_API void rtlTrimUtf8Both(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1947. {
  1948. unsigned lTrimLength;
  1949. size32_t lTrimSize;
  1950. rtlTrimUtf8Start(lTrimLength, lTrimSize, slen, src);
  1951. rtlTrimUtf8Right(tlen, tgt, slen-lTrimLength, src+lTrimSize);
  1952. }
  1953. void rtlAssignTrimBothV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1954. {
  1955. unsigned len = rtlTrimStrLen(slen, src);
  1956. unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
  1957. len -= start;
  1958. rtlCopySubStringV(tlen, tgt, len, src+start);
  1959. }
  1960. void rtlAssignTrimVBothV(size32_t tlen, char * tgt, const char * src)
  1961. {
  1962. unsigned len = rtlTrimVStrLen(src);
  1963. unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
  1964. len -= start;
  1965. rtlCopySubStringV(tlen, tgt, len, src+start);
  1966. }
  1967. //-----------------------------------------------------------------------------
  1968. // Functions used to trim off all blank spaces in a string.
  1969. unsigned rtlTrimStrLenNonBlank(size32_t l, const char * t)
  1970. {
  1971. unsigned len = 0;
  1972. while (l)
  1973. {
  1974. l--;
  1975. if (t[l] != ' ')
  1976. len++;
  1977. }
  1978. return len;
  1979. }
  1980. unsigned rtlTrimVStrLenNonBlank(const char * t)
  1981. {
  1982. unsigned len = 0;
  1983. unsigned char c;
  1984. while ((c = *t++) != 0)
  1985. {
  1986. if (c != ' ')
  1987. len++;
  1988. }
  1989. return len;
  1990. }
  1991. void rtlTrimAll(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1992. {
  1993. tlen = rtlTrimStrLenNonBlank(slen, src);
  1994. char * buffer = (char *)rtlMalloc(tlen + 1);
  1995. int ind = 0;
  1996. for(unsigned i = 0; i < slen; i++) {
  1997. if(src[i] != ' ') {
  1998. buffer[ind] = src[i];
  1999. ind++;
  2000. }
  2001. }
  2002. buffer[tlen] = 0;
  2003. tgt = buffer;
  2004. }
  2005. void rtlTrimUnicodeAll(unsigned & tlen, UChar * & tgt, unsigned slen, const UChar * src)
  2006. {
  2007. #ifdef _USE_ICU
  2008. UnicodeString rawStr;
  2009. UCharCharacterIterator iter(src, slen);
  2010. for(iter.first32(); iter.hasNext(); iter.next32())
  2011. if(!u_isspace(iter.current32()))
  2012. rawStr.append(iter.current32());
  2013. UnicodeString tgtStr;
  2014. normalizeUnicodeString(rawStr, tgtStr); // normalized in case crazy string like [combining accent] [space] [vowel]
  2015. tlen = tgtStr.length();
  2016. tgt = (UChar *)rtlMalloc((tlen+1)*2);
  2017. tgtStr.extract(0, tlen, tgt);
  2018. tgt[tlen] = 0x0000;
  2019. #else
  2020. rtlThrowNoUnicode();
  2021. #endif
  2022. }
  2023. void rtlTrimVAll(unsigned & tlen, char * & tgt, const char * src)
  2024. {
  2025. tlen = rtlTrimVStrLenNonBlank(src);
  2026. char * buffer = (char *)rtlMalloc(tlen + 1);
  2027. int ind = 0;
  2028. int i = 0;
  2029. while(src[i] != 0) {
  2030. if(src[i] != ' ') {
  2031. buffer[ind] = src[i];
  2032. ind++;
  2033. }
  2034. i++;
  2035. }
  2036. buffer[tlen] = 0;
  2037. tgt = buffer;
  2038. }
  2039. void rtlTrimVUnicodeAll(unsigned & tlen, UChar * & tgt, const UChar * src)
  2040. {
  2041. rtlTrimUnicodeAll(tlen, tgt, rtlUnicodeStrlen(src), src);
  2042. }
  2043. ECLRTL_API void rtlTrimUtf8All(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  2044. {
  2045. //Go via unicode because of possibility of combining accents etc.
  2046. rtlDataAttr temp1(slen*sizeof(UChar));
  2047. rtlUtf8ToUnicode(slen, temp1.getustr(), slen, src);
  2048. unsigned trimLen;
  2049. rtlDataAttr trimText;
  2050. rtlTrimUnicodeAll(trimLen, trimText.refustr(), slen, temp1.getustr());
  2051. rtlUnicodeToUtf8X(tlen, tgt, trimLen, trimText.getustr());
  2052. }
  2053. void rtlAssignTrimAllV(unsigned tlen, char * tgt, unsigned slen, const char * src)
  2054. {
  2055. unsigned to = 0;
  2056. for (unsigned from = 0; (from < slen)&&(to+1 < tlen); from++)
  2057. {
  2058. if (src[from] != ' ')
  2059. tgt[to++] = src[from];
  2060. }
  2061. tgt[to] = 0;
  2062. }
  2063. void rtlAssignTrimVAllV(unsigned tlen, char * tgt, const char * src)
  2064. {
  2065. unsigned to = 0;
  2066. for (;(*src && (to+1 < tlen));src++)
  2067. {
  2068. if (*src != ' ')
  2069. tgt[to++] = *src;
  2070. }
  2071. tgt[to] = 0;
  2072. }
  2073. //-----------------------------------------------------------------------------
  2074. ECLRTL_API void rtlUnicodeToVAscii(unsigned outlen, char * out, unsigned inlen, UChar const * in)
  2075. {
  2076. rtlUnicodeToVCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2077. }
  2078. ECLRTL_API void rtlData2VUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  2079. {
  2080. rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2081. }
  2082. ECLRTL_API void rtlStrToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  2083. {
  2084. rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2085. }
  2086. ECLRTL_API void rtlData2Unicode(unsigned outlen, UChar * out, unsigned inlen, void const * in)
  2087. {
  2088. rtlCodepageToUnicode(outlen, out, inlen, (const char *)in, ASCII_LIKE_CODEPAGE);
  2089. }
  2090. ECLRTL_API void rtlAssignTrimUnicodeLeftV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2091. {
  2092. unsigned len;
  2093. UChar * str;
  2094. rtlTrimUnicodeLeft(len, str, slen, src);
  2095. if (len >= tlen)
  2096. len = tlen-1;
  2097. memcpy(tgt, str, len*2);
  2098. tgt[len] = 0;
  2099. rtlFree(str);
  2100. }
  2101. ECLRTL_API void rtlAssignTrimVUnicodeLeftV(size32_t tlen, UChar * tgt, const UChar * src)
  2102. {
  2103. unsigned len;
  2104. UChar * str;
  2105. rtlTrimVUnicodeLeft(len, str, src);
  2106. if (len >= tlen)
  2107. len = tlen-1;
  2108. memcpy(tgt, str, len*2);
  2109. tgt[len] = 0;
  2110. rtlFree(str);
  2111. }
  2112. ECLRTL_API void rtlAssignTrimUnicodeRightV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2113. {
  2114. unsigned len;
  2115. UChar * str;
  2116. rtlTrimUnicodeRight(len, str, slen, src);
  2117. if (len >= tlen)
  2118. len = tlen-1;
  2119. memcpy(tgt, str, len*2);
  2120. tgt[len] = 0;
  2121. rtlFree(str);
  2122. }
  2123. ECLRTL_API void rtlAssignTrimVUnicodeRightV(size32_t tlen, UChar * tgt, const UChar * src)
  2124. {
  2125. unsigned len;
  2126. UChar * str;
  2127. rtlTrimVUnicodeRight(len, str, src);
  2128. if (len >= tlen)
  2129. len = tlen-1;
  2130. memcpy(tgt, str, len*2);
  2131. tgt[len] = 0;
  2132. rtlFree(str);
  2133. }
  2134. ECLRTL_API void rtlAssignTrimUnicodeBothV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2135. {
  2136. unsigned len;
  2137. UChar * str;
  2138. rtlTrimUnicodeBoth(len, str, slen, src);
  2139. if (len >= tlen)
  2140. len = tlen-1;
  2141. memcpy(tgt, str, len*2);
  2142. tgt[len] = 0;
  2143. rtlFree(str);
  2144. }
  2145. ECLRTL_API void rtlAssignTrimVUnicodeBothV(size32_t tlen, UChar * tgt, const UChar * src)
  2146. {
  2147. unsigned len;
  2148. UChar * str;
  2149. rtlTrimVUnicodeBoth(len, str, src);
  2150. if (len >= tlen)
  2151. len = tlen-1;
  2152. memcpy(tgt, str, len*2);
  2153. tgt[len] = 0;
  2154. rtlFree(str);
  2155. }
  2156. ECLRTL_API void rtlAssignTrimUnicodeAllV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2157. {
  2158. unsigned len;
  2159. UChar * str;
  2160. rtlTrimUnicodeAll(len, str, slen, src);
  2161. if (len >= tlen)
  2162. len = tlen-1;
  2163. memcpy(tgt, str, len*2);
  2164. tgt[len] = 0;
  2165. rtlFree(str);
  2166. }
  2167. ECLRTL_API void rtlAssignTrimVUnicodeAllV(size32_t tlen, UChar * tgt, const UChar * src)
  2168. {
  2169. unsigned len;
  2170. UChar * str;
  2171. rtlTrimVUnicodeAll(len, str, src);
  2172. if (len >= tlen)
  2173. len = tlen-1;
  2174. memcpy(tgt, str, len*2);
  2175. tgt[len] = 0;
  2176. rtlFree(str);
  2177. }
  2178. //-----------------------------------------------------------------------------
  2179. int rtlCompareStrStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2180. {
  2181. unsigned len = l1;
  2182. if (len > l2)
  2183. len = l2;
  2184. int diff = memcmp(p1, p2, len);
  2185. if (diff == 0)
  2186. {
  2187. if (len != l1)
  2188. {
  2189. for (;(diff == 0) && (len != l1);len++)
  2190. diff = ((unsigned char *)p1)[len] - ' ';
  2191. }
  2192. else if (len != l2)
  2193. {
  2194. for (;(diff == 0) && (len != l2);len++)
  2195. diff = ' ' - ((unsigned char *)p2)[len];
  2196. }
  2197. }
  2198. return diff;
  2199. }
  2200. int rtlCompareVStrVStr(const char * p1, const char * p2)
  2201. {
  2202. return rtlCompareStrStr(strlen(p1), p1, strlen(p2), p2);
  2203. }
  2204. int rtlCompareStrBlank(unsigned l1, const char * p1)
  2205. {
  2206. while (l1--)
  2207. {
  2208. int diff = (*(unsigned char *)(p1++)) - ' ';
  2209. if (diff)
  2210. return diff;
  2211. }
  2212. return 0;
  2213. }
  2214. int rtlCompareDataData(unsigned l1, const void * p1, unsigned l2, const void * p2)
  2215. {
  2216. unsigned len = l1;
  2217. if (len > l2)
  2218. len = l2;
  2219. int diff = memcmp(p1, p2, len);
  2220. if (diff == 0)
  2221. {
  2222. if (l1 > l2)
  2223. diff = +1;
  2224. else if (l1 < l2)
  2225. diff = -1;
  2226. }
  2227. return diff;
  2228. }
  2229. int rtlCompareEStrEStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2230. {
  2231. unsigned len = l1;
  2232. if (len > l2)
  2233. len = l2;
  2234. int diff = memcmp(p1, p2, len);
  2235. if (diff == 0)
  2236. {
  2237. if (len != l1)
  2238. {
  2239. for (;(diff == 0) && (len != l1);len++)
  2240. diff = ((unsigned char *)p1)[len] - '@';
  2241. }
  2242. else if (len != l2)
  2243. {
  2244. for (;(diff == 0) && (len != l2);len++)
  2245. diff = '@' - ((unsigned char *)p2)[len];
  2246. }
  2247. }
  2248. return diff;
  2249. }
  2250. const static UChar nullUStr = 0;
  2251. #ifdef _USE_ICU
  2252. int rtlCompareUnicodeUnicode(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale)
  2253. {
  2254. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2255. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2256. if (!p1) p1 = &nullUStr;
  2257. if (!p2) p2 = &nullUStr;
  2258. return ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1, l1, p2, l2);
  2259. }
  2260. int rtlCompareUnicodeUnicodeStrength(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale, unsigned strength)
  2261. {
  2262. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2263. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2264. if (!p1) p1 = &nullUStr;
  2265. if (!p2) p2 = &nullUStr;
  2266. return ucol_strcoll(queryRTLLocale(locale)->queryCollator(strength), p1, l1, p2, l2);
  2267. }
  2268. #else
  2269. int rtlCompareUnicodeUnicode(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale) { rtlThrowNoUnicode(); }
  2270. int rtlCompareUnicodeUnicodeStrength(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale, unsigned strength) { rtlThrowNoUnicode(); }
  2271. #endif
  2272. int rtlCompareVUnicodeVUnicode(UChar const * p1, UChar const * p2, char const * locale)
  2273. {
  2274. return rtlCompareUnicodeUnicode(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale);
  2275. }
  2276. int rtlCompareVUnicodeVUnicodeStrength(UChar const * p1, UChar const * p2, char const * locale, unsigned strength)
  2277. {
  2278. return rtlCompareUnicodeUnicodeStrength(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale, strength);
  2279. }
  2280. #ifdef _USE_ICU
  2281. void rtlKeyUnicodeX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale)
  2282. {
  2283. while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
  2284. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2285. tlen = ucol_getSortKey(coll, src, slen, 0, 0);
  2286. tgt = rtlMalloc(tlen);
  2287. ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
  2288. }
  2289. void rtlKeyUnicodeStrengthX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale, unsigned strength)
  2290. {
  2291. while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
  2292. UCollator * coll = queryRTLLocale(locale)->queryCollator(strength);
  2293. tlen = ucol_getSortKey(coll, src, slen, 0, 0);
  2294. tgt = rtlMalloc(tlen);
  2295. ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
  2296. }
  2297. #else
  2298. void rtlKeyUnicodeX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale)
  2299. {
  2300. rtlThrowNoUnicode();
  2301. }
  2302. void rtlKeyUnicodeStrengthX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale, unsigned strength)
  2303. {
  2304. rtlThrowNoUnicode();
  2305. }
  2306. #endif
  2307. ECLRTL_API int rtlPrefixDiffStrEx(unsigned l1, const char * p1, unsigned l2, const char * p2, unsigned origin)
  2308. {
  2309. unsigned len = l1 < l2 ? l1 : l2;
  2310. const byte * str1 = (const byte *)p1;
  2311. const byte * str2 = (const byte *)p2;
  2312. for (unsigned i=0; i<len; i++)
  2313. {
  2314. byte c1 = str1[i];
  2315. byte c2 = str2[i];
  2316. if (c1 != c2)
  2317. {
  2318. if (c1 < c2)
  2319. return -(int)(i+origin+1);
  2320. else
  2321. return (int)(i+origin+1);
  2322. }
  2323. }
  2324. if (l1 != l2)
  2325. return (l1 < l2) ? -(int)(len+origin+1) : (int)(len+origin+1);
  2326. return 0;
  2327. }
  2328. ECLRTL_API int rtlPrefixDiffStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2329. {
  2330. return rtlPrefixDiffStrEx(l1, p1, l2, p2, 0);
  2331. }
  2332. //MORE: I'm not sure this can really be implemented....
  2333. ECLRTL_API int rtlPrefixDiffUnicodeEx(unsigned l1, const UChar * p1, unsigned l2, const UChar * p2, char const * locale, unsigned origin)
  2334. {
  2335. #ifdef _USE_ICU
  2336. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2337. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2338. unsigned len = l1 < l2 ? l1 : l2;
  2339. for (unsigned i=0; i<len; i++)
  2340. {
  2341. if (p1[i] != p2[i])
  2342. {
  2343. int c = ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1+i, l1-i, p2+i, l2-i);
  2344. if (c < 0)
  2345. return -(int)(i+origin+1);
  2346. else if (c > 0)
  2347. return (int)(i+origin+1);
  2348. }
  2349. }
  2350. if (l1 != l2)
  2351. return (l1 < l2) ? -(int)(len+origin+1) : (int)(len+origin+1);
  2352. #else
  2353. rtlThrowNoUnicode();
  2354. #endif
  2355. return 0;
  2356. }
  2357. ECLRTL_API int rtlPrefixDiffUnicode(unsigned l1, const UChar * p1, unsigned l2, const UChar * p2, char const * locale)
  2358. {
  2359. return rtlPrefixDiffUnicodeEx(l1, p1, l2, p2, locale, 0);
  2360. }
  2361. //-----------------------------------------------------------------------------
  2362. void rtlStringToLower(size32_t l, char * t)
  2363. {
  2364. for (;l--;t++)
  2365. *t = tolower(*t);
  2366. }
  2367. void rtlStringToUpper(size32_t l, char * t)
  2368. {
  2369. for (;l--;t++)
  2370. *t = toupper(*t);
  2371. }
  2372. #ifdef _USE_ICU
  2373. void rtlUnicodeToLower(size32_t l, UChar * t, char const * locale)
  2374. {
  2375. UChar * buff = (UChar *)rtlMalloc(l*2);
  2376. UErrorCode err = U_ZERO_ERROR;
  2377. u_strToLower(buff, l, t, l, locale, &err);
  2378. unicodeNormalizedCopy(buff, t, l);
  2379. }
  2380. void rtlUnicodeToLowerX(size32_t & lenout, UChar * & out, size32_t l, const UChar * t, char const * locale)
  2381. {
  2382. out = (UChar *)rtlMalloc(l*2);
  2383. lenout = l;
  2384. UErrorCode err = U_ZERO_ERROR;
  2385. u_strToLower(out, l, t, l, locale, &err);
  2386. }
  2387. void rtlUnicodeToUpper(size32_t l, UChar * t, char const * locale)
  2388. {
  2389. UChar * buff = (UChar *)rtlMalloc(l*2);
  2390. UErrorCode err = U_ZERO_ERROR;
  2391. u_strToUpper(buff, l, t, l, locale, &err);
  2392. unicodeNormalizedCopy(buff, t, l);
  2393. }
  2394. #else
  2395. void rtlUnicodeToLower(size32_t l, UChar * t, char const * locale) { rtlThrowNoUnicode(); }
  2396. void rtlUnicodeToLowerX(size32_t & lenout, UChar * & out, size32_t l, const UChar * t, char const * locale) { rtlThrowNoUnicode(); }
  2397. void rtlUnicodeToUpper(size32_t l, UChar * t, char const * locale) { rtlThrowNoUnicode(); }
  2398. #endif
  2399. //=============================================================================
  2400. // Miscellaneous helper functions...
  2401. //-----------------------------------------------------------------------------
  2402. int searchTableStringN(unsigned count, const char * * table, unsigned width, const char * search)
  2403. {
  2404. int left = 0;
  2405. int right = count;
  2406. do
  2407. {
  2408. int mid = (left + right) >> 1;
  2409. int cmp = memcmp(search, table[mid], width);
  2410. if (cmp < 0)
  2411. right = mid;
  2412. else if (cmp > 0)
  2413. left = mid+1;
  2414. else
  2415. return mid;
  2416. } while (left < right);
  2417. return -1;
  2418. }
  2419. int rtlSearchTableStringN(unsigned count, const char * * table, unsigned width, const char * search)
  2420. {
  2421. int left = 0;
  2422. int right = count;
  2423. do
  2424. {
  2425. int mid = (left + right) >> 1;
  2426. //we could use rtlCompareStrStr, but both source and target strings should
  2427. //be the correct length, so no point.... (unless new weird collation sequences)
  2428. //we would also need to call a different function for data
  2429. int cmp = memcmp(search, table[mid], width);
  2430. if (cmp < 0)
  2431. right = mid;
  2432. else if (cmp > 0)
  2433. left = mid+1;
  2434. else
  2435. return mid;
  2436. } while (left < right);
  2437. return -1;
  2438. }
  2439. int rtlSearchTableVStringN(unsigned count, const char * * table, const char * search)
  2440. {
  2441. int left = 0;
  2442. int right = count;
  2443. do
  2444. {
  2445. int mid = (left + right) >> 1;
  2446. int cmp = strcmp(search, table[mid]);
  2447. if (cmp < 0)
  2448. right = mid;
  2449. else if (cmp > 0)
  2450. left = mid+1;
  2451. else
  2452. return mid;
  2453. } while (left < right);
  2454. return -1;
  2455. }
  2456. int rtlNewSearchDataTable(unsigned count, unsigned elemlen, const char * * table, unsigned width, const char * search)
  2457. {
  2458. int left = 0;
  2459. int right = count;
  2460. do
  2461. {
  2462. int mid = (left + right) >> 1;
  2463. int cmp = rtlCompareDataData( width, search, elemlen, table[mid]);
  2464. if (cmp < 0)
  2465. right = mid;
  2466. else if (cmp > 0)
  2467. left = mid+1;
  2468. else {
  2469. return mid;
  2470. }
  2471. } while (left < right);
  2472. return -1;
  2473. }
  2474. int rtlNewSearchEStringTable(unsigned count, unsigned elemlen, const char * * table, unsigned width, const char * search)
  2475. {
  2476. int left = 0;
  2477. int right = count;
  2478. do
  2479. {
  2480. int mid = (left + right) >> 1;
  2481. int cmp = rtlCompareEStrEStr( width, search, elemlen, table[mid]);
  2482. if (cmp < 0)
  2483. right = mid;
  2484. else if (cmp > 0)
  2485. left = mid+1;
  2486. else {
  2487. return mid;
  2488. }
  2489. } while (left < right);
  2490. return -1;
  2491. }
  2492. int rtlNewSearchQStringTable(unsigned count, unsigned elemlen, const char * * table, unsigned width, const char * search)
  2493. {
  2494. int left = 0;
  2495. int right = count;
  2496. do
  2497. {
  2498. int mid = (left + right) >> 1;
  2499. int cmp = rtlCompareQStrQStr( width, search, elemlen, table[mid]);
  2500. if (cmp < 0)
  2501. right = mid;
  2502. else if (cmp > 0)
  2503. left = mid+1;
  2504. else {
  2505. return mid;
  2506. }
  2507. } while (left < right);
  2508. return -1;
  2509. }
  2510. int rtlNewSearchStringTable(unsigned count, unsigned elemlen, const char * * table, unsigned width, const char * search)
  2511. {
  2512. int left = 0;
  2513. int right = count;
  2514. do
  2515. {
  2516. int mid = (left + right) >> 1;
  2517. int cmp = rtlCompareStrStr( width, search, elemlen, table[mid]);
  2518. if (cmp < 0)
  2519. right = mid;
  2520. else if (cmp > 0)
  2521. left = mid+1;
  2522. else {
  2523. return mid;
  2524. }
  2525. } while (left < right);
  2526. return -1;
  2527. }
  2528. #ifdef _USE_ICU
  2529. int rtlNewSearchUnicodeTable(unsigned count, unsigned elemlen, const UChar * * table, unsigned width, const UChar * search, const char * locale)
  2530. {
  2531. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2532. int left = 0;
  2533. int right = count;
  2534. if (!search) search = &nullUStr;
  2535. size32_t trimWidth = rtlQuickTrimUnicode(width, search);
  2536. do
  2537. {
  2538. int mid = (left + right) >> 1;
  2539. size32_t elemTrimWidth = rtlQuickTrimUnicode(elemlen, table[mid]);
  2540. UCollationResult cmp = ucol_strcoll(coll, search, trimWidth, table[mid], elemTrimWidth);
  2541. if (cmp == UCOL_LESS)
  2542. right = mid;
  2543. else if (cmp == UCOL_GREATER)
  2544. left = mid+1;
  2545. else
  2546. return mid;
  2547. } while (left < right);
  2548. return -1;
  2549. }
  2550. int rtlNewSearchVUnicodeTable(unsigned count, const UChar * * table, const UChar * search, const char * locale)
  2551. {
  2552. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2553. int left = 0;
  2554. int right = count;
  2555. do
  2556. {
  2557. int mid = (left + right) >> 1;
  2558. UCollationResult cmp = ucol_strcoll(coll, search, rtlUnicodeStrlen(search), table[mid], rtlUnicodeStrlen(table[mid]));
  2559. if (cmp == UCOL_LESS)
  2560. right = mid;
  2561. else if (cmp == UCOL_GREATER)
  2562. left = mid+1;
  2563. else
  2564. return mid;
  2565. } while (left < right);
  2566. return -1;
  2567. }
  2568. #endif
  2569. //-----------------------------------------------------------------------------
  2570. template <class T>
  2571. int rtlSearchIntegerTable(unsigned count, const T * table, T search)
  2572. {
  2573. int left = 0;
  2574. int right = count;
  2575. do
  2576. {
  2577. int mid = (left + right) >> 1;
  2578. T midValue = table[mid];
  2579. if (search < midValue)
  2580. right = mid;
  2581. else if (search > midValue)
  2582. left = mid+1;
  2583. else
  2584. return mid;
  2585. } while (left < right);
  2586. return -1;
  2587. }
  2588. int rtlSearchTableInteger8(unsigned count, const __int64 * table, __int64 search)
  2589. {
  2590. return rtlSearchIntegerTable(count, table, search);
  2591. }
  2592. int rtlSearchTableUInteger8(unsigned count, const unsigned __int64 * table, unsigned __int64 search)
  2593. {
  2594. return rtlSearchIntegerTable(count, table, search);
  2595. }
  2596. int rtlSearchTableInteger4(unsigned count, const int * table, int search)
  2597. {
  2598. return rtlSearchIntegerTable(count, table, search);
  2599. }
  2600. int rtlSearchTableUInteger4(unsigned count, const unsigned * table, unsigned search)
  2601. {
  2602. return rtlSearchIntegerTable(count, table, search);
  2603. }
  2604. //-----------------------------------------------------------------------------
  2605. unsigned rtlCrc32(unsigned len, const void * buffer, unsigned crc)
  2606. {
  2607. return crc32((const char *)buffer, len, crc);
  2608. }
  2609. //=============================================================================
  2610. // EBCDIC helper functions...
  2611. static unsigned char ccsid1047[] = "\
  2612. \000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017\
  2613. \020\021\022\023\235\012\010\207\030\031\222\217\034\035\036\037\
  2614. \200\201\202\203\204\205\027\033\210\211\212\213\214\005\006\007\
  2615. \220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032\
  2616. \040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174\
  2617. \046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\136\
  2618. \055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077\
  2619. \370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042\
  2620. \330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261\
  2621. \260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244\
  2622. \265\176\163\164\165\166\167\170\171\172\241\277\320\133\336\256\
  2623. \254\243\245\267\251\247\266\274\275\276\335\250\257\135\264\327\
  2624. \173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365\
  2625. \175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377\
  2626. \134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325\
  2627. \060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237";
  2628. static unsigned char ccsid1047_rev[] = "\
  2629. \000\001\002\003\067\055\056\057\026\005\025\013\014\015\016\017\
  2630. \020\021\022\023\074\075\062\046\030\031\077\047\034\035\036\037\
  2631. \100\132\177\173\133\154\120\175\115\135\134\116\153\140\113\141\
  2632. \360\361\362\363\364\365\366\367\370\371\172\136\114\176\156\157\
  2633. \174\301\302\303\304\305\306\307\310\311\321\322\323\324\325\326\
  2634. \327\330\331\342\343\344\345\346\347\350\351\255\340\275\137\155\
  2635. \171\201\202\203\204\205\206\207\210\211\221\222\223\224\225\226\
  2636. \227\230\231\242\243\244\245\246\247\250\251\300\117\320\241\007\
  2637. \040\041\042\043\044\045\006\027\050\051\052\053\054\011\012\033\
  2638. \060\061\032\063\064\065\066\010\070\071\072\073\004\024\076\377\
  2639. \101\252\112\261\237\262\152\265\273\264\232\212\260\312\257\274\
  2640. \220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\
  2641. \144\145\142\146\143\147\236\150\164\161\162\163\170\165\166\167\
  2642. \254\151\355\356\353\357\354\277\200\375\376\373\374\272\256\131\
  2643. \104\105\102\106\103\107\234\110\124\121\122\123\130\125\126\127\
  2644. \214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337";
  2645. void rtlEStrToStr(unsigned outlen, char *out, unsigned inlen, const char *in)
  2646. {
  2647. unsigned char *codepage = ccsid1047;
  2648. unsigned i,j;
  2649. unsigned lim = inlen;
  2650. if (lim>outlen) lim = outlen;
  2651. for (i=0;i<lim;i++)
  2652. {
  2653. j = in[i] & 0x00ff;
  2654. out[i] = codepage[j];
  2655. }
  2656. for (;i<outlen; i++)
  2657. out[i] = ' ';
  2658. }
  2659. void rtlStrToEStr(unsigned outlen, char *out, unsigned inlen, const char *in)
  2660. {
  2661. unsigned char *codepage = ccsid1047_rev;
  2662. unsigned i,j;
  2663. unsigned lim = inlen;
  2664. if (lim>outlen) lim = outlen;
  2665. for (i=0;i<lim;i++)
  2666. {
  2667. j = in[i] & 0x00ff;
  2668. out[i] = codepage[j];
  2669. }
  2670. for (;i<outlen; i++)
  2671. out[i] = codepage[(unsigned char) ' '];
  2672. }
  2673. //---------------------------------------------------------------------------
  2674. #ifdef _USE_ICU
  2675. void rtlCodepageToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2676. {
  2677. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2678. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2679. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2680. UErrorCode err = U_ZERO_ERROR;
  2681. unsigned len = ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2682. while(len<outlen) out[len++] = 0x0020;
  2683. unicodeEnsureIsNormalized(outlen, out);
  2684. }
  2685. void rtlCodepageToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2686. {
  2687. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2688. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2689. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2690. UErrorCode err = U_ZERO_ERROR;
  2691. unsigned len = ucnv_toUChars(conv, out, outlen-1, in, inlen, &err);
  2692. if (len >= outlen) len = outlen-1;
  2693. out[len] = 0;
  2694. vunicodeEnsureIsNormalized(outlen, out);
  2695. }
  2696. void rtlCodepageToUnicodeUnescape(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2697. {
  2698. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2699. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2700. UnicodeString raw(in, inlen, codepage);
  2701. UnicodeString unescaped = raw.unescape();
  2702. UnicodeString normalized;
  2703. normalizeUnicodeString(unescaped, normalized);
  2704. if((unsigned)normalized.length()>outlen)
  2705. normalized.truncate(outlen);
  2706. else if((unsigned)normalized.length()<outlen)
  2707. normalized.padTrailing(outlen);
  2708. normalized.extract(0, outlen, out);
  2709. }
  2710. void rtlUnicodeToCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2711. {
  2712. //If the unicode contains a character which doesn't exist in the destination codepage,
  2713. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2714. //no telling how your terminal may display this (I've seen a divide sign and a right
  2715. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2716. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2717. UErrorCode err = U_ZERO_ERROR;
  2718. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
  2719. if(len<outlen)
  2720. codepageBlankFill(codepage, out+len, outlen-len);
  2721. }
  2722. void rtlUnicodeToData(unsigned outlen, void * out, unsigned inlen, UChar const * in)
  2723. {
  2724. //If the unicode contains a character which doesn't exist in the destination codepage,
  2725. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2726. //no telling how your terminal may display this (I've seen a divide sign and a right
  2727. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2728. UConverter * conv = queryRTLUnicodeConverter(ASCII_LIKE_CODEPAGE)->query();
  2729. UErrorCode err = U_ZERO_ERROR;
  2730. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
  2731. if(len<outlen)
  2732. memset((char *)out+len, 0, outlen-len);
  2733. }
  2734. void rtlUnicodeToVCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2735. {
  2736. //If the unicode contains a character which doesn't exist in the destination codepage,
  2737. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2738. //no telling how your terminal may display this (I've seen a divide sign and a right
  2739. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2740. UConverter * conv = queryRTLUnicodeConverter(ASCII_LIKE_CODEPAGE)->query();
  2741. UErrorCode err = U_ZERO_ERROR;
  2742. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen-1, in, inlen, &err);
  2743. if (len >= outlen) len = outlen-1;
  2744. out[len] = 0;
  2745. }
  2746. void rtlCodepageToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2747. {
  2748. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2749. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2750. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2751. UErrorCode err = U_ZERO_ERROR;
  2752. outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
  2753. if(err==U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2754. out = (UChar *)rtlMalloc(outlen*2);
  2755. ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2756. }
  2757. UChar * rtlCodepageToVUnicodeX(unsigned inlen, char const * in, char const * codepage)
  2758. {
  2759. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2760. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2761. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2762. UErrorCode err = U_ZERO_ERROR;
  2763. unsigned outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
  2764. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2765. UChar * out = (UChar *)rtlMalloc((outlen+1)*2);
  2766. ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2767. out[outlen] = 0x0000;
  2768. vunicodeEnsureIsNormalizedX(outlen, out);
  2769. return out;
  2770. }
  2771. #else
  2772. void rtlCodepageToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2773. {
  2774. if (inlen > outlen)
  2775. inlen = outlen;
  2776. unsigned i = 0;
  2777. for (; i < inlen; i++)
  2778. out[i] = in[i];
  2779. while (i < outlen)
  2780. out[i++] = 0x0020;
  2781. }
  2782. void rtlCodepageToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2783. {
  2784. rtlThrowNoUnicode();
  2785. }
  2786. void rtlCodepageToUnicodeUnescape(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2787. {
  2788. rtlCodepageToUnicode(outlen, out, inlen, in, codepage);
  2789. }
  2790. void rtlUnicodeToCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2791. {
  2792. if (inlen > outlen)
  2793. inlen = outlen;
  2794. unsigned i = 0;
  2795. for (; i < inlen; i++)
  2796. out[i] = (char)in[i];
  2797. while (i < outlen)
  2798. out[i++] = ' ';
  2799. }
  2800. void rtlUnicodeToData(unsigned outlen, void * out, unsigned inlen, UChar const * in)
  2801. {
  2802. rtlUnicodeToCodepage(outlen, (char *)out, inlen, in, nullptr);
  2803. }
  2804. void rtlUnicodeToVCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2805. {
  2806. rtlThrowNoUnicode();
  2807. }
  2808. void rtlCodepageToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2809. {
  2810. outlen = inlen;
  2811. out = (UChar *)rtlMalloc(outlen*2);
  2812. rtlCodepageToUnicode(outlen, out, inlen, in, codepage);
  2813. }
  2814. UChar * rtlCodepageToVUnicodeX(unsigned inlen, char const * in, char const * codepage)
  2815. {
  2816. rtlThrowNoUnicode();
  2817. }
  2818. #endif
  2819. void rtlVCodepageToUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
  2820. {
  2821. rtlCodepageToUnicode(outlen, out, strlen(in), in, codepage);
  2822. }
  2823. void rtlVCodepageToVUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
  2824. {
  2825. rtlCodepageToVUnicode(outlen, out, strlen(in), in, codepage);
  2826. }
  2827. void rtlVUnicodeToCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
  2828. {
  2829. rtlUnicodeToCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2830. }
  2831. void rtlVUnicodeToData(unsigned outlen, void * out, UChar const * in)
  2832. {
  2833. rtlUnicodeToData(outlen, out, rtlUnicodeStrlen(in), in);
  2834. }
  2835. void rtlVUnicodeToDataX(unsigned& outlen, void * &out, UChar const * in)
  2836. {
  2837. rtlUnicodeToDataX(outlen, out, rtlUnicodeStrlen(in), in);
  2838. }
  2839. void rtlVUnicodeToVCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
  2840. {
  2841. rtlUnicodeToVCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2842. }
  2843. void rtlVCodepageToUnicodeX(unsigned & outlen, UChar * & out, char const * in, char const * codepage)
  2844. {
  2845. rtlCodepageToUnicodeX(outlen, out, strlen(in), in, codepage);
  2846. }
  2847. UChar * rtlVCodepageToVUnicodeX(char const * in, char const * codepage)
  2848. {
  2849. return rtlCodepageToVUnicodeX(strlen(in), in, codepage);
  2850. }
  2851. #ifdef _USE_ICU
  2852. void rtlCodepageToUnicodeXUnescape(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2853. {
  2854. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2855. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2856. UnicodeString raw(in, inlen, codepage);
  2857. UnicodeString unescaped = raw.unescape();
  2858. UnicodeString normalized;
  2859. normalizeUnicodeString(unescaped, normalized);
  2860. outlen = normalized.length();
  2861. out = (UChar *)rtlMalloc(outlen*2);
  2862. normalized.extract(0, outlen, out);
  2863. }
  2864. void rtlCodepageToUtf8XUnescape(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  2865. {
  2866. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2867. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2868. UnicodeString raw(in, inlen, codepage);
  2869. UnicodeString unescaped = raw.unescape();
  2870. UnicodeString normalized;
  2871. normalizeUnicodeString(unescaped, normalized);
  2872. UConverter * utf8Conv = queryRTLUnicodeConverter(UTF8_CODEPAGE)->query();
  2873. UErrorCode err = U_ZERO_ERROR;
  2874. size32_t outsize = normalized.extract(NULL, 0, utf8Conv, err);
  2875. err = U_ZERO_ERROR;
  2876. out = (char *)rtlMalloc(outsize);
  2877. outsize = normalized.extract(out, outsize, utf8Conv, err);
  2878. outlen = rtlUtf8Length(outsize, out);
  2879. }
  2880. void rtlUnicodeToCodepageX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in, char const * codepage)
  2881. {
  2882. //If the unicode contains a character which doesn't exist in the destination codepage,
  2883. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2884. //no telling how your terminal may display this (I've seen a divide sign and a right
  2885. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2886. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2887. UErrorCode err = U_ZERO_ERROR;
  2888. outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
  2889. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2890. out = (char *)rtlMalloc(outlen);
  2891. ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
  2892. }
  2893. char * rtlUnicodeToVCodepageX(unsigned inlen, UChar const * in, char const * codepage)
  2894. {
  2895. //If the unicode contains a character which doesn't exist in the destination codepage,
  2896. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2897. //no telling how your terminal may display this (I've seen a divide sign and a right
  2898. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2899. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2900. UErrorCode err = U_ZERO_ERROR;
  2901. unsigned outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
  2902. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2903. char * out = (char *)rtlMalloc(outlen+1);
  2904. ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
  2905. out[outlen] = 0x00;
  2906. return out;
  2907. }
  2908. #else
  2909. void rtlCodepageToUnicodeXUnescape(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2910. {
  2911. rtlCodepageToUnicodeX(outlen, out, inlen, in, codepage);
  2912. }
  2913. void rtlCodepageToUtf8XUnescape(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  2914. {
  2915. rtlCodepageToUtf8X(outlen, out, inlen, in, codepage);
  2916. }
  2917. void rtlUnicodeToCodepageX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in, char const * codepage)
  2918. {
  2919. outlen = inlen;
  2920. out = (char *)rtlMalloc(outlen);
  2921. rtlUnicodeToCodepage(outlen, out, inlen, in, codepage);
  2922. }
  2923. char * rtlUnicodeToVCodepageX(unsigned inlen, UChar const * in, char const * codepage)
  2924. {
  2925. rtlThrowNoUnicode();
  2926. }
  2927. #endif
  2928. void rtlUnicodeToDataX(unsigned & outlen, void * & out, unsigned inlen, UChar const * in)
  2929. {
  2930. rtlUnicodeToCodepageX(outlen, (char * &)out, inlen, in, ASCII_LIKE_CODEPAGE);
  2931. }
  2932. void rtlVUnicodeToCodepageX(unsigned & outlen, char * & out, UChar const * in, char const * codepage)
  2933. {
  2934. rtlUnicodeToCodepageX(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2935. }
  2936. char * rtlVUnicodeToVCodepageX(UChar const * in, char const * codepage)
  2937. {
  2938. return rtlUnicodeToVCodepageX(rtlUnicodeStrlen(in), in, codepage);
  2939. }
  2940. void rtlStrToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  2941. {
  2942. rtlCodepageToUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2943. }
  2944. void rtlUnicodeToStr(unsigned outlen, char * out, unsigned inlen, UChar const * in)
  2945. {
  2946. rtlUnicodeToCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2947. }
  2948. void rtlStrToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
  2949. {
  2950. rtlCodepageToUnicodeX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2951. }
  2952. void rtlUnicodeToStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  2953. {
  2954. rtlUnicodeToCodepageX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2955. }
  2956. #ifdef _USE_ICU
  2957. void rtlUnicodeToEscapedStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  2958. {
  2959. StringBuffer outbuff;
  2960. escapeUnicode(inlen, in, outbuff);
  2961. outlen = outbuff.length();
  2962. out = (char *)rtlMalloc(outlen);
  2963. memcpy(out, outbuff.str(), outlen);
  2964. }
  2965. bool rtlCodepageToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  2966. {
  2967. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  2968. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  2969. UErrorCode err = U_ZERO_ERROR;
  2970. char * target = out;
  2971. ucnv_convertEx(outconv, inconv, &target, out+outlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  2972. unsigned len = target - out;
  2973. if(len < outlen)
  2974. codepageBlankFill(outcodepage, target, outlen-len);
  2975. return U_SUCCESS(err) != FALSE;
  2976. }
  2977. bool rtlCodepageToCodepageX(unsigned & outlen, char * & out, unsigned maxoutlen, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  2978. {
  2979. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  2980. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  2981. UErrorCode err = U_ZERO_ERROR;
  2982. //GH->PG is there a better way of coding this with out temporary buffer?
  2983. char * tempBuffer = (char *)rtlMalloc(maxoutlen);
  2984. char * target = tempBuffer;
  2985. ucnv_convertEx(outconv, inconv, &target, tempBuffer+maxoutlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  2986. unsigned len = target - tempBuffer;
  2987. outlen = len;
  2988. if (len == maxoutlen)
  2989. out = tempBuffer;
  2990. else
  2991. {
  2992. out = (char *)rtlRealloc(tempBuffer, len);
  2993. if (!out)
  2994. out = tempBuffer;
  2995. }
  2996. return U_SUCCESS(err) != FALSE;
  2997. }
  2998. int rtlSingleUtf8ToCodepage(char * out, unsigned inlen, char const * in, char const * outcodepage)
  2999. {
  3000. const byte head = *in; // Macros require unsigned argument on some versions of ICU
  3001. if(!U8_IS_LEAD(head))
  3002. return -1;
  3003. uint8_t trailbytes = U8_COUNT_TRAIL_BYTES(head);
  3004. if(inlen < (unsigned)(trailbytes+1))
  3005. return -1;
  3006. if(!rtlCodepageToCodepage(1, out, trailbytes+1, in, outcodepage, UTF8_CODEPAGE))
  3007. return -1;
  3008. return static_cast<int>(trailbytes); //cast okay as is certainly 0--3
  3009. }
  3010. #else
  3011. void rtlUnicodeToEscapedStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  3012. {
  3013. return rtlUnicodeToStrX(outlen, out, inlen, in);
  3014. }
  3015. bool rtlCodepageToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  3016. {
  3017. if (inlen > outlen)
  3018. inlen = outlen;
  3019. memcpy(out, in, inlen);
  3020. if (inlen < outlen)
  3021. memset(out+inlen, ' ', outlen-inlen);
  3022. return true;
  3023. }
  3024. bool rtlCodepageToCodepageX(unsigned & outlen, char * & out, unsigned maxoutlen, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  3025. {
  3026. if (inlen > maxoutlen)
  3027. inlen = maxoutlen;
  3028. outlen = inlen;
  3029. out = (char *)rtlMalloc(inlen);
  3030. return rtlCodepageToCodepage(outlen, out, inlen, in, outcodepage, incodepage);
  3031. }
  3032. int rtlSingleUtf8ToCodepage(char * out, unsigned inlen, char const * in, char const * outcodepage)
  3033. {
  3034. rtlThrowNoUnicode();
  3035. }
  3036. #endif
  3037. //---------------------------------------------------------------------------
  3038. void rtlStrToDataX(unsigned & tlen, void * & tgt, unsigned slen, const void * src)
  3039. {
  3040. void * data = rtlMalloc(slen);
  3041. memcpy(data, src, slen);
  3042. tgt = data;
  3043. tlen = slen;
  3044. }
  3045. void rtlStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const void * src)
  3046. {
  3047. char * data = (char *)rtlMalloc(slen);
  3048. memcpy(data, src, slen);
  3049. tgt = data;
  3050. tlen = slen;
  3051. }
  3052. char * rtlStrToVStrX(unsigned slen, const void * src)
  3053. {
  3054. char * data = (char *)rtlMalloc(slen+1);
  3055. memcpy(data, src, slen);
  3056. data[slen] = 0;
  3057. return data;
  3058. }
  3059. char * rtlEStrToVStrX(unsigned slen, const char * src)
  3060. {
  3061. MemoryAttr heapMem;
  3062. char * astr = (char *)CONDSTACKALLOC(heapMem, slen);
  3063. rtlEStrToStr(slen,astr,slen,src);
  3064. return rtlStrToVStrX(slen, astr);
  3065. }
  3066. void rtlEStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  3067. {
  3068. char * data = (char *)rtlMalloc(slen);
  3069. rtlEStrToStr(slen, data, slen, src);
  3070. tgt = data;
  3071. tlen = slen;
  3072. }
  3073. void rtlStrToEStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  3074. {
  3075. char * data = (char *)rtlMalloc(slen);
  3076. rtlStrToEStr(slen, data, slen, src);
  3077. tgt = data;
  3078. tlen = slen;
  3079. }
  3080. //---------------------------------------------------------------------------
  3081. // See http://www.isthe.com/chongo/tech/comp/fnv/index.html
  3082. #define FNV1_64_INIT HASH64_INIT
  3083. #define FNV_64_PRIME I64C(0x100000001b3U)
  3084. #define APPLY_FNV64(hval, next) { hval *= FNV_64_PRIME; hval ^= next; }
  3085. hash64_t rtlHash64Data(size32_t len, const void *buf, hash64_t hval)
  3086. {
  3087. const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
  3088. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3089. //This possibly breaks the aliasing rules for c++, but I can't see it causing any problems
  3090. while (len >= sizeof(unsigned))
  3091. {
  3092. unsigned next = *(const unsigned *)bp;
  3093. bp += sizeof(unsigned);
  3094. for (unsigned i=0; i < sizeof(unsigned); i++)
  3095. {
  3096. APPLY_FNV64(hval, (byte)next);
  3097. next >>= 8;
  3098. }
  3099. len -= sizeof(unsigned);
  3100. }
  3101. #endif
  3102. const unsigned char *be = bp + len; /* beyond end of buffer */
  3103. while (bp < be)
  3104. {
  3105. APPLY_FNV64(hval, *bp++);
  3106. }
  3107. return hval;
  3108. }
  3109. hash64_t rtlHash64VStr(const char *str, hash64_t hval)
  3110. {
  3111. const unsigned char *s = (const unsigned char *)str;
  3112. unsigned char c;
  3113. while ((c = *s++) != 0)
  3114. {
  3115. APPLY_FNV64(hval, c);
  3116. }
  3117. return hval;
  3118. }
  3119. hash64_t rtlHash64Unicode(unsigned length, UChar const * k, hash64_t hval)
  3120. {
  3121. #ifdef _USE_ICU
  3122. unsigned trimLength = rtlTrimUnicodeStrLen(length, k);
  3123. for (unsigned i=0; i < trimLength; i++)
  3124. {
  3125. //Handle surrogate pairs correctly, but still hash the utf16 representation
  3126. const byte * cur = reinterpret_cast<const byte *>(&k[i]);
  3127. UChar32 c = k[i];
  3128. if (U16_IS_SURROGATE(c))
  3129. {
  3130. U16_GET(k, 0, i, length, c);
  3131. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3132. {
  3133. APPLY_FNV64(hval, cur[0]);
  3134. APPLY_FNV64(hval, cur[1]);
  3135. APPLY_FNV64(hval, cur[2]);
  3136. APPLY_FNV64(hval, cur[3]);
  3137. }
  3138. //Skip the surrogate pair
  3139. i++;
  3140. }
  3141. else
  3142. {
  3143. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3144. {
  3145. APPLY_FNV64(hval, cur[0]);
  3146. APPLY_FNV64(hval, cur[1]);
  3147. }
  3148. }
  3149. }
  3150. #else
  3151. rtlThrowNoUnicode();
  3152. #endif
  3153. return hval;
  3154. }
  3155. hash64_t rtlHash64VUnicode(UChar const * k, hash64_t initval)
  3156. {
  3157. return rtlHash64Unicode(rtlUnicodeStrlen(k), k, initval);
  3158. }
  3159. //---------------------------------------------------------------------------
  3160. // See http://www.isthe.com/chongo/tech/comp/fnv/index.html
  3161. #define FNV1_32_INIT HASH32_INIT
  3162. #define FNV_32_PRIME 0x1000193
  3163. #define APPLY_FNV32(hval, next) { hval *= FNV_32_PRIME; hval ^= next; }
  3164. unsigned rtlHash32Data(size32_t len, const void *buf, unsigned hval)
  3165. {
  3166. const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
  3167. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3168. //This possibly breaks the aliasing rules for c++, but I can't see it causing any problems
  3169. while (len >= sizeof(unsigned))
  3170. {
  3171. unsigned next = *(const unsigned *)bp;
  3172. bp += sizeof(unsigned);
  3173. for (unsigned i=0; i < sizeof(unsigned); i++)
  3174. {
  3175. APPLY_FNV32(hval, (byte)next);
  3176. next >>= 8;
  3177. }
  3178. len -= sizeof(unsigned);
  3179. }
  3180. #endif
  3181. const unsigned char *be = bp + len; /* beyond end of buffer */
  3182. while (bp < be)
  3183. {
  3184. APPLY_FNV32(hval, *bp++);
  3185. }
  3186. return hval;
  3187. }
  3188. unsigned rtlHash32VStr(const char *str, unsigned hval)
  3189. {
  3190. const unsigned char *s = (const unsigned char *)str;
  3191. unsigned char c;
  3192. while ((c = *s++) != 0)
  3193. {
  3194. APPLY_FNV32(hval, c);
  3195. }
  3196. return hval;
  3197. }
  3198. unsigned rtlHash32Unicode(unsigned length, UChar const * k, unsigned hval)
  3199. {
  3200. #ifdef _USE_ICU
  3201. unsigned trimLength = rtlTrimUnicodeStrLen(length, k);
  3202. for (unsigned i=0; i < trimLength; i++)
  3203. {
  3204. //Handle surrogate pairs correctly, but still hash the utf16 representation
  3205. const byte * cur = reinterpret_cast<const byte *>(&k[i]);
  3206. UChar32 c = k[i];
  3207. if (U16_IS_SURROGATE(c))
  3208. {
  3209. U16_GET(k, 0, i, length, c);
  3210. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3211. {
  3212. APPLY_FNV32(hval, cur[0]);
  3213. APPLY_FNV32(hval, cur[1]);
  3214. APPLY_FNV32(hval, cur[2]);
  3215. APPLY_FNV32(hval, cur[3]);
  3216. }
  3217. //Skip the surrogate pair
  3218. i++;
  3219. }
  3220. else
  3221. {
  3222. if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
  3223. {
  3224. APPLY_FNV32(hval, cur[0]);
  3225. APPLY_FNV32(hval, cur[1]);
  3226. }
  3227. }
  3228. }
  3229. #else
  3230. rtlThrowNoUnicode();
  3231. #endif
  3232. return hval;
  3233. }
  3234. unsigned rtlHash32VUnicode(UChar const * k, unsigned initval)
  3235. {
  3236. return rtlHash32Unicode(rtlUnicodeStrlen(k), k, initval);
  3237. }
  3238. //---------------------------------------------------------------------------
  3239. // Hash Helper functions
  3240. #define mix(a,b,c) \
  3241. { \
  3242. a -= b; a -= c; a ^= (c>>13); \
  3243. b -= c; b -= a; b ^= (a<<8); \
  3244. c -= a; c -= b; c ^= (b>>13); \
  3245. a -= b; a -= c; a ^= (c>>12); \
  3246. b -= c; b -= a; b ^= (a<<16); \
  3247. c -= a; c -= b; c ^= (b>>5); \
  3248. a -= b; a -= c; a ^= (c>>3); \
  3249. b -= c; b -= a; b ^= (a<<10); \
  3250. c -= a; c -= b; c ^= (b>>15); \
  3251. }
  3252. #define GETBYTE0(n) ((unsigned)k[n])
  3253. #define GETBYTE1(n) ((unsigned)k[n+1]<<8)
  3254. #define GETBYTE2(n) ((unsigned)k[n+2]<<16)
  3255. #define GETBYTE3(n) ((unsigned)k[n+3]<<24)
  3256. #define GETWORD(k,n) (GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))
  3257. // the above looks inefficient but the compiler optimizes well
  3258. // this hash looks slow but is about twice as quick as using our CRC table
  3259. // and gives gives better results
  3260. // (see paper at http://burtleburtle.net/bob/hash/evahash.html for more info)
  3261. unsigned rtlHashData( unsigned length, const void *_k, unsigned initval)
  3262. {
  3263. const unsigned char * k = (const unsigned char *)_k;
  3264. unsigned a,b,c,len;
  3265. /* Set up the internal state */
  3266. len = length;
  3267. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  3268. c = initval; /* the previous hash value */
  3269. /*---------------------------------------- handle most of the key */
  3270. while (len >= 12)
  3271. {
  3272. a += GETWORD(k,0);
  3273. b += GETWORD(k,4);
  3274. c += GETWORD(k,8);
  3275. mix(a,b,c);
  3276. k += 12; len -= 12;
  3277. }
  3278. /*------------------------------------- handle the last 11 bytes */
  3279. c += length;
  3280. switch(len) /* all the case statements fall through */
  3281. {
  3282. case 11: c+=GETBYTE3(7);
  3283. case 10: c+=GETBYTE2(7);
  3284. case 9 : c+=GETBYTE1(7);
  3285. /* the first byte of c is reserved for the length */
  3286. case 8 : b+=GETBYTE3(4);
  3287. case 7 : b+=GETBYTE2(4);
  3288. case 6 : b+=GETBYTE1(4);
  3289. case 5 : b+=GETBYTE0(4);
  3290. case 4 : a+=GETBYTE3(0);
  3291. case 3 : a+=GETBYTE2(0);
  3292. case 2 : a+=GETBYTE1(0);
  3293. case 1 : a+=GETBYTE0(0);
  3294. /* case 0: nothing left to add */
  3295. }
  3296. mix(a,b,c);
  3297. /*-------------------------------------------- report the result */
  3298. return c;
  3299. }
  3300. unsigned rtlHashString( unsigned length, const char *_k, unsigned initval)
  3301. {
  3302. return rtlHashData(rtlTrimStrLen(length, _k), _k, initval);
  3303. }
  3304. unsigned rtlHashUnicode(unsigned length, UChar const * k, unsigned initval)
  3305. {
  3306. #ifdef _USE_ICU
  3307. unsigned trimLength = rtlTrimUnicodeStrLen(length, k);
  3308. //Because of the implementation of HASH we need to strip ignoreable code points instead of skipping them
  3309. size32_t tempLength;
  3310. rtlDataAttr temp;
  3311. if (stripIgnorableCharacters(tempLength, temp.refustr(), trimLength, k))
  3312. return rtlHashData(tempLength*2, temp.getustr(), initval);
  3313. return rtlHashData(trimLength*sizeof(UChar), k, initval);
  3314. #else
  3315. rtlThrowNoUnicode();
  3316. #endif
  3317. }
  3318. unsigned rtlHashVStr(const char * k, unsigned initval)
  3319. {
  3320. return rtlHashData(rtlTrimVStrLen(k), k, initval);
  3321. }
  3322. unsigned rtlHashVUnicode(UChar const * k, unsigned initval)
  3323. {
  3324. return rtlHashUnicode(rtlTrimVUnicodeStrLen(k), k, initval);
  3325. }
  3326. #define GETWORDNC(k,n) ((GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))&0xdfdfdfdf)
  3327. unsigned rtlHashDataNC( unsigned length, const void * _k, unsigned initval)
  3328. {
  3329. const unsigned char * k = (const unsigned char *)_k;
  3330. unsigned a,b,c,len;
  3331. /* Set up the internal state */
  3332. len = length;
  3333. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  3334. c = initval; /* the previous hash value */
  3335. /*---------------------------------------- handle most of the key */
  3336. while (len >= 12)
  3337. {
  3338. a += GETWORDNC(k,0);
  3339. b += GETWORDNC(k,4);
  3340. c += GETWORDNC(k,8);
  3341. mix(a,b,c);
  3342. k += 12; len -= 12;
  3343. }
  3344. /*------------------------------------- handle the last 11 bytes */
  3345. c += length;
  3346. switch(len) /* all the case statements fall through */
  3347. {
  3348. case 11: c+=GETBYTE3(7)&0xdf;
  3349. case 10: c+=GETBYTE2(7)&0xdf;
  3350. case 9 : c+=GETBYTE1(7)&0xdf;
  3351. /* the first byte of c is reserved for the length */
  3352. case 8 : b+=GETBYTE3(4)&0xdf;
  3353. case 7 : b+=GETBYTE2(4)&0xdf;
  3354. case 6 : b+=GETBYTE1(4)&0xdf;
  3355. case 5 : b+=GETBYTE0(4)&0xdf;
  3356. case 4 : a+=GETBYTE3(0)&0xdf;
  3357. case 3 : a+=GETBYTE2(0)&0xdf;
  3358. case 2 : a+=GETBYTE1(0)&0xdf;
  3359. case 1 : a+=GETBYTE0(0)&0xdf;
  3360. /* case 0: nothing left to add */
  3361. }
  3362. mix(a,b,c);
  3363. /*-------------------------------------------- report the result */
  3364. return c;
  3365. }
  3366. unsigned rtlHashVStrNC(const char * k, unsigned initval)
  3367. {
  3368. return rtlHashDataNC(strlen(k), k, initval);
  3369. }
  3370. //---------------------------------------------------------------------------
  3371. unsigned rtlCrcData( unsigned length, const void *_k, unsigned initval)
  3372. {
  3373. return crc32((const char *)_k, length, initval);
  3374. }
  3375. unsigned rtlCrcUnicode(unsigned length, UChar const * k, unsigned initval)
  3376. {
  3377. return crc32((char const *)k, length*2, initval);
  3378. }
  3379. unsigned rtlCrcVStr( const char * k, unsigned initval)
  3380. {
  3381. return crc32(k, strlen(k), initval);
  3382. }
  3383. unsigned rtlCrcVUnicode(UChar const * k, unsigned initval)
  3384. {
  3385. return rtlCrcUnicode(rtlUnicodeStrlen(k), k, initval);
  3386. }
  3387. //---------------------------------------------------------------------------
  3388. // MD5 processing:
  3389. void rtlHashMd5Init(size32_t sizestate, void * _state)
  3390. {
  3391. assertex(sizestate >= sizeof(md5_state_s));
  3392. md5_state_s * state = (md5_state_s *)_state;
  3393. md5_init(state);
  3394. }
  3395. void rtlHashMd5Data(size32_t len, const void *buf, size32_t sizestate, void * _state)
  3396. {
  3397. md5_state_s * state = (md5_state_s * )_state;
  3398. md5_append(state, (const md5_byte_t *)buf, len);
  3399. }
  3400. void rtlHashMd5Finish(void * out, size32_t sizestate, void * _state)
  3401. {
  3402. typedef md5_byte_t digest_t[16];
  3403. md5_state_s * state = (md5_state_s *)_state;
  3404. md5_finish(state, *(digest_t*)out);
  3405. }
  3406. //---------------------------------------------------------------------------
  3407. unsigned rtlRandom()
  3408. {
  3409. CriticalBlock block(random_Sect);
  3410. return random_->next();
  3411. }
  3412. void rtlSeedRandom(unsigned value)
  3413. {
  3414. CriticalBlock block(random_Sect);
  3415. random_->seed(value);
  3416. }
  3417. // These are all useful functions for testing - not really designed for other people to use them...
  3418. ECLRTL_API unsigned rtlTick()
  3419. {
  3420. return msTick();
  3421. }
  3422. ECLRTL_API bool rtlGPF()
  3423. {
  3424. char * x = 0;
  3425. *x = 0;
  3426. return false;
  3427. }
  3428. ECLRTL_API unsigned rtlSleep(unsigned delay)
  3429. {
  3430. MilliSleep(delay);
  3431. return 0;
  3432. }
  3433. ECLRTL_API unsigned rtlDisplay(unsigned len, const char * src)
  3434. {
  3435. LOG(MCprogress, unknownJob, "%.*s", len, src);
  3436. return 0;
  3437. }
  3438. void rtlEcho(unsigned len, const char * src)
  3439. {
  3440. printf("%.*s\n", len, src);
  3441. }
  3442. ECLRTL_API unsigned __int64 rtlNano()
  3443. {
  3444. return cycle_to_nanosec(get_cycles_now());
  3445. }
  3446. ECLRTL_API void rtlTestGetPrimes(unsigned & num, void * & data)
  3447. {
  3448. unsigned numPrimes = 6;
  3449. unsigned size = sizeof(unsigned) * numPrimes;
  3450. unsigned * primes = (unsigned *)rtlMalloc(size);
  3451. primes[0] = 1;
  3452. primes[1] = 2;
  3453. primes[2] = 3;
  3454. primes[3] = 5;
  3455. primes[4] = 7;
  3456. primes[5] = 11;
  3457. num = numPrimes;
  3458. data = primes;
  3459. }
  3460. ECLRTL_API void rtlTestFibList(bool & outAll, size32_t & outSize, void * & outData, bool inAll, size32_t inSize, const void * inData)
  3461. {
  3462. const unsigned * inList = (const unsigned *)inData;
  3463. unsigned * outList = (unsigned *)rtlMalloc(inSize);
  3464. unsigned * curOut = outList;
  3465. unsigned count = inSize / sizeof(*inList);
  3466. unsigned prev = 0;
  3467. for (unsigned i=0; i < count; i++)
  3468. {
  3469. unsigned next = *inList++;
  3470. *curOut++ = next + prev;
  3471. prev = next;
  3472. }
  3473. outAll = inAll;
  3474. outSize = inSize;
  3475. outData = outList;
  3476. }
  3477. unsigned rtlDelayReturn(unsigned value, unsigned sleepTime)
  3478. {
  3479. MilliSleep(sleepTime);
  3480. return value;
  3481. }
  3482. //---------------------------------------------------------------------------
  3483. class DECL_EXCEPTION CRtlFailException : public IUserException, public CInterface
  3484. {
  3485. public:
  3486. CRtlFailException(int _code, char const * _msg) : code(_code) { msg = strdup(_msg); }
  3487. ~CRtlFailException() { free(msg); }
  3488. IMPLEMENT_IINTERFACE;
  3489. virtual int errorCode() const { return code; }
  3490. virtual StringBuffer & errorMessage(StringBuffer & buff) const { return buff.append(msg); }
  3491. virtual MessageAudience errorAudience() const { return MSGAUD_user; }
  3492. private:
  3493. int code;
  3494. char * msg;
  3495. };
  3496. void rtlFail(int code, const char *msg)
  3497. {
  3498. throw dynamic_cast<IUserException *>(new CRtlFailException(code, msg));
  3499. }
  3500. void rtlSysFail(int code, const char *msg)
  3501. {
  3502. throw MakeStringException(MSGAUD_user, code, "%s", msg);
  3503. }
  3504. void rtlThrowOutOfMemory(int code, const char *msg)
  3505. {
  3506. throw static_cast<IUserException *>(new CRtlFailException(code, msg));
  3507. }
  3508. void rtlReportRowOverflow(unsigned size, unsigned max)
  3509. {
  3510. throw MakeStringException(MSGAUD_user, 1000, "Row size %u exceeds the maximum size specified(%u)", size, max);
  3511. }
  3512. void rtlThrowNoUnicode()
  3513. {
  3514. throw MakeStringException(99, "System was built without Unicode support");
  3515. }
  3516. void rtlReportFieldOverflow(unsigned size, unsigned max, const char * name)
  3517. {
  3518. if (!name)
  3519. rtlReportRowOverflow(size, max);
  3520. else
  3521. throw MakeStringException(MSGAUD_user, 1000, "Assignment to field '%s' causes row overflow. Size %u exceeds the maximum size specified(%u)", name, size, max);
  3522. }
  3523. void rtlCheckRowOverflow(unsigned size, unsigned max)
  3524. {
  3525. if (size > max)
  3526. rtlReportRowOverflow(size, max);
  3527. }
  3528. void rtlCheckFieldOverflow(unsigned size, unsigned max, const char * field)
  3529. {
  3530. if (size > max)
  3531. rtlReportFieldOverflow(size, max, field);
  3532. }
  3533. void rtlFailUnexpected()
  3534. {
  3535. throw MakeStringException(MSGAUD_user, -1, "Unexpected code execution");
  3536. }
  3537. void rtlFailOnAssert()
  3538. {
  3539. throw MakeStringException(MSGAUD_user, -1, "Abort execution");
  3540. }
  3541. void rtlFailDivideByZero()
  3542. {
  3543. throw MakeStringException(MSGAUD_user, -1, "Division by zero");
  3544. }
  3545. //---------------------------------------------------------------------------
  3546. void deserializeRaw(unsigned recordSize, void *record, MemoryBuffer &in)
  3547. {
  3548. in.read(recordSize, record);
  3549. }
  3550. void deserializeDataX(size32_t & len, void * & data, MemoryBuffer &in)
  3551. {
  3552. free(data);
  3553. in.read(sizeof(len), &len);
  3554. data = rtlMalloc(len);
  3555. in.read(len, data);
  3556. }
  3557. void deserializeStringX(size32_t & len, char * & data, MemoryBuffer &in)
  3558. {
  3559. free(data);
  3560. in.read(sizeof(len), &len);
  3561. data = (char *)rtlMalloc(len);
  3562. in.read(len, data);
  3563. }
  3564. char * deserializeCStringX(MemoryBuffer &in)
  3565. {
  3566. unsigned len;
  3567. in.read(sizeof(len), &len);
  3568. char * data = (char *)rtlMalloc(len+1);
  3569. in.read(len, data);
  3570. data[len] = 0;
  3571. return data;
  3572. }
  3573. void deserializeUnicodeX(size32_t & len, UChar * & data, MemoryBuffer &in)
  3574. {
  3575. free(data);
  3576. in.read(sizeof(len), &len);
  3577. data = (UChar *)rtlMalloc(len*sizeof(UChar));
  3578. in.read(len*sizeof(UChar), data);
  3579. }
  3580. void deserializeUtf8X(size32_t & len, char * & data, MemoryBuffer &in)
  3581. {
  3582. free(data);
  3583. in.read(sizeof(len), &len);
  3584. unsigned size = rtlUtf8Size(len, in.readDirect(0));
  3585. data = (char *)rtlMalloc(size);
  3586. in.read(size, data);
  3587. }
  3588. UChar * deserializeVUnicodeX(MemoryBuffer &in)
  3589. {
  3590. unsigned len;
  3591. in.read(sizeof(len), &len);
  3592. UChar * data = (UChar *)rtlMalloc((len+1)*sizeof(UChar));
  3593. in.read(len*sizeof(UChar), data);
  3594. data[len] = 0;
  3595. return data;
  3596. }
  3597. void deserializeSet(bool & isAll, size32_t & len, void * & data, MemoryBuffer &in)
  3598. {
  3599. free(data);
  3600. in.read(isAll);
  3601. in.read(sizeof(len), &len);
  3602. data = rtlMalloc(len);
  3603. in.read(len, data);
  3604. }
  3605. void serializeRaw(unsigned recordSize, const void *record, MemoryBuffer &out)
  3606. {
  3607. out.append(recordSize, record);
  3608. }
  3609. void serializeDataX(size32_t len, const void * data, MemoryBuffer &out)
  3610. {
  3611. out.append(len).append(len, data);
  3612. }
  3613. void serializeStringX(size32_t len, const char * data, MemoryBuffer &out)
  3614. {
  3615. out.append(len).append(len, data);
  3616. }
  3617. void serializeCStringX(const char * data, MemoryBuffer &out)
  3618. {
  3619. unsigned len = strlen(data);
  3620. out.append(len).append(len, data);
  3621. }
  3622. void serializeUnicodeX(size32_t len, const UChar * data, MemoryBuffer &out)
  3623. {
  3624. out.append(len).append(len*sizeof(UChar), data);
  3625. }
  3626. void serializeUtf8X(size32_t len, const char * data, MemoryBuffer &out)
  3627. {
  3628. out.append(len).append(rtlUtf8Size(len, data), data);
  3629. }
  3630. void serializeSet(bool isAll, size32_t len, const void * data, MemoryBuffer &out)
  3631. {
  3632. out.append(isAll).append(len).append(len, data);
  3633. }
  3634. //---------------------------------------------------------------------------
  3635. ECLRTL_API void serializeFixedString(unsigned len, const char *field, MemoryBuffer &out)
  3636. {
  3637. out.append(len, field);
  3638. }
  3639. ECLRTL_API void serializeLPString(unsigned len, const char *field, MemoryBuffer &out)
  3640. {
  3641. out.append(len);
  3642. out.append(len, field);
  3643. }
  3644. ECLRTL_API void serializeVarString(const char *field, MemoryBuffer &out)
  3645. {
  3646. out.append(field);
  3647. }
  3648. ECLRTL_API void serializeBool(bool field, MemoryBuffer &out)
  3649. {
  3650. out.append(field);
  3651. }
  3652. ECLRTL_API void serializeFixedData(unsigned len, const void *field, MemoryBuffer &out)
  3653. {
  3654. out.append(len, field);
  3655. }
  3656. ECLRTL_API void serializeLPData(unsigned len, const void *field, MemoryBuffer &out)
  3657. {
  3658. out.append(len);
  3659. out.append(len, field);
  3660. }
  3661. ECLRTL_API void serializeInt1(signed char field, MemoryBuffer &out)
  3662. {
  3663. // MORE - why did overloading pick the int method for this???
  3664. // out.append(field);
  3665. out.appendEndian(sizeof(field), &field);
  3666. }
  3667. ECLRTL_API void serializeInt2(signed short field, MemoryBuffer &out)
  3668. {
  3669. out.appendEndian(sizeof(field), &field);
  3670. }
  3671. ECLRTL_API void serializeInt3(signed int field, MemoryBuffer &out)
  3672. {
  3673. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3674. out.appendEndian(3, &field);
  3675. #else
  3676. out.appendEndian(3, ((char *) &field) + 1);
  3677. #endif
  3678. }
  3679. ECLRTL_API void serializeInt4(signed int field, MemoryBuffer &out)
  3680. {
  3681. out.appendEndian(sizeof(field), &field);
  3682. }
  3683. ECLRTL_API void serializeInt5(signed __int64 field, MemoryBuffer &out)
  3684. {
  3685. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3686. out.appendEndian(5, &field);
  3687. #else
  3688. out.appendEndian(5, ((char *) &field) + 3);
  3689. #endif
  3690. }
  3691. ECLRTL_API void serializeInt6(signed __int64 field, MemoryBuffer &out)
  3692. {
  3693. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3694. out.appendEndian(6, &field);
  3695. #else
  3696. out.appendEndian(6, ((char *) &field) + 2);
  3697. #endif
  3698. }
  3699. ECLRTL_API void serializeInt7(signed __int64 field, MemoryBuffer &out)
  3700. {
  3701. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3702. out.appendEndian(7, &field);
  3703. #else
  3704. out.appendEndian(7, ((char *) &field) + 1);
  3705. #endif
  3706. }
  3707. ECLRTL_API void serializeInt8(signed __int64 field, MemoryBuffer &out)
  3708. {
  3709. out.appendEndian(sizeof(field), &field);
  3710. }
  3711. ECLRTL_API void serializeUInt1(unsigned char field, MemoryBuffer &out)
  3712. {
  3713. out.appendEndian(sizeof(field), &field);
  3714. }
  3715. ECLRTL_API void serializeUInt2(unsigned short field, MemoryBuffer &out)
  3716. {
  3717. out.appendEndian(sizeof(field), &field);
  3718. }
  3719. ECLRTL_API void serializeUInt3(unsigned int field, MemoryBuffer &out)
  3720. {
  3721. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3722. out.appendEndian(3, &field);
  3723. #else
  3724. out.appendEndian(3, ((char *) &field) + 1);
  3725. #endif
  3726. }
  3727. ECLRTL_API void serializeUInt4(unsigned int field, MemoryBuffer &out)
  3728. {
  3729. out.appendEndian(sizeof(field), &field);
  3730. }
  3731. ECLRTL_API void serializeUInt5(unsigned __int64 field, MemoryBuffer &out)
  3732. {
  3733. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3734. out.appendEndian(5, &field);
  3735. #else
  3736. out.appendEndian(5, ((char *) &field) + 3);
  3737. #endif
  3738. }
  3739. ECLRTL_API void serializeUInt6(unsigned __int64 field, MemoryBuffer &out)
  3740. {
  3741. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3742. out.appendEndian(6, &field);
  3743. #else
  3744. out.appendEndian(6, ((char *) &field) + 2);
  3745. #endif
  3746. }
  3747. ECLRTL_API void serializeUInt7(unsigned __int64 field, MemoryBuffer &out)
  3748. {
  3749. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3750. out.appendEndian(7, &field);
  3751. #else
  3752. out.appendEndian(7, ((char *) &field) + 1);
  3753. #endif
  3754. }
  3755. ECLRTL_API void serializeUInt8(unsigned __int64 field, MemoryBuffer &out)
  3756. {
  3757. out.appendEndian(sizeof(field), &field);
  3758. }
  3759. ECLRTL_API void serializeReal4(float field, MemoryBuffer &out)
  3760. {
  3761. out.appendEndian(sizeof(field), &field);
  3762. }
  3763. ECLRTL_API void serializeReal8(double field, MemoryBuffer &out)
  3764. {
  3765. out.append(sizeof(field), &field);
  3766. }
  3767. //These maths functions can all have out of range arguments....
  3768. //---------------------------------------------------------------------------
  3769. static double rtlInvalidArgument(DBZaction dbz, const char *source, double arg)
  3770. {
  3771. switch ((DBZaction) dbz)
  3772. {
  3773. case DBZfail:
  3774. throw MakeStringException(MSGAUD_user, -1, "Invalid argument to %s: %f", source, arg);
  3775. case DBZnan:
  3776. return rtlCreateRealNull();
  3777. }
  3778. return 0;
  3779. }
  3780. static double rtlInvalidLog(DBZaction dbz, const char *source, double arg)
  3781. {
  3782. switch ((DBZaction) dbz)
  3783. {
  3784. case DBZfail:
  3785. throw MakeStringException(MSGAUD_user, -1, "Invalid argument to %s: %f", source, arg);
  3786. case DBZnan:
  3787. if (arg)
  3788. return rtlCreateRealNull();
  3789. else
  3790. return -INFINITY;
  3791. }
  3792. return 0;
  3793. }
  3794. ECLRTL_API double rtlLog10(double x, byte dbz)
  3795. {
  3796. if (x <= 0)
  3797. return rtlInvalidLog((DBZaction) dbz, "LOG10", x);
  3798. return log10(x);
  3799. }
  3800. ECLRTL_API double rtlLog(double x, byte dbz)
  3801. {
  3802. if (x <= 0)
  3803. return rtlInvalidLog((DBZaction) dbz, "LOG10", x);
  3804. return log(x);
  3805. }
  3806. ECLRTL_API double rtlSqrt(double x, byte dbz)
  3807. {
  3808. if (x < 0)
  3809. return rtlInvalidArgument((DBZaction) dbz, "SQRT", x);
  3810. return sqrt(x);
  3811. }
  3812. ECLRTL_API double rtlACos(double x, byte dbz)
  3813. {
  3814. if (fabs(x) > 1)
  3815. return rtlInvalidArgument((DBZaction) dbz, "ACOS", x);
  3816. return acos(x);
  3817. }
  3818. ECLRTL_API double rtlASin(double x, byte dbz)
  3819. {
  3820. if (fabs(x) > 1)
  3821. return rtlInvalidArgument((DBZaction) dbz, "ASIN", x);
  3822. return asin(x);
  3823. }
  3824. ECLRTL_API double rtlFMod(double numer, double denom, byte dbz)
  3825. {
  3826. if (!denom)
  3827. return rtlInvalidArgument((DBZaction) dbz, "FMOD", denom);
  3828. return fmod(numer, denom);
  3829. }
  3830. ECLRTL_API bool rtlFMatch(double a, double b, double epsilon)
  3831. {
  3832. if (isnan(a) || isnan(b))
  3833. return false;
  3834. return fabs(a-b) <= epsilon;
  3835. }
  3836. //---------------------------------------------------------------------------
  3837. ECLRTL_API bool rtlIsValidReal(unsigned size, const void * data)
  3838. {
  3839. byte * bytes = (byte *)data;
  3840. //Valid unless it is a Nan, represented by exponent all 1's and non-zero mantissa (ignore the sign).
  3841. if (size == 4)
  3842. {
  3843. //sign(1) exponent(8) mantissa(23)
  3844. if (((bytes[3] & 0x7f) == 0x7f) && ((bytes[2] & 0x80) == 0x80))
  3845. {
  3846. if ((bytes[2] & 0x7f) != 0 || bytes[1] || bytes[0])
  3847. return false;
  3848. }
  3849. }
  3850. else if (size == 8)
  3851. {
  3852. //sign(1) exponent(11) mantissa(52)
  3853. if (((bytes[7] & 0x7f) == 0x7f) && ((bytes[6] & 0xF0) == 0xF0))
  3854. {
  3855. if ((bytes[6] & 0xF) || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
  3856. return false;
  3857. }
  3858. }
  3859. else
  3860. {
  3861. //sign(1) exponent(15) mantissa(64)
  3862. assertex(size==10);
  3863. if (((bytes[9] & 0x7f) == 0x7f) && (bytes[8] == 0xFF))
  3864. {
  3865. if (bytes[7] || bytes[6] || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
  3866. return false;
  3867. }
  3868. }
  3869. return true;
  3870. }
  3871. double rtlCreateRealNull()
  3872. {
  3873. union
  3874. {
  3875. byte data[8];
  3876. double r;
  3877. } u;
  3878. //Use a non-signaling NaN
  3879. memcpy(u.data, "\x01\x00\x00\x00\x00\x00\xF0\x7f", 8);
  3880. return u.r;
  3881. }
  3882. double rtlCreateRealInf()
  3883. {
  3884. return INFINITY;
  3885. }
  3886. bool rtlIsInfinite(double value)
  3887. {
  3888. return isinf(value);
  3889. }
  3890. bool rtlIsNaN(double value)
  3891. {
  3892. return isnan(value);
  3893. }
  3894. bool rtlIsFinite(double value)
  3895. {
  3896. return isfinite(value);
  3897. }
  3898. unsigned rtlUtf8Size(const void * data)
  3899. {
  3900. return readUtf8Size(data);
  3901. }
  3902. unsigned rtlUtf8Size(unsigned len, const void * _data)
  3903. {
  3904. const byte * data = (const byte *)_data;
  3905. size32_t offset = 0;
  3906. for (unsigned i=0; i< len; i++)
  3907. offset += readUtf8Size(data+offset);
  3908. return offset;
  3909. }
  3910. unsigned rtlUtf8Length(unsigned size, const void * _data)
  3911. {
  3912. const byte * data = (const byte *)_data;
  3913. size32_t length = 0;
  3914. for (unsigned offset=0; offset < size; offset += readUtf8Size(data+offset))
  3915. length++;
  3916. return length;
  3917. }
  3918. unsigned rtlUtf8Char(const void * data)
  3919. {
  3920. return readUtf8Char(data);
  3921. }
  3922. void rtlUnicodeToUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
  3923. {
  3924. if(inlen>outlen) inlen = outlen;
  3925. memcpy(out, in, inlen*2);
  3926. while(inlen<outlen)
  3927. out[inlen++] = 0x0020;
  3928. }
  3929. void rtlUnicodeToVUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
  3930. {
  3931. if((inlen>=outlen) && (outlen != 0)) inlen = outlen-1;
  3932. memcpy(out, in, inlen*2);
  3933. out[inlen] = 0x0000;
  3934. }
  3935. void rtlVUnicodeToUnicode(size32_t outlen, UChar * out, UChar const *in)
  3936. {
  3937. rtlUnicodeToUnicode(outlen, out, rtlUnicodeStrlen(in), in);
  3938. }
  3939. void rtlVUnicodeToVUnicode(size32_t outlen, UChar * out, UChar const *in)
  3940. {
  3941. rtlUnicodeToVUnicode(outlen, out, rtlUnicodeStrlen(in), in);
  3942. }
  3943. void rtlUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  3944. {
  3945. tgt = (UChar *)rtlMalloc(slen*2);
  3946. memcpy(tgt, src, slen*2);
  3947. tlen = slen;
  3948. }
  3949. UChar * rtlUnicodeToVUnicodeX(unsigned slen, UChar const * src)
  3950. {
  3951. UChar * data = (UChar *)rtlMalloc((slen+1)*2);
  3952. memcpy(data, src, slen*2);
  3953. data[slen] = 0x0000;
  3954. return data;
  3955. }
  3956. void rtlVUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, UChar const * src)
  3957. {
  3958. rtlUnicodeToUnicodeX(tlen, tgt, rtlUnicodeStrlen(src), src);
  3959. }
  3960. UChar * rtlVUnicodeToVUnicodeX(UChar const * src)
  3961. {
  3962. return rtlUnicodeToVUnicodeX(rtlUnicodeStrlen(src), src);
  3963. }
  3964. void rtlDecPushUnicode(size32_t len, UChar const * data)
  3965. {
  3966. char * buff = 0;
  3967. unsigned bufflen = 0;
  3968. rtlUnicodeToStrX(bufflen, buff, len, data);
  3969. DecPushString(bufflen, buff);
  3970. rtlFree(buff);
  3971. }
  3972. void rtlUtf8ToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
  3973. {
  3974. //Packs as many characaters as it can into the target, but don't include any half characters
  3975. size32_t offset = 0;
  3976. size32_t outsize = outlen*UTF8_MAXSIZE;
  3977. for (unsigned i=0; i< inlen; i++)
  3978. {
  3979. unsigned nextSize = readUtf8Size(in+offset);
  3980. if (offset + nextSize > outsize)
  3981. break;
  3982. offset += nextSize;
  3983. }
  3984. memcpy(out, in, offset);
  3985. if (offset != outsize)
  3986. memset(out+offset, ' ', outsize-offset);
  3987. }
  3988. void rtlUtf8ToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  3989. {
  3990. unsigned insize = rtlUtf8Size(inlen, in);
  3991. char * buffer = (char *)rtlMalloc(insize);
  3992. memcpy(buffer, in, insize);
  3993. outlen = inlen;
  3994. out = buffer;
  3995. }
  3996. #ifdef _USE_ICU
  3997. unsigned rtlUnicodeStrlen(UChar const * str)
  3998. {
  3999. return u_strlen(str);
  4000. }
  4001. #else
  4002. unsigned rtlUnicodeStrlen(UChar const * str)
  4003. {
  4004. unsigned len = 0;
  4005. while (*str++)
  4006. len++;
  4007. return len;
  4008. }
  4009. #endif
  4010. //---------------------------------------------------------------------------
  4011. void rtlUtf8ToData(size32_t outlen, void * out, size32_t inlen, const char *in)
  4012. {
  4013. unsigned insize = rtlUtf8Size(inlen, in);
  4014. if (insize >= outlen)
  4015. rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4016. else
  4017. {
  4018. rtlCodepageToCodepage(insize, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4019. memset((char*)out + insize, 0, outlen-insize);
  4020. }
  4021. }
  4022. void rtlUtf8ToDataX(size32_t & outlen, void * & out, size32_t inlen, const char *in)
  4023. {
  4024. unsigned insize = rtlUtf8Size(inlen, in);
  4025. char * cout;
  4026. rtlCodepageToCodepageX(outlen, cout, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4027. out = cout;
  4028. }
  4029. void rtlUtf8ToStr(size32_t outlen, char * out, size32_t inlen, const char *in)
  4030. {
  4031. unsigned insize = rtlUtf8Size(inlen, in);
  4032. rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4033. }
  4034. void rtlUtf8ToStrX(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  4035. {
  4036. unsigned insize = rtlUtf8Size(inlen, in);
  4037. rtlCodepageToCodepageX(outlen, out, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4038. }
  4039. char * rtlUtf8ToVStr(size32_t inlen, const char *in)
  4040. {
  4041. unsigned utfSize = rtlUtf8Size(inlen, in);
  4042. char *ret = (char *) rtlMalloc(inlen+1);
  4043. rtlCodepageToCodepage(inlen, ret, utfSize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  4044. ret[inlen] = 0;
  4045. return ret;
  4046. }
  4047. void rtlDataToUtf8(size32_t outlen, char * out, size32_t inlen, const void *in)
  4048. {
  4049. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  4050. }
  4051. void rtlDataToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const void *in)
  4052. {
  4053. unsigned outsize;
  4054. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  4055. outlen = rtlUtf8Length(outsize, out);
  4056. }
  4057. void rtlStrToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
  4058. {
  4059. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  4060. }
  4061. void rtlStrToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  4062. {
  4063. unsigned outsize;
  4064. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  4065. outlen = rtlUtf8Length(outsize, out);
  4066. }
  4067. static int rtlCompareUtf8Utf8ViaUnicode(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  4068. {
  4069. rtlDataAttr uleft(llen*sizeof(UChar));
  4070. rtlDataAttr uright(rlen*sizeof(UChar));
  4071. rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
  4072. rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
  4073. return rtlCompareUnicodeUnicode(llen, uleft.getustr(), rlen, uright.getustr(), locale);
  4074. }
  4075. #ifdef _USE_ICU
  4076. int rtlCompareUtf8Utf8(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  4077. {
  4078. //MORE: Do a simple comparison as long as there are no non->0x80 characters around
  4079. // fall back to a full unicode comparison if we hit one - or in the next character to allow for accents etc.
  4080. const byte * bleft = (const byte *)left;
  4081. const byte * bright = (const byte *)right;
  4082. unsigned len = llen > rlen ? rlen : llen;
  4083. for (unsigned i = 0; i < len; i++)
  4084. {
  4085. byte nextLeft = bleft[i];
  4086. byte nextRight = bright[i];
  4087. if (nextLeft >= 0x80 || nextRight >= 0x80)
  4088. return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
  4089. if ((i+1 != len) && ((bleft[i+1] >= 0x80) || bright[i+1] >= 0x80))
  4090. return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
  4091. if (nextLeft != nextRight)
  4092. return nextLeft - nextRight;
  4093. }
  4094. int diff = 0;
  4095. if (len != llen)
  4096. {
  4097. for (;(diff == 0) && (len != llen);len++)
  4098. diff = bleft[len] - ' ';
  4099. }
  4100. else if (len != rlen)
  4101. {
  4102. for (;(diff == 0) && (len != rlen);len++)
  4103. diff = ' ' - bright[len];
  4104. }
  4105. return diff;
  4106. }
  4107. int rtlCompareUtf8Utf8Strength(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale, unsigned strength)
  4108. {
  4109. //GH->PG Any better way of doing this? We could possible decide it was a binary comparison instead I guess.
  4110. rtlDataAttr uleft(llen*sizeof(UChar));
  4111. rtlDataAttr uright(rlen*sizeof(UChar));
  4112. rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
  4113. rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
  4114. return rtlCompareUnicodeUnicodeStrength(llen, uleft.getustr(), rlen, uright.getustr(), locale, strength);
  4115. }
  4116. #else
  4117. int rtlCompareUtf8Utf8(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  4118. {
  4119. return rtlCompareStrStr(rtlUtf8Size(llen, left), left, rtlUtf8Size(rlen, right), right);
  4120. }
  4121. int rtlCompareUtf8Utf8Strength(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale, unsigned strength)
  4122. {
  4123. return rtlCompareUtf8Utf8(llen, left, rlen, right, locale);
  4124. }
  4125. #endif
  4126. void rtlDecPushUtf8(size32_t len, const char * data)
  4127. {
  4128. DecPushString(len, (const char *)data); // good enough for the moment
  4129. }
  4130. bool rtlUtf8ToBool(size32_t inlen, const char * in)
  4131. {
  4132. return rtlStrToBool(inlen, in);
  4133. }
  4134. __int64 rtlUtf8ToInt(size32_t inlen, const char * in)
  4135. {
  4136. return rtlStrToInt8(inlen, in); // good enough for the moment
  4137. }
  4138. double rtlUtf8ToReal(size32_t inlen, const char * in)
  4139. {
  4140. return rtlStrToReal(inlen, in); // good enough for the moment
  4141. }
  4142. void rtlCodepageToUtf8(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
  4143. {
  4144. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, codepage);
  4145. }
  4146. void rtlCodepageToUtf8X(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  4147. {
  4148. unsigned outsize;
  4149. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, codepage);
  4150. outlen = rtlUtf8Length(outsize, out);
  4151. }
  4152. void rtlUtf8ToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
  4153. {
  4154. unsigned insize = rtlUtf8Size(inlen, in);
  4155. rtlCodepageToCodepage(outlen, (char *)out, insize, in, codepage, UTF8_CODEPAGE);
  4156. }
  4157. void rtlUtf8ToCodepageX(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  4158. {
  4159. unsigned insize = rtlUtf8Size(inlen, in);
  4160. rtlCodepageToCodepageX(outlen, out, inlen, insize, in, codepage, UTF8_CODEPAGE);
  4161. }
  4162. void rtlUnicodeToUtf8X(unsigned & outlen, char * & out, unsigned inlen, const UChar * in)
  4163. {
  4164. unsigned outsize;
  4165. rtlUnicodeToCodepageX(outsize, out, inlen, in, UTF8_CODEPAGE);
  4166. outlen = rtlUtf8Length(outsize, out);
  4167. }
  4168. void rtlUnicodeToUtf8(unsigned outlen, char * out, unsigned inlen, const UChar * in)
  4169. {
  4170. rtlUnicodeToCodepage(outlen*UTF8_MAXSIZE, out, inlen, in, UTF8_CODEPAGE);
  4171. }
  4172. void rtlUtf8ToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
  4173. {
  4174. rtlCodepageToUnicodeX(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
  4175. }
  4176. void rtlUtf8ToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  4177. {
  4178. rtlCodepageToUnicode(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
  4179. }
  4180. ECLRTL_API void rtlUtf8SubStrFT(unsigned tlen, char * tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  4181. {
  4182. normalizeFromTo(from, to);
  4183. clipFromTo(from, to, slen);
  4184. unsigned copylen = to - from;
  4185. unsigned startOffset = rtlUtf8Size(from, src);
  4186. rtlUtf8ToUtf8(tlen, tgt, copylen, src+startOffset);
  4187. }
  4188. ECLRTL_API void rtlUtf8SubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  4189. {
  4190. normalizeFromTo(from, to);
  4191. unsigned len = to - from;
  4192. clipFromTo(from, to, slen);
  4193. unsigned copylen = to - from;
  4194. unsigned fillSize = len - copylen;
  4195. unsigned startOffset = rtlUtf8Size(from, src);
  4196. unsigned copySize = rtlUtf8Size(copylen, src+startOffset);
  4197. char * buffer = (char *)rtlMalloc(copySize + fillSize);
  4198. memcpy(buffer, (byte *)src+startOffset, copySize);
  4199. if (fillSize)
  4200. memset(buffer+copySize, ' ', fillSize);
  4201. tlen = len;
  4202. tgt = buffer;
  4203. }
  4204. ECLRTL_API void rtlUtf8SubStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
  4205. {
  4206. normalizeFromTo(from, slen);
  4207. unsigned len = slen - from;
  4208. unsigned startOffset = rtlUtf8Size(from, src);
  4209. unsigned copySize = rtlUtf8Size(len, src+startOffset);
  4210. char * buffer = (char *)rtlMalloc(copySize);
  4211. memcpy(buffer, (byte *)src+startOffset, copySize);
  4212. tlen = len;
  4213. tgt = buffer;
  4214. }
  4215. ECLRTL_API void rtlUtf8ToLower(size32_t l, char * t, char const * locale)
  4216. {
  4217. //Convert to lower case, but only go via unicode routines if we have to...
  4218. for (unsigned i=0; i< l; i++)
  4219. {
  4220. byte next = *t;
  4221. if (next >= 0x80)
  4222. {
  4223. //yuk, go via unicode to do the convertion.
  4224. unsigned len = l-i;
  4225. unsigned size = rtlUtf8Size(len, t+i);
  4226. rtlDataAttr unicode(len*sizeof(UChar));
  4227. rtlCodepageToUnicode(len, unicode.getustr(), size, t+i, UTF8_CODEPAGE);
  4228. rtlUnicodeToLower(len, unicode.getustr(), locale);
  4229. rtlUnicodeToCodepage(size, t+i, len, unicode.getustr(), UTF8_CODEPAGE);
  4230. return;
  4231. }
  4232. *t++ = tolower(next);
  4233. }
  4234. }
  4235. #ifdef _USE_ICU
  4236. ECLRTL_API void rtlConcatUtf8(unsigned & tlen, char * * tgt, ...)
  4237. {
  4238. //Going to have to go via unicode because of normalization. However, it might be worth optimizing the case where no special characters are present
  4239. va_list args;
  4240. unsigned totalLength = 0;
  4241. unsigned maxLength = 0;
  4242. va_start(args, tgt);
  4243. for(;;)
  4244. {
  4245. unsigned len = va_arg(args, unsigned);
  4246. if(len+1==0)
  4247. break;
  4248. va_arg(args, const char *); // Skip the string
  4249. totalLength += len;
  4250. if (len > maxLength)
  4251. maxLength = len;
  4252. }
  4253. va_end(args);
  4254. rtlDataAttr next(maxLength*sizeof(UChar));
  4255. rtlDataAttr result(totalLength*sizeof(UChar));
  4256. unsigned idx = 0;
  4257. UErrorCode err = U_ZERO_ERROR;
  4258. va_start(args, tgt);
  4259. for(;;)
  4260. {
  4261. unsigned len = va_arg(args, unsigned);
  4262. if(len+1==0)
  4263. break;
  4264. const char * str = va_arg(args, const char *);
  4265. if (len)
  4266. {
  4267. rtlUtf8ToUnicode(len, next.getustr(), len, str);
  4268. idx = unorm_concatenate(result.getustr(), idx, next.getustr(), len, result.getustr(), totalLength, UNORM_NFC, 0, &err);
  4269. }
  4270. }
  4271. va_end(args);
  4272. rtlUnicodeToUtf8X(tlen, *tgt, idx, result.getustr());
  4273. }
  4274. ECLRTL_API unsigned rtlConcatUtf8ToUtf8(unsigned tlen, char * tgt, unsigned offset, unsigned slen, const char * src)
  4275. {
  4276. //NB: Inconsistently with the other varieties, idx is a byte offset, not a character position to make the code more efficient.....
  4277. //normalization is done in the space filling routine at the end
  4278. unsigned ssize = rtlUtf8Size(slen, src);
  4279. assertex(tlen * UTF8_MAXSIZE >= offset+ssize);
  4280. memcpy(tgt+offset, src, ssize);
  4281. return offset + ssize;
  4282. }
  4283. ECLRTL_API void rtlUtf8SpaceFill(unsigned tlen, char * tgt, unsigned offset)
  4284. {
  4285. const byte * src = (const byte *)tgt;
  4286. for (unsigned i=0; i<offset; i++)
  4287. {
  4288. if (src[i] >= 0x80)
  4289. {
  4290. unsigned idx = rtlUtf8Length(offset, tgt);
  4291. rtlDataAttr unicode(idx*sizeof(UChar));
  4292. rtlUtf8ToUnicode(idx, unicode.getustr(), idx, tgt);
  4293. unicodeEnsureIsNormalized(idx, unicode.getustr());
  4294. rtlUnicodeToUtf8(tlen, tgt, idx, unicode.getustr());
  4295. return;
  4296. }
  4297. }
  4298. //no special characters=>easy route.
  4299. memset(tgt+offset, ' ', tlen*UTF8_MAXSIZE-offset);
  4300. }
  4301. #else
  4302. ECLRTL_API void rtlConcatUtf8(unsigned & tlen, char * * tgt, ...) { rtlThrowNoUnicode(); }
  4303. #endif
  4304. ECLRTL_API unsigned rtlHash32Utf8(unsigned length, const char * k, unsigned initval)
  4305. {
  4306. //These need to hash the same way as a UNICODE string would => convert to UNICODE
  4307. //It would be hard to optimize to hash the string without performing the conversion.
  4308. size32_t tempLength;
  4309. rtlDataAttr temp;
  4310. rtlUtf8ToUnicodeX(tempLength, temp.refustr(), length, k);
  4311. return rtlHash32Unicode(tempLength, temp.getustr(), initval);
  4312. }
  4313. ECLRTL_API unsigned rtlHashUtf8(unsigned length, const char * k, unsigned initval)
  4314. {
  4315. //These need to hash the same way as a UNICODE string would => convert to UNICODE
  4316. size32_t tempLength;
  4317. rtlDataAttr temp;
  4318. rtlUtf8ToUnicodeX(tempLength, temp.refustr(), length, k);
  4319. return rtlHashUnicode(tempLength, temp.getustr(), initval);
  4320. }
  4321. ECLRTL_API hash64_t rtlHash64Utf8(unsigned length, const char * k, hash64_t initval)
  4322. {
  4323. //These need to hash the same way as a UNICODE string would => convert to UNICODE
  4324. size32_t tempLength;
  4325. rtlDataAttr temp;
  4326. rtlUtf8ToUnicodeX(tempLength, temp.refustr(), length, k);
  4327. return rtlHash64Unicode(tempLength, temp.getustr(), initval);
  4328. }
  4329. unsigned rtlCrcUtf8(unsigned length, const char * k, unsigned initval)
  4330. {
  4331. return rtlCrcData(rtlUtf8Size(length, k), k, initval);
  4332. }
  4333. int rtlNewSearchUtf8Table(unsigned count, unsigned elemlen, const char * * table, unsigned width, const char * search, const char * locale)
  4334. {
  4335. //MORE: Hopelessly inefficient.... Should rethink - possibly introducing a class for doing string searching, and the Utf8 variety pre-converting the
  4336. //search strings into unicode.
  4337. int left = 0;
  4338. int right = count;
  4339. do
  4340. {
  4341. int mid = (left + right) >> 1;
  4342. int cmp = rtlCompareUtf8Utf8(width, search, elemlen, table[mid], locale);
  4343. if (cmp < 0)
  4344. right = mid;
  4345. else if (cmp > 0)
  4346. left = mid+1;
  4347. else
  4348. return mid;
  4349. } while (left < right);
  4350. return -1;
  4351. }
  4352. //---------------------------------------------------------------------------
  4353. ECLRTL_API int rtlQueryLocalFailCode(IException * e)
  4354. {
  4355. return e->errorCode();
  4356. }
  4357. ECLRTL_API void rtlGetLocalFailMessage(size32_t & len, char * & text, IException * e, const char * tag)
  4358. {
  4359. rtlExceptionExtract(len, text, e, tag);
  4360. }
  4361. ECLRTL_API void rtlFreeException(IException * e)
  4362. {
  4363. e->Release();
  4364. }
  4365. //---------------------------------------------------------------------------
  4366. //Generally any calls to this function have also checked that the length(trim(str)) <= fieldLen, so exceptions should only occur if compareLen > fieldLen
  4367. //However, function can now also handle the exception case.
  4368. ECLRTL_API void rtlCreateRange(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str, byte fill, byte pad)
  4369. {
  4370. //
  4371. if (compareLen > fieldLen)
  4372. {
  4373. if ((int)compareLen >= 0)
  4374. {
  4375. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  4376. compareLen = fieldLen;
  4377. }
  4378. else
  4379. compareLen = 0; // probably m[1..-1] or something silly
  4380. }
  4381. if (len > compareLen)
  4382. {
  4383. while ((len > compareLen) && (str[len-1] == pad))
  4384. len--;
  4385. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  4386. if (len > compareLen)
  4387. {
  4388. compareLen = 0;
  4389. fill = (fill == 0) ? 255 : 0;
  4390. }
  4391. }
  4392. outlen = fieldLen;
  4393. out = (char *)rtlMalloc(fieldLen);
  4394. if (len >= compareLen)
  4395. memcpy(out, str, compareLen);
  4396. else
  4397. {
  4398. memcpy(out, str, len);
  4399. memset(out+len, pad, compareLen-len);
  4400. }
  4401. memset(out + compareLen, fill, fieldLen-compareLen);
  4402. }
  4403. ECLRTL_API void rtlCreateStrRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4404. {
  4405. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
  4406. }
  4407. ECLRTL_API void rtlCreateStrRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4408. {
  4409. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
  4410. }
  4411. ECLRTL_API void rtlCreateDataRangeLow(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
  4412. {
  4413. rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 0, 0);
  4414. }
  4415. ECLRTL_API void rtlCreateDataRangeHigh(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
  4416. {
  4417. rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 255, 0);
  4418. }
  4419. ECLRTL_API void rtlCreateRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4420. {
  4421. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
  4422. }
  4423. ECLRTL_API void rtlCreateRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4424. {
  4425. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
  4426. }
  4427. ECLRTL_API void rtlCreateUnicodeRange(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str, byte fill)
  4428. {
  4429. //Same as function above!
  4430. if (compareLen > fieldLen)
  4431. {
  4432. if ((int)compareLen >= 0)
  4433. {
  4434. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  4435. compareLen = fieldLen;
  4436. }
  4437. else
  4438. compareLen = 0; // probably m[1..-1] or something silly
  4439. }
  4440. if (len > compareLen)
  4441. {
  4442. while ((len > compareLen) && (str[len-1] == ' '))
  4443. len--;
  4444. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  4445. if (len > compareLen)
  4446. {
  4447. compareLen = 0;
  4448. fill = (fill == 0) ? 255 : 0;
  4449. }
  4450. }
  4451. outlen = fieldLen;
  4452. out = (UChar *)rtlMalloc(fieldLen*sizeof(UChar));
  4453. if (len >= compareLen)
  4454. memcpy(out, str, compareLen*sizeof(UChar));
  4455. else
  4456. {
  4457. memcpy(out, str, len * sizeof(UChar));
  4458. while (len != compareLen)
  4459. out[len++] = ' ';
  4460. }
  4461. memset(out + compareLen, fill, (fieldLen-compareLen) * sizeof(UChar));
  4462. }
  4463. ECLRTL_API void rtlCreateUnicodeRangeLow(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
  4464. {
  4465. rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0x00);
  4466. }
  4467. ECLRTL_API void rtlCreateUnicodeRangeHigh(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
  4468. {
  4469. rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0xFF);
  4470. }
  4471. //---------------------------------------------------------------------------
  4472. ECLRTL_API unsigned rtlCountRows(size32_t len, const void * data, IRecordSize * rs)
  4473. {
  4474. if (rs->isFixedSize())
  4475. return len / rs->getFixedSize();
  4476. unsigned count = 0;
  4477. while (len)
  4478. {
  4479. size32_t thisLen = rs->getRecordSize(data);
  4480. data = (byte *)data + thisLen;
  4481. if (thisLen > len)
  4482. throw MakeStringException(0, "Invalid raw data");
  4483. len -= thisLen;
  4484. count++;
  4485. }
  4486. return count;
  4487. }
  4488. //---------------------------------------------------------------------------
  4489. ECLRTL_API size32_t rtlCountToSize(unsigned count, const void * data, IRecordSize * rs)
  4490. {
  4491. if (rs->isFixedSize())
  4492. return count * rs->getFixedSize();
  4493. unsigned size = 0;
  4494. for (unsigned i=0;i<count;i++)
  4495. {
  4496. size32_t thisLen = rs->getRecordSize(data);
  4497. data = (byte *)data + thisLen;
  4498. size += thisLen;
  4499. }
  4500. return size;
  4501. }
  4502. //---------------------------------------------------------------------------
  4503. #ifdef _USE_ICU
  4504. class rtlCodepageConverter
  4505. {
  4506. public:
  4507. rtlCodepageConverter(char const * sourceName, char const * targetName, bool & failed) : uerr(U_ZERO_ERROR)
  4508. {
  4509. srccnv = ucnv_open(sourceName, &uerr);
  4510. tgtcnv = ucnv_open(targetName, &uerr);
  4511. tgtMaxRatio = ucnv_getMaxCharSize(tgtcnv);
  4512. failed = U_FAILURE(uerr) != FALSE;
  4513. }
  4514. ~rtlCodepageConverter()
  4515. {
  4516. ucnv_close(srccnv);
  4517. ucnv_close(tgtcnv);
  4518. }
  4519. void convertX(unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4520. {
  4521. //convert from source to utf-16: try to avoid preflighting by guessing upper bound
  4522. //unicode length in UChars equal source length in chars if single byte encoding, and be less for multibyte
  4523. UChar * ubuff = (UChar *)rtlMalloc(sourceLength*2);
  4524. int32_t ulen = ucnv_toUChars(srccnv, ubuff, sourceLength, source, sourceLength, &uerr);
  4525. if(ulen > (int32_t)sourceLength)
  4526. {
  4527. //okay, so our guess was wrong, and we have to reallocate
  4528. free(ubuff);
  4529. ubuff = (UChar *)rtlMalloc(ulen*2);
  4530. ucnv_toUChars(srccnv, ubuff, ulen, source, sourceLength, &uerr);
  4531. }
  4532. if(preflight)
  4533. {
  4534. //convert from utf-16 to target: preflight to get buffer of exactly the right size
  4535. UErrorCode uerr2 = uerr; //preflight has to use copy of error code, as it is considered an 'error'
  4536. int32_t tlen = ucnv_fromUChars(tgtcnv, 0, 0, ubuff, ulen, &uerr2);
  4537. target = (char *)rtlMalloc(tlen);
  4538. targetLength = ucnv_fromUChars(tgtcnv, target, tlen, ubuff, ulen, &uerr);
  4539. }
  4540. else
  4541. {
  4542. //convert from utf-16 to target: avoid preflighting by allocating buffer of maximum size
  4543. target = (char *)rtlMalloc(ulen*tgtMaxRatio);
  4544. targetLength = ucnv_fromUChars(tgtcnv, target, ulen*tgtMaxRatio, ubuff, ulen, &uerr);
  4545. }
  4546. free(ubuff);
  4547. failed = U_FAILURE(uerr) != FALSE;
  4548. }
  4549. unsigned convert(unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4550. {
  4551. char * tgtStart = target;
  4552. ucnv_convertEx(tgtcnv, srccnv, &target, target+targetLength, &source, source+sourceLength, 0, 0, 0, 0, true, true, &uerr);
  4553. int32_t ret = target-tgtStart;
  4554. failed = U_FAILURE(uerr) != FALSE;
  4555. return ret;
  4556. }
  4557. private:
  4558. UErrorCode uerr;
  4559. UConverter * srccnv;
  4560. UConverter * tgtcnv;
  4561. int8_t tgtMaxRatio;
  4562. };
  4563. void * rtlOpenCodepageConverter(char const * sourceName, char const * targetName, bool & failed)
  4564. {
  4565. return new rtlCodepageConverter(sourceName, targetName, failed);
  4566. }
  4567. void rtlCloseCodepageConverter(void * converter)
  4568. {
  4569. delete ((rtlCodepageConverter *)converter);
  4570. }
  4571. void rtlCodepageConvertX(void * converter, unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4572. {
  4573. ((rtlCodepageConverter *)converter)->convertX(targetLength, target, sourceLength, source, failed, preflight);
  4574. }
  4575. unsigned rtlCodepageConvert(void * converter, unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4576. {
  4577. return ((rtlCodepageConverter *)converter)->convert(targetLength, target, sourceLength, source, failed);
  4578. }
  4579. #else
  4580. void * rtlOpenCodepageConverter(char const * sourceName, char const * targetName, bool & failed)
  4581. {
  4582. rtlThrowNoUnicode();
  4583. }
  4584. void rtlCloseCodepageConverter(void * converter)
  4585. {
  4586. }
  4587. void rtlCodepageConvertX(void * converter, unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4588. {
  4589. }
  4590. unsigned rtlCodepageConvert(void * converter, unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4591. {
  4592. return 0;
  4593. }
  4594. #endif
  4595. //---------------------------------------------------------------------------
  4596. void appendUChar(MemoryBuffer & buff, char x)
  4597. {
  4598. UChar c = x;
  4599. buff.append(sizeof(c), &c);
  4600. }
  4601. void appendUChar(MemoryBuffer & buff, UChar c)
  4602. {
  4603. buff.append(sizeof(c), &c);
  4604. }
  4605. void appendUStr(MemoryBuffer & x, const char * text)
  4606. {
  4607. while (*text)
  4608. {
  4609. UChar c = *text++;
  4610. x.append(sizeof(c), &c);
  4611. }
  4612. }
  4613. ECLRTL_API void xmlDecodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in)
  4614. {
  4615. StringBuffer input(inLen, in);
  4616. StringBuffer temp;
  4617. decodeXML(input, temp, NULL, NULL, false);
  4618. outLen = temp.length();
  4619. out = temp.detach();
  4620. }
  4621. bool hasPrefix(const UChar * ustr, const UChar * end, const char * str, unsigned len)
  4622. {
  4623. if ((unsigned)(end - ustr) < len)
  4624. return false;
  4625. while (len--)
  4626. {
  4627. if (*ustr++ != *str++)
  4628. return false;
  4629. }
  4630. return true;
  4631. }
  4632. ECLRTL_API void xmlDecodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in)
  4633. {
  4634. const UChar * cur = in;
  4635. const UChar * end = in+inLen;
  4636. MemoryBuffer ret;
  4637. while (cur<end)
  4638. {
  4639. switch(*cur)
  4640. {
  4641. case '&':
  4642. if(hasPrefix(cur+1, end, "amp;", 4))
  4643. {
  4644. cur += 4;
  4645. appendUChar(ret, '&');
  4646. }
  4647. else if(hasPrefix(cur+1, end, "lt;", 3))
  4648. {
  4649. cur += 3;
  4650. appendUChar(ret, '<');
  4651. }
  4652. else if(hasPrefix(cur+1, end, "gt;", 3))
  4653. {
  4654. cur += 3;
  4655. appendUChar(ret, '>');
  4656. }
  4657. else if(hasPrefix(cur+1, end, "quot;", 5))
  4658. {
  4659. cur += 5;
  4660. appendUChar(ret, '"');
  4661. }
  4662. else if(hasPrefix(cur+1, end, "apos;", 5))
  4663. {
  4664. cur += 5;
  4665. appendUChar(ret, '\'');
  4666. }
  4667. else if(hasPrefix(cur+1, end, "nbsp;", 5))
  4668. {
  4669. cur += 5;
  4670. appendUChar(ret, (UChar) 0xa0);
  4671. }
  4672. else if(hasPrefix(cur+1, end, "#", 1))
  4673. {
  4674. const UChar * saveCur = cur;
  4675. bool error = true; // until we have seen a digit...
  4676. cur += 2;
  4677. unsigned base = 10;
  4678. if (*cur == 'x')
  4679. {
  4680. base = 16;
  4681. cur++;
  4682. }
  4683. UChar value = 0;
  4684. while (cur < end)
  4685. {
  4686. unsigned digit;
  4687. UChar next = *cur;
  4688. if ((next >= '0') && (next <= '9'))
  4689. digit = next-'0';
  4690. else if ((next >= 'A') && (next <= 'F'))
  4691. digit = next-'A'+10;
  4692. else if ((next >= 'a') && (next <= 'f'))
  4693. digit = next-'a'+10;
  4694. else if (next==';')
  4695. break;
  4696. else
  4697. digit = base;
  4698. if (digit >= base)
  4699. {
  4700. error = true;
  4701. break;
  4702. }
  4703. error = false;
  4704. value = value * base + digit;
  4705. cur++;
  4706. }
  4707. if (error)
  4708. {
  4709. appendUChar(ret, '&');
  4710. cur = saveCur;
  4711. }
  4712. else
  4713. appendUChar(ret, value);
  4714. }
  4715. else
  4716. appendUChar(ret, *cur);
  4717. break;
  4718. default:
  4719. appendUChar(ret, *cur);
  4720. break;
  4721. }
  4722. cur++;
  4723. }
  4724. outLen = ret.length()/2;
  4725. out = (UChar *)ret.detach();
  4726. }
  4727. ECLRTL_API void xmlEncodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in, unsigned flags)
  4728. {
  4729. StringBuffer temp;
  4730. encodeXML(in, temp, flags, inLen, false);
  4731. outLen = temp.length();
  4732. out = temp.detach();
  4733. }
  4734. ECLRTL_API void xmlEncodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in, unsigned flags)
  4735. {
  4736. const UChar * cur = in;
  4737. MemoryBuffer ret;
  4738. ret.ensureCapacity(inLen*2);
  4739. while (inLen)
  4740. {
  4741. UChar next = *cur;
  4742. switch(*cur)
  4743. {
  4744. case '&':
  4745. appendUStr(ret, "&amp;");
  4746. break;
  4747. case '<':
  4748. appendUStr(ret, "&lt;");
  4749. break;
  4750. case '>':
  4751. appendUStr(ret, "&gt;");
  4752. break;
  4753. case '\"':
  4754. appendUStr(ret, "&quot;");
  4755. break;
  4756. case '\'':
  4757. appendUStr(ret, "&apos;");
  4758. break;
  4759. case ' ':
  4760. appendUStr(ret, flags & ENCODE_SPACES?"&#32;":" ");
  4761. break;
  4762. case '\n':
  4763. appendUStr(ret, flags & ENCODE_NEWLINES?"&#10;":"\n");
  4764. break;
  4765. case '\r':
  4766. appendUStr(ret, flags & ENCODE_NEWLINES?"&#13;":"\r");
  4767. break;
  4768. case '\t':
  4769. appendUStr(ret, flags & ENCODE_SPACES?"&#9;":"\t");
  4770. break;
  4771. default:
  4772. appendUChar(ret, next);
  4773. break;
  4774. }
  4775. inLen--;
  4776. cur++;
  4777. }
  4778. outLen = ret.length()/2;
  4779. out = (UChar *)ret.detach();
  4780. }
  4781. //---------------------------------------------------------------------------
  4782. #define STRUCTURED_EXCEPTION_TAG "Error"
  4783. inline bool isStructuredMessage(const char * text, const char * tag)
  4784. {
  4785. if (!text || text[0] != '<')
  4786. return false;
  4787. if (!tag)
  4788. return true;
  4789. size32_t lenTag = strlen(tag);
  4790. if (memcmp(text+1,tag,lenTag) != 0)
  4791. return false;
  4792. if (text[lenTag+1] != '>')
  4793. return false;
  4794. return true;
  4795. }
  4796. inline bool isStructuredError(const char * text) { return isStructuredMessage(text, STRUCTURED_EXCEPTION_TAG); }
  4797. void rtlExtractTag(size32_t & outLen, char * & out, const char * text, const char * tag, const char * rootTag)
  4798. {
  4799. if (!tag || !isStructuredMessage(text, rootTag))
  4800. {
  4801. if (text && (!tag || strcmp(tag, "text")==0))
  4802. rtlStrToStrX(outLen, out, strlen(text), text);
  4803. else
  4804. {
  4805. outLen = 0;
  4806. out = NULL;
  4807. }
  4808. }
  4809. else
  4810. {
  4811. StringBuffer startTag, endTag;
  4812. startTag.append("<").append(tag).append(">");
  4813. endTag.append("</").append(tag).append(">");
  4814. const char * start = strstr(text, startTag.str());
  4815. const char * end = strstr(text, endTag.str());
  4816. if (start && end)
  4817. {
  4818. start += startTag.length();
  4819. xmlDecodeStrX(outLen, out, end-start, start);
  4820. }
  4821. else
  4822. {
  4823. outLen = 0;
  4824. out = NULL;
  4825. }
  4826. }
  4827. }
  4828. void rtlExceptionExtract(size32_t & outLen, char * & out, const char * text, const char * tag)
  4829. {
  4830. if (!tag) tag = "text";
  4831. rtlExtractTag(outLen, out, text, tag, STRUCTURED_EXCEPTION_TAG);
  4832. }
  4833. void rtlExceptionExtract(size32_t & outLen, char * & out, IException * e, const char * tag)
  4834. {
  4835. StringBuffer text;
  4836. e->errorMessage(text);
  4837. rtlExceptionExtract(outLen, out, text.str(), tag);
  4838. }
  4839. void rtlAddExceptionTag(StringBuffer & errorText, const char * tag, const char * value)
  4840. {
  4841. if (!isStructuredError(errorText.str()))
  4842. {
  4843. StringBuffer temp;
  4844. temp.append("<" STRUCTURED_EXCEPTION_TAG "><text>");
  4845. encodeXML(errorText.str(), temp, ENCODE_WHITESPACE, errorText.length(), false);
  4846. temp.append("</text></" STRUCTURED_EXCEPTION_TAG ">");
  4847. errorText.swapWith(temp);
  4848. }
  4849. StringBuffer temp;
  4850. temp.append("<").append(tag).append(">");
  4851. encodeXML(value, temp, ENCODE_WHITESPACE, (unsigned)-1, false);
  4852. temp.append("</").append(tag).append(">");
  4853. unsigned len = errorText.length();
  4854. unsigned pos = len - strlen(STRUCTURED_EXCEPTION_TAG) - 3;
  4855. errorText.insert(pos, temp);
  4856. }
  4857. //---------------------------------------------------------------------------
  4858. void rtlSubstituteEmbeddedScript(size32_t &__lenResult, char * &__result, size32_t scriptChars, const char *script, size32_t outFieldsChars, const char *outFields, size32_t searchChars, const char *search)
  4859. {
  4860. StringBuffer result;
  4861. ::replaceString(result, rtlUtf8Size(scriptChars, script), script, rtlUtf8Size(searchChars, search), search, rtlUtf8Size(outFieldsChars, outFields), outFields);
  4862. __lenResult = result.lengthUtf8();
  4863. __result = result.detach();
  4864. }
  4865. //---------------------------------------------------------------------------
  4866. void rtlRowBuilder::forceAvailable(size32_t size)
  4867. {
  4868. const size32_t chunkSize = 64;
  4869. maxsize = (size + chunkSize-1) & ~(chunkSize-1);
  4870. ptr = rtlRealloc(ptr, maxsize);
  4871. }
  4872. //---------------------------------------------------------------------------
  4873. inline unsigned numExtraBytesFromValue(unsigned __int64 first)
  4874. {
  4875. if (first >= I64C(0x10000000))
  4876. if (first >= I64C(0x40000000000))
  4877. if (first >= I64C(0x2000000000000))
  4878. if (first >= I64C(0x100000000000000))
  4879. return 8;
  4880. else
  4881. return 7;
  4882. else
  4883. return 6;
  4884. else
  4885. if (first >= I64C(0x800000000))
  4886. return 5;
  4887. else
  4888. return 4;
  4889. else
  4890. if (first >= 0x4000)
  4891. if (first >= 0x200000)
  4892. return 3;
  4893. else
  4894. return 2;
  4895. else
  4896. if (first >= 0x80)
  4897. return 1;
  4898. else
  4899. return 0;
  4900. }
  4901. //An packed byte format, based on the unicode packing of utf-8.
  4902. //The number of top bits set in the leading byte indicates how many extra
  4903. //bytes follow (0..8). It gives the same compression as using a top bit to
  4904. //indicate continuation, but seems to be quicker (and requires less look ahead).
  4905. /*
  4906. byte numExtraBytesFromFirstTable[256] =
  4907. {
  4908. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4909. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4910. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4911. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4912. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  4913. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  4914. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  4915. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8
  4916. };
  4917. inline unsigned numExtraBytesFromFirst(byte first)
  4918. {
  4919. return numExtraBytesFromFirstTable(first);
  4920. }
  4921. */
  4922. //NB: This seems to be faster than using the table lookup above. Probably affects the data cache less
  4923. inline unsigned numExtraBytesFromFirst(byte first)
  4924. {
  4925. if (first >= 0xF0)
  4926. if (first >= 0xFC)
  4927. if (first >= 0xFE)
  4928. if (first >= 0xFF)
  4929. return 8;
  4930. else
  4931. return 7;
  4932. else
  4933. return 6;
  4934. else
  4935. if (first >= 0xF8)
  4936. return 5;
  4937. else
  4938. return 4;
  4939. else
  4940. if (first >= 0xC0)
  4941. if (first >= 0xE0)
  4942. return 3;
  4943. else
  4944. return 2;
  4945. else
  4946. if (first >= 0x80)
  4947. return 1;
  4948. else
  4949. return 0;
  4950. }
  4951. const static byte leadingValueMask[9] = { 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01, 0x00, 0x00 };
  4952. const static byte leadingLengthMask[9] = { 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF };
  4953. //maximum number of bytes for a packed value is size+1 bytes for size <=8 and last byte being fully used.
  4954. unsigned __int64 rtlGetPackedUnsigned(const void * _ptr)
  4955. {
  4956. const byte * ptr = (const byte *)_ptr;
  4957. byte first = *ptr++;
  4958. unsigned numExtra = numExtraBytesFromFirst(first);
  4959. unsigned __int64 value = first & leadingValueMask[numExtra];
  4960. //Loop unrolling has a negligable effect
  4961. while (numExtra--)
  4962. value = (value << 8) | *ptr++;
  4963. return value;
  4964. }
  4965. void rtlSetPackedUnsigned(void * _ptr, unsigned __int64 value)
  4966. {
  4967. byte * ptr = (byte *)_ptr;
  4968. unsigned numExtra = numExtraBytesFromValue(value);
  4969. byte firstMask = leadingLengthMask[numExtra];
  4970. while (numExtra)
  4971. {
  4972. ptr[numExtra--] = (byte)value;
  4973. value >>= 8;
  4974. }
  4975. ptr[0] = (byte)value | firstMask;
  4976. }
  4977. size32_t rtlGetPackedSize(const void * ptr)
  4978. {
  4979. return numExtraBytesFromFirst(*(byte*)ptr)+1;
  4980. }
  4981. size32_t rtlGetPackedSizeFromFirst(byte first)
  4982. {
  4983. return numExtraBytesFromFirst(first)+1;
  4984. }
  4985. //Store signed by moving the sign to the bottom bit, and inverting if negative.
  4986. //so small positive and negative numbers are stored compactly.
  4987. __int64 rtlGetPackedSigned(const void * ptr)
  4988. {
  4989. unsigned __int64 value = rtlGetPackedUnsigned(ptr);
  4990. unsigned __int64 shifted = (value >> 1);
  4991. return (__int64)((value & 1) ? ~shifted : shifted);
  4992. }
  4993. void rtlSetPackedSigned(void * ptr, __int64 value)
  4994. {
  4995. unsigned __int64 storeValue;
  4996. if (value < 0)
  4997. storeValue = (~value << 1) | 1;
  4998. else
  4999. storeValue = value << 1;
  5000. rtlSetPackedUnsigned(ptr, storeValue);
  5001. }
  5002. IAtom * rtlCreateFieldNameAtom(const char * name)
  5003. {
  5004. return createAtom(name);
  5005. }
  5006. void rtlBase64Encode(size32_t & tlen, char * & tgt, size32_t slen, const void * src)
  5007. {
  5008. tlen = 0;
  5009. tgt = NULL;
  5010. if (slen)
  5011. {
  5012. StringBuffer out;
  5013. JBASE64_Encode(src, slen, out);
  5014. tlen = out.length();
  5015. if (tlen)
  5016. {
  5017. char * data = (char *) rtlMalloc(tlen);
  5018. out.getChars(0, tlen, data);
  5019. tgt = data;
  5020. }
  5021. }
  5022. }
  5023. void rtlBase64Decode(size32_t & tlen, void * & tgt, size32_t slen, const char * src)
  5024. {
  5025. tlen = 0;
  5026. if (slen)
  5027. {
  5028. StringBuffer out;
  5029. if (JBASE64_Decode(slen, src, out))
  5030. tlen = out.length();
  5031. if (tlen)
  5032. {
  5033. char * data = (char *) rtlMalloc(tlen);
  5034. out.getChars(0, tlen, data);
  5035. tgt = (void *) data;
  5036. }
  5037. }
  5038. }
  5039. //---------------------------------------------------------------------------
  5040. void RtlCInterface::Link() const { atomic_inc(&xxcount); }
  5041. bool RtlCInterface::Release(void) const
  5042. {
  5043. if (atomic_dec_and_test(&xxcount))
  5044. {
  5045. delete this;
  5046. return true;
  5047. }
  5048. return false;
  5049. }
  5050. //---------------------------------------------------------------------------
  5051. class RtlRowStream : implements IRowStream, public RtlCInterface
  5052. {
  5053. public:
  5054. RtlRowStream(size32_t _count, const byte * * _rowset) : count(_count), rowset(_rowset)
  5055. {
  5056. rtlLinkRowset(rowset);
  5057. cur = 0;
  5058. }
  5059. ~RtlRowStream()
  5060. {
  5061. rtlReleaseRowset(count, rowset);
  5062. }
  5063. RTLIMPLEMENT_IINTERFACE
  5064. virtual const void *nextRow()
  5065. {
  5066. if (cur >= count)
  5067. return NULL;
  5068. const byte * ret = rowset[cur];
  5069. cur++;
  5070. rtlLinkRow(ret);
  5071. return ret;
  5072. }
  5073. virtual void stop()
  5074. {
  5075. cur = count;
  5076. }
  5077. protected:
  5078. size32_t cur;
  5079. size32_t count;
  5080. const byte * * rowset;
  5081. };
  5082. ECLRTL_API IRowStream * createRowStream(size32_t count, const byte * * rowset)
  5083. {
  5084. return new RtlRowStream(count, rowset);
  5085. }
  5086. #if 0
  5087. void PrintExtract(StringBuffer & s, const char * tag)
  5088. {
  5089. size32_t outLen;
  5090. char * out = NULL;
  5091. rtlExceptionExtract(outLen, out, s.str(), tag);
  5092. PrintLog("%s = %.*s", tag, outLen, out);
  5093. rtlFree(out);
  5094. }
  5095. void testStructuredExceptions()
  5096. {
  5097. StringBuffer s;
  5098. s.append("This<is>some text");
  5099. PrintExtract(s, NULL);
  5100. PrintExtract(s, "text");
  5101. PrintExtract(s, "is");
  5102. rtlAddExceptionTag(s, "location", "192.168.12.1");
  5103. PrintExtract(s, NULL);
  5104. PrintExtract(s, "text");
  5105. PrintExtract(s, "is");
  5106. PrintExtract(s, "location");
  5107. rtlAddExceptionTag(s, "author", "gavin");
  5108. PrintExtract(s, NULL);
  5109. PrintExtract(s, "text");
  5110. PrintExtract(s, "is");
  5111. PrintExtract(s, "location");
  5112. PrintExtract(s, "author");
  5113. PrintLog("%s", s.str());
  5114. }
  5115. static void testPackedUnsigned()
  5116. {
  5117. unsigned __int64 values[] = { 0, 1, 2, 10, 127, 128, 16383, 16384, 32767, 32768, 0xffffff, 0x7fffffff, 0xffffffff,
  5118. I64C(0xffffffffffffff), I64C(0x100000000000000), I64C(0x7fffffffffffffff), I64C(0xffffffffffffffff) };
  5119. unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 8, 9, 9, 9 };
  5120. unsigned numValues = _elements_in(values);
  5121. byte temp[9];
  5122. for (unsigned i = 0; i < numValues; i++)
  5123. {
  5124. rtlSetPackedUnsigned(temp, values[i]);
  5125. assertex(rtlGetPackedSize(temp) == numBytes[i]);
  5126. assertex(rtlGetPackedUnsigned(temp) == values[i]);
  5127. }
  5128. for (unsigned j= 0; j < 2000000; j++)
  5129. {
  5130. unsigned __int64 value = I64C(1) << (rtlRandom() & 63);
  5131. // unsigned value = rtlRandom();
  5132. rtlSetPackedUnsigned(temp, value);
  5133. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value)+1);
  5134. assertex(rtlGetPackedUnsigned(temp) == value);
  5135. }
  5136. for (unsigned k= 0; k < 63; k++)
  5137. {
  5138. unsigned __int64 value1 = I64C(1) << k;
  5139. rtlSetPackedUnsigned(temp, value1);
  5140. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value1)+1);
  5141. assertex(rtlGetPackedUnsigned(temp) == value1);
  5142. unsigned __int64 value2 = value1-1;
  5143. rtlSetPackedUnsigned(temp, value2);
  5144. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value2)+1);
  5145. assertex(rtlGetPackedUnsigned(temp) == value2);
  5146. }
  5147. }
  5148. static void testPackedSigned()
  5149. {
  5150. __int64 values[] = { 0, 1, -2, 10, 63, 64, -64, -65, 8191, 8192, 0x3fffffff,
  5151. I64C(0x7fffffffffffff), I64C(0x80000000000000), I64C(0x7fffffffffffffff), I64C(0x8000000000000000) };
  5152. unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 1, 2, 2, 3, 5,
  5153. 8, 9, 9, 9 };
  5154. unsigned numValues = _elements_in(values);
  5155. byte temp[9];
  5156. for (unsigned i = 0; i < numValues; i++)
  5157. {
  5158. rtlSetPackedSigned(temp, values[i]);
  5159. assertex(rtlGetPackedSize(temp) == numBytes[i]);
  5160. assertex(rtlGetPackedSigned(temp) == values[i]);
  5161. }
  5162. }
  5163. #endif
  5164. void ensureRtlLoaded()
  5165. {
  5166. }
  5167. #ifdef _USE_CPPUNIT
  5168. #include "unittests.hpp"
  5169. class EclRtlTests : public CppUnit::TestFixture
  5170. {
  5171. CPPUNIT_TEST_SUITE( EclRtlTests );
  5172. CPPUNIT_TEST(RegexTest);
  5173. CPPUNIT_TEST(MultiRegexTest);
  5174. CPPUNIT_TEST_SUITE_END();
  5175. protected:
  5176. void RegexTest()
  5177. {
  5178. rtlCompiledStrRegex r;
  5179. size32_t outlen;
  5180. char * out = NULL;
  5181. r.setPattern("([A-Z]+)[ ]?'(S) ", true);
  5182. r->replace(outlen, out, 7, "ABC'S ", 5, "$1$2 ");
  5183. ASSERT(outlen==6);
  5184. ASSERT(out != NULL);
  5185. ASSERT(memcmp(out, "ABCS ", outlen)==0);
  5186. rtlFree(out);
  5187. }
  5188. void MultiRegexTest()
  5189. {
  5190. class RegexTestThread : public Thread
  5191. {
  5192. virtual int run()
  5193. {
  5194. for (int i = 0; i < 100000; i++)
  5195. {
  5196. rtlCompiledStrRegex r;
  5197. size32_t outlen;
  5198. char * out = NULL;
  5199. r.setPattern("([A-Z]+)[ ]?'(S) ", true);
  5200. r->replace(outlen, out, 7, "ABC'S ", 5, "$1$2 ");
  5201. ASSERT(outlen==6);
  5202. ASSERT(out != NULL);
  5203. ASSERT(memcmp(out, "ABCS ", outlen)==0);
  5204. rtlFree(out);
  5205. }
  5206. return 0;
  5207. }
  5208. };
  5209. RegexTestThread t1;
  5210. RegexTestThread t2;
  5211. RegexTestThread t3;
  5212. t1.start();
  5213. t2.start();
  5214. t3.start();
  5215. t1.join();
  5216. t2.join();
  5217. t3.join();
  5218. }
  5219. };
  5220. CPPUNIT_TEST_SUITE_REGISTRATION( EclRtlTests );
  5221. CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( EclRtlTests, "EclRtlTests" );
  5222. #endif