/*##############################################################################
Copyright (C) 2011 HPCC Systems.
All rights reserved. This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
############################################################################## */
#include "limits.h"
#ifdef _USE_BOOST_REGEX
#include "boost/regex.hpp" // must precede platform.h ; n.b. this uses a #pragma comment(lib, ...) to link the appropriate .lib in MSVC
#endif
#include "platform.h"
#include
#include
#include "jexcept.hpp"
#include "jmisc.hpp"
#include "jutil.hpp"
#include "jlib.hpp"
#include "jptree.hpp"
#include "junicode.hpp"
#include "eclrtl.hpp"
#include "bcd.hpp"
#include "eclrtl_imp.hpp"
#include "unicode/uchar.h"
#include "unicode/ucol.h"
#include "unicode/ustring.h"
#include "unicode/ucnv.h"
#include "unicode/schriter.h"
#include "unicode/regex.h"
#include "unicode/normlzr.h"
#include "unicode/locid.h"
#include "jlog.hpp"
#include "jmd5.hpp"
#include "rtlqstr.ipp"
#ifndef _WIN32
//typedef long long __int64;
#define _fastcall
#define __fastcall
#define _stdcall
#define __stdcall
#endif
#define UTF8_CODEPAGE "UTF-8"
#define UTF8_MAXSIZE 4
IRandomNumberGenerator * random_;
static CriticalSection random_Sect;
MODULE_INIT(INIT_PRIORITY_ECLRTL_ECLRTL)
{
random_ = createRandomNumberGenerator();
random_->seed((unsigned)get_cycles_now());
return true;
}
MODULE_EXIT()
{
random_->Release();
}
//=============================================================================
// Miscellaneous string functions...
ECLRTL_API void * rtlMalloc(size32_t size)
{
return malloc(size);
}
void rtlFree(void *ptr)
{
free(ptr);
}
ECLRTL_API void * rtlRealloc(void * _ptr, size32_t size)
{
return realloc(_ptr, size);
}
//=============================================================================
static IRtlRowCallback * rowCallback = NULL;
ECLRTL_API void rtlReleaseRow(const void * row)
{
if (row)
rowCallback->releaseRow(row);
}
ECLRTL_API void rtlReleaseRowset(unsigned count, byte * * rowset)
{
rowCallback->releaseRowset(count, rowset);
}
ECLRTL_API IRtlRowCallback * rtlSetReleaseRowHook(IRtlRowCallback * hook)
{
IRtlRowCallback * prev = rowCallback;
rowCallback = hook;
return prev;
}
ECLRTL_API void * rtlLinkRow(const void * row)
{
return rowCallback->linkRow(row);
}
ECLRTL_API byte * * rtlLinkRowset(byte * * rowset)
{
return rowCallback->linkRowset(rowset);
}
//=============================================================================
// Unicode helper classes and functions
// escape
void escapeUnicode(unsigned inlen, UChar const * in, StringBuffer & out)
{
UCharCharacterIterator iter(in, inlen);
for(iter.first32(); iter.hasNext(); iter.next32())
{
UChar32 c = iter.current32();
if(c < 0x80)
out.append((char) c);
else if (c < 0x10000)
out.appendf("\\u%04X", c);
else
out.appendf("\\U%08X", c);
}
}
// locales and collators
static unsigned const unicodeStrengthLimit = 5;
static UCollationStrength unicodeStrength[unicodeStrengthLimit] =
{
UCOL_PRIMARY,
UCOL_SECONDARY,
UCOL_TERTIARY,
UCOL_QUATERNARY,
UCOL_IDENTICAL
};
class RTLLocale : public CInterface
{
public:
RTLLocale(char const * _locale) : locale(_locale)
{
for(unsigned i=0; i unicodeStrengthLimit) strength = unicodeStrengthLimit;
if(!colls[strength-1])
{
UErrorCode err = U_ZERO_ERROR;
const_cast(colls)[strength-1] = ucol_open(locale.get(), &err);
assertex(U_SUCCESS(err));
ucol_setStrength(colls[strength-1], unicodeStrength[strength-1]);
}
return colls[strength-1];
}
private:
StringAttr locale;
UCollator * colls[unicodeStrengthLimit];
};
typedef MapStringTo MapStrToLocale;
MapStrToLocale *localeMap;
CriticalSection localeCrit;
MODULE_INIT(INIT_PRIORITY_STANDARD)
{
localeMap = new MapStrToLocale;
return true;
}
MODULE_EXIT()
{
delete localeMap;
}
bool rtlGetNormalizedUnicodeLocaleName(unsigned len, char const * in, char * out)
{
bool isPrimary = true;
bool ok = true;
unsigned i;
for(i=0; igetValue(locale);
if(!loc)
{
unsigned ll = strlen(locale);
StringBuffer lnorm;
rtlGetNormalizedUnicodeLocaleName(ll, locale, lnorm.reserve(ll));
localeMap->setValue(locale, lnorm.str());
loc = localeMap->getValue(locale);
}
return loc;
}
// converters
class RTLUnicodeConverter : public CInterface
{
public:
RTLUnicodeConverter(char const * codepage)
{
UErrorCode err = U_ZERO_ERROR;
conv = ucnv_open(codepage, &err);
if (!U_SUCCESS(err))
{
StringBuffer msg;
msg.append("Unrecognised codepage '").append(codepage).append("'");
rtlFail(0, msg.str());
}
}
~RTLUnicodeConverter()
{
ucnv_close(conv);
}
UConverter * query() const { return conv; }
private:
UConverter * conv;
};
typedef MapStringTo MapStrToUnicodeConverter;
MapStrToUnicodeConverter *unicodeConverterMap;
CriticalSection ucmCrit;
MODULE_INIT(INIT_PRIORITY_STANDARD)
{
unicodeConverterMap = new MapStrToUnicodeConverter;
return true;
}
MODULE_EXIT()
{
delete unicodeConverterMap;
}
RTLUnicodeConverter * queryRTLUnicodeConverter(char const * codepage)
{
CriticalBlock b(ucmCrit);
RTLUnicodeConverter * conv = unicodeConverterMap->getValue(codepage);
if(!conv)
{
unicodeConverterMap->setValue(codepage, codepage);
conv = unicodeConverterMap->getValue(codepage);
}
return conv;
}
// normalization
bool unicodeNeedsNormalize(unsigned inlen, UChar * in, UErrorCode * err)
{
return !unorm_isNormalized(in, inlen, UNORM_NFC, err);
}
bool vunicodeNeedsNormalize(UChar * in, UErrorCode * err)
{
return !unorm_isNormalized(in, -1, UNORM_NFC, err);
}
void unicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
{
UChar * buff = (UChar *)malloc(inlen*2);
unsigned len = unorm_normalize(in, inlen, UNORM_NFC, 0, buff, inlen, err);
while(len MemoryAttrMapping;
MemoryAttrMapping *unicodeBlankCache;
CriticalSection ubcCrit;
MODULE_INIT(INIT_PRIORITY_STANDARD)
{
unicodeBlankCache = new MemoryAttrMapping;
return true;
}
MODULE_EXIT()
{
delete unicodeBlankCache;
}
UChar unicodeSpace = 0x0020;
void codepageBlankFill(char const * codepage, char * out, unsigned len)
{
CriticalBlock b(ubcCrit);
MemoryAttr * cached = unicodeBlankCache->getValue(codepage);
if(cached)
{
char const * blank = (char const *)cached->get();
size32_t blanklen = cached->length();
if(blanklen==1)
memset(out, *blank, len);
else
multimemset(out, len, blank, blanklen);
}
else
{
UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
unsigned blanklen;
char * blank;
rtlUnicodeToCodepageX(blanklen, blank, 1, &unicodeSpace, codepage);
unicodeBlankCache->setValue(codepage, blanklen);
unicodeBlankCache->getValue(codepage)->set(blanklen, blank);
if(blanklen==1)
memset(out, *blank, len);
else
multimemset(out, len, blank, blanklen);
free(blank);
}
}
//---------------------------------------------------------------------------
// floating point functions
static const double smallPowers[16] = {
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 };
static double powerOfTen(int x)
{
if (x < 0)
return 1 / powerOfTen(-x);
double value = smallPowers[x&15];
double scale = 1e16;
x >>= 4;
while (x)
{
if (x & 1)
value *= scale;
scale *= scale;
x >>= 1;
}
return value;
};
static double kk = (1.0 / ((unsigned __int64)1<<53));
__int64 rtlRound(double x)
{
//a fudge to make numbers that are inexact after a division round up "correctly".
//coded rather oddly as microsoft's optimizer has a habit of throwing it away otherwise...
volatile double tt = x * kk;
x += tt;
if (x >= 0.0)
return (__int64)(x + 0.5);
return -(__int64)(-x + 0.5);
}
double rtlRoundTo(const double x, int places)
{
if (x < 0)
return -rtlRoundTo(-x, places);
volatile double tt = x * kk;
double x0 = x + tt;
if (places >= 0)
{
double scale = powerOfTen(places);
return floor(x * scale + 0.5) / scale;
}
else
{
double scale = powerOfTen(-places);
return floor(x / scale + 0.5) * scale;
}
}
__int64 rtlRoundDown(double x)
{
if (x >= 0.0)
return (__int64)floor(x);
return (__int64)ceil(x);
}
__int64 rtlRoundUp(double x)
{
if (x >= 0.0)
return (__int64)ceil(x);
return (__int64)floor(x);
}
//=============================================================================
// Numeric conversion functions... - fixed length target
#define intToStringNBody() \
unsigned len = numtostr(temp, val); \
if (len > l) \
memset(t,'*',l); \
else \
{ \
memcpy(t,temp,len); \
memset(t+len, ' ', l-len); \
}
void rtlUInt4ToStr(size32_t l, char * t, unsigned val)
{
char temp[20];
intToStringNBody();
}
void rtlUInt8ToStr(size32_t l, char * t, unsigned __int64 val)
{
char temp[40];
intToStringNBody();
}
void rtlInt4ToStr(size32_t l, char * t, int val)
{
char temp[20];
intToStringNBody();
}
void rtlInt8ToStr(size32_t l, char * t, __int64 val)
{
char temp[40];
intToStringNBody();
}
//=============================================================================
// Numeric conversion functions... - unknown length target
#define intToUnknownStringBody() \
unsigned len = numtostr(temp, val); \
char * result = (char *)malloc(len); \
memcpy(result, temp, len); \
l = len; \
t = result;
void rtlUInt4ToStrX(size32_t & l, char * & t, unsigned val)
{
char temp[20];
intToUnknownStringBody();
}
void rtlUInt8ToStrX(size32_t & l, char * & t, unsigned __int64 val)
{
char temp[40];
intToUnknownStringBody();
}
void rtlInt4ToStrX(size32_t & l, char * & t, int val)
{
char temp[20];
intToUnknownStringBody();
}
void rtlInt8ToStrX(size32_t & l, char * & t, __int64 val)
{
char temp[40];
intToUnknownStringBody();
}
//=============================================================================
// Numeric conversion functions... - fixed length ebcdic target
// ILKA - converting ebcdic to numeric still uses string in between, for more efficiency
// a function numtoebcdicstr should be implemented
#define intToEbcdicStringNBody() \
unsigned len = numtostr(astr, val); \
rtlStrToEStr(sizeof(estr),estr,len,astr); \
if (len > l) \
memset(t,0x2A,l); \
else \
{ \
memcpy(t,estr,len); \
memset(t+len, '@', l-len); \
}
void rtl_l42en(size32_t l, char * t, unsigned val)
{
char astr[20];
char estr[20];
intToEbcdicStringNBody();
}
void rtl_l82en(size32_t l, char * t, unsigned __int64 val)
{
char astr[40];
char estr[40];
intToEbcdicStringNBody();
}
void rtl_ls42en(size32_t l, char * t, int val)
{
char astr[20];
char estr[20];
intToEbcdicStringNBody();
}
void rtl_ls82en(size32_t l, char * t, __int64 val)
{
char astr[40];
char estr[40];
intToEbcdicStringNBody();
}
//=============================================================================
// Numeric conversion functions... - unknown length ebcdic target
#define intToUnknownEbcdicStringBody() \
unsigned alen = numtostr(astr, val); \
rtlStrToEStrX(elen,estr,alen,astr); \
char * result = (char *)malloc(elen); \
memcpy(result, estr, elen); \
l = elen; \
t = result;
#if defined _MSC_VER
#pragma warning(push)
#pragma warning(disable:4700)
#endif
void rtl_l42ex(size32_t & l, char * & t, unsigned val)
{
char astr[20];
char * estr;
unsigned elen;
intToUnknownEbcdicStringBody();
}
void rtl_l82ex(size32_t & l, char * & t, unsigned __int64 val)
{
char astr[40];
char * estr;
unsigned elen;
intToUnknownEbcdicStringBody();
}
void rtl_ls42ex(size32_t & l, char * & t, int val)
{
char astr[20];
char * estr;
unsigned elen;
intToUnknownEbcdicStringBody();
}
void rtl_ls82ex(size32_t & l, char * & t, __int64 val)
{
char astr[40];
char * estr;
unsigned elen;
intToUnknownEbcdicStringBody();
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
//=============================================================================
// Numeric conversion functions... - fixed length variable target
#define intToVarStringNBody() \
unsigned len = numtostr(temp, val) + 1; \
if (len > l) \
{ \
memset(t,'*',l); \
t[l-1]=0; \
} \
else \
memcpy(t,temp,len);
void rtlUInt4ToVStr(size32_t l, char * t, unsigned val)
{
char temp[20];
intToVarStringNBody();
}
void rtlUInt8ToVStr(size32_t l, char * t, unsigned __int64 val)
{
char temp[40];
intToVarStringNBody();
}
void rtlInt4ToVStr(size32_t l, char * t, int val)
{
char temp[20];
intToVarStringNBody();
}
void rtlInt8ToVStr(size32_t l, char * t, __int64 val)
{
char temp[40];
intToVarStringNBody();
}
//=============================================================================
// Numeric conversion functions... - unknown length variable target
#define intToVarStringXBody() \
unsigned len = numtostr(temp, val); \
temp[len] = 0; \
return strdup(temp);
char * rtlUInt4ToVStrX(unsigned val)
{
char temp[20];
intToVarStringXBody();
}
char * rtlUInt8ToVStrX(unsigned __int64 val)
{
char temp[40];
intToVarStringXBody();
}
char * rtlInt4ToVStrX(int val)
{
char temp[20];
intToVarStringXBody();
}
char * rtlInt8ToVStrX(__int64 val)
{
char temp[40];
intToVarStringXBody();
}
//---------------------------------------------------------------------------
double rtlStrToReal(size32_t l, const char * t)
{
char * temp = (char *)alloca(l+1);
memcpy(temp, t, l);
temp[l] = 0;
return rtlVStrToReal(temp);
}
double rtlEStrToReal(size32_t l, const char * t)
{
char * astr = (char*)alloca(l);
rtlEStrToStr(l,astr,l,t);
char * temp = (char *)alloca(l+1);
memcpy(temp, astr, l);
temp[l] = 0;
return rtlVStrToReal(temp);
}
double rtlVStrToReal(const char * t)
{
char * end;
return strtod(t, &end);
}
double rtl_ex2f(const char * t)
{
unsigned len = strlen(t);
char * astr = (char*)alloca(len+1);
rtlEStrToStr(len,astr,len,t);
astr[len] = 0;
return rtlVStrToReal(astr);
}
double rtlUnicodeToReal(size32_t l, UChar const * t)
{
unsigned bufflen;
char * buff;
rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
double ret = rtlStrToReal(bufflen, buff);
rtlFree(buff);
return ret;
}
//---------------------------------------------------------------------------
void rtlRealToStr(size32_t l, char * t, double val)
{
StringBuffer temp;
temp.append(val);
unsigned len = temp.length();
if (len > l)
memset(t,'*',l);
else
{
memcpy(t,temp.str(),len);
memset(t+len, ' ', l-len);
}
}
void rtlRealToStr(size32_t l, char * t, float val)
{
StringBuffer temp;
temp.append(val);
unsigned len = temp.length();
if (len > l)
memset(t,'*',l);
else
{
memcpy(t,temp.str(),len);
memset(t+len, ' ', l-len);
}
}
void rtlRealToStrX(size32_t & l, char * & t, double val)
{
StringBuffer temp;
temp.append(val);
unsigned len = temp.length();
char * result = (char *)malloc(len);
memcpy(result,temp.str(),len);
l = len;
t = result;
}
void rtlRealToStrX(size32_t & l, char * & t, float val)
{
StringBuffer temp;
temp.append(val);
unsigned len = temp.length();
char * result = (char *)malloc(len);
memcpy(result,temp.str(),len);
l = len;
t = result;
}
void rtlRealToVStr(size32_t l, char * t, double val)
{
StringBuffer temp;
temp.append(val);
unsigned len = temp.length()+1;
if (len > l)
{
memset(t,'*',l);
t[l-1]=0;
}
else
{
memcpy(t,temp.str(),len);
}
}
void rtlRealToVStr(size32_t l, char * t, float val)
{
StringBuffer temp;
temp.append(val);
unsigned len = temp.length()+1;
if (len > l)
{
memset(t,'*',l);
t[l-1]=0;
}
else
{
memcpy(t,temp.str(),len);
}
}
char * rtlRealToVStrX(double val)
{
StringBuffer temp;
temp.append(val);
return strdup(temp);
}
char * rtlRealToVStrX(float val)
{
StringBuffer temp;
temp.append(val);
return strdup(temp);
}
//---------------------------------------------------------------------------
#define SkipSpaces(l, t) \
while (l) \
{ \
char c = *t; \
switch (c) \
{ \
case ' ': \
case '\t': \
case '-': \
case '+': \
break; \
default: \
goto done; \
} \
l--; \
t++; \
} \
done:
#define SkipSignSpaces(l, t, negate) \
while (l) \
{ \
char c = *t; \
switch (c) \
{ \
case '-': \
negate = true; \
break; \
case ' ': \
case '\t': \
case '+': \
break; \
default: \
goto done; \
} \
l--; \
t++; \
} \
done:
unsigned rtlStrToUInt4(size32_t l, const char * t)
{
SkipSpaces(l, t);
unsigned v = 0;
while (l--)
{
char c = *t++;
if ((c >= '0') && (c <= '9'))
v = v * 10 + (c-'0');
else
break;
}
return v;
}
unsigned __int64 rtlStrToUInt8(size32_t l, const char * t)
{
SkipSpaces(l, t);
unsigned __int64 v = 0;
while (l--)
{
char c = *t++;
if ((c >= '0') && (c <= '9'))
v = v * 10 + (c-'0');
else
break;
}
return v;
}
int rtlStrToInt4(size32_t l, const char * t)
{
bool negate = false;
SkipSignSpaces(l, t, negate);
int v = 0;
while (l--)
{
char c = *t++;
if ((c >= '0') && (c <= '9'))
v = v * 10 + (c-'0');
else
break;
}
return negate ? -v : v;
}
__int64 rtlStrToInt8(size32_t l, const char * t)
{
bool negate = false;
SkipSignSpaces(l, t, negate);
__int64 v = 0;
while (l--)
{
char c = *t++;
if ((c >= '0') && (c <= '9'))
v = v * 10 + (c-'0');
else
break;
}
return negate ? -v : v;
}
__int64 rtlUnicodeToInt8(size32_t l, UChar const * t)
{
unsigned bufflen;
char * buff;
rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
__int64 ret = rtlStrToInt8(bufflen, buff);
rtlFree(buff);
return ret;
}
bool rtlStrToBool(size32_t l, const char * t)
{
while (l--)
{
char c = *t++;
if (c != ' ')
return true;
}
return false;
}
bool rtlUnicodeToBool(size32_t l, UChar const * t)
{
while(l--)
if(*t++ != 0x20) return true;
return false;
}
// return true for "on", "true" or any non-zero constant, else false;
bool rtlCsvStrToBool(size32_t l, const char * t)
{
return clipStrToBool(l, t);
}
//---------------------------------------------------------------------------
unsigned rtlEStrToUInt4(size32_t l, const char * t)
{
char * astr = (char*)alloca(l);
rtlEStrToStr(l,astr,l,t);
return rtlStrToUInt4(l,astr);
}
unsigned __int64 rtlEStrToUInt8(size32_t l, const char * t)
{
char * astr = (char*)alloca(l);
rtlEStrToStr(l,astr,l,t);
return rtlStrToUInt8(l,astr);
}
int rtlEStrToInt4(size32_t l, const char * t)
{
char * astr = (char*)alloca(l);
rtlEStrToStr(l,astr,l,t);
return rtlStrToInt4(l,astr);
}
__int64 rtlEStrToInt8(size32_t l, const char * t)
{
char * astr = (char*)alloca(l);
rtlEStrToStr(l,astr,l,t);
return rtlStrToInt8(l,astr);
}
bool rtl_en2b(size32_t l, const char * t)
{
char * astr = (char*)alloca(l);
rtlEStrToStr(l,astr,l,t);
return rtlStrToBool(l,astr);
}
//---------------------------------------------------------------------------
unsigned rtlVStrToUInt4(const char * t)
{
return rtlStrToUInt4(strlen(t), t);
}
unsigned __int64 rtlVStrToUInt8(const char * t)
{
return rtlStrToUInt8(strlen(t), t);
}
int rtlVStrToInt4(const char * t)
{
return rtlStrToInt4(strlen(t), t);
}
__int64 rtlVStrToInt8(const char * t)
{
return rtlStrToInt8(strlen(t), t);
}
bool rtlVStrToBool(const char * t)
{
char c;
while ((c = *t++) != 0)
{
//MORE: Allow spaces if we change the semantics.
return true;
}
return false;
}
//---------------------------------------------------------------------------
void holeIntFormat(size32_t maxlen, char * target, __int64 value, unsigned width, unsigned flags)
{
StringBuffer result;
if (flags & 1)
result.appendf("%0*"I64F"d", width, value);
else
result.appendf("%*"I64F"d", width, value);
size32_t written = result.length();
if (written > maxlen)
memset(target, '*', maxlen);
else
{
memset(target+written, ' ', maxlen-written);
memcpy(target, result.str(), written);
}
}
void holeRealFormat(size32_t maxlen, char * target, double value, unsigned width, unsigned places)
{
if ((int) width < 0)
return;
char temp[500];
if (width > sizeof(temp))
{
unsigned delta = width - sizeof(temp);
memset(target, ' ', delta);
target += delta;
width = sizeof(temp);
}
if (places >= width) places = width-1;
unsigned written = sprintf(temp, "%*.*f", width, places, value);
if (written > width)
{
memset(target, '*', width);
if (places)
target[width-places-1] = '.';
}
else
memcpy(target, temp, width);
}
//=============================================================================
// Conversion functions...
void rtlIntFormat(unsigned & len, char * & target, __int64 value, unsigned width, unsigned flags)
{
if ((int) width <= 0)
{
len = 0;
target = NULL;
return;
}
len = width;
target = (char *)malloc(width);
holeIntFormat(width, target, value, width, flags);
}
void rtlRealFormat(unsigned & len, char * & target, double value, unsigned width, unsigned places)
{
if ((int) width < 0)
{
len = 0;
target = NULL;
return;
}
len = width;
target = (char *)malloc(width);
holeRealFormat(width, target, value, width, places);
}
//=============================================================================
// String functions...
bool rtlDataToBool(unsigned len, const void * _src)
{
const char * src = (const char *)_src;
while (len--)
if (*src++)
return true;
return false;
}
void rtlBoolToData(unsigned tlen, void * tgt, bool src)
{
memset(tgt, 0, tlen);
if (src)
((char *)tgt)[tlen-1] = 1;
}
void rtlBoolToStr(unsigned tlen, void * tgt, bool src)
{
memset(tgt, ' ', tlen);
if (src)
((char *)tgt)[tlen-1] = '1';
}
void rtlBoolToVStr(char * tgt, bool src)
{
if (src)
*tgt++ = '1';
*tgt = 0;
}
void rtlBoolToStrX(unsigned & tlen, char * & tgt, bool src)
{
if (src)
{
char * ret = (char *)malloc(1);
ret[0] = '1';
tlen = 1;
tgt = ret;
}
else
{
tlen = 0;
tgt = NULL;
}
}
char * rtlBoolToVStrX(bool src)
{
if (src)
return strdup("1");
else
return strdup("");
}
//-----------------------------------------------------------------------------
// String copying functions....
void rtlDataToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
{
if (slen > tlen)
slen = tlen;
memcpy(tgt, src, slen);
if (tlen > slen)
memset((char *)tgt+slen, 0, tlen-slen);
}
void rtlStrToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
{
if (slen > tlen)
slen = tlen;
memcpy(tgt, src, slen);
if (tlen > slen)
memset((char *)tgt+slen, 0, tlen-slen);
}
void rtlStrToStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
{
if (slen > tlen)
slen = tlen;
memcpy(tgt, src, slen);
if (tlen > slen)
memset((char *)tgt+slen, ' ', tlen-slen);
}
void rtlStrToVStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
{
if ((slen >= tlen) && (tlen != 0))
slen = tlen-1;
memcpy(tgt, src, slen);
*((char *)tgt+slen)=0;
}
void rtlStr2EStr(unsigned tlen, char * tgt, unsigned slen, const char * src)
{
rtlStrToEStr(tlen,tgt,slen,src);
}
void rtlEStr2Data(unsigned tlen, void * tgt, unsigned slen, const char * src)
{
if (slen > tlen)
slen = tlen;
rtlEStrToStr(slen,(char *)tgt,slen,src);
if (tlen > slen)
memset((char *)tgt+slen, 0, tlen-slen);
}
void rtlEStr2Str(unsigned tlen, void * tgt, unsigned slen, const char * src)
{
rtlEStrToStr(tlen,(char *)tgt,slen,src);
}
void rtlEStrToVStr(unsigned tlen, void * tgt, unsigned slen, const char * src)
{
if (slen >= tlen)
slen = tlen-1;
rtlEStrToStr(slen,(char *)tgt,slen,src);
*((char *)tgt+slen)=0;
}
void rtlEStrToEStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
{
if (slen > tlen)
slen = tlen;
memcpy(tgt, src, slen);
if (tlen > slen)
memset((char *)tgt+slen, '@', tlen-slen);
}
void rtlVStrToData(unsigned tlen, void * tgt, const char * src)
{
rtlStrToData(tlen, tgt, strlen(src), src);
}
void rtlVStrToStr(unsigned tlen, void * tgt, const char * src)
{
rtlStrToStr(tlen, tgt, strlen(src), src);
}
void rtlVStr2EStr(unsigned tlen, char * tgt, const char * src)
{
rtlStr2EStr(tlen, tgt, strlen(src), src);
}
void rtlVStrToVStr(unsigned tlen, void * tgt, const char * src)
{
rtlStrToVStr(tlen, tgt, strlen(src), src);
}
char *rtlCreateQuotedString(unsigned _len_tgt,char * tgt)
{
// Add ' at start and end. MORE! also needs to handle embedded quotes
char * result = (char *)malloc(_len_tgt + 3);
result[0] = '\'';
memcpy(result+1, tgt, _len_tgt);
result[_len_tgt+1] = '\'';
result[_len_tgt+2] = 0;
return result;
}
//-----------------------------------------------------------------------------
//List of strings with length of -1 to mark the end...
void rtlConcat(unsigned & tlen, char * * tgt, ...)
{
va_list args;
unsigned totalLength = 0;
va_start(args, tgt);
for (;;)
{
unsigned len = va_arg(args, unsigned);
if (len+1==0)
break;
char * str = va_arg(args, char *);
totalLength += len;
}
va_end(args);
char * buffer = (char *)malloc(totalLength);
char * cur = buffer;
va_start(args, tgt);
for (;;)
{
unsigned len = va_arg(args, unsigned);
if (len+1==0)
break;
char * str = va_arg(args, char *);
memcpy(cur, str, len);
cur += len;
}
va_end(args);
tlen = totalLength;
*tgt = buffer;
}
void rtlConcatVStr(char * * tgt, ...)
{
va_list args;
unsigned totalLength = 0;
va_start(args, tgt);
for (;;)
{
unsigned len = va_arg(args, unsigned);
if (len+1==0)
break;
char * str = va_arg(args, char *);
totalLength += len;
}
va_end(args);
char * buffer = (char *)malloc(totalLength+1);
char * cur = buffer;
va_start(args, tgt);
for (;;)
{
unsigned len = va_arg(args, unsigned);
if (len+1==0)
break;
char * str = va_arg(args, char *);
memcpy(cur, str, len);
cur += len;
}
va_end(args);
cur[0] = 0;
*tgt = buffer;
}
void rtlConcatUnicode(unsigned & tlen, UChar * * tgt, ...)
{
va_list args;
unsigned totalLength = 0;
va_start(args, tgt);
for(;;)
{
unsigned len = va_arg(args, unsigned);
if(len+1==0)
break;
UChar * str = va_arg(args, UChar *);
totalLength += len;
}
va_end(args);
UChar * buffer = (UChar *)malloc(totalLength*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
unsigned idx = 0;
UErrorCode err = U_ZERO_ERROR;
va_start(args, tgt);
for(;;)
{
unsigned len = va_arg(args, unsigned);
if(len+1==0)
break;
UChar * str = va_arg(args, UChar *);
if (len)
idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
}
va_end(args);
*tgt = buffer;
tlen = idx;
}
void rtlConcatVUnicode(UChar * * tgt, ...)
{
va_list args;
unsigned totalLength = 0;
va_start(args, tgt);
for(;;)
{
unsigned len = va_arg(args, unsigned);
if(len+1==0)
break;
UChar * str = va_arg(args, UChar *);
totalLength += len;
}
va_end(args);
UChar * buffer = (UChar *)malloc((totalLength+1)*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
unsigned idx = 0;
UErrorCode err = U_ZERO_ERROR;
va_start(args, tgt);
for(;;)
{
unsigned len = va_arg(args, unsigned);
if(len+1==0)
break;
UChar * str = va_arg(args, UChar *);
if (len)
idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
}
va_end(args);
buffer[idx++] = 0x0000;
*tgt = buffer;
}
//List of strings with length of -1 to mark the end...
void rtlConcatStrF(unsigned tlen, void * _tgt, int fill, ...)
{
va_list args;
char * tgt = (char *)_tgt;
unsigned offset = 0;
va_start(args, fill);
while (offset != tlen)
{
unsigned len = va_arg(args, unsigned);
if (len+1==0)
break;
const char * str = va_arg(args, const char *);
unsigned copyLen = len + offset > tlen ? tlen - offset : len;
memcpy(tgt+offset, str, copyLen);
offset += copyLen;
}
va_end(args);
if (offset < tlen)
memset(tgt+offset, fill, tlen-offset);
}
void rtlConcatVStrF(unsigned tlen, char * tgt, ...)
{
va_list args;
unsigned offset = 0;
va_start(args, tgt);
while (offset != tlen)
{
unsigned len = va_arg(args, unsigned);
if (len+1==0)
break;
const char * str = va_arg(args, const char *);
unsigned copyLen = len + offset > tlen ? tlen - offset : len;
memcpy(tgt+offset, str, copyLen);
offset += copyLen;
}
va_end(args);
memset(tgt+offset, 0, (tlen+1)-offset);
}
void rtlConcatUnicodeF(unsigned tlen, UChar * tgt, ...)
{
va_list args;
unsigned idx = 0;
UErrorCode err = U_ZERO_ERROR;
va_start(args, tgt);
for(;;)
{
unsigned len = va_arg(args, unsigned);
if(len+1==0)
break;
UChar * str = va_arg(args, UChar *);
if (len)
idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
}
va_end(args);
while (idx < tlen)
tgt[idx++] = ' ';
}
void rtlConcatVUnicodeF(unsigned tlen, UChar * tgt, ...)
{
va_list args;
unsigned idx = 0;
UErrorCode err = U_ZERO_ERROR;
va_start(args, tgt);
for(;;)
{
unsigned len = va_arg(args, unsigned);
if(len+1==0)
break;
UChar * str = va_arg(args, UChar *);
if (len)
idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
}
va_end(args);
while (idx < tlen)
tgt[idx++] = 0;
tgt[tlen] = 0;
}
//------------------------------------------------------------------------------------------------
// The followinf concat functions are all deprecated in favour of the variable number of argument
// versions
unsigned rtlConcatStrToStr(unsigned tlen, char * tgt, unsigned idx, unsigned slen, const char * src)
{
unsigned len = tlen-idx;
if (len > slen)
len = slen;
memcpy(tgt+idx, src, len);
return idx+len;
}
unsigned rtlConcatVStrToStr(unsigned tlen, char * tgt, unsigned idx, const char * src)
{
while (idx != tlen)
{
char next = *src++;
if (!next)
break;
tgt[idx++] = next;
}
return idx;
}
void rtlConcatStrToVStr(unsigned tlen, void * _tgt, unsigned slen, const void * src)
{
char * tgt = (char *)_tgt;
unsigned tend = strlen(tgt);
rtlStrToVStr(tlen-tend, tgt+tend, slen, src);
}
void rtlConcatVStrToVStr(unsigned tlen, void * _tgt, const char * src)
{
char * tgt = (char *)_tgt;
unsigned tend = strlen(tgt);
rtlVStrToVStr(tlen-tend, tgt+tend, src);
}
unsigned rtlConcatUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, unsigned slen, UChar const * src)
{
UErrorCode err = U_ZERO_ERROR;
return unorm_concatenate(tgt, idx, src, slen, tgt, tlen, UNORM_NFC, 0, &err);
}
unsigned rtlConcatVUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, UChar const * src)
{
return rtlConcatUnicodeToUnicode(tlen, tgt, idx, rtlUnicodeStrlen(src), src);
}
void rtlESpaceFill(unsigned tlen, char * tgt, unsigned idx)
{
if (idx < tlen)
memset(tgt+idx, '@', tlen-idx);
}
void rtlSpaceFill(unsigned tlen, char * tgt, unsigned idx)
{
if (idx < tlen)
memset(tgt+idx, ' ', tlen-idx);
}
void rtlZeroFill(unsigned tlen, char * tgt, unsigned idx)
{
if (idx < tlen)
memset(tgt+idx, 0, tlen-idx);
}
void rtlNullTerminate(unsigned tlen, char * tgt, unsigned idx)
{
if (idx >= tlen)
idx = tlen-1;
tgt[idx] = 0;
}
void rtlUnicodeSpaceFill(unsigned tlen, UChar * tgt, unsigned idx)
{
while(idx= tlen)
idx = tlen-1;
tgt[idx] = 0x0000;
}
void rtlUnicodeStrcpy(UChar * tgt, UChar const * src)
{
memcpy(tgt, src, rtlUnicodeStrlen(src)*2+2);
}
void rtlConcatExtend(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
{
unsigned len = tlen + slen;
tgt = (char *)realloc(tgt, len);
memcpy(tgt+tlen, src, slen);
tlen = len;
}
//-----------------------------------------------------------------------------
inline void normalizeFrom(unsigned & from, unsigned slen)
{
from--;
if ((int)from < 0)
from = 0;
else if (from > slen)
from = slen;
}
inline void normalizeFromTo(unsigned & from, unsigned & to)
{
from--;
if ((int)from < 0) from = 0;
if ((int)to < (int)from) to = from;
}
inline void clipFromTo(unsigned & from, unsigned & to, unsigned slen)
{
if (to > slen)
{
to = slen;
if (from > slen)
from = slen;
}
}
//NB: From and to are 1 based: Now fills to ensure the correct length.
void * doSubStrFT(unsigned & tlen, unsigned slen, const void * src, unsigned from, unsigned to, byte fillChar)
{
normalizeFromTo(from, to);
unsigned len = to - from;
clipFromTo(from, to, slen);
unsigned copylen = to - from;
char * buffer = (char *)malloc(len);
memcpy(buffer, (byte *)src+from, copylen);
if (copylen < len)
memset(buffer+copylen, fillChar, len-copylen);
tlen = len;
return buffer;
}
void rtlSubStrFX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from)
{
normalizeFrom(from, slen);
tlen = slen-from;
tgt = (char *) malloc(tlen);
memcpy(tgt, src+from, tlen);
}
void rtlSubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from, unsigned to)
{
tgt = (char *)doSubStrFT(tlen, slen, src, from, to, ' ');
}
void rtlSubStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
{
unsigned char fillChar = ' '; // More, should be passed as a parameter
normalizeFromTo(from, to);
clipFromTo(from, to, slen);
unsigned copylen = to - from;
if (copylen > tlen)
copylen = tlen;
memcpy(tgt, (const char *)src+from, copylen);
if (copylen < tlen)
memset(tgt+copylen, fillChar, tlen-copylen);
}
void rtlSubDataFT(unsigned tlen, void * tgt, unsigned slen, const void * src, unsigned from, unsigned to)
{
normalizeFromTo(from, to);
clipFromTo(from, to, slen);
unsigned copylen = to - from;
if (copylen > tlen)
copylen = tlen;
memcpy(tgt, (char *)src+from, copylen);
if (copylen < tlen)
memset((byte*)tgt+copylen, 0, tlen-copylen);
}
void rtlSubDataFTX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from, unsigned to)
{
tgt = doSubStrFT(tlen, slen, src, from, to, 0);
}
void rtlSubDataFX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from)
{
normalizeFrom(from, slen);
tlen = slen-from;
tgt = (char *) malloc(tlen);
memcpy(tgt, (const byte *)src+from, tlen);
}
void rtlUnicodeSubStrFTX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from, unsigned to)
{
normalizeFromTo(from, to);
tlen = to - from;
clipFromTo(from, to, slen);
tgt = (UChar *)malloc(tlen*2);
unsigned copylen = to - from;
memcpy(tgt, src+from, copylen*2);
while(copylen= tlen)
slen = tlen-1;
memcpy(tgt, src, slen);
tgt[slen] = 0;
}
//not yet used, but would be needed for assignment to string rather than vstring
inline void rtlCopySubString(size32_t tlen, char * tgt, unsigned slen, const char * src, char fill)
{
if (slen > tlen)
slen = tlen;
memcpy(tgt, src, slen);
memset(tgt + slen, fill, tlen-slen);
}
unsigned rtlTrimUtf8StrLen(size32_t len, const char * t)
{
const byte * cur = (const byte *)t;
unsigned trimLength = 0;
for (unsigned i=0; i < len; i++)
{
unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
if (!u_isspace(next))
trimLength = i+1;
}
return trimLength;
}
//-----------------------------------------------------------------------------
// Functions to trim off left side blank spaces
void rtlTrimRight(size32_t & tlen, char * & tgt, unsigned slen, const char * src)
{
tlen = rtlTrimStrLen(slen, src);
tgt = rtlDupSubString(src, tlen);
}
void rtlTrimUnicodeRight(size32_t & tlen, UChar * & tgt, unsigned slen, UChar const * src)
{
tlen = rtlTrimUnicodeStrLen(slen, src);
tgt = rtlDupSubUnicode(src, tlen);
}
void rtlTrimVRight(size32_t & tlen, char * & tgt, const char * src)
{
tlen = rtlTrimVStrLen(src);
tgt = rtlDupSubString(src, tlen);
}
void rtlTrimVUnicodeRight(size32_t & tlen, UChar * & tgt, UChar const * src)
{
rtlTrimUnicodeRight(tlen, tgt, rtlUnicodeStrlen(src), src);
}
void rtlTrimUtf8Right(unsigned &tlen, char * &tgt, unsigned slen, char const * src)
{
unsigned trimLength;
size32_t trimSize;
rtlTrimUtf8Len(trimLength, trimSize, slen, src);
tlen = trimLength;
tgt = rtlDupSubString(src, trimSize);
}
void rtlAssignTrimRightV(size32_t tlen, char * tgt, unsigned slen, const char * src)
{
unsigned len = rtlTrimStrLen(slen, src);
rtlCopySubStringV(tlen, tgt, len, src);
}
void rtlAssignTrimVRightV(size32_t tlen, char * tgt, const char * src)
{
unsigned len = rtlTrimVStrLen(src);
rtlCopySubStringV(tlen, tgt, len, src);
}
//-------------------------------------------------------------------------------
// Functions to trim off left side blank spaces
void rtlTrimLeft(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
{
unsigned start = rtlLeftTrimStrStart(slen, src);
unsigned len = slen - start;
tlen = len;
tgt = rtlDupSubString(src + start, len);
}
void rtlTrimUnicodeLeft(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
{
unsigned start = rtlLeftTrimUnicodeStrStart(slen, src);
unsigned len = slen - start;
tlen = len;
tgt = rtlDupSubUnicode(src + start, len);
}
void rtlTrimVLeft(unsigned & tlen, char * & tgt, const char * src)
{
unsigned start = rtlLeftTrimVStrStart(src);
unsigned len = strlen(src+start);
tlen = len;
tgt = rtlDupSubString(src + start, len);
}
void rtlTrimVUnicodeLeft(unsigned & tlen, UChar * & tgt, UChar const * src)
{
rtlTrimUnicodeLeft(tlen, tgt, rtlUnicodeStrlen(src), src);
}
ECLRTL_API void rtlTrimUtf8Left(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
{
unsigned trimLength;
size32_t trimSize;
rtlTrimUtf8Start(trimLength, trimSize, slen, src);
unsigned len = slen-trimLength;
const char * start = src+trimSize;
tlen = len;
tgt = rtlDupSubString(start, rtlUtf8Size(len, start));
}
void rtlAssignTrimLeftV(size32_t tlen, char * tgt, unsigned slen, const char * src)
{
unsigned start = rtlLeftTrimStrStart(slen, src);
unsigned len = slen - start;
rtlCopySubStringV(tlen, tgt, len, src+start);
}
void rtlAssignTrimVLeftV(size32_t tlen, char * tgt, const char * src)
{
unsigned start = rtlLeftTrimVStrStart(src);
unsigned len = strlen(src+start);
rtlCopySubStringV(tlen, tgt, len, src+start);
}
//--------------------------------------------------------------------------------
// Functions to trim off blank spaces of both sides
void rtlTrimBoth(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
{
unsigned len = rtlTrimStrLen(slen, src);
unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
len -= start;
tlen = len;
tgt = rtlDupSubString(src + start, len);
}
void rtlTrimUnicodeBoth(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
{
unsigned len = rtlTrimUnicodeStrLen(slen, src);
unsigned start = len ? rtlLeftTrimUnicodeStrStart(slen, src) : 0;
len -= start;
tlen = len;
tgt = rtlDupSubUnicode(src + start, len);
}
void rtlTrimVBoth(unsigned & tlen, char * & tgt, const char * src)
{
unsigned len = rtlTrimVStrLen(src);
unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
len -= start;
tlen = len;
tgt = rtlDupSubString(src + start, len);
}
void rtlTrimVUnicodeBoth(unsigned & tlen, UChar * & tgt, UChar const * src)
{
rtlTrimUnicodeBoth(tlen, tgt, rtlUnicodeStrlen(src), src);
}
ECLRTL_API void rtlTrimUtf8Both(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
{
unsigned lTrimLength;
size32_t lTrimSize;
rtlTrimUtf8Start(lTrimLength, lTrimSize, slen, src);
rtlTrimUtf8Right(tlen, tgt, slen-lTrimLength, src+lTrimSize);
}
void rtlAssignTrimBothV(size32_t tlen, char * tgt, unsigned slen, const char * src)
{
unsigned len = rtlTrimStrLen(slen, src);
unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
len -= start;
rtlCopySubStringV(tlen, tgt, len, src+start);
}
void rtlAssignTrimVBothV(size32_t tlen, char * tgt, const char * src)
{
unsigned len = rtlTrimVStrLen(src);
unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
len -= start;
rtlCopySubStringV(tlen, tgt, len, src+start);
}
//-----------------------------------------------------------------------------
// Functions used to trim off all blank spaces in a string.
unsigned rtlTrimStrLenNonBlank(size32_t l, const char * t)
{
unsigned len = 0;
while (l)
{
l--;
if (t[l] != ' ')
len++;
}
return len;
}
unsigned rtlTrimVStrLenNonBlank(const char * t)
{
unsigned len = 0;
unsigned char c;
while ((c = *t++) != 0)
{
if (c != ' ')
len++;
}
return len;
}
void rtlTrimAll(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
{
tlen = rtlTrimStrLenNonBlank(slen, src);
char * buffer = (char *)malloc(tlen + 1);
int ind = 0;
for(unsigned i = 0; i < slen; i++) {
if(src[i] != ' ') {
buffer[ind] = src[i];
ind++;
}
}
buffer[tlen] = 0;
tgt = buffer;
}
void rtlTrimUnicodeAll(unsigned & tlen, UChar * & tgt, unsigned slen, const UChar * src)
{
UnicodeString rawStr;
UCharCharacterIterator iter(src, slen);
for(iter.first32(); iter.hasNext(); iter.next32())
if(!u_isspace(iter.current32()))
rawStr.append(iter.current32());
UnicodeString tgtStr;
normalizeUnicodeString(rawStr, tgtStr); // normalized in case crazy string like [combining accent] [space] [vowel]
tlen = tgtStr.length();
tgt = (UChar *)malloc((tlen+1)*2);
tgtStr.extract(0, tlen, tgt);
tgt[tlen] = 0x0000;
}
void rtlTrimVAll(unsigned & tlen, char * & tgt, const char * src)
{
tlen = rtlTrimVStrLenNonBlank(src);
char * buffer = (char *)malloc(tlen + 1);
int ind = 0;
int i = 0;
while(src[i] != 0) {
if(src[i] != ' ') {
buffer[ind] = src[i];
ind++;
}
i++;
}
buffer[tlen] = 0;
tgt = buffer;
}
void rtlTrimVUnicodeAll(unsigned & tlen, UChar * & tgt, const UChar * src)
{
rtlTrimUnicodeAll(tlen, tgt, rtlUnicodeStrlen(src), src);
}
ECLRTL_API void rtlTrimUtf8All(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
{
//Go via unicode because of possibility of combining accents etc.
rtlDataAttr temp1(slen*sizeof(UChar));
rtlUtf8ToUnicode(slen, temp1.getustr(), slen, src);
unsigned trimLen;
rtlDataAttr trimText;
rtlTrimUnicodeAll(trimLen, trimText.refustr(), slen, temp1.getustr());
rtlUnicodeToUtf8X(tlen, tgt, trimLen, trimText.getustr());
}
void rtlAssignTrimAllV(unsigned tlen, char * tgt, unsigned slen, const char * src)
{
unsigned to = 0;
for (unsigned from = 0; (from < slen)&&(to+1 < tlen); from++)
{
if (src[from] != ' ')
tgt[to++] = src[from];
}
tgt[to] = 0;
}
void rtlAssignTrimVAllV(unsigned tlen, char * tgt, const char * src)
{
unsigned to = 0;
for (;(*src && (to+1 < tlen));src++)
{
if (*src != ' ')
tgt[to++] = *src;
}
tgt[to] = 0;
}
//-----------------------------------------------------------------------------
ECLRTL_API void rtlUnicodeToVAscii(unsigned outlen, char * out, unsigned inlen, UChar const * in)
{
rtlUnicodeToVCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
}
ECLRTL_API void rtlData2VUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
{
rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
}
ECLRTL_API void rtlStrToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
{
rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
}
ECLRTL_API void rtlData2Unicode(unsigned outlen, UChar * out, unsigned inlen, void const * in)
{
rtlCodepageToUnicode(outlen, out, inlen, (const char *)in, ASCII_LIKE_CODEPAGE);
}
ECLRTL_API void rtlAssignTrimUnicodeLeftV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
{
unsigned len;
UChar * str;
rtlTrimUnicodeLeft(len, str, slen, src);
if (len >= tlen)
len = tlen-1;
memcpy(tgt, str, len*2);
tgt[len] = 0;
rtlFree(str);
}
ECLRTL_API void rtlAssignTrimVUnicodeLeftV(size32_t tlen, UChar * tgt, const UChar * src)
{
unsigned len;
UChar * str;
rtlTrimVUnicodeLeft(len, str, src);
if (len >= tlen)
len = tlen-1;
memcpy(tgt, str, len*2);
tgt[len] = 0;
rtlFree(str);
}
ECLRTL_API void rtlAssignTrimUnicodeRightV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
{
unsigned len;
UChar * str;
rtlTrimUnicodeRight(len, str, slen, src);
if (len >= tlen)
len = tlen-1;
memcpy(tgt, str, len*2);
tgt[len] = 0;
rtlFree(str);
}
ECLRTL_API void rtlAssignTrimVUnicodeRightV(size32_t tlen, UChar * tgt, const UChar * src)
{
unsigned len;
UChar * str;
rtlTrimVUnicodeRight(len, str, src);
if (len >= tlen)
len = tlen-1;
memcpy(tgt, str, len*2);
tgt[len] = 0;
rtlFree(str);
}
ECLRTL_API void rtlAssignTrimUnicodeBothV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
{
unsigned len;
UChar * str;
rtlTrimUnicodeBoth(len, str, slen, src);
if (len >= tlen)
len = tlen-1;
memcpy(tgt, str, len*2);
tgt[len] = 0;
rtlFree(str);
}
ECLRTL_API void rtlAssignTrimVUnicodeBothV(size32_t tlen, UChar * tgt, const UChar * src)
{
unsigned len;
UChar * str;
rtlTrimVUnicodeBoth(len, str, src);
if (len >= tlen)
len = tlen-1;
memcpy(tgt, str, len*2);
tgt[len] = 0;
rtlFree(str);
}
ECLRTL_API void rtlAssignTrimUnicodeAllV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
{
unsigned len;
UChar * str;
rtlTrimUnicodeAll(len, str, slen, src);
if (len >= tlen)
len = tlen-1;
memcpy(tgt, str, len*2);
tgt[len] = 0;
rtlFree(str);
}
ECLRTL_API void rtlAssignTrimVUnicodeAllV(size32_t tlen, UChar * tgt, const UChar * src)
{
unsigned len;
UChar * str;
rtlTrimVUnicodeAll(len, str, src);
if (len >= tlen)
len = tlen-1;
memcpy(tgt, str, len*2);
tgt[len] = 0;
rtlFree(str);
}
//-----------------------------------------------------------------------------
int rtlCompareStrStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
{
unsigned len = l1;
if (len > l2)
len = l2;
int diff = memcmp(p1, p2, len);
if (diff == 0)
{
if (len != l1)
{
for (;(diff == 0) && (len != l1);len++)
diff = ((unsigned char *)p1)[len] - ' ';
}
else if (len != l2)
{
for (;(diff == 0) && (len != l2);len++)
diff = ' ' - ((unsigned char *)p2)[len];
}
}
return diff;
}
int rtlCompareVStrVStr(const char * p1, const char * p2)
{
return rtlCompareStrStr(strlen(p1), p1, strlen(p2), p2);
}
int rtlCompareStrBlank(unsigned l1, const char * p1)
{
while (l1--)
{
int diff = (*(unsigned char *)(p1++)) - ' ';
if (diff)
return diff;
}
return 0;
}
int rtlCompareDataData(unsigned l1, const void * p1, unsigned l2, const void * p2)
{
unsigned len = l1;
if (len > l2)
len = l2;
int diff = memcmp(p1, p2, len);
if (diff == 0)
{
if (l1 > l2)
diff = +1;
else if (l1 < l2)
diff = -1;
}
return diff;
}
int rtlCompareEStrEStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
{
unsigned len = l1;
if (len > l2)
len = l2;
int diff = memcmp(p1, p2, len);
if (diff == 0)
{
if (len != l1)
{
for (;(diff == 0) && (len != l1);len++)
diff = ((unsigned char *)p1)[len] - '@';
}
else if (len != l2)
{
for (;(diff == 0) && (len != l2);len++)
diff = '@' - ((unsigned char *)p2)[len];
}
}
return diff;
}
int rtlCompareUnicodeUnicode(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale)
{
while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
return ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1, l1, p2, l2);
}
int rtlCompareUnicodeUnicodeStrength(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale, unsigned strength)
{
while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
return ucol_strcoll(queryRTLLocale(locale)->queryCollator(strength), p1, l1, p2, l2);
}
int rtlCompareVUnicodeVUnicode(UChar const * p1, UChar const * p2, char const * locale)
{
return rtlCompareUnicodeUnicode(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale);
}
int rtlCompareVUnicodeVUnicodeStrength(UChar const * p1, UChar const * p2, char const * locale, unsigned strength)
{
return rtlCompareUnicodeUnicodeStrength(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale, strength);
}
void rtlKeyUnicodeX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale)
{
while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
UCollator * coll = queryRTLLocale(locale)->queryCollator();
tlen = ucol_getSortKey(coll, src, slen, 0, 0);
tgt = malloc(tlen);
ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
}
void rtlKeyUnicodeStrengthX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale, unsigned strength)
{
while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
UCollator * coll = queryRTLLocale(locale)->queryCollator(strength);
tlen = ucol_getSortKey(coll, src, slen, 0, 0);
tgt = malloc(tlen);
ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
}
ECLRTL_API int rtlPrefixDiffStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
{
unsigned len = l1 < l2 ? l1 : l2;
const byte * str1 = (const byte *)p1;
const byte * str2 = (const byte *)p2;
for (unsigned i=0; iqueryCollator(), p1+i, l1-i, p2+i, l2-i);
if (c < 0)
return -(int)(i+1);
else if (c > 0)
return (int)(i+1);
else
return 0; //weird!
}
}
if (l1 != l2)
return (l1 < l2) ? -(int)(len+1) : (int)(len + 1);
return 0;
}
//-----------------------------------------------------------------------------
void rtlStringToLower(size32_t l, char * t)
{
for (;l--;t++)
*t = tolower(*t);
}
void rtlStringToUpper(size32_t l, char * t)
{
for (;l--;t++)
*t = toupper(*t);
}
void rtlUnicodeToLower(size32_t l, UChar * t, char const * locale)
{
UChar * buff = (UChar *)malloc(l*2);
UErrorCode err = U_ZERO_ERROR;
u_strToLower(buff, l, t, l, locale, &err);
unicodeNormalizedCopy(buff, t, l);
}
void rtlUnicodeToLowerX(size32_t & lenout, UChar * & out, size32_t l, const UChar * t, char const * locale)
{
out = (UChar *)malloc(l*2);
lenout = l;
UErrorCode err = U_ZERO_ERROR;
u_strToLower(out, l, t, l, locale, &err);
}
void rtlUnicodeToUpper(size32_t l, UChar * t, char const * locale)
{
UChar * buff = (UChar *)malloc(l*2);
UErrorCode err = U_ZERO_ERROR;
u_strToUpper(buff, l, t, l, locale, &err);
unicodeNormalizedCopy(buff, t, l);
}
//=============================================================================
// Miscellaneous helper functions...
//-----------------------------------------------------------------------------
int searchTableStringN(unsigned count, const char * * table, unsigned width, const char * search)
{
int left = 0;
int right = count;
do
{
int mid = (left + right) >> 1;
int cmp = memcmp(search, table[mid], width);
if (cmp < 0)
right = mid;
else if (cmp > 0)
left = mid+1;
else
return mid;
} while (left < right);
return -1;
}
int rtlSearchTableStringN(unsigned count, char * * table, unsigned width, const char * search)
{
int left = 0;
int right = count;
do
{
int mid = (left + right) >> 1;
//we could use rtlCompareStrStr, but both source and target strings should
//be the correct length, so no point.... (unless new weird collation sequences)
//we would also need to call a different function for data
int cmp = memcmp(search, table[mid], width);
if (cmp < 0)
right = mid;
else if (cmp > 0)
left = mid+1;
else
return mid;
} while (left < right);
return -1;
}
int rtlSearchTableVStringN(unsigned count, char * * table, const char * search)
{
int left = 0;
int right = count;
do
{
int mid = (left + right) >> 1;
int cmp = strcmp(search, table[mid]);
if (cmp < 0)
right = mid;
else if (cmp > 0)
left = mid+1;
else
return mid;
} while (left < right);
return -1;
}
int rtlNewSearchDataTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
{
int left = 0;
int right = count;
do
{
int mid = (left + right) >> 1;
int cmp = rtlCompareDataData( width, search, elemlen, table[mid]);
if (cmp < 0)
right = mid;
else if (cmp > 0)
left = mid+1;
else {
return mid;
}
} while (left < right);
return -1;
}
int rtlNewSearchEStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
{
int left = 0;
int right = count;
do
{
int mid = (left + right) >> 1;
int cmp = rtlCompareEStrEStr( width, search, elemlen, table[mid]);
if (cmp < 0)
right = mid;
else if (cmp > 0)
left = mid+1;
else {
return mid;
}
} while (left < right);
return -1;
}
int rtlNewSearchQStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
{
int left = 0;
int right = count;
do
{
int mid = (left + right) >> 1;
int cmp = rtlCompareQStrQStr( width, search, elemlen, table[mid]);
if (cmp < 0)
right = mid;
else if (cmp > 0)
left = mid+1;
else {
return mid;
}
} while (left < right);
return -1;
}
int rtlNewSearchStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
{
int left = 0;
int right = count;
do
{
int mid = (left + right) >> 1;
int cmp = rtlCompareStrStr( width, search, elemlen, table[mid]);
if (cmp < 0)
right = mid;
else if (cmp > 0)
left = mid+1;
else {
return mid;
}
} while (left < right);
return -1;
}
int rtlNewSearchUnicodeTable(unsigned count, unsigned elemlen, UChar * * table, unsigned width, const UChar * search, const char * locale)
{
UCollator * coll = queryRTLLocale(locale)->queryCollator();
int left = 0;
int right = count;
size32_t trimWidth = rtlQuickTrimUnicode(width, search);
do
{
int mid = (left + right) >> 1;
size32_t elemTrimWidth = rtlQuickTrimUnicode(elemlen, table[mid]);
UCollationResult cmp = ucol_strcoll(coll, search, trimWidth, table[mid], elemTrimWidth);
if (cmp == UCOL_LESS)
right = mid;
else if (cmp == UCOL_GREATER)
left = mid+1;
else
return mid;
} while (left < right);
return -1;
}
int rtlNewSearchVUnicodeTable(unsigned count, UChar * * table, const UChar * search, const char * locale)
{
UCollator * coll = queryRTLLocale(locale)->queryCollator();
int left = 0;
int right = count;
do
{
int mid = (left + right) >> 1;
UCollationResult cmp = ucol_strcoll(coll, search, rtlUnicodeStrlen(search), table[mid], rtlUnicodeStrlen(table[mid]));
if (cmp == UCOL_LESS)
right = mid;
else if (cmp == UCOL_GREATER)
left = mid+1;
else
return mid;
} while (left < right);
return -1;
}
//-----------------------------------------------------------------------------
template
int rtlSearchIntegerTable(unsigned count, T * table, T search)
{
int left = 0;
int right = count;
do
{
int mid = (left + right) >> 1;
T midValue = table[mid];
if (search < midValue)
right = mid;
else if (search > midValue)
left = mid+1;
else
return mid;
} while (left < right);
return -1;
}
int rtlSearchTableInteger8(unsigned count, __int64 * table, __int64 search)
{
return rtlSearchIntegerTable(count, table, search);
}
int rtlSearchTableUInteger8(unsigned count, unsigned __int64 * table, unsigned __int64 search)
{
return rtlSearchIntegerTable(count, table, search);
}
int rtlSearchTableInteger4(unsigned count, int * table, int search)
{
return rtlSearchIntegerTable(count, table, search);
}
int rtlSearchTableUInteger4(unsigned count, unsigned * table, unsigned search)
{
return rtlSearchIntegerTable(count, table, search);
}
//-----------------------------------------------------------------------------
unsigned rtlCrc32(unsigned len, const void * buffer, unsigned crc)
{
return crc32((const char *)buffer, len, crc);
}
//=============================================================================
// EBCDIC helper functions...
static char ccsid819[] = "\
\000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017\
\020\021\022\023\235\205\010\207\030\031\222\217\034\035\036\037\
\200\201\202\203\204\012\027\033\210\211\212\213\214\005\006\007\
\220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032\
\040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174\
\046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\254\
\055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077\
\370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042\
\330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261\
\260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244\
\265\176\163\164\165\166\167\170\171\172\241\277\320\335\336\256\
\136\243\245\267\251\247\266\274\275\276\133\135\257\250\264\327\
\173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365\
\175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377\
\134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325\
\060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237";
static unsigned char ccsid1047[] = "\
\000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017\
\020\021\022\023\235\012\010\207\030\031\222\217\034\035\036\037\
\200\201\202\203\204\205\027\033\210\211\212\213\214\005\006\007\
\220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032\
\040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174\
\046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\136\
\055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077\
\370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042\
\330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261\
\260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244\
\265\176\163\164\165\166\167\170\171\172\241\277\320\133\336\256\
\254\243\245\267\251\247\266\274\275\276\335\250\257\135\264\327\
\173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365\
\175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377\
\134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325\
\060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237";
static unsigned char ccsid1047_rev[] = "\
\000\001\002\003\067\055\056\057\026\005\025\013\014\015\016\017\
\020\021\022\023\074\075\062\046\030\031\077\047\034\035\036\037\
\100\132\177\173\133\154\120\175\115\135\134\116\153\140\113\141\
\360\361\362\363\364\365\366\367\370\371\172\136\114\176\156\157\
\174\301\302\303\304\305\306\307\310\311\321\322\323\324\325\326\
\327\330\331\342\343\344\345\346\347\350\351\255\340\275\137\155\
\171\201\202\203\204\205\206\207\210\211\221\222\223\224\225\226\
\227\230\231\242\243\244\245\246\247\250\251\300\117\320\241\007\
\040\041\042\043\044\045\006\027\050\051\052\053\054\011\012\033\
\060\061\032\063\064\065\066\010\070\071\072\073\004\024\076\377\
\101\252\112\261\237\262\152\265\273\264\232\212\260\312\257\274\
\220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\
\144\145\142\146\143\147\236\150\164\161\162\163\170\165\166\167\
\254\151\355\356\353\357\354\277\200\375\376\373\374\272\256\131\
\104\105\102\106\103\107\234\110\124\121\122\123\130\125\126\127\
\214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337";
void rtlEStrToStr(unsigned outlen, char *out, unsigned inlen, const char *in)
{
unsigned char *codepage = ccsid1047;
unsigned i,j;
unsigned lim = inlen;
if (lim>outlen) lim = outlen;
for (i=0;ioutlen) lim = outlen;
for (i=0;iquery();
UErrorCode err = U_ZERO_ERROR;
unsigned len = ucnv_toUChars(conv, out, outlen, in, inlen, &err);
while(lenquery();
UErrorCode err = U_ZERO_ERROR;
unsigned len = ucnv_toUChars(conv, out, outlen-1, in, inlen, &err);
if (len >= outlen) len = outlen-1;
out[len] = 0;
vunicodeEnsureIsNormalized(outlen, out);
}
void rtlVCodepageToUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
{
rtlCodepageToUnicode(outlen, out, strlen(in), in, codepage);
}
void rtlVCodepageToVUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
{
rtlCodepageToVUnicode(outlen, out, strlen(in), in, codepage);
}
void rtlCodepageToUnicodeUnescape(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
{
//If the input contains a character which doesn't exist in its claimed codepage, this will
//generate U+FFFD (substitution character). This most likely won't be displayed.
UnicodeString raw(in, inlen, codepage);
UnicodeString unescaped = raw.unescape();
UnicodeString normalized;
normalizeUnicodeString(unescaped, normalized);
if((unsigned)normalized.length()>outlen)
normalized.truncate(outlen);
else if((unsigned)normalized.length()query();
UErrorCode err = U_ZERO_ERROR;
unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
if(lenquery();
UErrorCode err = U_ZERO_ERROR;
unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
if(lenquery();
UErrorCode err = U_ZERO_ERROR;
unsigned len = ucnv_fromUChars(conv, (char *)out, outlen-1, in, inlen, &err);
if (len >= outlen) len = outlen-1;
out[len] = 0;
}
void rtlVUnicodeToCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
{
rtlUnicodeToCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
}
void rtlVUnicodeToData(unsigned outlen, void * out, UChar const * in)
{
rtlUnicodeToData(outlen, out, rtlUnicodeStrlen(in), in);
}
void rtlVUnicodeToVCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
{
rtlUnicodeToVCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
}
void rtlCodepageToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
{
//If the input contains a character which doesn't exist in its claimed codepage, this will
//generate U+FFFD (substitution character). This most likely won't be displayed.
UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
UErrorCode err = U_ZERO_ERROR;
outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
if(err==U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
out = (UChar *)malloc(outlen*2);
ucnv_toUChars(conv, out, outlen, in, inlen, &err);
}
UChar * rtlCodepageToVUnicodeX(unsigned inlen, char const * in, char const * codepage)
{
//If the input contains a character which doesn't exist in its claimed codepage, this will
//generate U+FFFD (substitution character). This most likely won't be displayed.
UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
UErrorCode err = U_ZERO_ERROR;
unsigned outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
UChar * out = (UChar *)malloc((outlen+1)*2);
ucnv_toUChars(conv, out, outlen, in, inlen, &err);
out[outlen] = 0x0000;
vunicodeEnsureIsNormalizedX(outlen, out);
return out;
}
void rtlVCodepageToUnicodeX(unsigned & outlen, UChar * & out, char const * in, char const * codepage)
{
rtlCodepageToUnicodeX(outlen, out, strlen(in), in, codepage);
}
UChar * rtlVCodepageToVUnicodeX(char const * in, char const * codepage)
{
return rtlCodepageToVUnicodeX(strlen(in), in, codepage);
}
void rtlCodepageToUnicodeXUnescape(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
{
//If the input contains a character which doesn't exist in its claimed codepage, this will
//generate U+FFFD (substitution character). This most likely won't be displayed.
UnicodeString raw(in, inlen, codepage);
UnicodeString unescaped = raw.unescape();
UnicodeString normalized;
normalizeUnicodeString(unescaped, normalized);
outlen = normalized.length();
out = (UChar *)malloc(outlen*2);
normalized.extract(0, outlen, out);
}
void rtlUnicodeToCodepageX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in, char const * codepage)
{
//If the unicode contains a character which doesn't exist in the destination codepage,
//this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
//no telling how your terminal may display this (I've seen a divide sign and a right
//arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
UErrorCode err = U_ZERO_ERROR;
outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
out = (char *)malloc(outlen);
ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
}
void rtlUnicodeToDataX(unsigned & outlen, void * & out, unsigned inlen, UChar const * in)
{
rtlUnicodeToCodepageX(outlen, (char * &)out, inlen, in, ASCII_LIKE_CODEPAGE);
}
char * rtlUnicodeToVCodepageX(unsigned inlen, UChar const * in, char const * codepage)
{
//If the unicode contains a character which doesn't exist in the destination codepage,
//this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
//no telling how your terminal may display this (I've seen a divide sign and a right
//arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
UErrorCode err = U_ZERO_ERROR;
unsigned outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
char * out = (char *)malloc(outlen+1);
ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
out[outlen] = 0x00;
return out;
}
void rtlVUnicodeToCodepageX(unsigned & outlen, char * & out, UChar const * in, char const * codepage)
{
rtlUnicodeToCodepageX(outlen, out, rtlUnicodeStrlen(in), in, codepage);
}
char * rtlVUnicodeToVCodepageX(UChar const * in, char const * codepage)
{
return rtlUnicodeToVCodepageX(rtlUnicodeStrlen(in), in, codepage);
}
void rtlStrToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
{
rtlCodepageToUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
}
void rtlUnicodeToStr(unsigned outlen, char * out, unsigned inlen, UChar const * in)
{
rtlUnicodeToCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
}
void rtlStrToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
{
rtlCodepageToUnicodeX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
}
void rtlUnicodeToStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
{
rtlUnicodeToCodepageX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
}
void rtlUnicodeToEscapedStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
{
StringBuffer outbuff;
escapeUnicode(inlen, in, outbuff);
outlen = outbuff.length();
out = (char *)malloc(outlen);
memcpy(out, outbuff.str(), outlen);
}
void rtlUnicodeToQuotedUTF8X(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
{
UnicodeString unicode(in, inlen);
unicode.findAndReplace("'", "\\'");
//pre-flight length - may be more efficient to guess length and only re-extract if guess no good, but what to guess?
outlen = unicode.extract(0, unicode.length(), 0, 0, UTF8_CODEPAGE);
out = (char *)malloc(outlen);
unicode.extract(0, unicode.length(), out, outlen, UTF8_CODEPAGE);
}
bool rtlCodepageToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
{
UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
UErrorCode err = U_ZERO_ERROR;
char * target = out;
ucnv_convertEx(outconv, inconv, &target, out+outlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
unsigned len = target - out;
if(len < outlen)
codepageBlankFill(outcodepage, target, outlen-len);
return U_SUCCESS(err);
}
bool rtlCodepageToCodepageX(unsigned & outlen, char * & out, unsigned maxoutlen, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
{
UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
UErrorCode err = U_ZERO_ERROR;
//GH->PG is there a better way of coding this with out temporary buffer?
char * tempBuffer = (char *)malloc(maxoutlen);
char * target = tempBuffer;
ucnv_convertEx(outconv, inconv, &target, tempBuffer+maxoutlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
unsigned len = target - tempBuffer;
outlen = len;
if (len == maxoutlen)
out = tempBuffer;
else
{
out = (char *)realloc(tempBuffer, len);
if (!out)
out = tempBuffer;
}
return U_SUCCESS(err);
}
int rtlSingleUtf8ToCodepage(char * out, unsigned inlen, char const * in, char const * outcodepage)
{
if(!U8_IS_LEAD(*in))
return -1;
uint8_t trailbytes = U8_COUNT_TRAIL_BYTES(*in);
if(inlen < (unsigned)(trailbytes+1))
return -1;
if(!rtlCodepageToCodepage(1, out, trailbytes+1, in, outcodepage, UTF8_CODEPAGE))
return -1;
return static_cast(trailbytes); //cast okay as is certainly 0--3
}
//---------------------------------------------------------------------------
void rtlStrToDataX(unsigned & tlen, void * & tgt, unsigned slen, const void * src)
{
void * data = malloc(slen);
memcpy(data, src, slen);
tgt = data;
tlen = slen;
}
void rtlStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const void * src)
{
char * data = (char *)malloc(slen);
memcpy(data, src, slen);
tgt = data;
tlen = slen;
}
char * rtlStrToVStrX(unsigned slen, const void * src)
{
char * data = (char *)malloc(slen+1);
memcpy(data, src, slen);
data[slen] = 0;
return data;
}
char * rtlEStrToVStrX(unsigned slen, const char * src)
{
char * astr = (char*)alloca(slen);
rtlEStrToStr(slen,astr,slen,src);
return rtlStrToVStrX(slen, astr);
}
void rtlEStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
{
char * data = (char *)malloc(slen);
rtlEStrToStr(slen, data, slen, src);
tgt = data;
tlen = slen;
}
void rtlStrToEStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
{
char * data = (char *)malloc(slen);
rtlStrToEStr(slen, data, slen, src);
tgt = data;
tlen = slen;
}
//---------------------------------------------------------------------------
// See http://www.isthe.com/chongo/tech/comp/fnv/index.html
#define FNV1_64_INIT I64C(0xcbf29ce484222325)
#define FNV_64_PRIME I64C(0x100000001b3U)
hash64_t rtlHash64Data(size32_t len, const void *buf, hash64_t hval)
{
const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
const unsigned char *be = bp + len; /* beyond end of buffer */
while (bp < be)
{
hval *= FNV_64_PRIME;
hval ^= *bp++;
}
return hval;
}
hash64_t rtlHash64VStr(const char *str, hash64_t hval)
{
const unsigned char *s = (const unsigned char *)str;
unsigned char c;
while ((c = *s++) != 0)
{
hval *= FNV_64_PRIME;
hval ^= c;
}
return hval;
}
hash64_t rtlHash64Unicode(unsigned length, UChar const * k, hash64_t initval)
{
return rtlHash64Data(length*2, k, initval);
}
hash64_t rtlHash64VUnicode(UChar const * k, hash64_t initval)
{
return rtlHash64Data(rtlUnicodeStrlen(k)*2, k, initval);
}
//---------------------------------------------------------------------------
// See http://www.isthe.com/chongo/tech/comp/fnv/index.html
#define FNV1_32_INIT 0x811C9DC5
#define FNV_32_PRIME 0x1000193
unsigned rtlHash32Data(size32_t len, const void *buf, unsigned hval)
{
const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
const unsigned char *be = bp + len; /* beyond end of buffer */
while (bp < be)
{
hval *= FNV_32_PRIME;
hval ^= *bp++;
}
return hval;
}
unsigned rtlHash32VStr(const char *str, unsigned hval)
{
const unsigned char *s = (const unsigned char *)str;
unsigned char c;
while ((c = *s++) != 0)
{
hval *= FNV_32_PRIME;
hval ^= c;
}
return hval;
}
unsigned rtlHash32Unicode(unsigned length, UChar const * k, unsigned initval)
{
return rtlHash32Data(length*2, k, initval);
}
unsigned rtlHash32VUnicode(UChar const * k, unsigned initval)
{
return rtlHash32Data(rtlUnicodeStrlen(k)*2, k, initval);
}
//---------------------------------------------------------------------------
// Hash Helper functions
#define mix(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
#define GETBYTE0(n) ((unsigned)k[n])
#define GETBYTE1(n) ((unsigned)k[n+1]<<8)
#define GETBYTE2(n) ((unsigned)k[n+2]<<16)
#define GETBYTE3(n) ((unsigned)k[n+3]<<24)
#define GETWORD(k,n) (GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))
// the above looks inefficient but the compiler optimizes well
// this hash looks slow but is about twice as quick as using our CRC table
// and gives gives better results
// (see paper at http://burtleburtle.net/bob/hash/evahash.html for more info)
unsigned rtlHashData( unsigned length, const void *_k, unsigned initval)
{
const unsigned char * k = (const unsigned char *)_k;
register unsigned a,b,c,len;
/* Set up the internal state */
len = length;
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* the previous hash value */
/*---------------------------------------- handle most of the key */
while (len >= 12)
{
a += GETWORD(k,0);
b += GETWORD(k,4);
c += GETWORD(k,8);
mix(a,b,c);
k += 12; len -= 12;
}
/*------------------------------------- handle the last 11 bytes */
c += length;
switch(len) /* all the case statements fall through */
{
case 11: c+=GETBYTE3(7);
case 10: c+=GETBYTE2(7);
case 9 : c+=GETBYTE1(7);
/* the first byte of c is reserved for the length */
case 8 : b+=GETBYTE3(4);
case 7 : b+=GETBYTE2(4);
case 6 : b+=GETBYTE1(4);
case 5 : b+=GETBYTE0(4);
case 4 : a+=GETBYTE3(0);
case 3 : a+=GETBYTE2(0);
case 2 : a+=GETBYTE1(0);
case 1 : a+=GETBYTE0(0);
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
unsigned rtlHashString( unsigned length, const char *_k, unsigned initval)
{
return rtlHashData(rtlTrimStrLen(length, _k), _k, initval);
}
unsigned rtlHashUnicode(unsigned length, UChar const * k, unsigned initval)
{
//Would make more sense to trim here.
return rtlHashData(length*2, k, initval);
}
unsigned rtlHashVStr(const char * k, unsigned initval)
{
return rtlHashData(rtlTrimVStrLen(k), k, initval);
}
unsigned rtlHashVUnicode(UChar const * k, unsigned initval)
{
return rtlHashData(rtlTrimVUnicodeStrLen(k)*2, k, initval);
}
#define GETWORDNC(k,n) ((GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))&0xdfdfdfdf)
unsigned rtlHashDataNC( unsigned length, const void * _k, unsigned initval)
{
const unsigned char * k = (const unsigned char *)_k;
register unsigned a,b,c,len;
/* Set up the internal state */
len = length;
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* the previous hash value */
/*---------------------------------------- handle most of the key */
while (len >= 12)
{
a += GETWORDNC(k,0);
b += GETWORDNC(k,4);
c += GETWORDNC(k,8);
mix(a,b,c);
k += 12; len -= 12;
}
/*------------------------------------- handle the last 11 bytes */
c += length;
switch(len) /* all the case statements fall through */
{
case 11: c+=GETBYTE3(7)&0xdf;
case 10: c+=GETBYTE2(7)&0xdf;
case 9 : c+=GETBYTE1(7)&0xdf;
/* the first byte of c is reserved for the length */
case 8 : b+=GETBYTE3(4)&0xdf;
case 7 : b+=GETBYTE2(4)&0xdf;
case 6 : b+=GETBYTE1(4)&0xdf;
case 5 : b+=GETBYTE0(4)&0xdf;
case 4 : a+=GETBYTE3(0)&0xdf;
case 3 : a+=GETBYTE2(0)&0xdf;
case 2 : a+=GETBYTE1(0)&0xdf;
case 1 : a+=GETBYTE0(0)&0xdf;
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
unsigned rtlHashVStrNC(const char * k, unsigned initval)
{
return rtlHashDataNC(strlen(k), k, initval);
}
//---------------------------------------------------------------------------
unsigned rtlCrcData( unsigned length, const void *_k, unsigned initval)
{
return crc32((const char *)_k, length, initval);
}
unsigned rtlCrcUnicode(unsigned length, UChar const * k, unsigned initval)
{
return crc32((char const *)k, length*2, initval);
}
unsigned rtlCrcVStr( const char * k, unsigned initval)
{
return crc32(k, strlen(k), initval);
}
unsigned rtlCrcVUnicode(UChar const * k, unsigned initval)
{
return crc32((char const *)k, rtlUnicodeStrlen(k)*2, initval);
}
//---------------------------------------------------------------------------
// MD5 processing:
void rtlHashMd5Init(size32_t sizestate, void * _state)
{
assertex(sizestate >= sizeof(md5_state_s));
md5_state_s * state = (md5_state_s *)_state;
md5_init(state);
}
void rtlHashMd5Data(size32_t len, const void *buf, size32_t sizestate, void * _state)
{
md5_state_s * state = (md5_state_s * )_state;
md5_append(state, (const md5_byte_t *)buf, len);
}
void rtlHashMd5Finish(void * out, size32_t sizestate, void * _state)
{
typedef md5_byte_t digest_t[16];
md5_state_s * state = (md5_state_s *)_state;
md5_finish(state, *(digest_t*)out);
}
//---------------------------------------------------------------------------
unsigned rtlRandom()
{
CriticalBlock block(random_Sect);
return random_->next();
}
void rtlSeedRandom(unsigned value)
{
CriticalBlock block(random_Sect);
random_->seed(value);
}
// These are all useful functions for testing - not really designed for other people to use them...
ECLRTL_API unsigned rtlTick()
{
return msTick();
}
ECLRTL_API bool rtlGPF()
{
char * x = 0;
*x = 0;
return false;
}
ECLRTL_API unsigned rtlSleep(unsigned delay)
{
MilliSleep(delay);
return 0;
}
ECLRTL_API unsigned rtlDisplay(unsigned len, const char * src)
{
LOG(MCprogress, unknownJob, "%.*s", len, src);
return 0;
}
void rtlEcho(unsigned len, const char * src)
{
printf("%.*s\n", len, src);
}
ECLRTL_API unsigned __int64 rtlNano()
{
return cycle_to_nanosec(get_cycles_now());
}
ECLRTL_API void rtlTestGetPrimes(unsigned & num, void * & data)
{
unsigned numPrimes = 6;
unsigned size = sizeof(unsigned) * numPrimes;
unsigned * primes = (unsigned *)malloc(size);
primes[0] = 1;
primes[1] = 2;
primes[2] = 3;
primes[3] = 5;
primes[4] = 7;
primes[5] = 11;
num = numPrimes;
data = primes;
}
ECLRTL_API void rtlTestFibList(bool & outAll, size32_t & outSize, void * & outData, bool inAll, size32_t inSize, const void * inData)
{
const unsigned * inList = (const unsigned *)inData;
unsigned * outList = (unsigned *)malloc(inSize);
unsigned * curOut = outList;
unsigned count = inSize / sizeof(*inList);
unsigned prev = 0;
for (unsigned i=0; i < count; i++)
{
unsigned next = *inList++;
*curOut++ = next + prev;
prev = next;
}
outAll = inAll;
outSize = inSize;
outData = outList;
}
unsigned rtlDelayReturn(unsigned value, unsigned sleepTime)
{
MilliSleep(sleepTime);
return value;
}
//---------------------------------------------------------------------------
class CRtlFailException : public CInterface, public IUserException
{
public:
CRtlFailException(int _code, char const * _msg) : code(_code) { msg = strdup(_msg); }
~CRtlFailException() { free(msg); }
IMPLEMENT_IINTERFACE;
virtual int errorCode() const { return code; }
virtual StringBuffer & errorMessage(StringBuffer & buff) const { return buff.append(msg); }
virtual MessageAudience errorAudience() const { return MSGAUD_user; }
private:
int code;
char * msg;
};
void rtlFail(int code, const char *msg)
{
throw dynamic_cast(new CRtlFailException(code, msg));
}
void rtlSysFail(int code, const char *msg)
{
throw MakeStringException(MSGAUD_user, code, "%s", msg);
}
void rtlReportRowOverflow(unsigned size, unsigned max)
{
throw MakeStringException(MSGAUD_user, 1000, "Row size %u exceeds the maximum size specified(%u)", size, max);
}
void rtlReportFieldOverflow(unsigned size, unsigned max, const char * name)
{
if (!name)
rtlReportRowOverflow(size, max);
else
throw MakeStringException(MSGAUD_user, 1000, "Assignment to field '%s' causes row overflow. Size %u exceeds the maximum size specified(%u)", name, size, max);
}
void rtlCheckRowOverflow(unsigned size, unsigned max)
{
if (size > max)
rtlReportRowOverflow(size, max);
}
void rtlCheckFieldOverflow(unsigned size, unsigned max, const char * field)
{
if (size > max)
rtlReportFieldOverflow(size, max, field);
}
void rtlFailUnexpected()
{
throw MakeStringException(MSGAUD_user, -1, "Unexpected code execution");
}
void rtlFailOnAssert()
{
throw MakeStringException(MSGAUD_user, -1, "Abort execution");
}
//---------------------------------------------------------------------------
void deserializeRaw(unsigned recordSize, void *record, MemoryBuffer &in)
{
in.read(recordSize, record);
}
void deserializeDataX(size32_t & len, void * & data, MemoryBuffer &in)
{
free(data);
in.read(sizeof(len), &len);
data = malloc(len);
in.read(len, data);
}
void deserializeStringX(size32_t & len, char * & data, MemoryBuffer &in)
{
free(data);
in.read(sizeof(len), &len);
data = (char *)malloc(len);
in.read(len, data);
}
char * deserializeCStringX(MemoryBuffer &in)
{
unsigned len;
in.read(sizeof(len), &len);
char * data = (char *)malloc(len+1);
in.read(len, data);
data[len] = 0;
return data;
}
void deserializeUnicodeX(size32_t & len, UChar * & data, MemoryBuffer &in)
{
free(data);
in.read(sizeof(len), &len);
data = (UChar *)malloc(len*sizeof(UChar));
in.read(len*sizeof(UChar), data);
}
void deserializeUtf8X(size32_t & len, char * & data, MemoryBuffer &in)
{
free(data);
in.read(sizeof(len), &len);
unsigned size = rtlUtf8Size(len, in.readDirect(0));
data = (char *)malloc(size);
in.read(size, data);
}
UChar * deserializeVUnicodeX(MemoryBuffer &in)
{
unsigned len;
in.read(sizeof(len), &len);
UChar * data = (UChar *)malloc((len+1)*sizeof(UChar));
in.read(len*sizeof(UChar), data);
data[len] = 0;
return data;
}
void deserializeSet(bool & isAll, size32_t & len, void * & data, MemoryBuffer &in)
{
free(data);
in.read(isAll);
in.read(sizeof(len), &len);
data = malloc(len);
in.read(len, data);
}
void serializeRaw(unsigned recordSize, const void *record, MemoryBuffer &out)
{
out.append(recordSize, record);
}
void serializeDataX(size32_t len, const void * data, MemoryBuffer &out)
{
out.append(len).append(len, data);
}
void serializeStringX(size32_t len, const char * data, MemoryBuffer &out)
{
out.append(len).append(len, data);
}
void serializeCStringX(const char * data, MemoryBuffer &out)
{
unsigned len = strlen(data);
out.append(len).append(len, data);
}
void serializeUnicodeX(size32_t len, const UChar * data, MemoryBuffer &out)
{
out.append(len).append(len*sizeof(UChar), data);
}
void serializeUtf8X(size32_t len, const char * data, MemoryBuffer &out)
{
out.append(len).append(rtlUtf8Size(len, data), data);
}
void serializeSet(bool isAll, size32_t len, const void * data, MemoryBuffer &out)
{
out.append(isAll).append(len).append(len, data);
}
//---------------------------------------------------------------------------
ECLRTL_API void serializeFixedString(unsigned len, const char *field, MemoryBuffer &out)
{
out.append(len, field);
}
ECLRTL_API void serializeLPString(unsigned len, const char *field, MemoryBuffer &out)
{
out.append(len);
out.append(len, field);
}
ECLRTL_API void serializeVarString(const char *field, MemoryBuffer &out)
{
out.append(field);
}
ECLRTL_API void serializeBool(bool field, MemoryBuffer &out)
{
out.append(field);
}
ECLRTL_API void serializeFixedData(unsigned len, const void *field, MemoryBuffer &out)
{
out.append(len, field);
}
ECLRTL_API void serializeLPData(unsigned len, const void *field, MemoryBuffer &out)
{
out.append(len);
out.append(len, field);
}
ECLRTL_API void serializeInt1(signed char field, MemoryBuffer &out)
{
// MORE - why did overloading pick the int method for this???
// out.append(field);
out.appendEndian(sizeof(field), &field);
}
ECLRTL_API void serializeInt2(signed short field, MemoryBuffer &out)
{
out.appendEndian(sizeof(field), &field);
}
ECLRTL_API void serializeInt3(signed int field, MemoryBuffer &out)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
out.appendEndian(3, &field);
#else
out.appendEndian(3, ((char *) &field) + 1);
#endif
}
ECLRTL_API void serializeInt4(signed int field, MemoryBuffer &out)
{
out.appendEndian(sizeof(field), &field);
}
ECLRTL_API void serializeInt5(signed __int64 field, MemoryBuffer &out)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
out.appendEndian(5, &field);
#else
out.appendEndian(5, ((char *) &field) + 3);
#endif
}
ECLRTL_API void serializeInt6(signed __int64 field, MemoryBuffer &out)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
out.appendEndian(6, &field);
#else
out.appendEndian(6, ((char *) &field) + 2);
#endif
}
ECLRTL_API void serializeInt7(signed __int64 field, MemoryBuffer &out)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
out.appendEndian(7, &field);
#else
out.appendEndian(7, ((char *) &field) + 1);
#endif
}
ECLRTL_API void serializeInt8(signed __int64 field, MemoryBuffer &out)
{
out.appendEndian(sizeof(field), &field);
}
ECLRTL_API void serializeUInt1(unsigned char field, MemoryBuffer &out)
{
out.appendEndian(sizeof(field), &field);
}
ECLRTL_API void serializeUInt2(unsigned short field, MemoryBuffer &out)
{
out.appendEndian(sizeof(field), &field);
}
ECLRTL_API void serializeUInt3(unsigned int field, MemoryBuffer &out)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
out.appendEndian(3, &field);
#else
out.appendEndian(3, ((char *) &field) + 1);
#endif
}
ECLRTL_API void serializeUInt4(unsigned int field, MemoryBuffer &out)
{
out.appendEndian(sizeof(field), &field);
}
ECLRTL_API void serializeUInt5(unsigned __int64 field, MemoryBuffer &out)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
out.appendEndian(5, &field);
#else
out.appendEndian(5, ((char *) &field) + 3);
#endif
}
ECLRTL_API void serializeUInt6(unsigned __int64 field, MemoryBuffer &out)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
out.appendEndian(6, &field);
#else
out.appendEndian(6, ((char *) &field) + 2);
#endif
}
ECLRTL_API void serializeUInt7(unsigned __int64 field, MemoryBuffer &out)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
out.appendEndian(7, &field);
#else
out.appendEndian(7, ((char *) &field) + 1);
#endif
}
ECLRTL_API void serializeUInt8(unsigned __int64 field, MemoryBuffer &out)
{
out.appendEndian(sizeof(field), &field);
}
ECLRTL_API void serializeReal4(float field, MemoryBuffer &out)
{
out.appendEndian(sizeof(field), &field);
}
ECLRTL_API void serializeReal8(double field, MemoryBuffer &out)
{
out.append(sizeof(field), &field);
}
//These maths functions can all have out of range arguments....
//---------------------------------------------------------------------------
ECLRTL_API double rtlLog10(double x)
{
if (x <= 0) return 0;
return log10(x);
}
ECLRTL_API double rtlLog(double x)
{
if (x <= 0) return 0;
return log(x);
}
ECLRTL_API double rtlSqrt(double x)
{
if (x < 0) return 0;
return sqrt(x);
}
ECLRTL_API double rtlACos(double x)
{
if (fabs(x) > 1) return 0;
return acos(x);
}
ECLRTL_API double rtlASin(double x)
{
if (fabs(x) > 1) return 0;
return asin(x);
}
//---------------------------------------------------------------------------
ECLRTL_API bool rtlIsValidReal(unsigned size, const void * data)
{
byte * bytes = (byte *)data;
//Valid unless it is a Nan, represented by exponent all 1's and non-zero mantissa (ignore the sign).
if (size == 4)
{
//sign(1) exponent(8) mantissa(23)
if (((bytes[3] & 0x7f) == 0x7f) && ((bytes[2] & 0x80) == 0x80))
{
if ((bytes[2] & 0x7f) != 0 || bytes[1] || bytes[0])
return false;
}
}
else if (size == 8)
{
//sign(1) exponent(11) mantissa(52)
if (((bytes[7] & 0x7f) == 0x7f) && ((bytes[6] & 0xF0) == 0xF0))
{
if ((bytes[6] & 0xF) || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
return false;
}
}
else
{
//sign(1) exponent(15) mantissa(64)
assertex(size==10);
if (((bytes[9] & 0x7f) == 0x7f) && (bytes[8] == 0xFF))
{
if (bytes[7] || bytes[6] || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
return false;
}
}
return true;
}
void rtlUnicodeToUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
{
if(inlen>outlen) inlen = outlen;
memcpy(out, in, inlen*2);
while(inlen=outlen) && (outlen != 0)) inlen = outlen-1;
memcpy(out, in, inlen*2);
out[inlen] = 0x0000;
}
void rtlVUnicodeToUnicode(size32_t outlen, UChar * out, UChar const *in)
{
rtlUnicodeToUnicode(outlen, out, rtlUnicodeStrlen(in), in);
}
void rtlVUnicodeToVUnicode(size32_t outlen, UChar * out, UChar const *in)
{
rtlUnicodeToVUnicode(outlen, out, rtlUnicodeStrlen(in), in);
}
void rtlUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
{
tgt = (UChar *)malloc(slen*2);
memcpy(tgt, src, slen*2);
tlen = slen;
}
UChar * rtlUnicodeToVUnicodeX(unsigned slen, UChar const * src)
{
UChar * data = (UChar *)malloc((slen+1)*2);
memcpy(data, src, slen*2);
data[slen] = 0x0000;
return data;
}
void rtlVUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, UChar const * src)
{
rtlUnicodeToUnicodeX(tlen, tgt, rtlUnicodeStrlen(src), src);
}
UChar * rtlVUnicodeToVUnicodeX(UChar const * src)
{
return rtlUnicodeToVUnicodeX(rtlUnicodeStrlen(src), src);
}
void rtlDecPushUnicode(size32_t len, UChar const * data)
{
char * buff = 0;
unsigned bufflen = 0;
rtlUnicodeToStrX(bufflen, buff, len, data);
DecPushString(bufflen, buff);
rtlFree(buff);
}
unsigned rtlUnicodeStrlen(UChar const * str)
{
return u_strlen(str);
}
//---------------------------------------------------------------------------
unsigned rtlUtf8Size(const void * data)
{
return readUtf8Size(data);
}
unsigned rtlUtf8Size(unsigned len, const void * _data)
{
const byte * data = (const byte *)_data;
size32_t offset = 0;
for (unsigned i=0; i< len; i++)
offset += readUtf8Size(data+offset);
return offset;
}
unsigned rtlUtf8Length(unsigned size, const void * _data)
{
const byte * data = (const byte *)_data;
size32_t length = 0;
for (unsigned offset=0; offset < size; offset += readUtf8Size(data+offset))
length++;
return length;
}
unsigned rtlUtf8Char(const void * data)
{
return readUtf8Char(data);
}
void rtlUtf8ToData(size32_t outlen, void * out, size32_t inlen, const char *in)
{
unsigned insize = rtlUtf8Size(inlen, in);
rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
}
void rtlUtf8ToDataX(size32_t & outlen, void * & out, size32_t inlen, const char *in)
{
unsigned insize = rtlUtf8Size(inlen, in);
char * cout;
rtlCodepageToCodepageX(outlen, cout, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
out = cout;
}
void rtlUtf8ToStr(size32_t outlen, char * out, size32_t inlen, const char *in)
{
unsigned insize = rtlUtf8Size(inlen, in);
rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
}
void rtlUtf8ToStrX(size32_t & outlen, char * & out, size32_t inlen, const char *in)
{
unsigned insize = rtlUtf8Size(inlen, in);
rtlCodepageToCodepageX(outlen, out, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
}
char * rtlUtf8ToVStr(size32_t inlen, const char *in)
{
unsigned utfSize = rtlUtf8Size(inlen, in);
char *ret = (char *) rtlMalloc(inlen+1);
rtlCodepageToCodepage(inlen, ret, utfSize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
ret[inlen] = 0;
return ret;
}
void rtlDataToUtf8(size32_t outlen, char * out, size32_t inlen, const void *in)
{
rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
}
void rtlDataToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const void *in)
{
unsigned outsize;
rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
outlen = rtlUtf8Length(outsize, out);
}
void rtlStrToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
{
rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
}
void rtlStrToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
{
unsigned outsize;
rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
outlen = rtlUtf8Length(outsize, out);
}
void rtlUtf8ToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
{
//Packs as many characaters as it can into the target, but don't include any half characters
size32_t offset = 0;
size32_t outsize = outlen*UTF8_MAXSIZE;
for (unsigned i=0; i< inlen; i++)
{
unsigned nextSize = readUtf8Size(in+offset);
if (offset + nextSize > outsize)
break;
offset += nextSize;
}
memcpy(out, in, offset);
if (offset != outsize)
memset(out+offset, ' ', outsize-offset);
}
void rtlUtf8ToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
{
unsigned insize = rtlUtf8Size(inlen, in);
char * buffer = (char *)malloc(insize);
memcpy(buffer, in, insize);
outlen = inlen;
out = buffer;
}
static int rtlCompareUtf8Utf8ViaUnicode(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
{
rtlDataAttr uleft(llen*sizeof(UChar));
rtlDataAttr uright(rlen*sizeof(UChar));
rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
return rtlCompareUnicodeUnicode(llen, uleft.getustr(), rlen, uright.getustr(), locale);
}
int rtlCompareUtf8Utf8(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
{
//MORE: Do a simple comparison as long as there are no non->0x80 characters around
// fall back to a full unicode comparison if we hit one - or in the next character to allow for accents etc.
const byte * bleft = (const byte *)left;
const byte * bright = (const byte *)right;
unsigned len = llen > rlen ? rlen : llen;
for (unsigned i = 0; i < len; i++)
{
byte nextLeft = bleft[i];
byte nextRight = bright[i];
if (nextLeft >= 0x80 || nextRight >= 0x80)
return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
if ((i+1 != len) && ((bleft[i+1] >= 0x80) || bright[i+1] >= 0x80))
return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
if (nextLeft != nextRight)
return nextLeft - nextRight;
}
int diff = 0;
if (len != llen)
{
for (;(diff == 0) && (len != llen);len++)
diff = bleft[len] - ' ';
}
else if (len != rlen)
{
for (;(diff == 0) && (len != rlen);len++)
diff = ' ' - bright[len];
}
return diff;
}
int rtlCompareUtf8Utf8Strength(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale, unsigned strength)
{
//GH->PG Any better way of doing this? We could possible decide it was a binary comparison instead I guess.
rtlDataAttr uleft(llen*sizeof(UChar));
rtlDataAttr uright(rlen*sizeof(UChar));
rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
return rtlCompareUnicodeUnicodeStrength(llen, uleft.getustr(), rlen, uright.getustr(), locale, strength);
}
void rtlDecPushUtf8(size32_t len, const void * data)
{
DecPushString(len, (const char *)data); // good enough for the moment
}
bool rtlUtf8ToBool(size32_t inlen, const char * in)
{
return rtlStrToBool(inlen, in);
}
__int64 rtlUtf8ToInt(size32_t inlen, const char * in)
{
return rtlStrToInt8(inlen, in); // good enough for the moment
}
double rtlUtf8ToReal(size32_t inlen, const char * in)
{
return rtlStrToReal(inlen, in); // good enough for the moment
}
void rtlCodepageToUtf8(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
{
rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, codepage);
}
void rtlCodepageToUtf8X(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
{
unsigned outsize;
rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, codepage);
outlen = rtlUtf8Length(outsize, out);
}
void rtlUtf8ToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
{
unsigned insize = rtlUtf8Size(inlen, in);
rtlCodepageToCodepage(outlen, (char *)out, insize, in, codepage, UTF8_CODEPAGE);
}
void rtlUtf8ToCodepageX(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
{
unsigned insize = rtlUtf8Size(inlen, in);
rtlCodepageToCodepageX(outlen, out, inlen, insize, in, codepage, UTF8_CODEPAGE);
}
void rtlUnicodeToUtf8X(unsigned & outlen, char * & out, unsigned inlen, const UChar * in)
{
unsigned outsize;
rtlUnicodeToCodepageX(outsize, out, inlen, in, UTF8_CODEPAGE);
outlen = rtlUtf8Length(outsize, out);
}
void rtlUnicodeToUtf8(unsigned outlen, char * out, unsigned inlen, const UChar * in)
{
rtlUnicodeToCodepage(outlen*UTF8_MAXSIZE, out, inlen, in, UTF8_CODEPAGE);
}
void rtlUtf8ToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
{
rtlCodepageToUnicodeX(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
}
void rtlUtf8ToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
{
rtlCodepageToUnicode(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
}
ECLRTL_API void rtlUtf8SubStrFT(unsigned tlen, char * tgt, unsigned slen, char const * src, unsigned from, unsigned to)
{
normalizeFromTo(from, to);
unsigned len = to - from;
clipFromTo(from, to, slen);
unsigned copylen = to - from;
unsigned startOffset = rtlUtf8Size(from, src);
rtlUtf8ToUtf8(tlen, tgt, copylen, src+startOffset);
}
ECLRTL_API void rtlUtf8SubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
{
normalizeFromTo(from, to);
unsigned len = to - from;
clipFromTo(from, to, slen);
unsigned copylen = to - from;
unsigned fillSize = len - copylen;
unsigned startOffset = rtlUtf8Size(from, src);
unsigned copySize = rtlUtf8Size(copylen, src+startOffset);
char * buffer = (char *)malloc(copySize + fillSize);
memcpy(buffer, (byte *)src+startOffset, copySize);
if (fillSize)
memset(buffer+copySize, ' ', fillSize);
tlen = len;
tgt = buffer;
}
ECLRTL_API void rtlUtf8SubStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
{
normalizeFromTo(from, slen);
unsigned len = slen - from;
unsigned startOffset = rtlUtf8Size(from, src);
unsigned copySize = rtlUtf8Size(len, src+startOffset);
char * buffer = (char *)malloc(copySize);
memcpy(buffer, (byte *)src+startOffset, copySize);
tlen = len;
tgt = buffer;
}
ECLRTL_API void rtlUtf8ToLower(size32_t l, char * t, char const * locale)
{
//Convert to lower case, but only go via unicode routines if we have to...
const byte * buffer = (const byte *)t;
for (unsigned i=0; i< l; i++)
{
byte next = *t;
if (next >= 0x80)
{
//yuk, go via unicode to do the convertion.
unsigned len = l-i;
unsigned size = rtlUtf8Size(len, t+i);
rtlDataAttr unicode(len*sizeof(UChar));
rtlCodepageToUnicode(len, unicode.getustr(), size, t+i, UTF8_CODEPAGE);
rtlUnicodeToLower(len, unicode.getustr(), locale);
rtlUnicodeToCodepage(size, t+i, len, unicode.getustr(), UTF8_CODEPAGE);
return;
}
*t++ = tolower(next);
}
}
ECLRTL_API void rtlConcatUtf8(unsigned & tlen, char * * tgt, ...)
{
//Going to have to go via unicode because of normalization. However, it might be worth optimizing the case where no special characters are present
va_list args;
unsigned totalLength = 0;
unsigned maxLength = 0;
va_start(args, tgt);
for(;;)
{
unsigned len = va_arg(args, unsigned);
if(len+1==0)
break;
const char * str = va_arg(args, const char *);
totalLength += len;
if (len > maxLength)
maxLength = len;
}
va_end(args);
rtlDataAttr next(maxLength*sizeof(UChar));
rtlDataAttr result(totalLength*sizeof(UChar));
unsigned idx = 0;
UErrorCode err = U_ZERO_ERROR;
va_start(args, tgt);
for(;;)
{
unsigned len = va_arg(args, unsigned);
if(len+1==0)
break;
if (len)
{
const char * str = va_arg(args, const char *);
rtlUtf8ToUnicode(len, next.getustr(), len, str);
idx = unorm_concatenate(result.getustr(), idx, next.getustr(), len, result.getustr(), totalLength, UNORM_NFC, 0, &err);
}
}
va_end(args);
rtlUnicodeToUtf8X(tlen, *tgt, idx, result.getustr());
}
ECLRTL_API unsigned rtlConcatUtf8ToUtf8(unsigned tlen, char * tgt, unsigned offset, unsigned slen, const char * src)
{
//NB: Inconsistently with the other varieties, idx is a byte offset, not a character position to make the code more efficient.....
//normalization is done in the space filling routine at the end
unsigned ssize = rtlUtf8Size(slen, src);
assertex(tlen * UTF8_MAXSIZE >= offset+ssize);
memcpy(tgt+offset, src, ssize);
return offset + ssize;
}
ECLRTL_API void rtlUtf8SpaceFill(unsigned tlen, char * tgt, unsigned offset)
{
const byte * src = (const byte *)tgt;
for (unsigned i=0; i= 0x80)
{
unsigned idx = rtlUtf8Length(offset, tgt);
rtlDataAttr unicode(idx*sizeof(UChar));
rtlUtf8ToUnicode(idx, unicode.getustr(), idx, tgt);
unicodeEnsureIsNormalized(idx, unicode.getustr());
rtlUnicodeToUtf8(tlen, tgt, idx, unicode.getustr());
return;
}
}
//no special characters=>easy route.
memset(tgt+offset, ' ', tlen*UTF8_MAXSIZE-offset);
}
ECLRTL_API unsigned rtlHash32Utf8(unsigned length, const char * k, unsigned initval)
{
return rtlHash32Data(rtlUtf8Size(length, k), k, initval);
}
ECLRTL_API unsigned rtlHashUtf8(unsigned length, const char * k, unsigned initval)
{
return rtlHashData(rtlUtf8Size(length, k), k, initval);
}
ECLRTL_API hash64_t rtlHash64Utf8(unsigned length, const char * k, hash64_t initval)
{
return rtlHash64Data(rtlUtf8Size(length, k), k, initval);
}
unsigned rtlCrcUtf8(unsigned length, const char * k, unsigned initval)
{
return rtlCrcData(rtlUtf8Size(length, k), k, initval);
}
int rtlNewSearchUtf8Table(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search, const char * locale)
{
//MORE: Hopelessly inefficient.... Should rethink - possibly introducing a class for doing string searching, and the Utf8 variety pre-converting the
//search strings into unicode.
int left = 0;
int right = count;
do
{
int mid = (left + right) >> 1;
int cmp = rtlCompareUtf8Utf8(width, search, elemlen, table[mid], locale);
if (cmp < 0)
right = mid;
else if (cmp > 0)
left = mid+1;
else
return mid;
} while (left < right);
return -1;
}
//---------------------------------------------------------------------------
#ifdef _USE_BOOST_REGEX
class CStrRegExprFindInstance : implements IStrRegExprFindInstance
{
private:
bool matched;
const boost::regex * regEx;
boost::cmatch subs;
char * sample; //only required if findstr/findvstr will be called
public:
CStrRegExprFindInstance(const boost::regex * _regEx, const char * _str, size32_t _from, size32_t _len, bool _keep)
: regEx(_regEx)
{
matched = false;
sample = NULL;
try
{
if (_keep)
{
sample = (char *)malloc(_len + 1); //required for findstr
memcpy(sample, _str + _from, _len);
sample[_len] = (char)NULL;
matched = boost::regex_search(sample, subs, *regEx);
}
else
{
matched = boost::regex_search(_str + _from, _str + _len, subs, *regEx);
}
}
catch (std::runtime_error e)
{
throw MakeStringException(0, "Error in regex search: %s (regex: %s)", e.what(), regEx->str().c_str());
}
}
~CStrRegExprFindInstance() //CAVEAT non-virtual destructor !
{
free(sample);
}
//IStrRegExprFindInstance
bool found() const { return matched; }
void getMatchX(unsigned & outlen, char * & out, unsigned n = 0) const
{
if (matched && (n < subs.size()))
{
outlen = subs[n].second - subs[n].first;
out = (char *)malloc(outlen);
memcpy(out, subs[n].first, outlen);
}
else
{
outlen = 0;
out = NULL;
}
}
char const * findvstr(unsigned outlen, char * out, unsigned n = 0)
{
if (matched && (n < subs.size()))
{
unsigned sublen = subs[n].second - subs[n].first;
if (sublen >= outlen)
sublen = outlen - 1;
memcpy(out, subs[n].first, sublen);
out[sublen] = 0;
}
else
{
out[0] = 0;
}
return out;
}
};
//---------------------------------------------------------------------------
class CCompiledStrRegExpr : implements ICompiledStrRegExpr
{
private:
boost::regex regEx;
public:
CCompiledStrRegExpr(const char * _regExp, bool _isCaseSensitive = false)
{
try
{
if (_isCaseSensitive)
regEx.assign(_regExp, boost::regbase::perl);
else
regEx.assign(_regExp, boost::regbase::perl | boost::regbase::icase);
}
catch(boost::bad_expression e)
{
StringBuffer msg;
msg.append("Bad regular expression: ").append(e.what()).append(": ").append(_regExp);
rtlFail(0, msg.str()); //throws
}
}
//ICompiledStrRegExpr
void replace(size32_t & outlen, char * & out, size32_t slen, char const * str, size32_t rlen, char const * replace) const
{
std::string src(str, str + slen);
std::string fmt(replace, replace + rlen);
std::string tgt;
try
{
// tgt = boost::regex_merge(src, cre->regEx, fmt, boost::format_perl); //Algorithm regex_merge has been renamed regex_replace, existing code will continue to compile, but new code should use regex_replace instead.
tgt = boost::regex_replace(src, regEx, fmt, boost::format_perl);
}
catch(std::runtime_error e)
{
throw MakeStringException(0, "Error in regex replace: %s (regex: %s)", e.what(), regEx.str().c_str());
}
outlen = tgt.length();
out = (char *)malloc(outlen);
memcpy(out, tgt.data(), outlen);
}
IStrRegExprFindInstance * find(const char * str, size32_t from, size32_t len, bool needToKeepSearchString) const
{
CStrRegExprFindInstance * findInst = new CStrRegExprFindInstance(®Ex, str, from, len, needToKeepSearchString);
return findInst;
}
};
//---------------------------------------------------------------------------
ECLRTL_API ICompiledStrRegExpr * rtlCreateCompiledStrRegExpr(const char * regExpr, bool isCaseSensitive)
{
CCompiledStrRegExpr * expr = new CCompiledStrRegExpr(regExpr, isCaseSensitive);
return expr;
}
ECLRTL_API void rtlDestroyCompiledStrRegExpr(ICompiledStrRegExpr * compiledExpr)
{
if (compiledExpr)
delete (CCompiledStrRegExpr*)compiledExpr;
}
ECLRTL_API void rtlDestroyStrRegExprFindInstance(IStrRegExprFindInstance * findInst)
{
if (findInst)
delete (CStrRegExprFindInstance*)findInst;
}
//---------------------------------------------------------------------------
// RegEx Compiler for unicode strings
class CUStrRegExprFindInstance : implements IUStrRegExprFindInstance
{
private:
bool matched;
RegexMatcher * matcher;
UnicodeString sample;
unsigned matchedSize;
public:
CUStrRegExprFindInstance(RegexMatcher * _matcher, const UChar * _str, size32_t _from, size32_t _len)
: matcher(_matcher)
{
matched = false;
sample.setTo(_str + _from, _len);
matcher->reset(sample);
matched = matcher->find();
if (matched)
matchedSize = (unsigned)matcher->groupCount() + 1;
}
//IUStrRegExprFindInstance
bool found() const { return matched; }
void getMatchX(unsigned & outlen, UChar * & out, unsigned n = 0) const
{
if(matched && (n < matchedSize))
{
assertex(matcher);
UErrorCode uerr = U_ZERO_ERROR;
int32_t start = n ? matcher->start(n, uerr) : matcher->start(uerr);
int32_t end = n ? matcher->end(n, uerr) : matcher->end(uerr);
outlen = end - start;
out = (UChar *)malloc(outlen*2);
sample.extract(start, outlen, out);
}
else
{
outlen = 0;
out = NULL;
}
}
UChar const * findvstr(unsigned outlen, UChar * out, unsigned n = 0)
{
if(matched && (n < matchedSize))
{
assertex(matcher);
UErrorCode uerr = U_ZERO_ERROR;
int32_t start = n ? matcher->start(n, uerr) : matcher->start(uerr);
int32_t end = n ? matcher->end(n, uerr) : matcher->end(uerr);
unsigned sublen = end - start;
if(sublen >= outlen)
sublen = outlen - 1;
sample.extract(start, sublen, out);
out[sublen] = 0;
}
else
{
out[0] = 0;
}
return out;
}
};
//---------------------------------------------------------------------------
class CCompiledUStrRegExpr : implements ICompiledUStrRegExpr
{
private:
RegexPattern * pattern;
RegexMatcher * matcher;
public:
CCompiledUStrRegExpr(const UChar * _UregExp, bool _isCaseSensitive = false)
{
UErrorCode uerr = U_ZERO_ERROR;
UParseError uperr;
if (_isCaseSensitive)
pattern = RegexPattern::compile(_UregExp, uperr, uerr);
else
pattern = RegexPattern::compile(_UregExp, UREGEX_CASE_INSENSITIVE, uperr, uerr);
matcher = pattern->matcher(uerr);
if (U_FAILURE(uerr))
{
char * expAscii;
unsigned expAsciiLen;
rtlUnicodeToEscapedStrX(expAsciiLen, expAscii, rtlUnicodeStrlen(_UregExp), _UregExp);
StringBuffer msg;
msg.append("Bad regular expression: ").append(u_errorName(uerr)).append(": ").append(expAsciiLen, expAscii);
rtlFree(expAscii);
delete matcher;
delete pattern;
matcher = 0;
pattern = 0;
rtlFail(0, msg.str()); //throws
}
}
~CCompiledUStrRegExpr()
{
if (matcher)
delete matcher;
if (pattern)
delete pattern;
}
void replace(size32_t & outlen, UChar * & out, size32_t slen, const UChar * str, size32_t rlen, UChar const * replace) const
{
UnicodeString const src(str, slen);
UErrorCode err = U_ZERO_ERROR;
RegexMatcher * replacer = pattern->matcher(src, err);
UnicodeString const fmt(replace, rlen);
UnicodeString const tgt = replacer->replaceAll(fmt, err);
outlen = tgt.length();
out = (UChar *)malloc(outlen*2);
tgt.extract(0, outlen, out);
delete replacer;
}
IUStrRegExprFindInstance * find(const UChar * str, size32_t from, size32_t len) const
{
CUStrRegExprFindInstance * findInst = new CUStrRegExprFindInstance(matcher, str, from, len);
return findInst;
}
};
//---------------------------------------------------------------------------
ECLRTL_API ICompiledUStrRegExpr * rtlCreateCompiledUStrRegExpr(const UChar * regExpr, bool isCaseSensitive)
{
CCompiledUStrRegExpr * expr = new CCompiledUStrRegExpr(regExpr, isCaseSensitive);
return expr;
}
ECLRTL_API void rtlDestroyCompiledUStrRegExpr(ICompiledUStrRegExpr * compiledExpr)
{
if (compiledExpr)
delete (CCompiledUStrRegExpr*)compiledExpr;
}
ECLRTL_API void rtlDestroyUStrRegExprFindInstance(IUStrRegExprFindInstance * findInst)
{
if (findInst)
delete (CUStrRegExprFindInstance*)findInst;
}
#else // _USE_BOOST_REGEX not set
ECLRTL_API ICompiledStrRegExpr * rtlCreateCompiledStrRegExpr(const char * regExpr, bool isCaseSensitive)
{
UNIMPLEMENTED_X("Boost regex disabled");
}
ECLRTL_API void rtlDestroyCompiledStrRegExpr(ICompiledStrRegExpr * compiledExpr)
{
}
ECLRTL_API void rtlDestroyStrRegExprFindInstance(IStrRegExprFindInstance * findInst)
{
}
ECLRTL_API ICompiledUStrRegExpr * rtlCreateCompiledUStrRegExpr(const UChar * regExpr, bool isCaseSensitive)
{
UNIMPLEMENTED_X("Boost regex disabled");
}
ECLRTL_API void rtlDestroyCompiledUStrRegExpr(ICompiledUStrRegExpr * compiledExpr)
{
}
ECLRTL_API void rtlDestroyUStrRegExprFindInstance(IUStrRegExprFindInstance * findInst)
{
}
#endif
//---------------------------------------------------------------------------
ECLRTL_API int rtlQueryLocalFailCode(IException * e)
{
return e->errorCode();
}
ECLRTL_API void rtlGetLocalFailMessage(size32_t & len, char * & text, IException * e, const char * tag)
{
rtlExceptionExtract(len, text, e, tag);
}
ECLRTL_API void rtlFreeException(IException * e)
{
e->Release();
}
//---------------------------------------------------------------------------
//Generally any calls to this function have also checked that the length(trim(str)) <= fieldLen, so exceptions should only occur if compareLen > fieldLen
//However, function can now also handle the exception case.
ECLRTL_API void rtlCreateRange(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str, byte fill, byte pad)
{
//
if (compareLen > fieldLen)
{
if ((int)compareLen >= 0)
{
//x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
compareLen = fieldLen;
}
else
compareLen = 0; // probably m[1..-1] or something silly
}
if (len > compareLen)
{
while ((len > compareLen) && (str[len-1] == pad))
len--;
//so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
if (len > compareLen)
{
compareLen = 0;
fill = (fill == 0) ? 255 : 0;
}
}
outlen = fieldLen;
out = (char *)malloc(fieldLen);
if (len >= compareLen)
memcpy(out, str, compareLen);
else
{
memcpy(out, str, len);
memset(out+len, pad, compareLen-len);
}
memset(out + compareLen, fill, fieldLen-compareLen);
}
ECLRTL_API void rtlCreateStrRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
{
rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
}
ECLRTL_API void rtlCreateStrRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
{
rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
}
ECLRTL_API void rtlCreateDataRangeLow(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
{
rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 0, 0);
}
ECLRTL_API void rtlCreateDataRangeHigh(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
{
rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 255, 0);
}
ECLRTL_API void rtlCreateRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
{
rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
}
ECLRTL_API void rtlCreateRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
{
rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
}
ECLRTL_API void rtlCreateUnicodeRange(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str, byte fill)
{
//Same as function above!
if (compareLen > fieldLen)
{
if ((int)compareLen >= 0)
{
//x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
compareLen = fieldLen;
}
else
compareLen = 0; // probably m[1..-1] or something silly
}
if (len > compareLen)
{
while ((len > compareLen) && (str[len-1] == ' '))
len--;
//so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
if (len > compareLen)
{
compareLen = 0;
fill = (fill == 0) ? 255 : 0;
}
}
outlen = fieldLen;
out = (UChar *)malloc(fieldLen*sizeof(UChar));
if (len >= compareLen)
memcpy(out, str, compareLen*sizeof(UChar));
else
{
memcpy(out, str, len * sizeof(UChar));
while (len != compareLen)
out[len++] = ' ';
}
memset(out + compareLen, fill, (fieldLen-compareLen) * sizeof(UChar));
}
ECLRTL_API void rtlCreateUnicodeRangeLow(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
{
rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0x00);
}
ECLRTL_API void rtlCreateUnicodeRangeHigh(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
{
rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0xFF);
}
//---------------------------------------------------------------------------
ECLRTL_API unsigned rtlCountRows(size32_t len, const void * data, IRecordSize * rs)
{
if (rs->isFixedSize())
return len / rs->getFixedSize();
unsigned count = 0;
while (len)
{
size32_t thisLen = rs->getRecordSize(data);
data = (byte *)data + thisLen;
if (thisLen > len)
throw MakeStringException(0, "Invalid raw data");
len -= thisLen;
count++;
}
return count;
}
//---------------------------------------------------------------------------
ECLRTL_API size32_t rtlCountToSize(unsigned count, const void * data, IRecordSize * rs)
{
if (rs->isFixedSize())
return count * rs->getFixedSize();
unsigned size = 0;
for (unsigned i=0;igetRecordSize(data);
data = (byte *)data + thisLen;
size += thisLen;
}
return size;
}
//---------------------------------------------------------------------------
class rtlCodepageConverter
{
public:
rtlCodepageConverter(char const * sourceName, char const * targetName, bool & failed) : uerr(U_ZERO_ERROR)
{
srccnv = ucnv_open(sourceName, &uerr);
tgtcnv = ucnv_open(targetName, &uerr);
tgtMaxRatio = ucnv_getMaxCharSize(tgtcnv);
failed = U_FAILURE(uerr);
}
~rtlCodepageConverter()
{
ucnv_close(srccnv);
ucnv_close(tgtcnv);
}
void convertX(unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
{
//convert from source to utf-16: try to avoid preflighting by guessing upper bound
//unicode length in UChars equal source length in chars if single byte encoding, and be less for multibyte
UChar * ubuff = (UChar *)malloc(sourceLength*2);
int32_t ulen = ucnv_toUChars(srccnv, ubuff, sourceLength, source, sourceLength, &uerr);
if(ulen > (int32_t)sourceLength)
{
//okay, so our guess was wrong, and we have to reallocate
free(ubuff);
ubuff = (UChar *)malloc(ulen*2);
ucnv_toUChars(srccnv, ubuff, ulen, source, sourceLength, &uerr);
}
if(preflight)
{
//convert from utf-16 to target: preflight to get buffer of exactly the right size
UErrorCode uerr2 = uerr; //preflight has to use copy of error code, as it is considered an 'error'
int32_t tlen = ucnv_fromUChars(tgtcnv, 0, 0, ubuff, ulen, &uerr2);
target = (char *)malloc(tlen);
targetLength = ucnv_fromUChars(tgtcnv, target, tlen, ubuff, ulen, &uerr);
}
else
{
//convert from utf-16 to target: avoid preflighting by allocating buffer of maximum size
target = (char *)malloc(ulen*tgtMaxRatio);
targetLength = ucnv_fromUChars(tgtcnv, target, ulen*tgtMaxRatio, ubuff, ulen, &uerr);
}
free(ubuff);
failed = U_FAILURE(uerr);
}
unsigned convert(unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
{
char * tgtStart = target;
ucnv_convertEx(tgtcnv, srccnv, &target, target+targetLength, &source, source+sourceLength, 0, 0, 0, 0, true, true, &uerr);
int32_t ret = target-tgtStart;
failed = U_FAILURE(uerr);
return ret;
}
private:
UErrorCode uerr;
UConverter * srccnv;
UConverter * tgtcnv;
int8_t tgtMaxRatio;
};
void * rtlOpenCodepageConverter(char const * sourceName, char const * targetName, bool & failed)
{
return new rtlCodepageConverter(sourceName, targetName, failed);
}
void rtlCloseCodepageConverter(void * converter)
{
delete ((rtlCodepageConverter *)converter);
}
void rtlCodepageConvertX(void * converter, unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
{
((rtlCodepageConverter *)converter)->convertX(targetLength, target, sourceLength, source, failed, preflight);
}
unsigned rtlCodepageConvert(void * converter, unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
{
return ((rtlCodepageConverter *)converter)->convert(targetLength, target, sourceLength, source, failed);
}
//---------------------------------------------------------------------------
void appendUChar(MemoryBuffer & buff, char x)
{
UChar c = x;
buff.append(sizeof(c), &c);
}
void appendUChar(MemoryBuffer & buff, UChar c)
{
buff.append(sizeof(c), &c);
}
void appendUStr(MemoryBuffer & x, const char * text)
{
while (*text)
{
UChar c = *text++;
x.append(sizeof(c), &c);
}
}
ECLRTL_API void xmlDecodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in)
{
StringBuffer temp;
decodeXML(in, temp, inLen);
outLen = temp.length();
out = temp.detach();
}
bool hasPrefix(const UChar * ustr, const char * str, unsigned len)
{
while (len--)
{
if (*ustr++ != *str++)
return false;
}
return true;
}
ECLRTL_API void xmlDecodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in)
{
const UChar * cur = in;
const UChar * end = in+inLen;
MemoryBuffer ret;
while (cur');
}
else if(hasPrefix(cur+1, "quot;", 5))
{
cur += 5;
appendUChar(ret, '"');
}
else if(hasPrefix(cur+1, "apos;", 5))
{
cur += 5;
appendUChar(ret, '\'');
}
else
{
cur++;
if (*cur == '#')
{
cur++;
unsigned base = 10;
if (*cur == 'x' || *cur == 'X') // strictly not sure about X.
{
base = 16;
cur++;
}
UChar value = 0;
while (cur < end)
{
unsigned digit;
UChar next = *cur;
if ((next >= '0') && (next <= '9'))
digit = next-'0';
else if ((next >= 'A') && (next <= 'F'))
digit = next-'A'+10;
else if ((next >= 'a') && (next <= 'f'))
digit = next-'a'+10;
else
break;
if (digit >= base)
break;
value = value * base + digit;
cur++;
}
appendUChar(ret, value);
//if (cur == end) || (*cur != ';') throw Error;
}
else
appendUChar(ret, *cur); // error... / unexpanded entity
}
//assertex(cur':
appendUStr(ret, ">");
break;
case '\"':
appendUStr(ret, """);
break;
case '\'':
appendUStr(ret, "'");
break;
case ' ':
appendUStr(ret, flags & ENCODE_SPACES?" ":" ");
break;
case '\n':
appendUStr(ret, flags & ENCODE_NEWLINES?"
":"\n");
break;
case '\r':
appendUStr(ret, flags & ENCODE_NEWLINES?"
":"\r");
break;
case '\t':
appendUStr(ret, flags & ENCODE_SPACES?" ":"\t");
break;
default:
appendUChar(ret, next);
break;
}
inLen--;
cur++;
}
outLen = ret.length()/2;
out = (UChar *)ret.detach();
}
//---------------------------------------------------------------------------
#define STRUCTURED_EXCEPTION_TAG "Error"
inline bool isStructuredMessage(const char * text, const char * tag)
{
if (!text || text[0] != '<')
return false;
if (!tag)
return true;
size32_t lenTag = strlen(tag);
if (memcmp(text+1,tag,lenTag) != 0)
return false;
if (text[lenTag+1] != '>')
return false;
return true;
}
inline bool isStructuredError(const char * text) { return isStructuredMessage(text, STRUCTURED_EXCEPTION_TAG); }
void rtlExtractTag(size32_t & outLen, char * & out, const char * text, const char * tag, const char * rootTag)
{
if (!tag || !isStructuredMessage(text, rootTag))
{
if (!tag || strcmp(tag, "text")==0)
rtlStrToStrX(outLen, out, strlen(text), text);
else
{
outLen = 0;
out = NULL;
}
}
else
{
StringBuffer startTag, endTag;
startTag.append("<").append(tag).append(">");
endTag.append("").append(tag).append(">");
const char * start = strstr(text, startTag.str());
const char * end = strstr(text, endTag.str());
if (start && end)
{
start += startTag.length();
xmlDecodeStrX(outLen, out, end-start, start);
}
else
{
outLen = 0;
out = NULL;
}
}
}
void rtlExceptionExtract(size32_t & outLen, char * & out, const char * text, const char * tag)
{
if (!tag) tag = "text";
rtlExtractTag(outLen, out, text, tag, STRUCTURED_EXCEPTION_TAG);
}
void rtlExceptionExtract(size32_t & outLen, char * & out, IException * e, const char * tag)
{
StringBuffer text;
e->errorMessage(text);
rtlExceptionExtract(outLen, out, text.str(), tag);
}
void rtlAddExceptionTag(StringBuffer & errorText, const char * tag, const char * value)
{
if (!isStructuredError(errorText.str()))
{
StringBuffer temp;
temp.append("<" STRUCTURED_EXCEPTION_TAG ">");
encodeXML(errorText.str(), temp, ENCODE_WHITESPACE, errorText.length(), false);
temp.append("" STRUCTURED_EXCEPTION_TAG ">");
errorText.swapWith(temp);
}
StringBuffer temp;
temp.append("<").append(tag).append(">");
encodeXML(value, temp, ENCODE_WHITESPACE, (unsigned)-1, false);
temp.append("").append(tag).append(">");
unsigned len = errorText.length();
unsigned pos = len - strlen(STRUCTURED_EXCEPTION_TAG) - 3;
errorText.insert(pos, temp);
}
//---------------------------------------------------------------------------
void rtlRowBuilder::forceAvailable(size32_t size)
{
const size32_t chunkSize = 64;
maxsize = (size + chunkSize-1) & ~(chunkSize-1);
ptr = realloc(ptr, maxsize);
}
//---------------------------------------------------------------------------
inline unsigned numExtraBytesFromValue(unsigned __int64 first)
{
if (first >= I64C(0x10000000))
if (first >= I64C(0x40000000000))
if (first >= I64C(0x2000000000000))
if (first >= I64C(0x100000000000000))
return 8;
else
return 7;
else
return 6;
else
if (first >= I64C(0x800000000))
return 5;
else
return 4;
else
if (first >= 0x4000)
if (first >= 0x200000)
return 3;
else
return 2;
else
if (first >= 0x80)
return 1;
else
return 0;
}
//An packed byte format, based on the unicode packing of utf-8.
//The number of top bits set in the leading byte indicates how many extra
//bytes follow (0..8). It gives the same compression as using a top bit to
//indicate continuation, but seems to be quicker (and requires less look ahead).
/*
byte numExtraBytesFromFirstTable[256] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8
};
inline unsigned numExtraBytesFromFirst(byte first)
{
return numExtraBytesFromFirstTable(first);
}
*/
//NB: This seems to be faster than using the table lookup above. Probably affects the data cache less
inline unsigned numExtraBytesFromFirst(byte first)
{
if (first >= 0xF0)
if (first >= 0xFC)
if (first >= 0xFE)
if (first >= 0xFF)
return 8;
else
return 7;
else
return 6;
else
if (first >= 0xF8)
return 5;
else
return 4;
else
if (first >= 0xC0)
if (first >= 0xE0)
return 3;
else
return 2;
else
if (first >= 0x80)
return 1;
else
return 0;
}
static byte leadingValueMask[9] = { 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01, 0x00, 0x00 };
static byte leadingLengthMask[9] = { 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF };
//maximum number of bytes for a packed value is size+1 bytes for size <=8 and last byte being fully used.
unsigned __int64 rtlGetPackedUnsigned(const void * _ptr)
{
const byte * ptr = (const byte *)_ptr;
byte first = *ptr++;
unsigned numExtra = numExtraBytesFromFirst(first);
unsigned __int64 value = first & leadingValueMask[numExtra];
//Loop unrolling has a negligable effect
while (numExtra--)
value = (value << 8) | *ptr++;
return value;
}
void rtlSetPackedUnsigned(void * _ptr, unsigned __int64 value)
{
byte * ptr = (byte *)_ptr;
unsigned numExtra = numExtraBytesFromValue(value);
byte firstMask = leadingLengthMask[numExtra];
while (numExtra)
{
ptr[numExtra--] = (byte)value;
value >>= 8;
}
ptr[0] = (byte)value | firstMask;
}
size32_t rtlGetPackedSize(const void * ptr)
{
return numExtraBytesFromFirst(*(byte*)ptr)+1;
}
size32_t rtlGetPackedSizeFromFirst(byte first)
{
return numExtraBytesFromFirst(first)+1;
}
//Store signed by moving the sign to the bottom bit, and inverting if negative.
//so small positive and negative numbers are stored compactly.
__int64 rtlGetPackedSigned(const void * ptr)
{
unsigned __int64 value = rtlGetPackedUnsigned(ptr);
unsigned __int64 shifted = (value >> 1);
return (__int64)((value & 1) ? ~shifted : shifted);
}
void rtlSetPackedSigned(void * ptr, __int64 value)
{
unsigned __int64 storeValue;
if (value < 0)
storeValue = (~value << 1) | 1;
else
storeValue = value << 1;
rtlSetPackedUnsigned(ptr, storeValue);
}
IAtom * rtlCreateFieldNameAtom(const char * name)
{
return createAtom(name);
}
//---------------------------------------------------------------------------
void RtlCInterface::Link() const { atomic_inc(&xxcount); }
bool RtlCInterface::Release(void) const
{
if (atomic_dec_and_test(&xxcount))
{
delete this;
return true;
}
return false;
}
//---------------------------------------------------------------------------
#if 0
void PrintExtract(StringBuffer & s, const char * tag)
{
size32_t outLen;
char * out = NULL;
rtlExceptionExtract(outLen, out, s.str(), tag);
PrintLog("%s = %.*s", tag, outLen, out);
rtlFree(out);
}
void testStructuredExceptions()
{
StringBuffer s;
s.append("Thissome text");
PrintExtract(s, NULL);
PrintExtract(s, "text");
PrintExtract(s, "is");
rtlAddExceptionTag(s, "location", "192.168.12.1");
PrintExtract(s, NULL);
PrintExtract(s, "text");
PrintExtract(s, "is");
PrintExtract(s, "location");
rtlAddExceptionTag(s, "author", "gavin");
PrintExtract(s, NULL);
PrintExtract(s, "text");
PrintExtract(s, "is");
PrintExtract(s, "location");
PrintExtract(s, "author");
PrintLog("%s", s.str());
}
static void testPackedUnsigned()
{
unsigned __int64 values[] = { 0, 1, 2, 10, 127, 128, 16383, 16384, 32767, 32768, 0xffffff, 0x7fffffff, 0xffffffff,
I64C(0xffffffffffffff), I64C(0x100000000000000), I64C(0x7fffffffffffffff), I64C(0xffffffffffffffff) };
unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 8, 9, 9, 9 };
unsigned numValues = _elements_in(values);
byte temp[9];
for (unsigned i = 0; i < numValues; i++)
{
rtlSetPackedUnsigned(temp, values[i]);
assertex(rtlGetPackedSize(temp) == numBytes[i]);
assertex(rtlGetPackedUnsigned(temp) == values[i]);
}
for (unsigned j= 0; j < 2000000; j++)
{
unsigned __int64 value = I64C(1) << (rtlRandom() & 63);
// unsigned value = rtlRandom();
rtlSetPackedUnsigned(temp, value);
assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value)+1);
assertex(rtlGetPackedUnsigned(temp) == value);
}
for (unsigned k= 0; k < 63; k++)
{
unsigned __int64 value1 = I64C(1) << k;
rtlSetPackedUnsigned(temp, value1);
assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value1)+1);
assertex(rtlGetPackedUnsigned(temp) == value1);
unsigned __int64 value2 = value1-1;
rtlSetPackedUnsigned(temp, value2);
assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value2)+1);
assertex(rtlGetPackedUnsigned(temp) == value2);
}
}
static void testPackedSigned()
{
__int64 values[] = { 0, 1, -2, 10, 63, 64, -64, -65, 8191, 8192, 0x3fffffff,
I64C(0x7fffffffffffff), I64C(0x80000000000000), I64C(0x7fffffffffffffff), I64C(0x8000000000000000) };
unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 1, 2, 2, 3, 5,
8, 9, 9, 9 };
unsigned numValues = _elements_in(values);
byte temp[9];
for (unsigned i = 0; i < numValues; i++)
{
rtlSetPackedSigned(temp, values[i]);
assertex(rtlGetPackedSize(temp) == numBytes[i]);
assertex(rtlGetPackedSigned(temp) == values[i]);
}
}
#endif
void ensureRtlLoaded()
{
}