jstring.cpp 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include <stdio.h>
  15. #include <stdlib.h>
  16. #include <stdarg.h>
  17. #include <assert.h>
  18. #include <string.h>
  19. #include <ctype.h>
  20. #include <time.h>
  21. #include <math.h>
  22. #include "jstring.hpp"
  23. #include "jexcept.hpp"
  24. #include "jhash.hpp"
  25. #include "jlog.hpp"
  26. #include "jfile.hpp"
  27. #include "jdebug.hpp"
  28. #include "jutil.hpp"
  29. #include "junicode.hpp"
  30. #define DOUBLE_FORMAT "%.16g"
  31. #define FLOAT_FORMAT "%.7g"
  32. #ifndef va_copy
  33. /* WARNING - DANGER - ASSUMES TYPICAL STACK MACHINE */
  34. #define va_copy(dst, src) ((void)((dst) = (src)))
  35. #endif
  36. static const char * TheNullStr = "";
  37. #define FIRST_CHUNK_SIZE 8
  38. #define DOUBLE_LIMIT 0x100000 // must be a power of 2
  39. #define DETACH_GRANULARITY 16
  40. //===========================================================================
  41. StringBuffer::StringBuffer()
  42. {
  43. init();
  44. }
  45. #if 0
  46. StringBuffer::StringBuffer(size_t initial)
  47. {
  48. init();
  49. ensureCapacity(initial);
  50. }
  51. #endif
  52. StringBuffer::StringBuffer(String & value)
  53. {
  54. init();
  55. append(value);
  56. }
  57. StringBuffer::StringBuffer(const char *value)
  58. {
  59. init();
  60. append(value);
  61. }
  62. StringBuffer::StringBuffer(char value)
  63. {
  64. init();
  65. append(value);
  66. }
  67. StringBuffer::StringBuffer(size_t len, const char *value)
  68. {
  69. init();
  70. append(len, value);
  71. }
  72. StringBuffer::StringBuffer(const StringBuffer & value)
  73. {
  74. init();
  75. append(value);
  76. }
  77. StringBuffer::StringBuffer(StringBuffer && value)
  78. {
  79. init();
  80. swapWith(value);
  81. }
  82. StringBuffer::StringBuffer(bool useInternal)
  83. {
  84. if (useInternal)
  85. init();
  86. else
  87. initNoInternal();
  88. }
  89. StringBuffer::~StringBuffer()
  90. {
  91. freeBuffer();
  92. }
  93. void StringBuffer::freeBuffer()
  94. {
  95. if (buffer != internalBuffer)
  96. free(buffer);
  97. }
  98. void StringBuffer::setBuffer(size_t buffLen, char * newBuff, size_t strLen)
  99. {
  100. assertex(newBuff);
  101. assertex(buffLen>0 && strLen<buffLen);
  102. freeBuffer();
  103. buffer = newBuff;
  104. maxLen = buffLen;
  105. curLen = strLen;
  106. }
  107. void StringBuffer::_realloc(size_t newLen)
  108. {
  109. if (newLen >= maxLen)
  110. {
  111. size_t newMax = maxLen;
  112. if (newMax == 0)
  113. newMax = FIRST_CHUNK_SIZE;
  114. if (newLen > DOUBLE_LIMIT)
  115. {
  116. newMax = (newLen + DOUBLE_LIMIT) & ~(DOUBLE_LIMIT-1);
  117. if (newLen >= newMax)
  118. throw MakeStringException(MSGAUD_operator, -1, "StringBuffer::_realloc: Request for %zu bytes oldMax = %zu", newLen, maxLen);
  119. }
  120. else
  121. {
  122. while (newLen >= newMax)
  123. newMax += newMax;
  124. }
  125. char * newStr;
  126. char * originalBuffer = (buffer == internalBuffer) ? NULL : buffer;
  127. if (!newMax || !(newStr=(char *)realloc(originalBuffer, newMax)))
  128. {
  129. DBGLOG("StringBuffer::_realloc: Failed to realloc = %zu, oldMax = %zu", newMax, maxLen);
  130. PrintStackReport();
  131. PrintMemoryReport();
  132. throw MakeStringException(MSGAUD_operator, -1, "StringBuffer::_realloc: Failed to realloc = %zu, oldMax = %zu", newMax, maxLen);
  133. }
  134. if (useInternal())
  135. memcpy_iflen(newStr, internalBuffer, curLen);
  136. buffer = newStr;
  137. maxLen = newMax;
  138. }
  139. }
  140. char * StringBuffer::detach()
  141. {
  142. dbgassertex(buffer);
  143. char * result;
  144. if (buffer == internalBuffer)
  145. {
  146. result = (char *)malloc(curLen+1);
  147. memcpy_iflen(result, buffer, curLen);
  148. }
  149. else
  150. {
  151. if (maxLen>curLen+1+DETACH_GRANULARITY)
  152. buffer = (char *)realloc(buffer,curLen+1); // shrink
  153. result = buffer;
  154. }
  155. result[curLen] = '\0'; // There is always room for this null
  156. init();
  157. return result;
  158. }
  159. StringBuffer & StringBuffer::append(char value)
  160. {
  161. ensureCapacity(1);
  162. buffer[curLen] = value;
  163. ++curLen;
  164. return *this;
  165. }
  166. StringBuffer & StringBuffer::append(unsigned char value)
  167. {
  168. ensureCapacity(1);
  169. buffer[curLen] = value;
  170. ++curLen;
  171. return *this;
  172. }
  173. StringBuffer & StringBuffer::append(const char * value)
  174. {
  175. if (likely(value))
  176. {
  177. size_t SourceLen = strlen(value);
  178. if (likely(SourceLen))
  179. {
  180. ensureCapacity(SourceLen);
  181. memcpy(buffer + curLen, value, SourceLen);
  182. curLen += SourceLen;
  183. }
  184. }
  185. return *this;
  186. }
  187. StringBuffer & StringBuffer::append(size_t len, const char * value)
  188. {
  189. if (likely(len))
  190. {
  191. unsigned truncLen = (unsigned)len;
  192. assertex(truncLen == len); // MORE: StringBuffer should use size_t throughout
  193. if (likely(truncLen))
  194. {
  195. ensureCapacity(truncLen);
  196. memcpy(buffer + curLen, value, truncLen);
  197. curLen += truncLen;
  198. }
  199. }
  200. return *this;
  201. }
  202. StringBuffer & StringBuffer::append(const unsigned char * value)
  203. {
  204. return append((const char *) value);
  205. }
  206. StringBuffer & StringBuffer::append(const char * value, size_t offset, size_t len)
  207. {
  208. if (likely(len))
  209. {
  210. ensureCapacity(len);
  211. memcpy(buffer + curLen, value+offset, len);
  212. curLen += len;
  213. }
  214. return *this;
  215. }
  216. StringBuffer & StringBuffer::append(const IAtom * value)
  217. {
  218. if (value)
  219. append(value->queryStr());
  220. return *this;
  221. }
  222. StringBuffer & StringBuffer::append(double value)
  223. {
  224. size_t len = length();
  225. size_t newlen = appendf(DOUBLE_FORMAT, value).length();
  226. while (len < newlen)
  227. {
  228. switch (charAt(len))
  229. {
  230. case '.':
  231. case 'E':
  232. case 'e':
  233. case 'N': // Not a number/infinity
  234. case 'n':
  235. return *this;
  236. }
  237. len++;
  238. }
  239. return append(".0");
  240. }
  241. StringBuffer & StringBuffer::append(float value)
  242. {
  243. size_t len = length();
  244. size_t newlen = appendf(FLOAT_FORMAT, value).length();
  245. while (len < newlen)
  246. {
  247. switch (charAt(len))
  248. {
  249. case '.':
  250. case 'E':
  251. case 'e':
  252. case 'N': // Not a number/infinity
  253. case 'n':
  254. return *this;
  255. }
  256. len++;
  257. }
  258. return append(".0");
  259. }
  260. StringBuffer & StringBuffer::append(int value)
  261. {
  262. char temp[12];
  263. unsigned written = numtostr(temp, value);
  264. return append(written, temp);
  265. }
  266. StringBuffer & StringBuffer::append(unsigned value)
  267. {
  268. char temp[12];
  269. unsigned written = numtostr(temp, value);
  270. return append(written, temp);
  271. }
  272. StringBuffer & StringBuffer::appendlong(long value)
  273. {
  274. char temp[24];
  275. unsigned written = numtostr(temp, value);
  276. return append(written, temp);
  277. }
  278. StringBuffer & StringBuffer::appendulong(unsigned long value)
  279. {
  280. char temp[24];
  281. unsigned written = numtostr(temp, value);
  282. return append(written, temp);
  283. }
  284. StringBuffer & StringBuffer::append(__int64 value)
  285. {
  286. char temp[24];
  287. unsigned written = numtostr(temp, value);
  288. return append(written, temp);
  289. }
  290. StringBuffer & StringBuffer::append(unsigned __int64 value)
  291. {
  292. char temp[24];
  293. unsigned written = numtostr(temp, value);
  294. return append(written, temp);
  295. }
  296. StringBuffer & StringBuffer::append(const String & value)
  297. {
  298. size_t SourceLen = value.length();
  299. ensureCapacity(SourceLen);
  300. value.getChars(0, SourceLen, buffer, curLen);
  301. curLen += SourceLen;
  302. return *this;
  303. }
  304. StringBuffer & StringBuffer::append(const IStringVal & value)
  305. {
  306. return append(value.str());
  307. }
  308. StringBuffer & StringBuffer::append(const IStringVal * value)
  309. {
  310. if (value)
  311. return append(value->str());
  312. else
  313. return *this;
  314. }
  315. StringBuffer & StringBuffer::append(const StringBuffer & value)
  316. {
  317. size_t SourceLen = value.length();
  318. ensureCapacity(SourceLen);
  319. value.getChars(0, SourceLen, buffer + curLen);
  320. curLen += SourceLen;
  321. return *this;
  322. }
  323. StringBuffer & StringBuffer::appendf(const char *format, ...)
  324. {
  325. va_list args;
  326. va_start(args, format);
  327. valist_appendf(format, args);
  328. va_end(args);
  329. return *this;
  330. }
  331. StringBuffer & StringBuffer::appendLower(size_t len, const char * value)
  332. {
  333. if (len)
  334. {
  335. ensureCapacity(len);
  336. const byte * from = reinterpret_cast<const byte *>(value);
  337. for (size_t i = 0; i < len; i++)
  338. buffer[curLen + i] = tolower(from[i]);
  339. curLen += len;
  340. }
  341. return *this;
  342. }
  343. StringBuffer & StringBuffer::setf(const char *format, ...)
  344. {
  345. clear();
  346. va_list args;
  347. va_start(args, format);
  348. valist_appendf(format, args);
  349. va_end(args);
  350. return *this;
  351. }
  352. StringBuffer & StringBuffer::limited_valist_appendf(size_t szLimit, const char *format, va_list args)
  353. {
  354. #define BUF_SIZE 1024
  355. #define MAX_BUF_SIZE (1024*1024) // limit buffer size to 1MB when doubling
  356. // handle string that is bigger that BUF_SIZE bytes
  357. size_t size = (0 == szLimit||szLimit>BUF_SIZE)?BUF_SIZE:szLimit;
  358. int len;
  359. va_list args2;
  360. va_copy(args2, args);
  361. try { ensureCapacity(size); }
  362. catch (IException *e)
  363. {
  364. StringBuffer eMsg;
  365. IException *e2 = MakeStringException(-1, "StringBuffer::valist_appendf(\"%s\"): vsnprintf failed or result exceeds limit (%zu): %s", format, size, e->errorMessage(eMsg).str());
  366. e->Release();
  367. throw e2;
  368. }
  369. len = _vsnprintf(buffer+curLen,size,format,args);
  370. if (len >= 0)
  371. {
  372. if ((size_t)len >= size)
  373. {
  374. if (szLimit && (size_t)len >= szLimit)
  375. {
  376. if ((size_t)len>szLimit)
  377. {
  378. len = size;
  379. if (len>3) memcpy(buffer+len-3, "...", 3);
  380. }
  381. }
  382. else
  383. {
  384. ensureCapacity(len);
  385. // no need for _vsnprintf since the buffer is already made big enough
  386. vsprintf(buffer+curLen,format,args2);
  387. }
  388. }
  389. }
  390. else if (size == szLimit)
  391. {
  392. len = size;
  393. if (len>3) memcpy(buffer+len-3, "...", 3);
  394. }
  395. else
  396. {
  397. size = BUF_SIZE * 2;
  398. for (;;)
  399. {
  400. if (0 != szLimit && size>szLimit) size = szLimit; // if so, will be last attempt
  401. if (size>MAX_BUF_SIZE)
  402. {
  403. IWARNLOG("StringBuffer::valist_appendf(\"%s\"): vsnprintf exceeds limit (%zu)", format, size);
  404. size = szLimit = MAX_BUF_SIZE;
  405. }
  406. try { ensureCapacity(size); }
  407. catch (IException *e)
  408. {
  409. StringBuffer eMsg;
  410. IException *e2 = MakeStringException(-1, "StringBuffer::valist_appendf(\"%s\"): vsnprintf failed (%zu): %s", format, size, e->errorMessage(eMsg).str());
  411. e->Release();
  412. throw e2;
  413. }
  414. va_list args3;
  415. va_copy(args3, args2);
  416. len = _vsnprintf(buffer+curLen,size,format,args3);
  417. va_end(args3);
  418. if (len>=0) // NB: len>size not possible, 1st _vsnprintf would have handled.
  419. break;
  420. if (size == szLimit)
  421. {
  422. len = size;
  423. if (len>3) memcpy(buffer+len-3, "...", 3);
  424. break;
  425. }
  426. size <<= 1;
  427. }
  428. }
  429. va_end(args2);
  430. curLen += len;
  431. return *this;
  432. }
  433. StringBuffer & StringBuffer::appendN(size_t count, char fill)
  434. {
  435. ensureCapacity(count);
  436. memset(buffer+curLen, fill, count);
  437. curLen += count;
  438. return *this;
  439. }
  440. void StringBuffer::setLength(size_t len)
  441. {
  442. if (len > curLen)
  443. {
  444. ensureCapacity(len-curLen);
  445. }
  446. curLen = len;
  447. }
  448. size32_t StringBuffer::lengthUtf8() const
  449. {
  450. size_t chars = 0;
  451. for (size_t offset=0; offset < curLen; offset += readUtf8Size(buffer+offset))
  452. chars++;
  453. return (size32_t)chars; // NB: preserving return type as size32_t for backward compatibility (as might be used in serialization)
  454. }
  455. char * StringBuffer::reserve(size_t size)
  456. {
  457. ensureCapacity(size);
  458. char *ret = buffer+curLen;
  459. curLen += size;
  460. return ret;
  461. }
  462. char * StringBuffer::reserveTruncate(size_t size)
  463. {
  464. size_t newMax = curLen+size+1;
  465. if (buffer == internalBuffer)
  466. {
  467. if (newMax > InternalBufferSize)
  468. {
  469. char * newStr = (char *)malloc(newMax);
  470. if (!newStr)
  471. throw MakeStringException(-1, "StringBuffer::_realloc: Failed to realloc newMax = %zu, oldMax = %zu", newMax, maxLen);
  472. memcpy_iflen(newStr, buffer, curLen);
  473. buffer = newStr;
  474. maxLen = newMax;
  475. }
  476. }
  477. else if (newMax != maxLen)
  478. {
  479. char * newStr = (char *) realloc(buffer, newMax);
  480. if (!newStr)
  481. throw MakeStringException(-1, "StringBuffer::_realloc: Failed to realloc newMax = %zu, oldMax = %zu", newMax, maxLen);
  482. buffer = newStr;
  483. maxLen = newMax;
  484. }
  485. char *ret = buffer+curLen;
  486. curLen += size;
  487. return ret;
  488. }
  489. void StringBuffer::swapWith(StringBuffer &other)
  490. {
  491. //Swap max
  492. size_t tempMax = maxLen;
  493. maxLen = other.maxLen;
  494. other.maxLen = tempMax;
  495. //Swap lengths
  496. size_t thisLen = curLen;
  497. size_t otherLen = other.curLen;
  498. curLen = otherLen;
  499. other.curLen = thisLen;
  500. //Swap buffers
  501. char * thisBuffer = buffer;
  502. char * otherBuffer = other.buffer;
  503. if (useInternal())
  504. {
  505. if (other.useInternal())
  506. {
  507. //NOTE: The c++ compiler can generate better code for the fixed size memcpy than it can
  508. // if only the required characters are copied.
  509. char temp[InternalBufferSize];
  510. memcpy(temp, thisBuffer, InternalBufferSize);
  511. memcpy(thisBuffer, otherBuffer, InternalBufferSize);
  512. memcpy(otherBuffer, temp, InternalBufferSize);
  513. //buffers already point in the correct place
  514. }
  515. else
  516. {
  517. memcpy(other.internalBuffer, thisBuffer, InternalBufferSize);
  518. buffer = otherBuffer;
  519. other.buffer = other.internalBuffer;
  520. }
  521. }
  522. else
  523. {
  524. if (other.useInternal())
  525. {
  526. memcpy(internalBuffer, otherBuffer, InternalBufferSize);
  527. buffer = internalBuffer;
  528. other.buffer = thisBuffer;
  529. }
  530. else
  531. {
  532. buffer = otherBuffer;
  533. other.buffer = thisBuffer;
  534. }
  535. }
  536. }
  537. void StringBuffer::setown(StringBuffer &other)
  538. {
  539. maxLen = other.maxLen;
  540. curLen = other.curLen;
  541. freeBuffer();
  542. if (other.useInternal())
  543. {
  544. memcpy(internalBuffer, other.internalBuffer, InternalBufferSize);
  545. buffer = internalBuffer;
  546. }
  547. else
  548. {
  549. buffer = other.buffer;
  550. }
  551. other.init();
  552. }
  553. void StringBuffer::kill()
  554. {
  555. freeBuffer();
  556. init();
  557. }
  558. void StringBuffer::getChars(size_t srcBegin, size_t srcEnd, char * target) const
  559. {
  560. if (srcEnd > curLen)
  561. srcEnd = curLen;
  562. const int len = srcEnd - srcBegin;
  563. if (target && buffer && len > 0)
  564. memcpy(target, buffer + srcBegin, len);
  565. }
  566. void StringBuffer::_insert(size_t offset, size_t insertLen)
  567. {
  568. ensureCapacity(insertLen);
  569. memmove(buffer + offset + insertLen, buffer + offset, curLen - offset);
  570. curLen += insertLen;
  571. }
  572. StringBuffer & StringBuffer::insert(size_t offset, char value)
  573. {
  574. _insert(offset, 1);
  575. buffer[offset] = value;
  576. return *this;
  577. }
  578. StringBuffer & StringBuffer::insert(size_t offset, const char * value)
  579. {
  580. if (!value) return *this;
  581. size_t len = strlen(value);
  582. if (likely(len))
  583. {
  584. _insert(offset, len);
  585. memcpy(buffer + offset, value, len);
  586. }
  587. return *this;
  588. }
  589. StringBuffer & StringBuffer::insert(size_t offset, double value)
  590. {
  591. char temp[36];
  592. sprintf(temp, "%f", value);
  593. insert(offset, temp);
  594. return *this;
  595. }
  596. StringBuffer & StringBuffer::insert(size_t offset, float value)
  597. {
  598. return insert(offset, (double)value);
  599. }
  600. StringBuffer & StringBuffer::insert(size_t offset, int value)
  601. {
  602. char temp[12];
  603. numtostr(temp, value);
  604. return insert(offset, temp);
  605. }
  606. StringBuffer & StringBuffer::insert(size_t offset, unsigned value)
  607. {
  608. char temp[12];
  609. numtostr(temp, value);
  610. return insert(offset, temp);
  611. }
  612. #if 0
  613. StringBuffer & StringBuffer::insert(size_t offset, long value)
  614. {
  615. char temp[24];
  616. numtostr(temp, value);
  617. return insert(offset, temp);
  618. }
  619. #endif
  620. StringBuffer & StringBuffer::insert(size_t offset, __int64 value)
  621. {
  622. char temp[24];
  623. numtostr(temp, value);
  624. return insert(offset, temp);
  625. }
  626. StringBuffer & StringBuffer::insert(size_t offset, const String & value)
  627. {
  628. size_t len = value.length();
  629. _insert(offset, len);
  630. value.getChars(0, len, buffer, offset);
  631. return *this;
  632. }
  633. StringBuffer & StringBuffer::insert(size_t offset, const StringBuffer & value)
  634. {
  635. size_t len = value.length();
  636. _insert(offset, len);
  637. value.getChars(0, len, buffer+offset);
  638. return *this;
  639. }
  640. StringBuffer & StringBuffer::insert(size_t offset, const IStringVal & value)
  641. {
  642. return insert(offset, value.str());
  643. }
  644. StringBuffer & StringBuffer::insert(size_t offset, const IStringVal * value)
  645. {
  646. if (value)
  647. return insert(offset, value->str());
  648. else
  649. return *this;
  650. }
  651. StringBuffer & StringBuffer::newline()
  652. {
  653. return append("\n");
  654. }
  655. StringBuffer & StringBuffer::pad(size_t count)
  656. {
  657. ensureCapacity(count);
  658. memset(buffer + curLen, ' ', count);
  659. curLen += count;
  660. return *this;
  661. }
  662. StringBuffer & StringBuffer::padTo(size_t count)
  663. {
  664. if (curLen<count)
  665. pad(count-curLen);
  666. return *this;
  667. }
  668. StringBuffer & StringBuffer::clip()
  669. {
  670. while (curLen && isspace(buffer[curLen-1]))
  671. curLen--;
  672. return *this;
  673. }
  674. StringBuffer & StringBuffer::trim()
  675. {
  676. return clip().trimLeft();
  677. }
  678. StringBuffer & StringBuffer::trimLeft()
  679. {
  680. char *p;
  681. if (curLen==0)
  682. return *this;
  683. buffer[curLen] = 0;
  684. for(p = buffer;isspace(*p);p++)
  685. ;
  686. if (p!=buffer)
  687. {
  688. curLen -= p-buffer;
  689. memmove(buffer,p,curLen);
  690. }
  691. return *this;
  692. }
  693. StringBuffer & StringBuffer::remove(size_t start, size_t len)
  694. {
  695. if (start > curLen) start = curLen;
  696. if (start + len > curLen) len = curLen - start;
  697. unsigned start2 = start + len;
  698. memmove(buffer + start, buffer + start2, curLen - start2);
  699. setLength(curLen - len);
  700. return *this;
  701. }
  702. StringBuffer &StringBuffer::reverse()
  703. {
  704. size_t max = curLen/2;
  705. char * end = buffer + curLen;
  706. size_t idx;
  707. for (idx = 0; idx < max; idx++)
  708. {
  709. char temp = buffer[idx];
  710. end--;
  711. buffer[idx] = *end;
  712. *end = temp;
  713. }
  714. return *this;
  715. }
  716. MemoryBuffer & StringBuffer::deserialize(MemoryBuffer & in)
  717. {
  718. unsigned len;
  719. in.read(len);
  720. append(len, (const char *)in.readDirect(len));
  721. return in;
  722. }
  723. MemoryBuffer & StringBuffer::serialize(MemoryBuffer & out) const
  724. {
  725. return out.append((unsigned)curLen).append(curLen, buffer);
  726. }
  727. StringBuffer &StringBuffer::loadFile(const char *filename, bool binaryMode)
  728. {
  729. FILE *in = fopen(filename, binaryMode?"rb":"rt");
  730. if (in)
  731. {
  732. char buffer[1024];
  733. size_t bytes;
  734. for (;;)
  735. {
  736. bytes = (size_t)fread(buffer, 1, sizeof(buffer), in);
  737. if (!bytes)
  738. break;
  739. append(buffer, 0, bytes);
  740. }
  741. fclose(in);
  742. return *this;
  743. }
  744. else
  745. throw MakeStringException(errno, "File %s could not be opened", filename);
  746. }
  747. StringBuffer & StringBuffer::loadFile(IFile* f)
  748. {
  749. if(!f)
  750. return *this;
  751. Owned<IFileIO> io = f->open(IFOread);
  752. if(!io)
  753. throw MakeStringException(errno, "file %s could not be opened for reading", f->queryFilename());
  754. char buf[2048];
  755. const unsigned requestedSize = sizeof(buf);
  756. offset_t pos = 0;
  757. for (;;)
  758. {
  759. size32_t len = io->read(pos, requestedSize, buf);
  760. if (len == 0)
  761. break;
  762. append(len, buf);
  763. pos += len;
  764. if (len != requestedSize)
  765. break;
  766. }
  767. return *this;
  768. }
  769. void StringBuffer::setCharAt(size_t offset, char value)
  770. {
  771. if (offset < curLen)
  772. buffer[offset] = value;
  773. }
  774. StringBuffer & StringBuffer::toLowerCase()
  775. {
  776. size_t l = curLen;
  777. for (size_t i = 0; i < l; i++)
  778. {
  779. if (isupper(buffer[i]))
  780. buffer[i] = tolower(buffer[i]);
  781. }
  782. return *this;
  783. }
  784. StringBuffer & StringBuffer::toUpperCase()
  785. {
  786. size32_t l = curLen;
  787. for (size32_t i = 0; i < l; i++)
  788. {
  789. if (islower(buffer[i]))
  790. buffer[i] = toupper(buffer[i]);
  791. }
  792. return *this;
  793. }
  794. StringBuffer & StringBuffer::replace(char oldChar, char newChar)
  795. {
  796. size_t l = curLen;
  797. for (size_t i = 0; i < l; i++)
  798. {
  799. if (buffer[i] == oldChar)
  800. {
  801. buffer[i] = newChar;
  802. if (newChar == '\0')
  803. {
  804. curLen = i;
  805. break;
  806. }
  807. }
  808. }
  809. return *this;
  810. }
  811. // Copy source to result, replacing all occurrences of "oldStr" with "newStr"
  812. StringBuffer &replaceString(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr)
  813. {
  814. if (lenSource)
  815. {
  816. size_t left = lenSource;
  817. while (left >= lenOldStr)
  818. {
  819. if (memcmp(source, oldStr, lenOldStr)==0)
  820. {
  821. result.append(lenNewStr, newStr);
  822. source += lenOldStr;
  823. left -= lenOldStr;
  824. }
  825. else
  826. {
  827. result.append(*source);
  828. source++;
  829. left--;
  830. }
  831. }
  832. // there are no more possible replacements, make sure we keep the end of the original buffer
  833. result.append(left, source);
  834. }
  835. return result;
  836. }
  837. StringBuffer &replaceStringNoCase(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr)
  838. {
  839. if (lenSource)
  840. {
  841. size_t left = lenSource;
  842. while (left >= lenOldStr)
  843. {
  844. if (memicmp(source, oldStr, lenOldStr)==0)
  845. {
  846. result.append(lenNewStr, newStr);
  847. source += lenOldStr;
  848. left -= lenOldStr;
  849. }
  850. else
  851. {
  852. result.append(*source);
  853. source++;
  854. left--;
  855. }
  856. }
  857. // there are no more possible replacements, make sure we keep the end of the original buffer
  858. result.append(left, source);
  859. }
  860. return result;
  861. }
  862. // this method will replace all occurrences of "oldStr" with "newStr"
  863. StringBuffer & StringBuffer::replaceString(const char* oldStr, const char* newStr)
  864. {
  865. if (curLen)
  866. {
  867. StringBuffer temp;
  868. size_t oldlen = oldStr ? strlen(oldStr) : 0;
  869. size_t newlen = newStr ? strlen(newStr) : 0;
  870. ::replaceString(temp, curLen, buffer, oldlen, oldStr, newlen, newStr);
  871. swapWith(temp);
  872. }
  873. return *this;
  874. }
  875. StringBuffer & StringBuffer::replaceStringNoCase(const char* oldStr, const char* newStr)
  876. {
  877. if (curLen)
  878. {
  879. StringBuffer temp;
  880. size_t oldlen = oldStr ? strlen(oldStr) : 0;
  881. size_t newlen = newStr ? strlen(newStr) : 0;
  882. ::replaceStringNoCase(temp, curLen, buffer, oldlen, oldStr, newlen, newStr);
  883. swapWith(temp);
  884. }
  885. return *this;
  886. }
  887. StringBuffer & StringBuffer::stripChar(char oldChar)
  888. {
  889. size_t delta = 0;
  890. size_t l = curLen;
  891. for (size_t i = 0; i < l; i++)
  892. {
  893. if (buffer[i] == oldChar)
  894. delta++;
  895. else if (delta)
  896. buffer[i-delta] = buffer[i];
  897. }
  898. if (delta)
  899. curLen = curLen - delta;
  900. return *this;
  901. }
  902. const char * StringBuffer::str() const
  903. {
  904. buffer[curLen] = '\0'; // There is always room for this null
  905. return buffer;
  906. }
  907. //===========================================================================
  908. VStringBuffer::VStringBuffer(const char* format, ...)
  909. {
  910. va_list args;
  911. va_start(args,format);
  912. valist_appendf(format,args);
  913. va_end(args);
  914. }
  915. //===========================================================================
  916. StringAttrBuilder::StringAttrBuilder(StringAttr & _target) : StringBuffer(false), target(_target)
  917. {
  918. }
  919. StringAttrBuilder::~StringAttrBuilder()
  920. {
  921. target.setown(*this);
  922. }
  923. //===========================================================================
  924. String::String()
  925. {
  926. text = (char *)TheNullStr;
  927. }
  928. String::String(const char * value)
  929. {
  930. text = (value ? strdup(value) : (char *)TheNullStr);
  931. }
  932. String::String(const char * value, int offset, int _count)
  933. {
  934. text = (char *)malloc(_count+1);
  935. memcpy_iflen(text, value+offset, _count);
  936. text[_count]=0;
  937. }
  938. String::String(String & value)
  939. {
  940. text = strdup(value.str());
  941. }
  942. String::String(StringBuffer & value)
  943. {
  944. unsigned len = value.length();
  945. text = (char *)malloc(len+1);
  946. value.getChars(0,len,text);
  947. text[len] = 0;
  948. }
  949. String::~String()
  950. {
  951. if (text != TheNullStr) free(text);
  952. }
  953. char String::charAt(size32_t index) const
  954. {
  955. return text[index];
  956. }
  957. int String::compareTo(const String & value) const
  958. {
  959. return strcmp(text, value.str());
  960. }
  961. int String::compareTo(const char* value) const
  962. {
  963. return strcmp(text,value);
  964. }
  965. String * String::concat(const String & value) const
  966. {
  967. StringBuffer temp(str());
  968. temp.append(value);
  969. return new String(temp.str());
  970. }
  971. bool String::endsWith(const String & value) const
  972. {
  973. unsigned lenValue = value.length();
  974. unsigned len = (size32_t)strlen(text);
  975. if (len >= lenValue)
  976. return (memcmp(text+(len-lenValue),value.str(),lenValue) == 0);
  977. return false;
  978. }
  979. bool String::endsWith(const char* value) const
  980. {
  981. return ::endsWith(this->text, value);
  982. }
  983. bool String::equals(String & value) const
  984. {
  985. return strcmp(text, value.str())==0;
  986. }
  987. bool String::equalsIgnoreCase(const String & value) const
  988. {
  989. return stricmp(text, value.str())==0;
  990. }
  991. void String::getBytes(int srcBegin, int srcEnd, void * dest, int dstBegin) const
  992. {
  993. memcpy_iflen((char *)dest+dstBegin, text+srcBegin, srcEnd-srcBegin);
  994. }
  995. void String::getChars(int srcBegin, int srcEnd, void * dest, int dstBegin) const
  996. {
  997. memcpy_iflen((char *)dest+dstBegin, text+srcBegin, srcEnd-srcBegin);
  998. }
  999. int String::hashCode() const
  1000. {
  1001. return (int)hashc((const byte *)text,length(),0);
  1002. }
  1003. int String::indexOf(int ch) const
  1004. {
  1005. char * match = strchr(text, ch);
  1006. return match ? (int)(match - text) : -1;
  1007. }
  1008. int String::indexOf(int ch, int from) const
  1009. {
  1010. char * match = strchr(text + from, ch);
  1011. return match ? (int)(match - text) : -1;
  1012. }
  1013. int String::indexOf(const String & search) const
  1014. {
  1015. const char * str = search.str();
  1016. const char * match = strstr(text, str);
  1017. return match ? (int)(match - text) : -1;
  1018. }
  1019. int String::indexOf(const String & search, int from) const
  1020. {
  1021. const char * str = search.str();
  1022. const char * match = strstr(text + from, str);
  1023. return match ? (int)(match - text) : -1;
  1024. }
  1025. int String::lastIndexOf(int ch) const
  1026. {
  1027. char * match = strrchr(text, ch);
  1028. return match ? (int)(match - text) : -1;
  1029. }
  1030. int String::lastIndexOf(int ch, int from) const
  1031. {
  1032. for (;(from > 0);--from)
  1033. if (text[from] == ch)
  1034. return from;
  1035. return -1;
  1036. }
  1037. int String::lastIndexOf(const String & search) const
  1038. {
  1039. assertex(!"TBD");
  1040. return -1;
  1041. }
  1042. int String::lastIndexOf(const String & search, int from) const
  1043. {
  1044. assertex(!"TBD");
  1045. return -1;
  1046. }
  1047. size32_t String::length() const
  1048. {
  1049. return (size32_t)strlen(text);
  1050. }
  1051. bool String::startsWith(String & value) const
  1052. {
  1053. unsigned lenValue = value.length();
  1054. const char * search = value.str();
  1055. return (memcmp(text, search, lenValue) == 0);
  1056. }
  1057. bool String::startsWith(String & value, int offset) const
  1058. {
  1059. unsigned lenValue = value.length();
  1060. const char * search = value.str();
  1061. return (memcmp(text + offset, search, lenValue) == 0);
  1062. }
  1063. bool String::startsWith(const char* value) const
  1064. {
  1065. return ::startsWith(this->text,value);
  1066. }
  1067. String * String::substring(int beginIndex) const
  1068. {
  1069. return new String(text+beginIndex);
  1070. }
  1071. String * String::substring(int beginIndex, int endIndex) const
  1072. {
  1073. return new String(text, beginIndex, endIndex - beginIndex);
  1074. }
  1075. const char *String::str() const
  1076. {
  1077. return text;
  1078. }
  1079. String * String::toLowerCase() const
  1080. {
  1081. String *ret = new String();
  1082. size32_t l = length();
  1083. if (l)
  1084. {
  1085. ret->text = (char *)malloc(l+1);
  1086. for (unsigned i = 0; i < l; i++)
  1087. ret->text[i] = tolower(text[i]);
  1088. ret->text[l]=0;
  1089. }
  1090. return ret;
  1091. }
  1092. String * String::toString()
  1093. {
  1094. Link();
  1095. return this;
  1096. }
  1097. String * String::toUpperCase() const
  1098. {
  1099. String *ret = new String();
  1100. size32_t l = length();
  1101. if (l)
  1102. {
  1103. ret->text = (char *)malloc(l+1);
  1104. for (unsigned i = 0; i < l; i++)
  1105. ret->text[i] = toupper(text[i]);
  1106. ret->text[l]=0;
  1107. }
  1108. return ret;
  1109. }
  1110. String * String::trim() const
  1111. {
  1112. size32_t l = length();
  1113. while (l && isspace(text[l-1]))
  1114. l--;
  1115. return new String(text, 0, l);
  1116. }
  1117. //------------------------------------------------
  1118. #if 0
  1119. String & String::valueOf(char value)
  1120. {
  1121. return * new String(&value, 0, 1);
  1122. }
  1123. String & String::valueOf(const char * value)
  1124. {
  1125. return * new String(value);
  1126. }
  1127. String & String::valueOf(const char * value, int offset, int count)
  1128. {
  1129. return * new String(value, offset, count);
  1130. }
  1131. String & String::valueOf(double value)
  1132. {
  1133. StringBuffer temp;
  1134. return temp.append(value).toString();
  1135. }
  1136. String & String::valueOf(float value)
  1137. {
  1138. StringBuffer temp;
  1139. return temp.append(value).toString();
  1140. }
  1141. String & String::valueOf(int value)
  1142. {
  1143. StringBuffer temp;
  1144. return temp.append(value).toString();
  1145. }
  1146. String & String::valueOf(long value)
  1147. {
  1148. StringBuffer temp;
  1149. return temp.append(value).toString();
  1150. }
  1151. #endif
  1152. //------------------------------------------------
  1153. StringAttr::StringAttr(const char * _text)
  1154. {
  1155. text = _text ? strdup(_text) : NULL;
  1156. }
  1157. StringAttr::StringAttr(const char * _text, size_t _len)
  1158. {
  1159. text = NULL;
  1160. set(_text, _len);
  1161. }
  1162. StringAttr::StringAttr(const StringAttr & src)
  1163. {
  1164. text = NULL;
  1165. set(src.get());
  1166. }
  1167. StringAttr::StringAttr(StringAttr && src)
  1168. {
  1169. text = src.text;
  1170. src.text = nullptr;
  1171. }
  1172. StringAttr& StringAttr::operator = (StringAttr && from)
  1173. {
  1174. char *temp = text;
  1175. text = from.text;
  1176. from.text = temp;
  1177. return *this;
  1178. }
  1179. StringAttr& StringAttr::operator = (const StringAttr & from)
  1180. {
  1181. set(from.str());
  1182. return *this;
  1183. }
  1184. void StringAttr::set(const char * _text)
  1185. {
  1186. char * oldtext = text;
  1187. text = _text ? strdup(_text) : NULL;
  1188. free(oldtext);
  1189. }
  1190. void StringAttr::set(const char * _text, size_t _len)
  1191. {
  1192. char * oldtext = text;
  1193. text = (char *)malloc(_len+1);
  1194. memcpy_iflen(text, _text, _len);
  1195. text[_len] = 0;
  1196. free(oldtext);
  1197. }
  1198. void StringAttr::setown(const char * _text)
  1199. {
  1200. char * oldtext = text;
  1201. text = (char *)_text;
  1202. free(oldtext);
  1203. }
  1204. void StringAttr::set(const StringBuffer & source)
  1205. {
  1206. if (source.length())
  1207. set(source.str());
  1208. else
  1209. clear();
  1210. }
  1211. void StringAttr::setown(StringBuffer & source)
  1212. {
  1213. if (source.length())
  1214. setown(source.detach());
  1215. else
  1216. clear();
  1217. }
  1218. void StringAttr::toLowerCase()
  1219. {
  1220. if (text)
  1221. {
  1222. char * cur = text;
  1223. char next;
  1224. while ((next = *cur) != 0)
  1225. {
  1226. if (isupper(next))
  1227. *cur = tolower(next);
  1228. cur++;
  1229. }
  1230. }
  1231. }
  1232. void StringAttr::toUpperCase()
  1233. {
  1234. if (text)
  1235. {
  1236. char * cur = text;
  1237. char next;
  1238. while ((next = *cur) != 0)
  1239. {
  1240. if (islower(next))
  1241. *cur = toupper(next);
  1242. cur++;
  1243. }
  1244. }
  1245. }
  1246. StringAttrItem::StringAttrItem(const char *_text, unsigned _len)
  1247. {
  1248. text.set(_text, _len);
  1249. }
  1250. inline char hex(char c, char lower)
  1251. {
  1252. if (c < 10)
  1253. return '0' + c;
  1254. else if (lower)
  1255. return 'a' + c - 10;
  1256. else
  1257. return 'A' + c - 10;
  1258. }
  1259. StringBuffer & StringBuffer::appendhex(unsigned char c, char lower)
  1260. {
  1261. append(hex(c>>4, lower));
  1262. append(hex(c&0xF, lower));
  1263. return *this;
  1264. }
  1265. void appendURL(StringBuffer *dest, const char *src, size32_t len, char lower)
  1266. {
  1267. if (len == (size32_t)-1)
  1268. len = (size32_t)strlen(src);
  1269. while (len)
  1270. {
  1271. // isalnum seems to give weird results for chars > 127....
  1272. unsigned char c = (unsigned char) *src;
  1273. if (c == ' ')
  1274. dest->append('+');
  1275. else if ((c & 0x80) || !isalnum(*src))
  1276. {
  1277. dest->append('%');
  1278. dest->appendhex(c, lower);
  1279. }
  1280. else
  1281. dest->append(c);
  1282. src++;
  1283. len--;
  1284. }
  1285. }
  1286. inline char translateHex(char hex)
  1287. {
  1288. if(hex >= 'A')
  1289. return (hex & 0xdf) - 'A' + 10;
  1290. else
  1291. return hex - '0';
  1292. }
  1293. inline char translateHex(char h1, char h2)
  1294. {
  1295. return (translateHex(h1) * 16 + translateHex(h2));
  1296. }
  1297. StringBuffer &appendDecodedURL(StringBuffer &s, const char *url)
  1298. {
  1299. if(!url)
  1300. return s;
  1301. while (*url)
  1302. {
  1303. char c = *url++;
  1304. if (c == '+')
  1305. c = ' ';
  1306. else if (c == '%')
  1307. {
  1308. if (isxdigit(url[0]) && isxdigit(url[1]))
  1309. {
  1310. c = translateHex(url[0], url[1]);
  1311. url+=2;
  1312. }
  1313. }
  1314. s.append(c);
  1315. }
  1316. return s;
  1317. }
  1318. static StringBuffer & appendStringExpandControl(StringBuffer &out, unsigned len, const char * src, bool addBreak, bool isCpp, bool isUtf8)
  1319. {
  1320. const int minBreakPos = 0;
  1321. const int commaBreakPos = 70;
  1322. const int maxBreakPos = 120;
  1323. const char * startLine = src;
  1324. out.ensureCapacity(len+2);
  1325. for (; len > 0; --len)
  1326. {
  1327. unsigned char c = *src++;
  1328. bool insertBreak = false;
  1329. bool allowBreak = true;
  1330. switch (c)
  1331. {
  1332. case '\n':
  1333. {
  1334. out.append("\\n");
  1335. if (src-startLine > minBreakPos)
  1336. insertBreak = true;
  1337. break;
  1338. }
  1339. case ',':
  1340. {
  1341. out.append(c);
  1342. if (src-startLine > commaBreakPos)
  1343. insertBreak = true;
  1344. break;
  1345. }
  1346. case '\r': out.append("\\r"); break;
  1347. case '\t': out.append("\\t"); break;
  1348. case '"':
  1349. if (isCpp)
  1350. out.append("\\");
  1351. out.append(c);
  1352. break;
  1353. case '\'':
  1354. if (!isCpp)
  1355. out.append("\\");
  1356. out.append(c);
  1357. break;
  1358. case '\\': out.append("\\\\"); break;
  1359. case '?':
  1360. if (isCpp)
  1361. {
  1362. //stop trigraphs being generated.... quote the second ?
  1363. out.append(c);
  1364. if ((len!=1) && (*src == '?'))
  1365. {
  1366. out.append('\\');
  1367. allowBreak = false;
  1368. }
  1369. }
  1370. else
  1371. out.append(c);
  1372. break;
  1373. default:
  1374. if (isUtf8 || ((c >= ' ') && (c <= 126)))
  1375. out.append(c);
  1376. else
  1377. out.appendf("\\%03o", c);
  1378. break;
  1379. }
  1380. if (addBreak && (insertBreak || (allowBreak && src-startLine >= maxBreakPos)))
  1381. {
  1382. out.append("\"").newline().append("\t\t\"");
  1383. startLine = src;
  1384. }
  1385. }
  1386. return out;
  1387. }
  1388. StringBuffer & appendStringAsCPP(StringBuffer &out, unsigned len, const char * src, bool addBreak)
  1389. {
  1390. return appendStringExpandControl(out, len, src, addBreak, true, false);
  1391. }
  1392. StringBuffer & appendStringAsECL(StringBuffer &out, unsigned len, const char * src)
  1393. {
  1394. return appendStringExpandControl(out, len, src, false, false, false);
  1395. }
  1396. StringBuffer & appendUtf8AsECL(StringBuffer &out, unsigned len, const char * src)
  1397. {
  1398. return appendStringExpandControl(out, len, src, false, false, true);
  1399. }
  1400. StringBuffer & appendStringAsQuotedCPP(StringBuffer &out, unsigned len, const char * src, bool addBreak)
  1401. {
  1402. out.ensureCapacity(len+2);
  1403. out.append('\"');
  1404. appendStringAsCPP(out, len, src, addBreak);
  1405. return out.append('\"');
  1406. }
  1407. StringBuffer & appendStringAsQuotedECL(StringBuffer &out, unsigned len, const char * src)
  1408. {
  1409. out.ensureCapacity(len+2);
  1410. out.append('\'');
  1411. appendStringAsECL(out, len, src);
  1412. return out.append('\'');
  1413. }
  1414. void extractItem(StringBuffer & res, const char * src, const char * sep, int whichItem, bool caps)
  1415. {
  1416. bool isSeparator[256];
  1417. memset(isSeparator,0,sizeof(isSeparator));
  1418. unsigned char * finger = (unsigned char *)sep;
  1419. while (*finger !=0)
  1420. isSeparator[*finger++] = true;
  1421. isSeparator[0]=true;
  1422. finger = (unsigned char *)src;
  1423. unsigned char next;
  1424. for (;;)
  1425. {
  1426. while (isSeparator[(next = *finger)])
  1427. {
  1428. if (next == 0) return;
  1429. finger++;
  1430. }
  1431. if (whichItem == 0)
  1432. {
  1433. while (!isSeparator[(next = *finger)])
  1434. {
  1435. if (caps)
  1436. next = toupper(next);
  1437. res.append(next);
  1438. finger++;
  1439. }
  1440. return;
  1441. }
  1442. while (!isSeparator[*finger])
  1443. finger++;
  1444. whichItem--;
  1445. }
  1446. }
  1447. int utf8CharLen(unsigned char ch)
  1448. {
  1449. //return 1 if this is an ascii character,
  1450. //or 0 if its not a valid utf-8 character
  1451. if (ch < 128)
  1452. return 1;
  1453. if (ch < 192)
  1454. return 0;
  1455. unsigned char len = 1;
  1456. for (unsigned char lead = ch << 1; (lead & 0x80); lead <<=1)
  1457. len++;
  1458. return len;
  1459. }
  1460. int utf8CharLen(const unsigned char *ch, unsigned maxsize)
  1461. {
  1462. //return 1 if this is an ascii character,
  1463. //or 0 if its not a valid utf-8 character
  1464. if (*ch < 128)
  1465. return 1;
  1466. unsigned char len = utf8CharLen(*ch);
  1467. if (len>maxsize)
  1468. return 0;
  1469. for (unsigned pos = 1; pos < len; pos++)
  1470. if ((ch[pos] < 128) || (ch[pos] >= 192))
  1471. return 0; //its not a valid utf-8 character after all
  1472. return len;
  1473. }
  1474. const char *encodeXML(const char *x, StringBuffer &ret, unsigned flags, unsigned len, bool utf8)
  1475. {
  1476. while (len)
  1477. {
  1478. switch(*x)
  1479. {
  1480. case '&':
  1481. ret.append("&amp;");
  1482. break;
  1483. case '<':
  1484. ret.append("&lt;");
  1485. break;
  1486. case '>':
  1487. ret.append("&gt;");
  1488. break;
  1489. case '\"':
  1490. ret.append("&quot;");
  1491. break;
  1492. case '\'':
  1493. ret.append("&apos;");
  1494. break;
  1495. case ' ':
  1496. ret.append(flags & ENCODE_SPACES?"&#32;":" ");
  1497. break;
  1498. case '\n':
  1499. ret.append(flags & ENCODE_NEWLINES?"&#10;":"\n");
  1500. break;
  1501. case '\r':
  1502. ret.append(flags & ENCODE_NEWLINES?"&#13;":"\r");
  1503. break;
  1504. case '\t':
  1505. ret.append(flags & ENCODE_SPACES?"&#9;":"\t");
  1506. break;
  1507. case '\0':
  1508. if (len == (unsigned)-1)
  1509. return ret.str();
  1510. ret.append("&#xe000;"); // hack!!! Characters below 0x20 are not legal in strict xml, even encoded.
  1511. break;
  1512. default:
  1513. if (*x >= ' ' && ((byte)*x) < 128)
  1514. ret.append(*x);
  1515. else if (*x < ' ' && *x > 0)
  1516. ret.append("&#xe0").appendhex(*x, true).append(';'); // HACK
  1517. else if (utf8)
  1518. {
  1519. unsigned chlen = utf8CharLen((const unsigned char *)x);
  1520. if (chlen==0)
  1521. ret.append("&#").append((unsigned int)*(unsigned char *) x).append(';');
  1522. else
  1523. {
  1524. ret.append(*x);
  1525. while(--chlen)
  1526. {
  1527. if (len != (unsigned) -1)
  1528. len--;
  1529. ret.append(*(++x));
  1530. }
  1531. }
  1532. }
  1533. else
  1534. ret.append("&#").append((unsigned int)*(unsigned char *) x).append(';');
  1535. break;
  1536. }
  1537. if (len != (unsigned) -1)
  1538. len--;
  1539. ++x;
  1540. }
  1541. return ret.str();
  1542. }
  1543. void encodeXML(const char *x, IIOStream &out, unsigned flags, unsigned len, bool utf8)
  1544. {
  1545. while (len)
  1546. {
  1547. switch(*x)
  1548. {
  1549. case '&':
  1550. writeStringToStream(out, "&amp;");
  1551. break;
  1552. case '<':
  1553. writeStringToStream(out, "&lt;");
  1554. break;
  1555. case '>':
  1556. writeStringToStream(out, "&gt;");
  1557. break;
  1558. case '\"':
  1559. writeStringToStream(out, "&quot;");
  1560. break;
  1561. case '\'':
  1562. writeStringToStream(out, "&apos;");
  1563. break;
  1564. case ' ':
  1565. writeStringToStream(out, flags & ENCODE_SPACES?"&#32;":" ");
  1566. break;
  1567. case '\n':
  1568. writeStringToStream(out, flags & ENCODE_NEWLINES?"&#10;":"\n");
  1569. break;
  1570. case '\r':
  1571. writeStringToStream(out, flags & ENCODE_NEWLINES?"&#13;":"\r");
  1572. break;
  1573. case '\t':
  1574. writeStringToStream(out, flags & ENCODE_SPACES?"&#9;":"\t");
  1575. break;
  1576. case '\0':
  1577. if (len == (unsigned) -1)
  1578. return;
  1579. writeStringToStream(out, "&#xe000;"); // hack!!! Characters below 0x20 are not legal in strict xml, even encoded.
  1580. break;
  1581. default:
  1582. if (*x >= ' ' && ((byte)*x) < 128)
  1583. writeCharToStream(out, *x);
  1584. else if (*x < ' ' && *x > 0)
  1585. {
  1586. writeStringToStream(out, "&#xe0");
  1587. unsigned char c = *(unsigned char *)x;
  1588. writeCharToStream(out, hex(c>>4, true));
  1589. writeCharToStream(out, hex(c&0xF, true));
  1590. writeCharToStream(out, ';'); // HACK
  1591. }
  1592. else if (utf8)
  1593. {
  1594. int chlen = utf8CharLen((const unsigned char *)x);
  1595. if (chlen==0)
  1596. {
  1597. writeStringToStream(out, "&#");
  1598. char tmp[12];
  1599. unsigned written = numtostr(tmp, *(unsigned char *)x);
  1600. out.write(written, tmp);
  1601. writeCharToStream(out, ';');
  1602. }
  1603. else
  1604. {
  1605. writeCharToStream(out, *x);
  1606. while(--chlen)
  1607. {
  1608. if (len != (unsigned) -1)
  1609. len--;
  1610. writeCharToStream(out, *(++x));
  1611. }
  1612. }
  1613. }
  1614. else
  1615. {
  1616. writeStringToStream(out, "&#");
  1617. char tmp[12];
  1618. unsigned written = numtostr(tmp, *(unsigned char *)x);
  1619. out.write(written, tmp);
  1620. writeCharToStream(out, ';');
  1621. }
  1622. break;
  1623. }
  1624. if (len != (unsigned) -1)
  1625. len--;
  1626. ++x;
  1627. }
  1628. }
  1629. static void writeUtf8(unsigned c, StringBuffer &out)
  1630. {
  1631. if (c < 0x80)
  1632. out.append((char)c);
  1633. else if (c < 0x800)
  1634. {
  1635. out.append((char)(0xC0 | (c>>6)));
  1636. out.append((char)(0x80 | (c & 0x3F)));
  1637. }
  1638. else if (c < 0x10000)
  1639. {
  1640. out.append((char) (0xE0 | (c>>12)));
  1641. out.append((char) (0x80 | (c>>6 & 0x3F)));
  1642. out.append((char) (0x80 | (c & 0x3F)));
  1643. }
  1644. else if (c < 0x200000)
  1645. {
  1646. out.append((char) (0xF0 | (c>>18)));
  1647. out.append((char) (0x80 | (c>>12 & 0x3F)));
  1648. out.append((char) (0x80 | (c>>6 & 0x3F)));
  1649. out.append((char) (0x80 | (c & 0x3F)));
  1650. }
  1651. else if (c < 0x4000000)
  1652. {
  1653. out.append((char) (0xF8 | (c>>24)));
  1654. out.append((char) (0x80 | (c>>18 & 0x3F)));
  1655. out.append((char) (0x80 | (c>>12 & 0x3F)));
  1656. out.append((char) (0x80 | (c>>6 & 0x3F)));
  1657. out.append((char) (0x80 | (c & 0x3F)));
  1658. }
  1659. else if (c < 0x80000000)
  1660. {
  1661. out.append((char) (0xFC | (c>>30)));
  1662. out.append((char) (0x80 | (c>>24 & 0x3F)));
  1663. out.append((char) (0x80 | (c>>18 & 0x3F)));
  1664. out.append((char) (0x80 | (c>>12 & 0x3F)));
  1665. out.append((char) (0x80 | (c>>6 & 0x3F)));
  1666. out.append((char) (0x80 | (c & 0x3F)));
  1667. }
  1668. else
  1669. assertex(false);
  1670. }
  1671. #define JSONSTRICT
  1672. const char *decodeJSON(const char *j, StringBuffer &ret, unsigned len, const char **errMark)
  1673. {
  1674. if (!j)
  1675. return ret.str();
  1676. if ((unsigned)-1 == len)
  1677. len = (unsigned)strlen(j);
  1678. try
  1679. {
  1680. for (const char *end = j+len; j<end && *j; j++)
  1681. {
  1682. if (*j!='\\')
  1683. ret.append(*j);
  1684. else
  1685. {
  1686. switch (*++j)
  1687. {
  1688. case 'u':
  1689. {
  1690. j++;
  1691. if (end-j>=4)
  1692. {
  1693. char *endptr;
  1694. StringAttr s(j, 4);
  1695. unsigned val = strtoul(s.get(), &endptr, 16);
  1696. if (endptr && !*endptr)
  1697. {
  1698. writeUtf8(val, ret);
  1699. j+=3;
  1700. break;
  1701. }
  1702. }
  1703. #ifdef JSONSTRICT
  1704. throw MakeStringException(-1, "invalid json \\u escaped sequence");
  1705. #endif
  1706. ret.append(*j);
  1707. break;
  1708. }
  1709. case '\"':
  1710. case '\\':
  1711. case '/':
  1712. ret.append(*j);
  1713. break;
  1714. case 'b':
  1715. ret.append('\b');
  1716. break;
  1717. case 'f':
  1718. ret.append('\f');
  1719. break;
  1720. case 'n':
  1721. ret.append('\n');
  1722. continue;
  1723. case 'r':
  1724. ret.append('\r');
  1725. break;
  1726. case 't':
  1727. ret.append('\t');
  1728. break;
  1729. default:
  1730. {
  1731. #ifdef JSONSTRICT
  1732. throw MakeStringException(-1, "invalid json escaped sequence");
  1733. #endif
  1734. ret.append('\\');
  1735. ret.append(*j);
  1736. break;
  1737. }
  1738. }
  1739. }
  1740. }
  1741. }
  1742. catch (IException *)
  1743. {
  1744. if (errMark) *errMark = j;
  1745. throw;
  1746. }
  1747. return ret.str();
  1748. }
  1749. void decodeXML(ISimpleReadStream &in, StringBuffer &out, unsigned len)
  1750. {
  1751. // TODO
  1752. UNIMPLEMENTED;
  1753. }
  1754. const char *decodeXML(const char *x, StringBuffer &ret, const char **errMark, IEntityHelper *entityHelper, bool strict)
  1755. {
  1756. if (!x)
  1757. return ret.str();
  1758. try
  1759. {
  1760. while (*x)
  1761. {
  1762. if ('&' == *x)
  1763. {
  1764. switch (x[1])
  1765. {
  1766. case 'a':
  1767. switch (x[2])
  1768. {
  1769. case 'm':
  1770. {
  1771. if ('p' == x[3] && ';' == x[4])
  1772. {
  1773. x += 5;
  1774. ret.append('&');
  1775. continue;
  1776. }
  1777. break;
  1778. }
  1779. case 'p':
  1780. {
  1781. if ('o' == x[3] && 's' == x[4] && ';' == x[5])
  1782. {
  1783. x += 6;
  1784. ret.append('\'');
  1785. continue;
  1786. }
  1787. break;
  1788. }
  1789. }
  1790. break;
  1791. case 'l':
  1792. if ('t' == x[2] && ';' == x[3])
  1793. {
  1794. x += 4;
  1795. ret.append('<');
  1796. continue;
  1797. }
  1798. break;
  1799. case 'g':
  1800. if ('t' == x[2] && ';' == x[3])
  1801. {
  1802. x += 4;
  1803. ret.append('>');
  1804. continue;
  1805. }
  1806. break;
  1807. case 'q':
  1808. if ('u' == x[2] && 'o' == x[3] && 't' == x[4] && ';' == x[5])
  1809. {
  1810. x += 6;
  1811. ret.append('"');
  1812. continue;
  1813. }
  1814. break;
  1815. case 'n':
  1816. if ('b' == x[2] && 's' == x[3] && 'p' == x[4] && ';' == x[5])
  1817. {
  1818. x += 6;
  1819. writeUtf8(0xa0, ret);
  1820. continue;
  1821. }
  1822. break;
  1823. case '#':
  1824. {
  1825. const char *numstart = x+2;
  1826. int base = 10;
  1827. if (*numstart == 'x')
  1828. {
  1829. base = 16;
  1830. numstart++;
  1831. }
  1832. char *numend;
  1833. unsigned val = strtoul(numstart, &numend, base);
  1834. if (numstart==numend || *numend != ';')
  1835. {
  1836. if (strict)
  1837. throw MakeStringException(-1, "invalid escaped sequence");
  1838. }
  1839. else // always convert to utf-8. Should potentially throw error if not marked as utf-8 encoded doc and out of ascii range.
  1840. {
  1841. writeUtf8(val, ret);
  1842. x = numend+1;
  1843. continue;
  1844. }
  1845. break;
  1846. }
  1847. case ';':
  1848. case '\0':
  1849. if (strict)
  1850. throw MakeStringException(-1, "invalid escaped sequence");
  1851. break;
  1852. default:
  1853. if (entityHelper)
  1854. {
  1855. const char *start=x+1;
  1856. const char *finger=start;
  1857. while (*finger && *finger != ';')
  1858. ++finger;
  1859. if (*finger == ';')
  1860. {
  1861. StringBuffer entity(finger-start, start);
  1862. if (entityHelper->find(entity, ret))
  1863. {
  1864. x = finger + 1;
  1865. continue;
  1866. }
  1867. }
  1868. }
  1869. if (strict)
  1870. throw MakeStringException(-1, "invalid escaped sequence");
  1871. break;
  1872. }
  1873. }
  1874. ret.append(*x);
  1875. ++x;
  1876. }
  1877. }
  1878. catch (IException *)
  1879. {
  1880. if (errMark) *errMark = x;
  1881. throw;
  1882. }
  1883. return ret.str();
  1884. }
  1885. StringBuffer & appendXMLOpenTag(StringBuffer &xml, const char *tag, const char *prefix, bool complete, bool close, const char *uri)
  1886. {
  1887. if (!tag || !*tag)
  1888. return xml;
  1889. xml.append('<');
  1890. appendXMLTagName(xml, tag, prefix);
  1891. if (uri && *uri)
  1892. {
  1893. xml.append(" xmlns");
  1894. if (prefix && *prefix)
  1895. xml.append(':').append(prefix);
  1896. xml.append("=\"").append(uri).append('\"');
  1897. }
  1898. if (complete)
  1899. {
  1900. if (close)
  1901. xml.append('/');
  1902. xml.append('>');
  1903. }
  1904. return xml;
  1905. }
  1906. jlib_decl StringBuffer &appendJSONName(StringBuffer &s, const char *name)
  1907. {
  1908. if (!name || !*name)
  1909. return s;
  1910. delimitJSON(s);
  1911. return encodeJSON(s.append('"'), name).append("\": ");
  1912. }
  1913. jlib_decl StringBuffer &appendfJSONName(StringBuffer &s, const char *format, ...)
  1914. {
  1915. va_list args;
  1916. va_start(args, format);
  1917. StringBuffer vs;
  1918. vs.valist_appendf(format, args);
  1919. va_end(args);
  1920. return appendJSONName(s, vs);
  1921. }
  1922. static char hexchar[] = "0123456789ABCDEF";
  1923. jlib_decl StringBuffer &appendJSONDataValue(StringBuffer& s, const char *name, unsigned len, const void *_value)
  1924. {
  1925. appendJSONNameOrDelimit(s, name);
  1926. s.append('"');
  1927. const unsigned char *value = (const unsigned char *) _value;
  1928. for (unsigned int i = 0; i < len; i++)
  1929. s.append(hexchar[value[i] >> 4]).append(hexchar[value[i] & 0x0f]);
  1930. return s.append('"');
  1931. }
  1932. StringBuffer &appendJSONRealValue(StringBuffer& s, const char *name, double value)
  1933. {
  1934. appendJSONNameOrDelimit(s, name);
  1935. bool quoted = j_isnan(value) || j_isinf(value);
  1936. if (quoted)
  1937. s.append('"');
  1938. s.append(value);
  1939. if (quoted)
  1940. s.append('"');
  1941. return s;
  1942. }
  1943. inline StringBuffer &encodeJSONChar(StringBuffer &s, const char *&ch, unsigned &remaining)
  1944. {
  1945. byte next = *ch;
  1946. switch (next)
  1947. {
  1948. case '\b':
  1949. s.append("\\b");
  1950. break;
  1951. case '\f':
  1952. s.append("\\f");
  1953. break;
  1954. case '\n':
  1955. s.append("\\n");
  1956. break;
  1957. case '\r':
  1958. s.append("\\r");
  1959. break;
  1960. case '\t':
  1961. s.append("\\t");
  1962. break;
  1963. case '\"':
  1964. case '\\':
  1965. s.append('\\');
  1966. s.append(next);
  1967. break;
  1968. case '\0':
  1969. s.append("\\u0000");
  1970. break;
  1971. case '\x7f':
  1972. s.append("\\u007f");
  1973. break;
  1974. default:
  1975. if (next >= ' ' && next < 128)
  1976. s.append(next);
  1977. else if (next < ' ')
  1978. s.append("\\u00").appendhex(next, true);
  1979. else //json is always supposed to be utf8 (or other unicode formats)
  1980. {
  1981. unsigned chlen = utf8CharLen((const unsigned char *)ch, remaining);
  1982. if (chlen==0)
  1983. s.append("\\u00").appendhex(next, true);
  1984. else
  1985. {
  1986. s.append(chlen, ch);
  1987. ch += (chlen-1);
  1988. remaining -= (chlen-1);
  1989. }
  1990. }
  1991. break;
  1992. }
  1993. ch++;
  1994. remaining--;
  1995. return s;
  1996. }
  1997. StringBuffer &encodeJSON(StringBuffer &s, unsigned size, const char *value)
  1998. {
  1999. if (!value)
  2000. return s;
  2001. while (size)
  2002. encodeJSONChar(s, value, size);
  2003. return s;
  2004. }
  2005. StringBuffer &encodeJSON(StringBuffer &s, const char *value)
  2006. {
  2007. if (!value)
  2008. return s;
  2009. return encodeJSON(s, strlen(value), value);
  2010. }
  2011. bool checkUnicodeLiteral(char const * str, unsigned length, unsigned & ep, StringBuffer & msg)
  2012. {
  2013. unsigned i;
  2014. for(i = 0; i < length; i++)
  2015. {
  2016. if (str[i] == '\\')
  2017. {
  2018. unsigned char next = str[++i];
  2019. if (next == '\'' || next == '\\' || next == 'n' || next == 'r' || next == 't' || next == 'a' || next == 'b' || next == 'f' || next == 'v' || next == '?' || next == '"')
  2020. {
  2021. continue;
  2022. }
  2023. else if (isdigit(next) && next < '8')
  2024. {
  2025. unsigned count;
  2026. for(count = 1; count < 3; count++)
  2027. {
  2028. next = str[++i];
  2029. if(!isdigit(next) || next >= '8')
  2030. {
  2031. msg.append("3-digit numeric escape sequence contained non-octal digit: ").append(next);
  2032. ep = i;
  2033. return false;
  2034. }
  2035. }
  2036. }
  2037. else if (next == 'u' || next == 'U')
  2038. {
  2039. unsigned count;
  2040. unsigned max = (next == 'u') ? 4 : 8;
  2041. for(count = 0; count < max; count++)
  2042. {
  2043. next = str[++i];
  2044. if(!isdigit(next) && (!isalpha(next) || tolower(next) > 'f'))
  2045. {
  2046. msg.append((max == 4) ? '4' : '8').append("-digit unicode escape sequence contained non-hex digit: ").append(next);
  2047. ep = i;
  2048. return false;
  2049. }
  2050. }
  2051. }
  2052. else
  2053. {
  2054. msg.append("Unrecognized escape sequence: ").append("\\").append(next);
  2055. ep = i;
  2056. return false;
  2057. }
  2058. }
  2059. }
  2060. return true;
  2061. }
  2062. void decodeCppEscapeSequence(StringBuffer & out, const char * in, bool errorIfInvalid)
  2063. {
  2064. out.ensureCapacity((size32_t)strlen(in));
  2065. while (*in)
  2066. {
  2067. char c = *in++;
  2068. if (c == '\\')
  2069. {
  2070. char next = *in;
  2071. if (next)
  2072. {
  2073. in++;
  2074. switch (next)
  2075. {
  2076. case 'a': c = '\a'; break;
  2077. case 'b': c = '\b'; break;
  2078. case 'f': c = '\f'; break;
  2079. case 'n': c = '\n'; break;
  2080. case 'r': c = '\r'; break;
  2081. case 't': c = '\t'; break;
  2082. case 'v': c = '\v'; break;
  2083. case '\\':
  2084. case '\'':
  2085. case '?':
  2086. case '\"': break;
  2087. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
  2088. {
  2089. c = next - '0';
  2090. if (*in >= '0' && *in <= '7')
  2091. {
  2092. c = c << 3 | (*in++-'0');
  2093. if (*in >= '0' && *in <= '7')
  2094. c = c << 3 | (*in++-'0');
  2095. }
  2096. break;
  2097. }
  2098. case 'x':
  2099. c = 0;
  2100. while (isxdigit(*in))
  2101. {
  2102. next = *in++;
  2103. c = c << 4;
  2104. if (next >= '0' && next <= '9') c |= (next - '0');
  2105. else if (next >= 'A' && next <= 'F') c |= (next - 'A' + 10);
  2106. else if (next >= 'a' && next <= 'f') c |= (next - 'a' + 10);
  2107. }
  2108. break;
  2109. default:
  2110. if (errorIfInvalid)
  2111. throw MakeStringException(1, "unrecognised character escape sequence '\\%c'", next);
  2112. in--; // keep it as is.
  2113. break;
  2114. }
  2115. }
  2116. }
  2117. out.append(c);
  2118. }
  2119. }
  2120. bool isPrintable(unsigned len, const char * src)
  2121. {
  2122. while (len--)
  2123. {
  2124. if (!isprint(*((unsigned char *)src)))
  2125. return false;
  2126. src++;
  2127. }
  2128. return true;
  2129. }
  2130. //make this as fast as possible...
  2131. StringBuffer & appendStringAsSQL(StringBuffer & out, unsigned len, const char * src)
  2132. {
  2133. if (!isPrintable(len, src))
  2134. {
  2135. out.append("X'");
  2136. appendDataAsHex(out, len, src);
  2137. return out.append('\'');
  2138. }
  2139. out.ensureCapacity(2 + len);
  2140. out.append('\'');
  2141. for (;;)
  2142. {
  2143. char * next = (char *)memchr(src, '\'', len);
  2144. if (!next)
  2145. break;
  2146. unsigned chunk=(size32_t)(next-src)+1;
  2147. out.append(chunk, src).append('\'');
  2148. len -= chunk;
  2149. src += chunk;
  2150. }
  2151. return out.append(len, src).append('\'');
  2152. }
  2153. static const char * hexText = "0123456789ABCDEF";
  2154. StringBuffer & appendDataAsHex(StringBuffer &out, unsigned len, const void * data)
  2155. {
  2156. char * target = (char *)out.reserve(len*2);
  2157. unsigned char * start = (unsigned char *)data;
  2158. for (unsigned count=len; count> 0; --count)
  2159. {
  2160. unsigned next = *start++;
  2161. *target++ = hexText[next >>4];
  2162. *target++ = hexText[next & 15];
  2163. }
  2164. return out;
  2165. }
  2166. bool strToBool(size_t len, const char * text)
  2167. {
  2168. switch (len)
  2169. {
  2170. case 4:
  2171. if (memicmp(text, "true", 4) == 0)
  2172. return true;
  2173. break;
  2174. case 3:
  2175. if (memicmp(text, "yes", 3) == 0)
  2176. return true;
  2177. break;
  2178. case 2:
  2179. if (memicmp(text, "on", 2) == 0)
  2180. return true;
  2181. break;
  2182. case 1:
  2183. if ((memicmp(text, "t", 1) == 0) || (memicmp(text, "y", 1) == 0))
  2184. return true;
  2185. break;
  2186. }
  2187. while (len && isspace(*text))
  2188. {
  2189. len--;
  2190. text++;
  2191. }
  2192. while (len-- && isdigit(*text))
  2193. {
  2194. if (*text++ != '0') return true;
  2195. }
  2196. return false;
  2197. }
  2198. bool strToBool(const char * text)
  2199. {
  2200. return strToBool(strlen(text), text);
  2201. }
  2202. bool clipStrToBool(size_t len, const char * text)
  2203. {
  2204. while (len && *text==' ')
  2205. {
  2206. len--;
  2207. text++;
  2208. }
  2209. while (len && text[len-1]== ' ')
  2210. len--;
  2211. return strToBool(len, text);
  2212. }
  2213. bool clipStrToBool(const char * text)
  2214. {
  2215. return clipStrToBool(strlen(text), text);
  2216. }
  2217. StringBuffer & ncnameEscape(char const * in, StringBuffer & out)
  2218. {
  2219. if(!isalpha(*in))
  2220. {
  2221. out.appendf("_%02X", static_cast<unsigned char>(*in));
  2222. in++;
  2223. }
  2224. char const * finger = in;
  2225. while(*finger)
  2226. {
  2227. if(!isalnum(*finger))
  2228. {
  2229. if(finger>in)
  2230. out.append((size32_t)(finger-in), in);
  2231. out.appendf("_%02X", static_cast<unsigned char>(*finger));
  2232. in = ++finger;
  2233. }
  2234. else
  2235. {
  2236. finger++;
  2237. }
  2238. }
  2239. if(finger>in)
  2240. out.append((size32_t)(finger-in), in);
  2241. return out;
  2242. }
  2243. StringBuffer & ncnameUnescape(char const * in, StringBuffer & out)
  2244. {
  2245. char const * finger = in;
  2246. while(*finger)
  2247. {
  2248. if(*finger == '_')
  2249. {
  2250. if(finger>in)
  2251. out.append((size32_t)(finger-in), in);
  2252. unsigned char chr = 16 * hex2num(finger[1]) + hex2num(finger[2]);
  2253. out.append(static_cast<char>(chr));
  2254. in = (finger+=3);
  2255. }
  2256. else
  2257. {
  2258. finger++;
  2259. }
  2260. }
  2261. if(finger>in)
  2262. out.append((size32_t)(finger-in), in);
  2263. return out;
  2264. }
  2265. bool startsWith(const char* src, const char* prefix)
  2266. {
  2267. while (*prefix && *prefix == *src) { src++; prefix++; }
  2268. return *prefix==0;
  2269. }
  2270. bool startsWithIgnoreCase(const char* src, const char* prefix)
  2271. {
  2272. while (*prefix && tolower(*prefix) == tolower(*src)) { src++; prefix++; }
  2273. return *prefix==0;
  2274. }
  2275. bool endsWith(const char* src, const char* dst)
  2276. {
  2277. size_t srcLen = strlen(src);
  2278. size_t dstLen = strlen(dst);
  2279. if (dstLen<=srcLen)
  2280. return memcmp(dst, src+srcLen-dstLen, dstLen)==0;
  2281. return false;
  2282. }
  2283. bool endsWithIgnoreCase(const char* src, const char* dst)
  2284. {
  2285. size_t srcLen = strlen(src);
  2286. size_t dstLen = strlen(dst);
  2287. if (dstLen<=srcLen)
  2288. return memicmp(dst, src+srcLen-dstLen, dstLen)==0;
  2289. return false;
  2290. }
  2291. unsigned matchString(const char * search, const char * const * strings)
  2292. {
  2293. for (unsigned i=0;;i++)
  2294. {
  2295. const char * cur = strings[i];
  2296. if (!cur)
  2297. return UINT_MAX;
  2298. if (streq(search, cur))
  2299. return i;
  2300. }
  2301. }
  2302. char *j_strtok_r(char *str, const char *delim, char **saveptr)
  2303. {
  2304. if (!str)
  2305. str = *saveptr;
  2306. char c;
  2307. for (;;) {
  2308. c = *str;
  2309. if (!c) {
  2310. *saveptr = str;
  2311. return NULL;
  2312. }
  2313. if (!strchr(delim,c))
  2314. break;
  2315. str++;
  2316. }
  2317. char *ret=str;
  2318. do {
  2319. c = *(++str);
  2320. } while (c&&!strchr(delim,c));
  2321. if (c)
  2322. *(str++) = 0;
  2323. *saveptr = str;
  2324. return ret;
  2325. }
  2326. int j_memicmp (const void *s1, const void *s2, size32_t len)
  2327. {
  2328. const byte *b1 = (const byte *)s1;
  2329. const byte *b2 = (const byte *)s2;
  2330. int ret = 0;
  2331. while (len&&((ret = tolower(*b1)-tolower(*b2)) == 0)) {
  2332. b1++;
  2333. b2++;
  2334. len--;
  2335. }
  2336. return ret;
  2337. }
  2338. size32_t memcount(size32_t len, const char * str, char search)
  2339. {
  2340. size32_t count = 0;
  2341. for (size32_t i=0; i < len; i++)
  2342. {
  2343. if (str[i] == search)
  2344. count++;
  2345. }
  2346. return count;
  2347. }
  2348. StringBuffer & elideString(StringBuffer & s, unsigned maxLength)
  2349. {
  2350. if (s.length() > maxLength)
  2351. {
  2352. s.setLength(maxLength);
  2353. s.append("...");
  2354. }
  2355. return s;
  2356. }
  2357. const char * nullText(const char * text)
  2358. {
  2359. if (text) return text;
  2360. return "(null)";
  2361. }
  2362. StringBuffer& StringBuffer::operator=(StringBuffer&& value)
  2363. {
  2364. swapWith(value);
  2365. return *this;
  2366. }