jstring.cpp 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include <stdio.h>
  15. #include <stdlib.h>
  16. #include <stdarg.h>
  17. #include <assert.h>
  18. #include <string.h>
  19. #include <ctype.h>
  20. #include <time.h>
  21. #include <math.h>
  22. #include "jstring.hpp"
  23. #include "jexcept.hpp"
  24. #include "jhash.hpp"
  25. #include "jlog.hpp"
  26. #include "jfile.hpp"
  27. #include "jdebug.hpp"
  28. #include "jutil.hpp"
  29. #define DOUBLE_FORMAT "%.16g"
  30. #define FLOAT_FORMAT "%.7g"
  31. #ifndef va_copy
  32. /* WARNING - DANGER - ASSUMES TYPICAL STACK MACHINE */
  33. #define va_copy(dst, src) ((void)((dst) = (src)))
  34. #endif
  35. static const char * TheNullStr = "";
  36. #define FIRST_CHUNK_SIZE 8
  37. #define DOUBLE_LIMIT 0x100000 // must be a power of 2
  38. #define DETACH_GRANULARITY 16
  39. //===========================================================================
  40. StringBuffer::StringBuffer()
  41. {
  42. init();
  43. }
  44. #if 0
  45. StringBuffer::StringBuffer(int initial)
  46. {
  47. init();
  48. ensureCapacity(initial);
  49. }
  50. #endif
  51. StringBuffer::StringBuffer(String & value)
  52. {
  53. init();
  54. append(value);
  55. }
  56. StringBuffer::StringBuffer(const char *value)
  57. {
  58. init();
  59. append(value);
  60. }
  61. StringBuffer::StringBuffer(unsigned len, const char *value)
  62. {
  63. init();
  64. append(len, value);
  65. }
  66. StringBuffer::StringBuffer(const StringBuffer & value)
  67. {
  68. init();
  69. append(value);
  70. }
  71. void StringBuffer::setBuffer(size32_t buffLen, char * newBuff, size32_t strLen)
  72. {
  73. assertex(buffLen>0 && newBuff!=NULL && strLen<buffLen);
  74. if (buffer)
  75. free(buffer);
  76. buffer = newBuff;
  77. maxLen=buffLen;
  78. curLen=strLen;
  79. }
  80. void StringBuffer::_realloc(size32_t newLen)
  81. {
  82. if (newLen >= maxLen)
  83. {
  84. size32_t newMax = maxLen;
  85. if (newMax == 0)
  86. newMax = FIRST_CHUNK_SIZE;
  87. if (newLen > DOUBLE_LIMIT)
  88. {
  89. newMax = (newLen + DOUBLE_LIMIT) & ~(DOUBLE_LIMIT-1);
  90. if (newLen >= newMax)
  91. throw MakeStringException(MSGAUD_operator, -1, "StringBuffer::_realloc: Request for %d bytes oldMax = %d", newLen, maxLen);
  92. }
  93. else
  94. {
  95. while (newLen >= newMax)
  96. newMax += newMax;
  97. }
  98. char * newStr;
  99. if(!newMax || !(newStr=(char *)realloc(buffer, newMax)))
  100. {
  101. DBGLOG("StringBuffer::_realloc: Failed to realloc = %d, oldMax = %d", newMax, maxLen);
  102. PrintStackReport();
  103. PrintMemoryReport();
  104. throw MakeStringException(MSGAUD_operator, -1, "StringBuffer::_realloc: Failed to realloc = %d, oldMax = %d", newMax, maxLen);
  105. }
  106. buffer = newStr;
  107. maxLen = newMax;
  108. }
  109. }
  110. char * StringBuffer::detach()
  111. {
  112. if (buffer)
  113. {
  114. if (maxLen>curLen+1+DETACH_GRANULARITY)
  115. buffer = (char *)realloc(buffer,curLen+1); // shrink
  116. buffer[curLen] = '\0'; // There is always room for this null
  117. char *ret = buffer;
  118. init();
  119. return ret;
  120. }
  121. return strdup(TheNullStr);
  122. }
  123. StringBuffer & StringBuffer::append(char value)
  124. {
  125. ensureCapacity(1);
  126. buffer[curLen] = value;
  127. ++curLen;
  128. return *this;
  129. }
  130. StringBuffer & StringBuffer::append(unsigned char value)
  131. {
  132. ensureCapacity(1);
  133. buffer[curLen] = value;
  134. ++curLen;
  135. return *this;
  136. }
  137. StringBuffer & StringBuffer::append(const char * value)
  138. {
  139. if (value)
  140. {
  141. size32_t SourceLen = (size32_t)::strlen(value);
  142. ensureCapacity(SourceLen);
  143. memcpy(buffer + curLen, value, SourceLen);
  144. curLen += SourceLen;
  145. }
  146. return *this;
  147. }
  148. StringBuffer & StringBuffer::append(unsigned len, const char * value)
  149. {
  150. if (len)
  151. {
  152. ensureCapacity(len);
  153. memcpy(buffer + curLen, value, len);
  154. curLen += len;
  155. }
  156. return *this;
  157. }
  158. StringBuffer & StringBuffer::append(const unsigned char * value)
  159. {
  160. return append((const char *) value);
  161. }
  162. StringBuffer & StringBuffer::append(const char * value, int offset, int len)
  163. {
  164. ensureCapacity(len);
  165. memcpy(buffer + curLen, value+offset, len);
  166. curLen += len;
  167. return *this;
  168. }
  169. StringBuffer & StringBuffer::append(const IAtom * value)
  170. {
  171. if (value)
  172. append(value->getAtomNamePtr());
  173. return *this;
  174. }
  175. StringBuffer & StringBuffer::append(double value)
  176. {
  177. int len = length();
  178. int newlen = appendf(DOUBLE_FORMAT, value).length();
  179. while (len < newlen)
  180. {
  181. switch (charAt(len))
  182. {
  183. case '.':
  184. case 'E':
  185. case 'e':
  186. return *this;
  187. }
  188. len++;
  189. }
  190. return append(".0");
  191. }
  192. StringBuffer & StringBuffer::append(float value)
  193. {
  194. int len = length();
  195. int newlen = appendf(FLOAT_FORMAT, value).length();
  196. while (len < newlen)
  197. {
  198. switch (charAt(len))
  199. {
  200. case '.':
  201. case 'E':
  202. case 'e':
  203. return *this;
  204. }
  205. len++;
  206. }
  207. return append(".0");
  208. }
  209. StringBuffer & StringBuffer::append(int value)
  210. {
  211. char temp[12];
  212. unsigned written = numtostr(temp, value);
  213. return append(written, temp);
  214. }
  215. StringBuffer & StringBuffer::append(unsigned value)
  216. {
  217. char temp[12];
  218. unsigned written = numtostr(temp, value);
  219. return append(written, temp);
  220. }
  221. StringBuffer & StringBuffer::appendlong(long value)
  222. {
  223. char temp[24];
  224. unsigned written = numtostr(temp, value);
  225. return append(written, temp);
  226. }
  227. StringBuffer & StringBuffer::appendulong(unsigned long value)
  228. {
  229. char temp[24];
  230. unsigned written = numtostr(temp, value);
  231. return append(written, temp);
  232. }
  233. StringBuffer & StringBuffer::append(__int64 value)
  234. {
  235. char temp[24];
  236. unsigned written = numtostr(temp, value);
  237. return append(written, temp);
  238. }
  239. StringBuffer & StringBuffer::append(unsigned __int64 value)
  240. {
  241. char temp[24];
  242. unsigned written = numtostr(temp, value);
  243. return append(written, temp);
  244. }
  245. StringBuffer & StringBuffer::append(const String & value)
  246. {
  247. size32_t SourceLen = value.length();
  248. ensureCapacity(SourceLen);
  249. value.getChars(0, SourceLen, buffer, curLen);
  250. curLen += SourceLen;
  251. return *this;
  252. }
  253. StringBuffer & StringBuffer::append(const IStringVal & value)
  254. {
  255. return append(value.str());
  256. }
  257. StringBuffer & StringBuffer::append(const IStringVal * value)
  258. {
  259. if (value)
  260. return append(value->str());
  261. else
  262. return *this;
  263. }
  264. StringBuffer & StringBuffer::append(const StringBuffer & value)
  265. {
  266. size32_t SourceLen = value.length();
  267. ensureCapacity(SourceLen);
  268. value.getChars(0, SourceLen, buffer + curLen);
  269. curLen += SourceLen;
  270. return *this;
  271. }
  272. StringBuffer & StringBuffer::appendf(const char *format, ...)
  273. {
  274. va_list args;
  275. va_start(args, format);
  276. valist_appendf(format, args);
  277. va_end(args);
  278. return *this;
  279. }
  280. StringBuffer & StringBuffer::appendLower(unsigned len, const char * value)
  281. {
  282. if (len)
  283. {
  284. ensureCapacity(len);
  285. const byte * from = reinterpret_cast<const byte *>(value);
  286. for (unsigned i = 0; i < len; i++)
  287. buffer[curLen + i] = tolower(from[i]);
  288. curLen += len;
  289. }
  290. return *this;
  291. }
  292. StringBuffer & StringBuffer::setf(const char *format, ...)
  293. {
  294. clear();
  295. va_list args;
  296. va_start(args, format);
  297. valist_appendf(format, args);
  298. va_end(args);
  299. return *this;
  300. }
  301. StringBuffer & StringBuffer::limited_valist_appendf(unsigned szLimit, const char *format, va_list args)
  302. {
  303. #define BUF_SIZE 1024
  304. #define MAX_BUF_SIZE (1024*1024) // limit buffer size to 1MB when doubling
  305. // handle string that is bigger that BUF_SIZE bytes
  306. unsigned size = (0 == szLimit||szLimit>BUF_SIZE)?BUF_SIZE:szLimit;
  307. int len;
  308. va_list args2;
  309. va_copy(args2, args);
  310. try { ensureCapacity(size); }
  311. catch (IException *e)
  312. {
  313. StringBuffer eMsg;
  314. IException *e2 = MakeStringException(-1, "StringBuffer::valist_appendf(\"%s\"): vsnprintf failed or result exceeds limit (%d): %s", format, size, e->errorMessage(eMsg).str());
  315. e->Release();
  316. throw e2;
  317. }
  318. len = _vsnprintf(buffer+curLen,size,format,args);
  319. if (len >= 0)
  320. {
  321. if ((unsigned)len >= size)
  322. {
  323. if (szLimit && (unsigned)len >= szLimit)
  324. {
  325. if ((unsigned)len>szLimit)
  326. {
  327. len = size;
  328. if (len>3) memcpy(buffer+len-3, "...", 3);
  329. }
  330. }
  331. else
  332. {
  333. ensureCapacity(len);
  334. // no need for _vsnprintf since the buffer is already made big enough
  335. vsprintf(buffer+curLen,format,args2);
  336. }
  337. }
  338. }
  339. else if (size == szLimit)
  340. {
  341. len = size;
  342. if (len>3) memcpy(buffer+len-3, "...", 3);
  343. }
  344. else
  345. {
  346. size = BUF_SIZE * 2;
  347. loop
  348. {
  349. if (0 != szLimit && size>szLimit) size = szLimit; // if so, will be last attempt
  350. if (size>MAX_BUF_SIZE)
  351. {
  352. WARNLOG("StringBuffer::valist_appendf(\"%s\"): vsnprintf exceeds limit (%d)", format, size);
  353. size = szLimit = MAX_BUF_SIZE;
  354. }
  355. try { ensureCapacity(size); }
  356. catch (IException *e)
  357. {
  358. StringBuffer eMsg;
  359. IException *e2 = MakeStringException(-1, "StringBuffer::valist_appendf(\"%s\"): vsnprintf failed (%d): %s", format, size, e->errorMessage(eMsg).str());
  360. e->Release();
  361. throw e2;
  362. }
  363. va_list args3;
  364. va_copy(args3, args2);
  365. len = _vsnprintf(buffer+curLen,size,format,args3);
  366. va_end(args3);
  367. if (len>=0) // NB: len>size not possible, 1st _vsnprintf would have handled.
  368. break;
  369. if (size == szLimit)
  370. {
  371. len = size;
  372. if (len>3) memcpy(buffer+len-3, "...", 3);
  373. break;
  374. }
  375. size <<= 1;
  376. }
  377. }
  378. va_end(args2);
  379. curLen += len;
  380. return *this;
  381. }
  382. StringBuffer & StringBuffer::appendN(size32_t count, char fill)
  383. {
  384. ensureCapacity(count);
  385. memset(buffer+curLen, fill, count);
  386. curLen += count;
  387. return *this;
  388. }
  389. void StringBuffer::setLength(unsigned len)
  390. {
  391. if (len > curLen)
  392. {
  393. ensureCapacity(len-curLen);
  394. }
  395. curLen = len;
  396. }
  397. char * StringBuffer::reserve(size32_t size)
  398. {
  399. ensureCapacity(size);
  400. char *ret = buffer+curLen;
  401. curLen += size;
  402. return ret;
  403. }
  404. char * StringBuffer::reserveTruncate(size32_t size)
  405. {
  406. size32_t newMax = curLen+size+1;
  407. if (newMax != maxLen) {
  408. char * newStr = (char *) realloc(buffer, newMax);
  409. if (!newStr)
  410. throw MakeStringException(-1, "StringBuffer::_realloc: Failed to realloc newMax = %d, oldMax = %d", newMax, maxLen);
  411. buffer = newStr;
  412. maxLen = newMax;
  413. }
  414. char *ret = buffer+curLen;
  415. curLen += size;
  416. return ret;
  417. }
  418. void StringBuffer::swapWith(StringBuffer &other)
  419. {
  420. size32_t tmpsz = curLen;
  421. curLen = other.curLen;
  422. other.curLen = tmpsz;
  423. tmpsz = maxLen;
  424. maxLen = other.maxLen;
  425. other.maxLen = tmpsz;
  426. char *tmpbuf = buffer;
  427. buffer = other.buffer;
  428. other.buffer = tmpbuf;
  429. }
  430. void StringBuffer::kill()
  431. {
  432. if (buffer)
  433. free(buffer);
  434. init();
  435. }
  436. void StringBuffer::getChars(int srcBegin, int srcEnd, char * target) const
  437. {
  438. const int len = srcEnd - srcBegin;
  439. if (target && buffer && len > 0)
  440. memcpy(target, buffer + srcBegin, len);
  441. }
  442. void StringBuffer::_insert(unsigned offset, size32_t insertLen)
  443. {
  444. ensureCapacity(insertLen);
  445. memmove(buffer + offset + insertLen, buffer + offset, curLen - offset);
  446. curLen += insertLen;
  447. }
  448. StringBuffer & StringBuffer::insert(int offset, char value)
  449. {
  450. _insert(offset, 1);
  451. buffer[offset] = value;
  452. return *this;
  453. }
  454. StringBuffer & StringBuffer::insert(int offset, const char * value)
  455. {
  456. if (!value) return *this;
  457. unsigned len = (size32_t)strlen(value);
  458. _insert(offset, len);
  459. memcpy(buffer + offset, value, len);
  460. return *this;
  461. }
  462. StringBuffer & StringBuffer::insert(int offset, double value)
  463. {
  464. char temp[36];
  465. sprintf(temp, "%f", value);
  466. insert(offset, temp);
  467. return *this;
  468. }
  469. StringBuffer & StringBuffer::insert(int offset, float value)
  470. {
  471. return insert(offset, (double)value);
  472. }
  473. StringBuffer & StringBuffer::insert(int offset, int value)
  474. {
  475. char temp[12];
  476. numtostr(temp, value);
  477. return insert(offset, temp);
  478. }
  479. StringBuffer & StringBuffer::insert(int offset, unsigned value)
  480. {
  481. char temp[12];
  482. numtostr(temp, value);
  483. return insert(offset, temp);
  484. }
  485. #if 0
  486. StringBuffer & StringBuffer::insert(int offset, long value)
  487. {
  488. char temp[24];
  489. numtostr(temp, value);
  490. return insert(offset, temp);
  491. }
  492. #endif
  493. StringBuffer & StringBuffer::insert(int offset, __int64 value)
  494. {
  495. char temp[24];
  496. numtostr(temp, value);
  497. return insert(offset, temp);
  498. }
  499. StringBuffer & StringBuffer::insert(int offset, const String & value)
  500. {
  501. size32_t len = value.length();
  502. _insert(offset, len);
  503. value.getChars(0, len, buffer, offset);
  504. return *this;
  505. }
  506. StringBuffer & StringBuffer::insert(int offset, const StringBuffer & value)
  507. {
  508. size32_t len = value.length();
  509. _insert(offset, len);
  510. value.getChars(0, len, buffer+offset);
  511. return *this;
  512. }
  513. StringBuffer & StringBuffer::insert(int offset, const IStringVal & value)
  514. {
  515. return insert(offset, value.str());
  516. }
  517. StringBuffer & StringBuffer::insert(int offset, const IStringVal * value)
  518. {
  519. if (value)
  520. return insert(offset, value->str());
  521. else
  522. return *this;
  523. }
  524. StringBuffer & StringBuffer::newline()
  525. {
  526. return append("\n");
  527. }
  528. StringBuffer & StringBuffer::pad(unsigned count)
  529. {
  530. ensureCapacity(count);
  531. memset(buffer + curLen, ' ', count);
  532. curLen += count;
  533. return *this;
  534. }
  535. StringBuffer & StringBuffer::padTo(unsigned count)
  536. {
  537. if (curLen<count)
  538. pad(count-curLen);
  539. return *this;
  540. }
  541. StringBuffer & StringBuffer::clip()
  542. {
  543. while (curLen && isspace(buffer[curLen-1]))
  544. curLen--;
  545. return *this;
  546. }
  547. StringBuffer & StringBuffer::trim()
  548. {
  549. return clip().trimLeft();
  550. }
  551. StringBuffer & StringBuffer::trimLeft()
  552. {
  553. char *p;
  554. if (curLen==0)
  555. return *this;
  556. buffer[curLen] = 0;
  557. for(p = buffer;isspace(*p);p++)
  558. ;
  559. if (p!=buffer)
  560. {
  561. curLen -= p-buffer;
  562. memmove(buffer,p,curLen);
  563. }
  564. return *this;
  565. }
  566. StringBuffer & StringBuffer::remove(unsigned start, unsigned len)
  567. {
  568. if (start > curLen) start = curLen;
  569. if (start + len > curLen) len = curLen - start;
  570. unsigned start2 = start + len;
  571. memmove(buffer + start, buffer + start2, curLen - start2);
  572. setLength(curLen - len);
  573. return *this;
  574. }
  575. StringBuffer &StringBuffer::reverse()
  576. {
  577. unsigned max = curLen/2;
  578. char * end = buffer + curLen;
  579. unsigned idx;
  580. for (idx = 0; idx < max; idx++)
  581. {
  582. char temp = buffer[idx];
  583. end--;
  584. buffer[idx] = *end;
  585. *end = temp;
  586. }
  587. return *this;
  588. }
  589. MemoryBuffer & StringBuffer::deserialize(MemoryBuffer & in)
  590. {
  591. unsigned len;
  592. in.read(len);
  593. append(len, (const char *)in.readDirect(len));
  594. return in;
  595. }
  596. MemoryBuffer & StringBuffer::serialize(MemoryBuffer & out) const
  597. {
  598. return out.append(curLen).append(curLen, buffer);
  599. }
  600. StringBuffer &StringBuffer::loadFile(const char *filename, bool binaryMode)
  601. {
  602. FILE *in = fopen(filename, binaryMode?"rb":"rt");
  603. if (in)
  604. {
  605. char buffer[1024];
  606. int bytes;
  607. for (;;)
  608. {
  609. bytes = (size32_t)fread(buffer, 1, sizeof(buffer), in);
  610. if (!bytes)
  611. break;
  612. append(buffer, 0, bytes);
  613. }
  614. fclose(in);
  615. return *this;
  616. }
  617. else
  618. throw MakeStringException(errno, "File %s could not be opened", filename);
  619. }
  620. StringBuffer & StringBuffer::loadFile(IFile* f)
  621. {
  622. if(!f)
  623. return *this;
  624. Owned<IFileIO> io = f->open(IFOread);
  625. if(!io)
  626. throw MakeStringException(errno, "file %s could not be opened for reading", f->queryFilename());
  627. char buf[2048];
  628. const unsigned requestedSize = sizeof(buf);
  629. offset_t pos = 0;
  630. loop
  631. {
  632. size32_t len = io->read(pos, requestedSize, buf);
  633. if (len == 0)
  634. break;
  635. append(len, buf);
  636. pos += len;
  637. if (len != requestedSize)
  638. break;
  639. }
  640. return *this;
  641. }
  642. void StringBuffer::setCharAt(unsigned offset, char value)
  643. {
  644. if (offset < curLen)
  645. buffer[offset] = value;
  646. }
  647. StringBuffer & StringBuffer::toLowerCase()
  648. {
  649. if (buffer)
  650. {
  651. int l = curLen;
  652. for (int i = 0; i < l; i++)
  653. if (isupper(buffer[i]))
  654. buffer[i] = tolower(buffer[i]);
  655. }
  656. return *this;
  657. }
  658. StringBuffer & StringBuffer::toUpperCase()
  659. {
  660. if (buffer)
  661. {
  662. int l = curLen;
  663. for (int i = 0; i < l; i++)
  664. if (islower(buffer[i]))
  665. buffer[i] = toupper(buffer[i]);
  666. }
  667. return *this;
  668. }
  669. StringBuffer & StringBuffer::replace(char oldChar, char newChar)
  670. {
  671. if (buffer)
  672. {
  673. int l = curLen;
  674. for (int i = 0; i < l; i++)
  675. if (buffer[i] == oldChar)
  676. {
  677. buffer[i] = newChar;
  678. if (newChar == '\0')
  679. {
  680. curLen = i;
  681. break;
  682. }
  683. }
  684. }
  685. return *this;
  686. }
  687. // this method will replace all occurrances of "oldStr" with "newStr"
  688. StringBuffer & StringBuffer::replaceString(const char* oldStr, const char* newStr)
  689. {
  690. if (buffer)
  691. {
  692. const char* s = str(); // get null terminated version of the string
  693. int left = length();
  694. int oldStr_len = (size32_t)strlen(oldStr);
  695. StringBuffer tempbuff;
  696. while (left >= oldStr_len)
  697. {
  698. if ( memcmp(s, oldStr, oldStr_len) == 0)
  699. {
  700. tempbuff.append(newStr);
  701. s += oldStr_len;
  702. left -= oldStr_len;
  703. }
  704. else
  705. {
  706. tempbuff.append(*s);
  707. s++;
  708. left--;
  709. }
  710. }
  711. // there are no more possible replacements, make sure we keep the end of the original buffer
  712. tempbuff.append(s);
  713. //*this = tempbuff;
  714. swapWith(tempbuff);
  715. }
  716. return *this;
  717. }
  718. const char * StringBuffer::toCharArray() const
  719. {
  720. if (buffer)
  721. {
  722. buffer[curLen] = '\0'; // There is always room for this null
  723. return buffer;
  724. }
  725. return TheNullStr;
  726. }
  727. //===========================================================================
  728. VStringBuffer::VStringBuffer(const char* format, ...)
  729. {
  730. va_list args;
  731. va_start(args,format);
  732. valist_appendf(format,args);
  733. va_end(args);
  734. }
  735. //===========================================================================
  736. String::String()
  737. {
  738. text = (char *)TheNullStr;
  739. }
  740. String::String(const char * value)
  741. {
  742. text = (value ? strdup(value) : (char *)TheNullStr);
  743. }
  744. String::String(const char * value, int offset, int _count)
  745. {
  746. text = (char *)malloc(_count+1);
  747. memcpy(text, value+offset, _count);
  748. text[_count]=0;
  749. }
  750. String::String(String & value)
  751. {
  752. text = strdup(value.toCharArray());
  753. }
  754. String::String(StringBuffer & value)
  755. {
  756. unsigned len = value.length();
  757. text = (char *)malloc(len+1);
  758. value.getChars(0,len,text);
  759. text[len] = 0;
  760. }
  761. String::~String()
  762. {
  763. if (text != TheNullStr) free(text);
  764. }
  765. char String::charAt(size32_t index) const
  766. {
  767. return text[index];
  768. }
  769. int String::compareTo(const String & value) const
  770. {
  771. return strcmp(text, value.toCharArray());
  772. }
  773. int String::compareTo(const char* value) const
  774. {
  775. return strcmp(text,value);
  776. }
  777. String * String::concat(const String & value) const
  778. {
  779. StringBuffer temp(toCharArray());
  780. temp.append(value);
  781. return new String(temp.str());
  782. }
  783. bool String::endsWith(const String & value) const
  784. {
  785. unsigned lenValue = value.length();
  786. unsigned len = (size32_t)strlen(text);
  787. if (len >= lenValue)
  788. return (memcmp(text+(len-lenValue),value.toCharArray(),lenValue) == 0);
  789. return false;
  790. }
  791. bool String::endsWith(const char* value) const
  792. {
  793. return ::endsWith(this->text, value);
  794. }
  795. bool String::equals(String & value) const
  796. {
  797. return strcmp(text, value.toCharArray())==0;
  798. }
  799. bool String::equalsIgnoreCase(const String & value) const
  800. {
  801. return stricmp(text, value.toCharArray())==0;
  802. }
  803. void String::getBytes(int srcBegin, int srcEnd, void * dest, int dstBegin) const
  804. {
  805. memcpy((char *)dest+dstBegin, text+srcBegin, srcEnd-srcBegin);
  806. }
  807. void String::getChars(int srcBegin, int srcEnd, void * dest, int dstBegin) const
  808. {
  809. memcpy((char *)dest+dstBegin, text+srcBegin, srcEnd-srcBegin);
  810. }
  811. int String::hashCode() const
  812. {
  813. return (int)hashc((const byte *)text,length(),0);
  814. }
  815. int String::indexOf(int ch) const
  816. {
  817. char * match = strchr(text, ch);
  818. return match ? (int)(match - text) : -1;
  819. }
  820. int String::indexOf(int ch, int from) const
  821. {
  822. char * match = strchr(text + from, ch);
  823. return match ? (int)(match - text) : -1;
  824. }
  825. int String::indexOf(const String & search) const
  826. {
  827. const char * str = search.toCharArray();
  828. const char * match = strstr(text, str);
  829. return match ? (int)(match - text) : -1;
  830. }
  831. int String::indexOf(const String & search, int from) const
  832. {
  833. const char * str = search.toCharArray();
  834. const char * match = strstr(text + from, str);
  835. return match ? (int)(match - text) : -1;
  836. }
  837. int String::lastIndexOf(int ch) const
  838. {
  839. char * match = strrchr(text, ch);
  840. return match ? (int)(match - text) : -1;
  841. }
  842. int String::lastIndexOf(int ch, int from) const
  843. {
  844. for (;(from > 0);--from)
  845. if (text[from] == ch)
  846. return from;
  847. return -1;
  848. }
  849. int String::lastIndexOf(const String & search) const
  850. {
  851. assertex(!"TBD");
  852. return -1;
  853. }
  854. int String::lastIndexOf(const String & search, int from) const
  855. {
  856. assertex(!"TBD");
  857. return -1;
  858. }
  859. size32_t String::length() const
  860. {
  861. return (size32_t)strlen(text);
  862. }
  863. bool String::startsWith(String & value) const
  864. {
  865. unsigned lenValue = value.length();
  866. const char * search = value.toCharArray();
  867. return (memcmp(text, search, lenValue) == 0);
  868. }
  869. bool String::startsWith(String & value, int offset) const
  870. {
  871. unsigned lenValue = value.length();
  872. const char * search = value.toCharArray();
  873. return (memcmp(text + offset, search, lenValue) == 0);
  874. }
  875. bool String::startsWith(const char* value) const
  876. {
  877. return ::startsWith(this->text,value);
  878. }
  879. String * String::substring(int beginIndex) const
  880. {
  881. return new String(text+beginIndex);
  882. }
  883. String * String::substring(int beginIndex, int endIndex) const
  884. {
  885. return new String(text, beginIndex, endIndex - beginIndex);
  886. }
  887. const char *String::toCharArray() const
  888. {
  889. return text;
  890. }
  891. String * String::toLowerCase() const
  892. {
  893. String *ret = new String();
  894. size32_t l = length();
  895. if (l)
  896. {
  897. ret->text = (char *)malloc(l+1);
  898. for (unsigned i = 0; i < l; i++)
  899. ret->text[i] = tolower(text[i]);
  900. ret->text[l]=0;
  901. }
  902. return ret;
  903. }
  904. String * String::toString()
  905. {
  906. Link();
  907. return this;
  908. }
  909. String * String::toUpperCase() const
  910. {
  911. String *ret = new String();
  912. size32_t l = length();
  913. if (l)
  914. {
  915. ret->text = (char *)malloc(l+1);
  916. for (unsigned i = 0; i < l; i++)
  917. ret->text[i] = toupper(text[i]);
  918. ret->text[l]=0;
  919. }
  920. return ret;
  921. }
  922. String * String::trim() const
  923. {
  924. size32_t l = length();
  925. while (l && isspace(text[l-1]))
  926. l--;
  927. return new String(text, 0, l);
  928. }
  929. //------------------------------------------------
  930. #if 0
  931. String & String::valueOf(char value)
  932. {
  933. return * new String(&value, 0, 1);
  934. }
  935. String & String::valueOf(const char * value)
  936. {
  937. return * new String(value);
  938. }
  939. String & String::valueOf(const char * value, int offset, int count)
  940. {
  941. return * new String(value, offset, count);
  942. }
  943. String & String::valueOf(double value)
  944. {
  945. StringBuffer temp;
  946. return temp.append(value).toString();
  947. }
  948. String & String::valueOf(float value)
  949. {
  950. StringBuffer temp;
  951. return temp.append(value).toString();
  952. }
  953. String & String::valueOf(int value)
  954. {
  955. StringBuffer temp;
  956. return temp.append(value).toString();
  957. }
  958. String & String::valueOf(long value)
  959. {
  960. StringBuffer temp;
  961. return temp.append(value).toString();
  962. }
  963. #endif
  964. //------------------------------------------------
  965. StringAttr::StringAttr(const char * _text)
  966. {
  967. text = _text ? strdup(_text) : NULL;
  968. }
  969. StringAttr::StringAttr(const char * _text, unsigned _len)
  970. {
  971. text = NULL;
  972. set(_text, _len);
  973. }
  974. StringAttr::StringAttr(const StringAttr & src)
  975. {
  976. text = NULL;
  977. set(src.get());
  978. }
  979. void StringAttr::set(const char * _text)
  980. {
  981. free(text);
  982. text = _text ? strdup(_text) : NULL;
  983. }
  984. void StringAttr::set(const char * _text, unsigned _len)
  985. {
  986. if (text)
  987. free(text);
  988. text = (char *)malloc(_len+1);
  989. memcpy(text, _text, _len);
  990. text[_len] = 0;
  991. }
  992. void StringAttr::setown(const char * _text)
  993. {
  994. if (text)
  995. free(text);
  996. text = (char *)_text;
  997. }
  998. void StringAttr::toLowerCase()
  999. {
  1000. if (text)
  1001. {
  1002. char * cur = text;
  1003. char next;
  1004. while ((next = *cur) != 0)
  1005. {
  1006. if (isupper(next))
  1007. *cur = tolower(next);
  1008. cur++;
  1009. }
  1010. }
  1011. }
  1012. void StringAttr::toUpperCase()
  1013. {
  1014. if (text)
  1015. {
  1016. char * cur = text;
  1017. char next;
  1018. while ((next = *cur) != 0)
  1019. {
  1020. if (islower(next))
  1021. *cur = toupper(next);
  1022. cur++;
  1023. }
  1024. }
  1025. }
  1026. StringAttrItem::StringAttrItem(const char *_text, unsigned _len)
  1027. {
  1028. text.set(_text, _len);
  1029. }
  1030. inline char hex(char c, char lower)
  1031. {
  1032. if (c < 10)
  1033. return '0' + c;
  1034. else if (lower)
  1035. return 'a' + c - 10;
  1036. else
  1037. return 'A' + c - 10;
  1038. }
  1039. StringBuffer & StringBuffer::appendhex(unsigned char c, char lower)
  1040. {
  1041. append(hex(c>>4, lower));
  1042. append(hex(c&0xF, lower));
  1043. return *this;
  1044. }
  1045. void appendURL(StringBuffer *dest, const char *src, size32_t len, char lower)
  1046. {
  1047. if (len == (size32_t)-1)
  1048. len = (size32_t)strlen(src);
  1049. while (len)
  1050. {
  1051. // isalnum seems to give weird results for chars > 127....
  1052. unsigned char c = (unsigned char) *src;
  1053. if (c == ' ')
  1054. dest->append('+');
  1055. else if ((c & 0x80) || !isalnum(*src))
  1056. {
  1057. dest->append('%');
  1058. dest->appendhex(c, lower);
  1059. }
  1060. else
  1061. dest->append(c);
  1062. src++;
  1063. len--;
  1064. }
  1065. }
  1066. static StringBuffer & appendStringExpandControl(StringBuffer &out, unsigned len, const char * src, bool addBreak, bool isCpp)
  1067. {
  1068. const int minBreakPos = 0;
  1069. const int commaBreakPos = 70;
  1070. const int maxBreakPos = 120;
  1071. const char * startLine = src;
  1072. out.ensureCapacity(len+2);
  1073. for (; len > 0; --len)
  1074. {
  1075. unsigned char c = *src++;
  1076. bool insertBreak = false;
  1077. bool allowBreak = true;
  1078. switch (c)
  1079. {
  1080. case '\n':
  1081. {
  1082. out.append("\\n");
  1083. if (src-startLine > minBreakPos)
  1084. insertBreak = true;
  1085. break;
  1086. }
  1087. case ',':
  1088. {
  1089. out.append(c);
  1090. if (src-startLine > commaBreakPos)
  1091. insertBreak = true;
  1092. break;
  1093. }
  1094. case '\r': out.append("\\r"); break;
  1095. case '\t': out.append("\\t"); break;
  1096. case '"':
  1097. if (isCpp)
  1098. out.append("\\");
  1099. out.append(c);
  1100. break;
  1101. case '\'':
  1102. if (!isCpp)
  1103. out.append("\\");
  1104. out.append(c);
  1105. break;
  1106. case '\\': out.append("\\\\"); break;
  1107. case '?':
  1108. if (isCpp)
  1109. {
  1110. //stop trigraphs being generated.... quote the second ?
  1111. out.append(c);
  1112. if ((len!=1) && (*src == '?'))
  1113. {
  1114. out.append('\\');
  1115. allowBreak = false;
  1116. }
  1117. }
  1118. else
  1119. out.append(c);
  1120. break;
  1121. default:
  1122. if ((c >= ' ') && (c <= 126))
  1123. out.append(c);
  1124. else
  1125. out.appendf("\\%03o", c);
  1126. break;
  1127. }
  1128. if (addBreak && (insertBreak || (allowBreak && src-startLine >= maxBreakPos)))
  1129. {
  1130. out.append("\"").newline().append("\t\t\"");
  1131. startLine = src;
  1132. }
  1133. }
  1134. return out;
  1135. }
  1136. StringBuffer & appendStringAsCPP(StringBuffer &out, unsigned len, const char * src, bool addBreak)
  1137. {
  1138. return appendStringExpandControl(out, len, src, addBreak, true);
  1139. }
  1140. StringBuffer & appendStringAsECL(StringBuffer &out, unsigned len, const char * src)
  1141. {
  1142. return appendStringExpandControl(out, len, src, false, false);
  1143. }
  1144. StringBuffer & appendStringAsQuotedCPP(StringBuffer &out, unsigned len, const char * src, bool addBreak)
  1145. {
  1146. out.ensureCapacity(len+2);
  1147. out.append('\"');
  1148. appendStringAsCPP(out, len, src, addBreak);
  1149. return out.append('\"');
  1150. }
  1151. StringBuffer & appendStringAsQuotedECL(StringBuffer &out, unsigned len, const char * src)
  1152. {
  1153. out.ensureCapacity(len+2);
  1154. out.append('\'');
  1155. appendStringAsECL(out, len, src);
  1156. return out.append('\'');
  1157. }
  1158. void extractItem(StringBuffer & res, const char * src, const char * sep, int whichItem, bool caps)
  1159. {
  1160. bool isSeparator[256];
  1161. memset(isSeparator,0,sizeof(isSeparator));
  1162. unsigned char * finger = (unsigned char *)sep;
  1163. while (*finger !=0)
  1164. isSeparator[*finger++] = true;
  1165. isSeparator[0]=true;
  1166. finger = (unsigned char *)src;
  1167. unsigned char next;
  1168. loop
  1169. {
  1170. while (isSeparator[(next = *finger)])
  1171. {
  1172. if (next == 0) return;
  1173. finger++;
  1174. }
  1175. if (whichItem == 0)
  1176. {
  1177. while (!isSeparator[(next = *finger)])
  1178. {
  1179. if (caps)
  1180. next = toupper(next);
  1181. res.append(next);
  1182. finger++;
  1183. }
  1184. return;
  1185. }
  1186. while (!isSeparator[*finger])
  1187. finger++;
  1188. whichItem--;
  1189. }
  1190. }
  1191. int utf8CharLen(const unsigned char *ch)
  1192. {
  1193. //return 1 if this is an ascii character,
  1194. //or 0 if its not a valid utf-8 character
  1195. if (*ch < 128)
  1196. return 1;
  1197. if (*ch < 192)
  1198. return 0;
  1199. unsigned char len = 1;
  1200. for (unsigned char lead = *ch << 1; (lead & 0x80); lead <<=1)
  1201. len++;
  1202. for (unsigned pos = 1; pos < len; pos++)
  1203. if ((ch[pos] < 128) || (ch[pos] >= 192))
  1204. return 0; //its not a valid utf-8 character after all
  1205. return len;
  1206. }
  1207. const char *encodeXML(const char *x, StringBuffer &ret, unsigned flags, unsigned len, bool utf8)
  1208. {
  1209. while (len)
  1210. {
  1211. switch(*x)
  1212. {
  1213. case '&':
  1214. ret.append("&amp;");
  1215. break;
  1216. case '<':
  1217. ret.append("&lt;");
  1218. break;
  1219. case '>':
  1220. ret.append("&gt;");
  1221. break;
  1222. case '\"':
  1223. ret.append("&quot;");
  1224. break;
  1225. case '\'':
  1226. ret.append("&apos;");
  1227. break;
  1228. case ' ':
  1229. ret.append(flags & ENCODE_SPACES?"&#32;":" ");
  1230. break;
  1231. case '\n':
  1232. ret.append(flags & ENCODE_NEWLINES?"&#10;":"\n");
  1233. break;
  1234. case '\r':
  1235. ret.append(flags & ENCODE_NEWLINES?"&#13;":"\r");
  1236. break;
  1237. case '\t':
  1238. ret.append(flags & ENCODE_SPACES?"&#9;":"\t");
  1239. break;
  1240. case '\0':
  1241. if (len == (unsigned) -1)
  1242. return x;
  1243. ret.append("&#xe000;"); // hack!!! Characters below 0x20 are not legal in strict xml, even encoded.
  1244. break;
  1245. default:
  1246. if (*x >= ' ' && ((byte)*x) < 128)
  1247. ret.append(*x);
  1248. else if (*x < ' ' && *x > 0)
  1249. ret.append("&#xe0").appendhex(*x, true).append(';'); // HACK
  1250. else if (utf8)
  1251. {
  1252. unsigned chlen = utf8CharLen((const unsigned char *)x);
  1253. if (chlen==0)
  1254. ret.append("&#").append((unsigned int)*(unsigned char *) x).append(';');
  1255. else
  1256. {
  1257. ret.append(*x);
  1258. while(--chlen)
  1259. {
  1260. if (len != (unsigned) -1)
  1261. len--;
  1262. ret.append(*(++x));
  1263. }
  1264. }
  1265. }
  1266. else
  1267. ret.append("&#").append((unsigned int)*(unsigned char *) x).append(';');
  1268. break;
  1269. }
  1270. if (len != (unsigned) -1)
  1271. len--;
  1272. ++x;
  1273. }
  1274. return x;
  1275. }
  1276. const char *encodeXML(const char *x, IIOStream &out, unsigned flags, unsigned len, bool utf8)
  1277. {
  1278. while (len)
  1279. {
  1280. switch(*x)
  1281. {
  1282. case '&':
  1283. writeStringToStream(out, "&amp;");
  1284. break;
  1285. case '<':
  1286. writeStringToStream(out, "&lt;");
  1287. break;
  1288. case '>':
  1289. writeStringToStream(out, "&gt;");
  1290. break;
  1291. case '\"':
  1292. writeStringToStream(out, "&quot;");
  1293. break;
  1294. case '\'':
  1295. writeStringToStream(out, "&apos;");
  1296. break;
  1297. case ' ':
  1298. writeStringToStream(out, flags & ENCODE_SPACES?"&#32;":" ");
  1299. break;
  1300. case '\n':
  1301. writeStringToStream(out, flags & ENCODE_NEWLINES?"&#10;":"\n");
  1302. break;
  1303. case '\r':
  1304. writeStringToStream(out, flags & ENCODE_NEWLINES?"&#13;":"\r");
  1305. break;
  1306. case '\t':
  1307. writeStringToStream(out, flags & ENCODE_SPACES?"&#9;":"\t");
  1308. break;
  1309. case '\0':
  1310. if (len == (unsigned) -1)
  1311. return x;
  1312. writeStringToStream(out, "&#xe000;"); // hack!!! Characters below 0x20 are not legal in strict xml, even encoded.
  1313. break;
  1314. default:
  1315. if (*x >= ' ' && ((byte)*x) < 128)
  1316. writeCharToStream(out, *x);
  1317. else if (*x < ' ' && *x > 0)
  1318. {
  1319. writeStringToStream(out, "&#xe0");
  1320. unsigned char c = *(unsigned char *)x;
  1321. writeCharToStream(out, hex(c>>4, true));
  1322. writeCharToStream(out, hex(c&0xF, true));
  1323. writeCharToStream(out, ';'); // HACK
  1324. }
  1325. else if (utf8)
  1326. {
  1327. int chlen = utf8CharLen((const unsigned char *)x);
  1328. if (chlen==0)
  1329. {
  1330. writeStringToStream(out, "&#");
  1331. char tmp[12];
  1332. unsigned written = numtostr(tmp, *(unsigned char *)x);
  1333. out.write(written, tmp);
  1334. writeCharToStream(out, ';');
  1335. }
  1336. else
  1337. {
  1338. writeCharToStream(out, *x);
  1339. while(--chlen)
  1340. {
  1341. if (len != (unsigned) -1)
  1342. len--;
  1343. writeCharToStream(out, *(++x));
  1344. }
  1345. }
  1346. }
  1347. else
  1348. {
  1349. writeStringToStream(out, "&#");
  1350. char tmp[12];
  1351. unsigned written = numtostr(tmp, *(unsigned char *)x);
  1352. out.write(written, tmp);
  1353. writeCharToStream(out, ';');
  1354. }
  1355. break;
  1356. }
  1357. if (len != (unsigned) -1)
  1358. len--;
  1359. ++x;
  1360. }
  1361. return x;
  1362. }
  1363. static void writeUtf8(unsigned c, StringBuffer &out)
  1364. {
  1365. if (c < 0x80)
  1366. out.append((char)c);
  1367. else if (c < 0x800)
  1368. {
  1369. out.append((char)(0xC0 | (c>>6)));
  1370. out.append((char)(0x80 | (c & 0x3F)));
  1371. }
  1372. else if (c < 0x10000)
  1373. {
  1374. out.append((char) (0xE0 | (c>>12)));
  1375. out.append((char) (0x80 | (c>>6 & 0x3F)));
  1376. out.append((char) (0x80 | (c & 0x3F)));
  1377. }
  1378. else if (c < 0x200000)
  1379. {
  1380. out.append((char) (0xF0 | (c>>18)));
  1381. out.append((char) (0x80 | (c>>12 & 0x3F)));
  1382. out.append((char) (0x80 | (c>>6 & 0x3F)));
  1383. out.append((char) (0x80 | (c & 0x3F)));
  1384. }
  1385. else if (c < 0x4000000)
  1386. {
  1387. out.append((char) (0xF8 | (c>>24)));
  1388. out.append((char) (0x80 | (c>>18 & 0x3F)));
  1389. out.append((char) (0x80 | (c>>12 & 0x3F)));
  1390. out.append((char) (0x80 | (c>>6 & 0x3F)));
  1391. out.append((char) (0x80 | (c & 0x3F)));
  1392. }
  1393. else if (c < 0x80000000)
  1394. {
  1395. out.append((char) (0xFC | (c>>30)));
  1396. out.append((char) (0x80 | (c>>24 & 0x3F)));
  1397. out.append((char) (0x80 | (c>>18 & 0x3F)));
  1398. out.append((char) (0x80 | (c>>12 & 0x3F)));
  1399. out.append((char) (0x80 | (c>>6 & 0x3F)));
  1400. out.append((char) (0x80 | (c & 0x3F)));
  1401. }
  1402. else
  1403. assertex(false);
  1404. }
  1405. #define JSONSTRICT
  1406. const char *decodeJSON(const char *j, StringBuffer &ret, unsigned len, const char **errMark)
  1407. {
  1408. if (!j)
  1409. return j;
  1410. if ((unsigned)-1 == len)
  1411. len = (unsigned)strlen(j);
  1412. try
  1413. {
  1414. for (const char *end = j+len; j<end && *j; j++)
  1415. {
  1416. if (*j!='\\')
  1417. ret.append(*j);
  1418. else
  1419. {
  1420. switch (*++j)
  1421. {
  1422. case 'u':
  1423. {
  1424. j++;
  1425. if (end-j>=4)
  1426. {
  1427. char *endptr;
  1428. StringAttr s(j, 4);
  1429. unsigned val = strtoul(s.get(), &endptr, 16);
  1430. if (endptr && !*endptr)
  1431. {
  1432. writeUtf8(val, ret);
  1433. j+=3;
  1434. break;
  1435. }
  1436. }
  1437. #ifdef JSONSTRICT
  1438. throw MakeStringException(-1, "invalid json \\u escaped sequence");
  1439. #endif
  1440. ret.append(*j);
  1441. break;
  1442. }
  1443. case '\"':
  1444. case '\\':
  1445. case '/':
  1446. ret.append(*j);
  1447. break;
  1448. case 'b':
  1449. ret.append('\b');
  1450. break;
  1451. case 'f':
  1452. ret.append('\f');
  1453. break;
  1454. case 'n':
  1455. ret.append('\n');
  1456. continue;
  1457. case 'r':
  1458. ret.append('\r');
  1459. break;
  1460. case 't':
  1461. ret.append('\t');
  1462. break;
  1463. default:
  1464. {
  1465. #ifdef JSONSTRICT
  1466. throw MakeStringException(-1, "invalid json escaped sequence");
  1467. #endif
  1468. ret.append('\\');
  1469. ret.append(*j);
  1470. break;
  1471. }
  1472. }
  1473. }
  1474. }
  1475. }
  1476. catch (IException *)
  1477. {
  1478. if (errMark) *errMark = j;
  1479. throw;
  1480. }
  1481. return j;
  1482. }
  1483. void decodeXML(ISimpleReadStream &in, StringBuffer &out, unsigned len)
  1484. {
  1485. // TODO
  1486. UNIMPLEMENTED;
  1487. }
  1488. #define XMLSTRICT
  1489. const char *decodeXML(const char *x, StringBuffer &ret, unsigned len, const char **errMark, IEntityHelper *entityHelper)
  1490. {
  1491. if (!x)
  1492. return x;
  1493. if ((unsigned)-1 == len)
  1494. len = (unsigned)strlen(x);
  1495. const char *end = x+len;
  1496. try
  1497. {
  1498. while (x<end && *x)
  1499. {
  1500. if ('&' == *x)
  1501. {
  1502. switch (*(x+1))
  1503. {
  1504. case 'a':
  1505. case 'A':
  1506. {
  1507. switch (*(x+2))
  1508. {
  1509. case 'm':
  1510. case 'M':
  1511. {
  1512. char c1 = *(x+3);
  1513. if (('p' == c1 || 'P' == c1) && ';' == *(x+4))
  1514. {
  1515. x += 5;
  1516. ret.append('&');
  1517. continue;
  1518. }
  1519. break;
  1520. }
  1521. case 'p':
  1522. case 'P':
  1523. {
  1524. char c1 = *(x+3);
  1525. char c2 = *(x+4);
  1526. if (('o' == c1 || 'O' == c1) && ('s' == c2 || 'S' == c2) && ';' == *(x+5))
  1527. {
  1528. x += 6;
  1529. ret.append('\'');
  1530. continue;
  1531. }
  1532. break;
  1533. }
  1534. }
  1535. break;
  1536. }
  1537. case 'l':
  1538. case 'L':
  1539. {
  1540. char c1 = *(x+2);
  1541. if (('t' == c1 || 'T' == c1) && ';' == *(x+3))
  1542. {
  1543. x += 4;
  1544. ret.append('<');
  1545. continue;
  1546. }
  1547. break;
  1548. }
  1549. case 'g':
  1550. case 'G':
  1551. {
  1552. char c1 = *(x+2);
  1553. if (('t' == c1 || 'T' == c1) && ';' == *(x+3))
  1554. {
  1555. x += 4;
  1556. ret.append('>');
  1557. continue;
  1558. }
  1559. break;
  1560. }
  1561. case 'q':
  1562. case 'Q':
  1563. {
  1564. char c1 = *(x+2);
  1565. char c2 = *(x+3);
  1566. char c3 = *(x+4);
  1567. if (('u' == c1 || 'U' == c1) && ('o' == c2 || 'O' == c2) && ('t' == c3 || 'T' == c3) && ';' == *(x+5))
  1568. {
  1569. x += 6;
  1570. ret.append('"');
  1571. continue;
  1572. }
  1573. break;
  1574. }
  1575. case 'n':
  1576. case 'N':
  1577. {
  1578. char c1 = *(x+2);
  1579. char c2 = *(x+3);
  1580. char c3 = *(x+4);
  1581. if (('b' == c1 || 'B' == c1) && ('s' == c2 || 'S' == c2) && ('p' == c3 || 'P' == c3) && ';' == *(x+5))
  1582. {
  1583. x += 6;
  1584. writeUtf8(0xa0, ret);
  1585. continue;
  1586. }
  1587. break;
  1588. }
  1589. default:
  1590. {
  1591. x++;
  1592. if (*x == '#')
  1593. {
  1594. x++;
  1595. bool hex;
  1596. if (*x == 'x' || *x == 'X') // strictly not sure about X.
  1597. {
  1598. hex = true;
  1599. x++;
  1600. }
  1601. else
  1602. hex = false;
  1603. char *endptr;
  1604. unsigned val = 0;
  1605. if (hex)
  1606. val = strtoul(x,&endptr,16);
  1607. else
  1608. val = strtoul(x,&endptr,10);
  1609. if (x==endptr || *endptr != ';') {
  1610. #ifndef XMLSTRICT
  1611. LOG(MCerror, unknownJob, "&# syntax error");
  1612. ret.append(*x);
  1613. #endif
  1614. }
  1615. else // always convert to utf-8. Should potentially throw error if not marked as utf-8 encoded doc and out of ascii range.
  1616. writeUtf8(val, ret);
  1617. x = endptr+1;
  1618. continue;
  1619. }
  1620. else
  1621. {
  1622. if ('\0' == *x)
  1623. --x;
  1624. else
  1625. {
  1626. bool error = false;
  1627. if (entityHelper)
  1628. {
  1629. const char *start=x;
  1630. loop
  1631. {
  1632. ++x;
  1633. if ('\0' == *x) throw MakeStringException(-1, "missing ';'");
  1634. if (';' == *x) break;
  1635. }
  1636. StringBuffer entity(x-start, start);
  1637. if (!entityHelper->find(entity, ret))
  1638. {
  1639. error = true;
  1640. x = start;
  1641. }
  1642. }
  1643. else
  1644. error = true;
  1645. if (error)
  1646. {
  1647. #ifdef XMLSTRICT
  1648. throw MakeStringException(-1, "invalid escaped sequence");
  1649. #endif
  1650. ret.append('&');
  1651. }
  1652. }
  1653. }
  1654. break;
  1655. }
  1656. }
  1657. if (x>=end)
  1658. throw MakeStringException(-1, "invalid escaped sequence");
  1659. }
  1660. ret.append(*x);
  1661. ++x;
  1662. }
  1663. }
  1664. catch (IException *)
  1665. {
  1666. if (errMark) *errMark = x;
  1667. throw;
  1668. }
  1669. return x;
  1670. }
  1671. StringBuffer & appendXMLOpenTag(StringBuffer &xml, const char *tag, const char *prefix, bool complete, bool close, const char *uri)
  1672. {
  1673. if (!tag || !*tag)
  1674. return xml;
  1675. xml.append('<');
  1676. appendXMLTagName(xml, tag, prefix);
  1677. if (uri && *uri)
  1678. {
  1679. xml.append(" xmlns");
  1680. if (prefix && *prefix)
  1681. xml.append(':').append(prefix);
  1682. xml.append("=\"").append(uri).append('\"');
  1683. }
  1684. if (complete)
  1685. {
  1686. if (close)
  1687. xml.append('/');
  1688. xml.append('>');
  1689. }
  1690. return xml;
  1691. }
  1692. jlib_decl StringBuffer &appendJSONName(StringBuffer &s, const char *name)
  1693. {
  1694. if (!name || !*name)
  1695. return s;
  1696. delimitJSON(s);
  1697. return encodeJSON(s.append('"'), name).append("\": ");
  1698. }
  1699. StringBuffer &encodeJSON(StringBuffer &s, const char *value)
  1700. {
  1701. if (!value)
  1702. return s;
  1703. for (; *value; value++)
  1704. {
  1705. switch (*value)
  1706. {
  1707. case '\b':
  1708. s.append("\\b");
  1709. break;
  1710. case '\f':
  1711. s.append("\\f");
  1712. break;
  1713. case '\n':
  1714. s.append("\\n");
  1715. break;
  1716. case '\r':
  1717. s.append("\\r");
  1718. break;
  1719. case '\t':
  1720. s.append("\\t");
  1721. break;
  1722. case '\"':
  1723. case '\\':
  1724. case '/':
  1725. s.append('\\'); //fall through
  1726. default:
  1727. s.append(*value);
  1728. }
  1729. }
  1730. return s;
  1731. }
  1732. void decodeCppEscapeSequence(StringBuffer & out, const char * in, bool errorIfInvalid)
  1733. {
  1734. out.ensureCapacity((size32_t)strlen(in));
  1735. while (*in)
  1736. {
  1737. char c = *in++;
  1738. if (c == '\\')
  1739. {
  1740. char next = *in;
  1741. if (next)
  1742. {
  1743. in++;
  1744. switch (next)
  1745. {
  1746. case 'a': c = '\a'; break;
  1747. case 'b': c = '\b'; break;
  1748. case 'f': c = '\f'; break;
  1749. case 'n': c = '\n'; break;
  1750. case 'r': c = '\r'; break;
  1751. case 't': c = '\t'; break;
  1752. case 'v': c = '\v'; break;
  1753. case '\\':
  1754. case '\'':
  1755. case '?':
  1756. case '\"': break;
  1757. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
  1758. {
  1759. c = next - '0';
  1760. if (*in >= '0' && *in <= '7')
  1761. {
  1762. c = c << 3 | (*in++-'0');
  1763. if (*in >= '0' && *in <= '7')
  1764. c = c << 3 | (*in++-'0');
  1765. }
  1766. break;
  1767. }
  1768. case 'x':
  1769. c = 0;
  1770. while (isxdigit(*in))
  1771. {
  1772. next = *in++;
  1773. c = c << 4;
  1774. if (next >= '0' && next <= '9') c |= (next - '0');
  1775. else if (next >= 'A' && next <= 'F') c |= (next - 'A' + 10);
  1776. else if (next >= 'a' && next <= 'f') c |= (next - 'a' + 10);
  1777. }
  1778. break;
  1779. default:
  1780. if (errorIfInvalid)
  1781. throw MakeStringException(1, "unrecognised character escape sequence '\\%c'", next);
  1782. in--; // keep it as is.
  1783. break;
  1784. }
  1785. }
  1786. }
  1787. out.append(c);
  1788. }
  1789. }
  1790. bool isPrintable(unsigned len, const char * src)
  1791. {
  1792. while (len--)
  1793. {
  1794. if (!isprint(*((unsigned char *)src)))
  1795. return false;
  1796. src++;
  1797. }
  1798. return true;
  1799. }
  1800. //make this as fast as possible...
  1801. StringBuffer & appendStringAsSQL(StringBuffer & out, unsigned len, const char * src)
  1802. {
  1803. if (!isPrintable(len, src))
  1804. {
  1805. out.append("X'");
  1806. appendDataAsHex(out, len, src);
  1807. return out.append('\'');
  1808. }
  1809. out.ensureCapacity(2 + len);
  1810. out.append('\'');
  1811. loop
  1812. {
  1813. char * next = (char *)memchr(src, '\'', len);
  1814. if (!next)
  1815. break;
  1816. unsigned chunk=(size32_t)(next-src)+1;
  1817. out.append(chunk, src).append('\'');
  1818. len -= chunk;
  1819. src += chunk;
  1820. }
  1821. return out.append(len, src).append('\'');
  1822. }
  1823. static const char * hexText = "0123456789ABCDEF";
  1824. StringBuffer & appendDataAsHex(StringBuffer &out, unsigned len, const void * data)
  1825. {
  1826. char * target = (char *)out.reserve(len*2);
  1827. unsigned char * start = (unsigned char *)data;
  1828. for (unsigned count=len; count> 0; --count)
  1829. {
  1830. unsigned next = *start++;
  1831. *target++ = hexText[next >>4];
  1832. *target++ = hexText[next & 15];
  1833. }
  1834. return out;
  1835. }
  1836. bool strToBool(size_t len, const char * text)
  1837. {
  1838. switch (len)
  1839. {
  1840. case 4:
  1841. if (memicmp(text, "true", 4) == 0)
  1842. return true;
  1843. break;
  1844. case 3:
  1845. if (memicmp(text, "yes", 3) == 0)
  1846. return true;
  1847. break;
  1848. case 2:
  1849. if (memicmp(text, "on", 2) == 0)
  1850. return true;
  1851. break;
  1852. case 1:
  1853. if ((memicmp(text, "t", 1) == 0) || (memicmp(text, "y", 1) == 0))
  1854. return true;
  1855. break;
  1856. }
  1857. while (len && isspace(*text))
  1858. {
  1859. len--;
  1860. text++;
  1861. }
  1862. while (len-- && isdigit(*text))
  1863. {
  1864. if (*text++ != '0') return true;
  1865. }
  1866. return false;
  1867. }
  1868. bool strToBool(const char * text)
  1869. {
  1870. return strToBool(strlen(text), text);
  1871. }
  1872. bool clipStrToBool(size_t len, const char * text)
  1873. {
  1874. while (len && *text==' ')
  1875. {
  1876. len--;
  1877. text++;
  1878. }
  1879. while (len && text[len-1]== ' ')
  1880. len--;
  1881. return strToBool(len, text);
  1882. }
  1883. bool clipStrToBool(const char * text)
  1884. {
  1885. return clipStrToBool(strlen(text), text);
  1886. }
  1887. StringBuffer & ncnameEscape(char const * in, StringBuffer & out)
  1888. {
  1889. if(!isalpha(*in))
  1890. {
  1891. out.appendf("_%02X", static_cast<unsigned char>(*in));
  1892. in++;
  1893. }
  1894. char const * finger = in;
  1895. while(*finger)
  1896. {
  1897. if(!isalnum(*finger))
  1898. {
  1899. if(finger>in)
  1900. out.append((size32_t)(finger-in), in);
  1901. out.appendf("_%02X", static_cast<unsigned char>(*finger));
  1902. in = ++finger;
  1903. }
  1904. else
  1905. {
  1906. finger++;
  1907. }
  1908. }
  1909. if(finger>in)
  1910. out.append((size32_t)(finger-in), in);
  1911. return out;
  1912. }
  1913. StringBuffer & ncnameUnescape(char const * in, StringBuffer & out)
  1914. {
  1915. char const * finger = in;
  1916. while(*finger)
  1917. {
  1918. if(*finger == '_')
  1919. {
  1920. if(finger>in)
  1921. out.append((size32_t)(finger-in), in);
  1922. unsigned char chr = 16 * hex2num(finger[1]) + hex2num(finger[2]);
  1923. out.append(static_cast<char>(chr));
  1924. in = (finger+=3);
  1925. }
  1926. else
  1927. {
  1928. finger++;
  1929. }
  1930. }
  1931. if(finger>in)
  1932. out.append((size32_t)(finger-in), in);
  1933. return out;
  1934. }
  1935. bool startsWith(const char* src, const char* dst)
  1936. {
  1937. while (*dst && *dst == *src) { src++; dst++; }
  1938. return *dst==0;
  1939. }
  1940. bool startsWithIgnoreCase(const char* src, const char* dst)
  1941. {
  1942. while (*dst && tolower(*dst) == tolower(*src)) { src++; dst++; }
  1943. return *dst==0;
  1944. }
  1945. bool endsWith(const char* src, const char* dst)
  1946. {
  1947. size_t srcLen = strlen(src);
  1948. size_t dstLen = strlen(dst);
  1949. if (dstLen<=srcLen)
  1950. return memcmp(dst, src+srcLen-dstLen, dstLen)==0;
  1951. return false;
  1952. }
  1953. bool endsWithIgnoreCase(const char* src, const char* dst)
  1954. {
  1955. size_t srcLen = strlen(src);
  1956. size_t dstLen = strlen(dst);
  1957. if (dstLen<=srcLen)
  1958. return memicmp(dst, src+srcLen-dstLen, dstLen)==0;
  1959. return false;
  1960. }
  1961. char *j_strtok_r(char *str, const char *delim, char **saveptr)
  1962. {
  1963. if (!str)
  1964. str = *saveptr;
  1965. char c;
  1966. loop {
  1967. c = *str;
  1968. if (!c) {
  1969. *saveptr = str;
  1970. return NULL;
  1971. }
  1972. if (!strchr(delim,c))
  1973. break;
  1974. str++;
  1975. }
  1976. char *ret=str;
  1977. do {
  1978. c = *(++str);
  1979. } while (c&&!strchr(delim,c));
  1980. if (c)
  1981. *(str++) = 0;
  1982. *saveptr = str;
  1983. return ret;
  1984. }
  1985. int j_memicmp (const void *s1, const void *s2, size32_t len)
  1986. {
  1987. const byte *b1 = (const byte *)s1;
  1988. const byte *b2 = (const byte *)s2;
  1989. int ret = 0;
  1990. while (len&&((ret = tolower(*b1)-tolower(*b2)) == 0)) {
  1991. b1++;
  1992. b2++;
  1993. len--;
  1994. }
  1995. return ret;
  1996. }
  1997. size32_t memcount(size32_t len, const char * str, char search)
  1998. {
  1999. size32_t count = 0;
  2000. for (size32_t i=0; i < len; i++)
  2001. {
  2002. if (str[i] == search)
  2003. count++;
  2004. }
  2005. return count;
  2006. }
  2007. StringBuffer & elideString(StringBuffer & s, unsigned maxLength)
  2008. {
  2009. if (s.length() > maxLength)
  2010. {
  2011. s.setLength(maxLength);
  2012. s.append("...");
  2013. }
  2014. return s;
  2015. }