jstring.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef __JSTRING__
  14. #define __JSTRING__
  15. #include "jiface.hpp"
  16. #include "jio.hpp"
  17. #include "jstream.hpp"
  18. #include "jbuff.hpp"
  19. // A Java compatible String and StringBuffer class - useful for dynamic strings.
  20. class String;
  21. interface IAtom;
  22. interface IFile;
  23. class jlib_decl StringBuffer
  24. {
  25. public:
  26. StringBuffer();
  27. StringBuffer(String & value);
  28. StringBuffer(const char *value);
  29. StringBuffer(unsigned len, const char *value);
  30. StringBuffer(const StringBuffer & value);
  31. inline ~StringBuffer() { free(buffer); }
  32. inline size32_t length() const { return curLen; }
  33. inline void Release() const { delete this; } // for consistency even though not link counted
  34. void setLength(unsigned len);
  35. inline void ensureCapacity(unsigned max) { if (maxLen <= curLen + max) _realloc(curLen + max); }
  36. StringBuffer & append(char value);
  37. StringBuffer & append(unsigned char value);
  38. StringBuffer & append(const char * value);
  39. StringBuffer & append(const unsigned char * value);
  40. StringBuffer & append(const IAtom * value);
  41. StringBuffer & append(unsigned len, const char * value);
  42. StringBuffer & append(const char * value, int offset, int len);
  43. // StringBuffer & append(const unsigned char * value, int offset, int len);
  44. StringBuffer & append(double value);
  45. StringBuffer & append(float value);
  46. StringBuffer & append(int value);
  47. StringBuffer & append(unsigned value);
  48. StringBuffer & append(__int64 value);
  49. StringBuffer & append(unsigned __int64 value);
  50. StringBuffer & append(const String & value);
  51. StringBuffer & append(const IStringVal & value);
  52. StringBuffer & append(const IStringVal * value);
  53. StringBuffer & appendN(size32_t count, char fill);
  54. StringBuffer & appendf(const char *format, ...) __attribute__((format(printf, 2, 3)));
  55. StringBuffer & appendLower(unsigned len, const char * value);
  56. StringBuffer & setf(const char* format, ...) __attribute__((format(printf,2,3)));
  57. StringBuffer & limited_valist_appendf(unsigned szLimit, const char *format, va_list args);
  58. inline StringBuffer &valist_appendf(const char *format, va_list args) { return limited_valist_appendf(0, format, args); }
  59. StringBuffer & appendhex(unsigned char value, char lower);
  60. inline char charAt(size32_t pos) { return buffer[pos]; }
  61. inline StringBuffer & clear() { curLen = 0; return *this; }
  62. void kill();
  63. void getChars(int srcBegin, int srcEnd, char * target) const;
  64. StringBuffer & insert(int offset, char value);
  65. StringBuffer & insert(int offset, unsigned char value);
  66. StringBuffer & insert(int offset, const char * value);
  67. StringBuffer & insert(int offset, const unsigned char * value);
  68. StringBuffer & insert(int offset, double value);
  69. StringBuffer & insert(int offset, float value);
  70. StringBuffer & insert(int offset, int value);
  71. StringBuffer & insert(int offset, unsigned value);
  72. StringBuffer & insert(int offset, __int64 value);
  73. StringBuffer & insert(int offset, const String & value);
  74. StringBuffer & insert(int offset, const StringBuffer & value);
  75. StringBuffer & insert(int offset, const IStringVal & value);
  76. StringBuffer & insert(int offset, const IStringVal * value);
  77. StringBuffer & reverse();
  78. void setCharAt(unsigned offset, char value);
  79. //Non-standard functions:
  80. MemoryBuffer & deserialize(MemoryBuffer & in);
  81. MemoryBuffer & serialize(MemoryBuffer & out) const;
  82. StringBuffer & loadFile(const char *fname, bool binaryMode=false);
  83. StringBuffer & loadFile(IFile* f);
  84. StringBuffer & append(const StringBuffer & value);
  85. StringBuffer & newline();
  86. StringBuffer & pad(unsigned count);
  87. StringBuffer & padTo(unsigned count);
  88. inline const char * str() const { return toCharArray(); }
  89. char * detach();
  90. StringBuffer & clip();
  91. StringBuffer & trim();
  92. StringBuffer & trimLeft();
  93. inline StringBuffer & trimRight() { return clip(); }
  94. StringBuffer & remove(unsigned start, unsigned len);
  95. const char * toCharArray() const;
  96. StringBuffer & toLowerCase();
  97. StringBuffer & toUpperCase();
  98. StringBuffer & replace(char oldChar, char newChar);
  99. StringBuffer & replaceString(const char* oldStr, const char* newStr);
  100. char * reserve(size32_t size);
  101. char * reserveTruncate(size32_t size);
  102. StringBuffer & stripChar(char oldChar);
  103. void swapWith(StringBuffer &other);
  104. void setBuffer(size32_t buffLen, char * newBuff, size32_t strLen);
  105. inline StringBuffer& set(const char* value) { return clear().append(value); }
  106. inline operator const char* () const { return str(); }
  107. inline StringBuffer& operator=(const char* value)
  108. {
  109. return clear().append(value);
  110. }
  111. inline StringBuffer& operator=(const StringBuffer& value)
  112. {
  113. return clear().append(value.str());
  114. }
  115. StringBuffer & appendlong(long value);
  116. StringBuffer & appendulong(unsigned long value);
  117. private: // long depreciated
  118. StringBuffer & append(long value);
  119. StringBuffer & append(unsigned long value);
  120. StringBuffer & insert(int offset, long value);
  121. protected:
  122. void init()
  123. {
  124. buffer = NULL;
  125. curLen = 0;
  126. maxLen = 0;
  127. }
  128. void _insert(unsigned offset, size32_t insertLen);
  129. void _realloc(size32_t newLen);
  130. private:
  131. mutable char * buffer;
  132. size32_t curLen;
  133. size32_t maxLen;
  134. };
  135. // add a variable-parameter constructor to StringBuffer.
  136. class jlib_decl VStringBuffer : public StringBuffer
  137. {
  138. public:
  139. VStringBuffer(const char* format, ...) __attribute__((format(printf, 2, 3)));
  140. };
  141. class SCMStringBuffer : public CInterface, implements IStringVal
  142. {
  143. public:
  144. IMPLEMENT_IINTERFACE;
  145. StringBuffer s;
  146. virtual const char * str() const { return s.str(); };
  147. virtual void set(const char *val) { s.clear().append(val); };
  148. virtual void clear() { s.clear(); };
  149. virtual void setLen(const char *val, unsigned length) { s.clear().append(length, val); };
  150. virtual unsigned length() const { return s.length(); };
  151. };
  152. class jlib_decl String : public CInterface, implements IInterface
  153. {
  154. public:
  155. IMPLEMENT_IINTERFACE;
  156. String();
  157. // String(byte[]);
  158. // String(byte[], int);
  159. // String(byte[], int, int);
  160. // String(byte[], int, int, int);
  161. // String(byte[], int, int, String);
  162. // String(byte[], String);
  163. String(const char * value);
  164. String(const char * value, int offset, int count);
  165. String(String & value);
  166. String(StringBuffer & value);
  167. ~String();
  168. char charAt(size32_t index) const;
  169. int compareTo(const String & value) const;
  170. int compareTo(const char* value) const;
  171. String * concat(const String & value) const;
  172. bool endsWith(const String & value) const;
  173. bool endsWith(const char* value) const;
  174. bool equals(String & value) const;
  175. bool equalsIgnoreCase(const String & value) const;
  176. void getBytes(int srcBegin, int srcEnd, void * dest, int dstBegin) const;
  177. void getChars(int srcBegin, int srcEnd, void * dest, int dstBegin) const;
  178. int hashCode() const;
  179. int indexOf(int ch) const;
  180. int indexOf(int ch, int from) const;
  181. int indexOf(const String & search) const;
  182. int indexOf(const String & search, int from) const;
  183. int lastIndexOf(int ch) const;
  184. int lastIndexOf(int ch, int from) const;
  185. int lastIndexOf(const String & search) const;
  186. int lastIndexOf(const String & serach, int from) const;
  187. size32_t length() const;
  188. bool startsWith(String & value) const;
  189. bool startsWith(String & value, int offset) const;
  190. bool startsWith(const char* value) const;
  191. String * substring(int beginIndex) const;
  192. String * substring(int beginIndex, int endIndex) const;
  193. const char *toCharArray() const;
  194. String * toLowerCase() const;
  195. String * toString(); // Links this
  196. String * toUpperCase() const;
  197. String * trim() const;
  198. protected:
  199. char * text;
  200. };
  201. //This simple class is useful for storing string member variables
  202. class jlib_decl StringAttr
  203. {
  204. public:
  205. inline StringAttr(void) { text = NULL; }
  206. StringAttr(const char * _text, unsigned _len);
  207. StringAttr(const char * _text);
  208. StringAttr(const StringAttr & src);
  209. inline ~StringAttr(void) { free(text); }
  210. inline operator const char * () const { return text; }
  211. inline void clear() { setown(NULL); }
  212. inline char * detach() { char * ret = text; text = NULL; return ret; }
  213. inline const char * get(void) const { return text; }
  214. inline size32_t length() const { return text ? (size32_t)strlen(text) : 0; }
  215. inline bool isEmpty() const { return !text||!*text; } // faster than (length==0)
  216. inline const char * sget(void) const { return text ? text : ""; } // safe form of get (doesn't return NULL)
  217. void set(const char * _text);
  218. void setown(const char * _text);
  219. void set(const char * _text, unsigned _len);
  220. void toLowerCase();
  221. void toUpperCase();
  222. private:
  223. char * text;
  224. private:
  225. StringAttr &operator = (const StringAttr & from);
  226. };
  227. class jlib_decl StringAttrAdaptor : public CInterface, implements IStringVal
  228. {
  229. public:
  230. StringAttrAdaptor(StringAttr & _attr) : attr(_attr) {}
  231. IMPLEMENT_IINTERFACE;
  232. virtual const char * str() const { return attr.get(); };
  233. virtual void set(const char *val) { attr.set(val); };
  234. virtual void clear() { attr.clear(); };
  235. virtual void setLen(const char *val, unsigned length) { attr.set(val, length); };
  236. virtual unsigned length() const { return attr.length(); };
  237. private:
  238. StringAttr & attr;
  239. };
  240. class jlib_decl StringBufferAdaptor : public CInterface, implements IStringVal
  241. {
  242. public:
  243. StringBufferAdaptor(StringBuffer & _buffer) : buffer(_buffer) { initsize=buffer.length(); }
  244. IMPLEMENT_IINTERFACE;
  245. virtual const char * str() const { return buffer.str(); };
  246. virtual void set(const char *val) { clear(); buffer.append(val); };
  247. virtual void clear() { buffer.setLength(initsize); }
  248. virtual void setLen(const char *val, unsigned length) { clear(); buffer.append(length, val); };
  249. virtual unsigned length() const { return buffer.length(); };
  250. private:
  251. size32_t initsize;
  252. StringBuffer & buffer;
  253. };
  254. #ifdef __GNUC__
  255. class jlib_decl GccStringAttrAdaptor
  256. {
  257. public:
  258. GccStringAttrAdaptor(StringAttr & _attr) : adaptor(_attr) {}
  259. inline operator IStringVal & () { return adaptor; }
  260. private:
  261. StringAttrAdaptor adaptor;
  262. };
  263. class jlib_decl GccStringBufferAdaptor
  264. {
  265. public:
  266. GccStringBufferAdaptor(StringBuffer & _buffer) : adaptor(_buffer) {}
  267. inline operator IStringVal & () { return adaptor; }
  268. private:
  269. StringBufferAdaptor adaptor;
  270. };
  271. #define StringAttrAdaptor GccStringAttrAdaptor
  272. #define StringBufferAdaptor GccStringBufferAdaptor
  273. #endif
  274. class jlib_decl StringBufferItem : public CInterface, public StringBuffer
  275. {
  276. public:
  277. StringBufferItem() : StringBuffer() {}
  278. StringBufferItem(const char *value) : StringBuffer(value) {}
  279. StringBufferItem(unsigned len, const char *value) : StringBuffer(len, value) {}
  280. StringBufferItem(const StringBuffer & value) : StringBuffer(value) {}
  281. };
  282. class jlib_decl StringAttrItem : public CInterface
  283. {
  284. public:
  285. StringAttrItem(void) {}
  286. StringAttrItem(const char * _text) : text(_text) {}
  287. StringAttrItem(const char * _text, unsigned _len);
  288. public:
  289. StringAttr text;
  290. };
  291. // --$appendURL-----------------------------------------------------------------
  292. // appends the URL encoded version of src to dest
  293. // if len is unspecified, then src is assumed to be an NTS
  294. // if lower is TRUE a-f is used for hex numbers, otherwise A-F is used
  295. // -----------------------------------------------------------------------------
  296. #define ENCODE_SPACES 1
  297. #define ENCODE_NEWLINES 2
  298. #define ENCODE_WHITESPACE 3
  299. #define ENCODE_NONE 4
  300. interface IEntityHelper
  301. {
  302. virtual bool find(const char *entity, StringBuffer &value) = 0;
  303. };
  304. void jlib_decl appendURL(StringBuffer *dest, const char *src, size32_t len = -1, char lower=FALSE);
  305. extern jlib_decl StringBuffer &appendDecodedURL(StringBuffer &out, const char *url);
  306. extern jlib_decl StringBuffer & appendStringAsCPP(StringBuffer &out, unsigned len, const char * src, bool addBreak);
  307. extern jlib_decl StringBuffer & appendStringAsQuotedCPP(StringBuffer &out, unsigned len, const char * src, bool addBreak);
  308. extern jlib_decl StringBuffer & appendDataAsHex(StringBuffer &out, unsigned len, const void * data);
  309. extern jlib_decl StringBuffer & appendStringAsSQL(StringBuffer & out, unsigned len, const char * src);
  310. extern jlib_decl StringBuffer & appendStringAsECL(StringBuffer & out, unsigned len, const char * src);
  311. extern jlib_decl StringBuffer & appendStringAsQuotedECL(StringBuffer &out, unsigned len, const char * src);
  312. extern jlib_decl StringBuffer & appendUtf8AsECL(StringBuffer &out, unsigned len, const char * src);
  313. extern jlib_decl const char *decodeJSON(const char *x, StringBuffer &ret, unsigned len=(unsigned)-1, const char **errMark=NULL);
  314. extern jlib_decl void extractItem(StringBuffer & res, const char * src, const char * sep, int whichItem, bool caps);
  315. extern jlib_decl const char *encodeXML(const char *x, StringBuffer &ret, unsigned flags=0, unsigned len=(unsigned)-1, bool utf8=false);
  316. extern jlib_decl const char *decodeXML(const char *x, StringBuffer &ret, const char **errMark=NULL, IEntityHelper *entityHelper=NULL, bool strict = true);
  317. extern jlib_decl const char *encodeXML(const char *x, IIOStream &out, unsigned flags=0, unsigned len=(unsigned)-1, bool utf8=false);
  318. extern jlib_decl void decodeXML(ISimpleReadStream &in, StringBuffer &out, unsigned len=(unsigned)-1);
  319. extern jlib_decl int utf8CharLen(unsigned char ch);
  320. extern jlib_decl int utf8CharLen(const unsigned char *ch);
  321. inline const char *encodeUtf8XML(const char *x, StringBuffer &ret, unsigned flags=false, unsigned len=(unsigned)-1)
  322. {
  323. return encodeXML(x, ret, flags, len, true);
  324. }
  325. inline StringBuffer &appendXMLTagName(StringBuffer &xml, const char *tag, const char *prefix=NULL)
  326. {
  327. if (prefix && *prefix)
  328. xml.append(prefix).append(':');
  329. xml.append(tag);
  330. return xml;
  331. }
  332. extern jlib_decl StringBuffer & appendXMLOpenTag(StringBuffer &xml, const char *tag, const char *prefix=NULL, bool complete=true, bool close=false, const char *uri=NULL);
  333. inline StringBuffer &appendXMLAttr(StringBuffer &xml, const char *name, const char *value, const char *prefix=NULL)
  334. {
  335. if (!name || !*name || !value)
  336. return xml;
  337. xml.append(' ');
  338. appendXMLTagName(xml, name, prefix);
  339. encodeXML(value, xml.append("='"));
  340. xml.append("'");
  341. return xml;
  342. }
  343. inline StringBuffer & appendXMLCloseTag(StringBuffer &xml, const char *tag, const char *prefix=NULL)
  344. {
  345. if (!tag || !*tag)
  346. return xml;
  347. xml.append("</");
  348. return appendXMLTagName(xml, tag, prefix).append('>');
  349. }
  350. inline StringBuffer &appendXMLTag(StringBuffer &xml, const char *tag, const char *value, const char *prefix=NULL, unsigned flags=0, unsigned len=(unsigned)-1, bool utf8=true)
  351. {
  352. appendXMLOpenTag(xml, tag, prefix);
  353. if (value && *value)
  354. {
  355. if (flags != ENCODE_NONE)
  356. encodeXML(value, xml, flags, len, utf8);
  357. else
  358. xml.append(value);
  359. }
  360. return appendXMLCloseTag(xml, tag, prefix);
  361. }
  362. inline StringBuffer &delimitJSON(StringBuffer &s, bool addNewline=false, bool escapeNewline=false)
  363. {
  364. if (s.length() && !strchr("{ [:,n\n", s.charAt(s.length()-1))) //'n' or '\n' indicates already formatted with optionally escaped newline
  365. {
  366. s.append(",");
  367. if (addNewline)
  368. s.append(escapeNewline ? "\\n" : "\n");
  369. else
  370. s.append(' ');
  371. }
  372. return s;
  373. }
  374. jlib_decl StringBuffer &encodeJSON(StringBuffer &s, const char *value);
  375. jlib_decl StringBuffer &encodeJSON(StringBuffer &s, unsigned len, const char *value);
  376. jlib_decl StringBuffer &appendJSONName(StringBuffer &s, const char *name);
  377. jlib_decl StringBuffer &appendfJSONName(StringBuffer &s, const char *format, ...);
  378. jlib_decl StringBuffer &appendJSONDataValue(StringBuffer& s, const char *name, unsigned len, const void *_value);
  379. jlib_decl StringBuffer &appendJSONRealValue(StringBuffer& s, const char *name, double value);
  380. inline StringBuffer &appendJSONNameOrDelimit(StringBuffer &s, const char *name)
  381. {
  382. if (name && *name)
  383. return appendJSONName(s, name);
  384. return delimitJSON(s);
  385. }
  386. inline StringBuffer &appendJSONStringValue(StringBuffer& s, const char *name, unsigned len, const char *value, bool encode, bool quoted=true)
  387. {
  388. appendJSONNameOrDelimit(s, name);
  389. if (!value)
  390. return s.append("null");
  391. if (quoted)
  392. s.append('"');
  393. if (encode)
  394. encodeJSON(s, len, value);
  395. else
  396. s.append(len, value);
  397. if (quoted)
  398. s.append('"');
  399. return s;
  400. }
  401. inline StringBuffer &appendJSONStringValue(StringBuffer& s, const char *name, const char *value, bool encode, bool quoted=true)
  402. {
  403. return appendJSONStringValue(s, name, value ? strlen(value) : 0, value, encode, quoted);
  404. }
  405. template <typename type>
  406. inline StringBuffer &appendJSONValue(StringBuffer& s, const char *name, type value)
  407. {
  408. appendJSONNameOrDelimit(s, name);
  409. return s.append(value);
  410. }
  411. //specialization
  412. template <>
  413. inline StringBuffer &appendJSONValue(StringBuffer& s, const char *name, bool value)
  414. {
  415. appendJSONNameOrDelimit(s, name);
  416. return s.append((value) ? "true" : "false");
  417. }
  418. template <>
  419. inline StringBuffer &appendJSONValue(StringBuffer& s, const char *name, const char *value)
  420. {
  421. return appendJSONStringValue(s, name, value, true);
  422. }
  423. template <>
  424. inline StringBuffer &appendJSONValue(StringBuffer& s, const char *name, long value)
  425. {
  426. appendJSONNameOrDelimit(s, name);
  427. return s.appendlong(value);
  428. }
  429. template <>
  430. inline StringBuffer &appendJSONValue(StringBuffer& s, const char *name, double value)
  431. {
  432. return ::appendJSONRealValue(s, name, value);
  433. }
  434. template <>
  435. inline StringBuffer &appendJSONValue(StringBuffer& s, const char *name, float value)
  436. {
  437. return ::appendJSONRealValue(s, name, value);
  438. }
  439. template <>
  440. inline StringBuffer &appendJSONValue(StringBuffer& s, const char *name, unsigned long value)
  441. {
  442. appendJSONNameOrDelimit(s, name);
  443. return s.appendulong(value);
  444. }
  445. extern jlib_decl void decodeCppEscapeSequence(StringBuffer & out, const char * in, bool errorIfInvalid);
  446. extern jlib_decl bool strToBool(const char * text);
  447. extern jlib_decl bool strToBool(size_t len, const char * text);
  448. extern jlib_decl bool clipStrToBool(size_t len, const char * text);
  449. extern jlib_decl bool clipStrToBool(const char * text);
  450. extern jlib_decl StringBuffer & ncnameEscape(char const * in, StringBuffer & out);
  451. extern jlib_decl StringBuffer & ncnameUnescape(char const * in, StringBuffer & out);
  452. extern jlib_decl StringBuffer & elideString(StringBuffer & s, unsigned maxLength);
  453. extern jlib_decl bool startsWith(const char* src, const char* dst);
  454. extern jlib_decl bool endsWith(const char* src, const char* dst);
  455. extern jlib_decl bool startsWithIgnoreCase(const char* src, const char* dst);
  456. extern jlib_decl bool endsWithIgnoreCase(const char* src, const char* dst);
  457. inline bool strieq(const char* s, const char* t) { return stricmp(s,t)==0; }
  458. inline bool streq(const char* s, const char* t) { return strcmp(s,t)==0; }
  459. inline bool strsame(const char* s, const char* t) { return (s == t) || (s && t && strcmp(s,t)==0); } // also allow nulls
  460. extern jlib_decl char *j_strtok_r(char *str, const char *delim, char **saveptr);
  461. extern jlib_decl int j_memicmp (const void *s1, const void *s2, size32_t len);
  462. extern jlib_decl size32_t memcount(size32_t len, const char * str, char search);
  463. extern jlib_decl const char * nullText(const char * text);
  464. #endif