junicode.hpp 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef JUNICODE_HPP
  14. #define JUNICODE_HPP
  15. #include "jiface.hpp"
  16. class StringMatcher;
  17. typedef unsigned UTF32; /* at least 32 bits */
  18. typedef unsigned short UTF16; /* at least 16 bits */
  19. typedef unsigned char UTF8; /* typically 8 bits */
  20. #define UTF8_BOM (const char *)"\357\273\277"
  21. const UTF32 sourceIllegal = (UTF32)-1;
  22. const UTF32 sourceExhausted = (UTF32)-2;
  23. const UTF32 errorLowerLimit = (UTF32)-16; // Any value above this is an error code...
  24. class jlib_decl UtfReader : public CInterface
  25. {
  26. public:
  27. enum UtfFormat { Utf8, Utf16le, Utf16be, Utf32le, Utf32be };
  28. UtfReader(UtfFormat _type, bool _strictConversion) { type = _type; strictConversion = _strictConversion; set(0, NULL); }
  29. size32_t getLegalLength();
  30. void set(size32_t len, const void * start) { cur = (const byte *)start; end = cur + len; }
  31. UTF32 next();
  32. bool done() { return cur == end; }
  33. protected:
  34. UTF32 next8();
  35. UTF32 next16le();
  36. UTF32 next16be();
  37. UTF32 next32le();
  38. UTF32 next32be();
  39. public:
  40. const byte * cur;
  41. const byte * end;
  42. UtfFormat type;
  43. bool strictConversion;
  44. };
  45. extern jlib_decl unsigned writeUtf8(void * target, unsigned maxLength, UTF32 value);
  46. extern jlib_decl unsigned writeUtf16le(void * target, unsigned maxLength, UTF32 value);
  47. extern jlib_decl unsigned writeUtf16be(void * target, unsigned maxLength, UTF32 value);
  48. extern jlib_decl unsigned writeUtf32le(void * target, unsigned maxLength, UTF32 value);
  49. extern jlib_decl unsigned writeUtf32be(void * target, unsigned maxLength, UTF32 value);
  50. extern jlib_decl MemoryBuffer & appendUtf8(MemoryBuffer & out, UTF32 value);
  51. extern jlib_decl MemoryBuffer & appendUtf16le(MemoryBuffer & out, UTF32 value);
  52. extern jlib_decl MemoryBuffer & appendUtf16be(MemoryBuffer & out, UTF32 value);
  53. extern jlib_decl MemoryBuffer & appendUtf32le(MemoryBuffer & out, UTF32 value);
  54. extern jlib_decl MemoryBuffer & appendUtf32be(MemoryBuffer & out, UTF32 value);
  55. extern jlib_decl bool convertUtf(MemoryBuffer & target, UtfReader::UtfFormat targetType, unsigned sourceLength, const void * source, UtfReader::UtfFormat sourceType);
  56. extern jlib_decl bool convertToUtf8(MemoryBuffer & target, unsigned sourceLength, const void * source);
  57. extern jlib_decl void addUtfActionList(StringMatcher & matcher, const char * text, unsigned action, unsigned * maxElementLength, UtfReader::UtfFormat utfFormat);
  58. extern jlib_decl UTF32 readUtf8Character(unsigned len, const byte * & cur);
  59. extern jlib_decl size32_t readUtf8Size(const void * _data);
  60. extern jlib_decl UTF32 readUtf8Char(const void * _data);
  61. typedef MemoryBuffer & (*utfReplacementFunc)(MemoryBuffer & target, UTF32 match, UtfReader::UtfFormat type, const void * source, int len, bool start);
  62. extern jlib_decl bool replaceUtf(utfReplacementFunc func, MemoryBuffer & target, UtfReader::UtfFormat type, unsigned sourceLength, const void * source);
  63. extern jlib_decl bool appendUtfXmlName(MemoryBuffer & target, UtfReader::UtfFormat type, unsigned sourceLength, const void * source);
  64. inline StringBuffer &appendUtf8XmlName(StringBuffer & target, unsigned sourceLength, const void * source)
  65. {
  66. MemoryBuffer mb;
  67. appendUtfXmlName(mb, UtfReader::Utf8, sourceLength, source);
  68. return target.append(mb.length(), mb.toByteArray());
  69. }
  70. #endif