Browse Source

HPCC-10560 Fix incorrect use of utf8CharLen

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 11 years ago
parent
commit
27a5bcf33b
3 changed files with 17 additions and 5 deletions
  1. 1 1
      system/jlib/jptree.ipp
  2. 15 4
      system/jlib/jstring.cpp
  3. 1 0
      system/jlib/jstring.hpp

+ 1 - 1
system/jlib/jptree.ipp

@@ -148,7 +148,7 @@ inline static int validJSONUtf8ChrLen(unsigned char c)
         return 0;
         return 0;
     if ('\\' == c)
     if ('\\' == c)
         return 2;
         return 2;
-    return utf8CharLen(&c);
+    return utf8CharLen(c);
 }
 }
 
 
 inline static bool isAttribute(const char *xpath) { return (xpath && *xpath == '@'); }
 inline static bool isAttribute(const char *xpath) { return (xpath && *xpath == '@'); }

+ 15 - 4
system/jlib/jstring.cpp

@@ -1458,19 +1458,30 @@ void extractItem(StringBuffer & res, const char * src, const char * sep, int whi
 }
 }
 
 
 
 
-int utf8CharLen(const unsigned char *ch)
+int utf8CharLen(unsigned char ch)
 {
 {
     //return 1 if this is an ascii character, 
     //return 1 if this is an ascii character, 
     //or 0 if its not a valid utf-8 character
     //or 0 if its not a valid utf-8 character
-    if (*ch < 128)
+    if (ch < 128)
         return 1;
         return 1;
-    if (*ch < 192)
+    if (ch < 192)
         return 0;
         return 0;
     
     
     unsigned char len = 1;
     unsigned char len = 1;
-    for (unsigned char lead = *ch << 1; (lead & 0x80); lead <<=1)
+    for (unsigned char lead = ch << 1; (lead & 0x80); lead <<=1)
         len++;
         len++;
     
     
+    return len;
+}
+
+int utf8CharLen(const unsigned char *ch)
+{
+    //return 1 if this is an ascii character,
+    //or 0 if its not a valid utf-8 character
+    if (*ch < 128)
+        return 1;
+
+    unsigned char len = utf8CharLen(*ch);
     for (unsigned pos = 1; pos < len; pos++)
     for (unsigned pos = 1; pos < len; pos++)
         if ((ch[pos] < 128) || (ch[pos] >= 192))
         if ((ch[pos] < 128) || (ch[pos] >= 192))
             return 0;  //its not a valid utf-8 character after all
             return 0;  //its not a valid utf-8 character after all

+ 1 - 0
system/jlib/jstring.hpp

@@ -370,6 +370,7 @@ extern jlib_decl const char *decodeXML(const char *x, StringBuffer &ret, const c
 extern jlib_decl const char *encodeXML(const char *x, IIOStream &out, unsigned flags=0, unsigned len=(unsigned)-1, bool utf8=false);
 extern jlib_decl const char *encodeXML(const char *x, IIOStream &out, unsigned flags=0, unsigned len=(unsigned)-1, bool utf8=false);
 extern jlib_decl void decodeXML(ISimpleReadStream &in, StringBuffer &out, unsigned len=(unsigned)-1);
 extern jlib_decl void decodeXML(ISimpleReadStream &in, StringBuffer &out, unsigned len=(unsigned)-1);
 
 
+extern jlib_decl int utf8CharLen(unsigned char ch);
 extern jlib_decl int utf8CharLen(const unsigned char *ch);
 extern jlib_decl int utf8CharLen(const unsigned char *ch);
 
 
 inline const char *encodeUtf8XML(const char *x, StringBuffer &ret, unsigned flags=false, unsigned len=(unsigned)-1)
 inline const char *encodeUtf8XML(const char *x, StringBuffer &ret, unsigned flags=false, unsigned len=(unsigned)-1)