Ver código fonte

HPCC-8435 Implement JSON Pull Parser and PTree creation

Parsing and creating PTrees from JSON is needed for several
upcoming JSON integration items.  This implements a PullReader
and a BufferReader, as well as createPTreeFromJSON Functions.

This implementation emulates the style and practice of the current
XML parsers.  It also sticks to the existing parser interfaces
completely future enhancements could be made that would improve those
interfaces to benefit more from JSON data type knowledge.  Especially
to improve symmetry between building a PTree and serializing back
to JSON.

While the parsers support full JSON naming syntax, JSON supports
object names that cannot currently be supported as property trees.
Creating a property tree from such JSON will throw a PTree exception.

The general philosophy for the pull parser has been that each call to
IPulXMLReader::next() will result in one event (or error).  Further
action after an event is minimized.  The event handler should be able
to abort immediately without further events or syntax based errors.

Signed-off-by: Anthony Fishbeck <Anthony.Fishbeck@lexisnexis.com>
Anthony Fishbeck 12 anos atrás
pai
commit
198be5abbe
5 arquivos alterados com 1167 adições e 1 exclusões
  1. 1063 0
      system/jlib/jptree.cpp
  2. 10 0
      system/jlib/jptree.hpp
  3. 11 0
      system/jlib/jptree.ipp
  4. 79 0
      system/jlib/jstring.cpp
  5. 4 1
      system/jlib/jstring.hpp

Diferenças do arquivo suprimidas por serem muito extensas
+ 1063 - 0
system/jlib/jptree.cpp


+ 10 - 0
system/jlib/jptree.hpp

@@ -174,6 +174,13 @@ jlib_decl IPullXMLReader *createPullXMLStreamReader(ISimpleReadStream &stream, I
 jlib_decl IPullXMLReader *createPullXMLStringReader(const char *xml, IPTreeNotifyEvent &iEvent, XmlReaderOptions xmlReaderOptions=xr_ignoreWhiteSpace);
 jlib_decl IPullXMLReader *createPullXMLBufferReader(const void *buf, size32_t bufLength, IPTreeNotifyEvent &iEvent, XmlReaderOptions xmlReaderOptions=xr_ignoreWhiteSpace);
 
+jlib_decl IXMLReader *createJSONStreamReader(ISimpleReadStream &stream, IPTreeNotifyEvent &iEvent, XmlReaderOptions readerOptions=xr_ignoreWhiteSpace, size32_t bufSize=0);
+jlib_decl IXMLReader *createJSONStringReader(const char *json, IPTreeNotifyEvent &iEvent, XmlReaderOptions readerOptions=xr_ignoreWhiteSpace);
+jlib_decl IXMLReader *createJSONBufferReader(const void *buf, size32_t bufLength, IPTreeNotifyEvent &iEvent, XmlReaderOptions jsonReaderOptions=xr_ignoreWhiteSpace);
+jlib_decl IPullXMLReader *createPullJSONStreamReader(ISimpleReadStream &stream, IPTreeNotifyEvent &iEvent, XmlReaderOptions readerOptions=xr_ignoreWhiteSpace, size32_t bufSize=0);
+jlib_decl IPullXMLReader *createPullJSONStringReader(const char *json, IPTreeNotifyEvent &iEvent, XmlReaderOptions readerOptions=xr_ignoreWhiteSpace);
+jlib_decl IPullXMLReader *createPullJSONBufferReader(const void *buf, size32_t bufLength, IPTreeNotifyEvent &iEvent, XmlReaderOptions readerOptions=xr_ignoreWhiteSpace);
+
 jlib_decl void mergePTree(IPropertyTree *target, IPropertyTree *toMerge);
 jlib_decl void synchronizePTree(IPropertyTree *target, IPropertyTree *source);
 jlib_decl IPropertyTree *ensurePTree(IPropertyTree *root, const char *xpath);
@@ -191,6 +198,9 @@ jlib_decl IPropertyTree *createPTreeFromXMLString(unsigned len, const char *xml,
 jlib_decl IPropertyTree *createPTreeFromXMLFile(const char *filename, byte flags=ipt_none, XmlReaderOptions readFlags=xr_ignoreWhiteSpace, IPTreeMaker *iMaker=NULL);
 jlib_decl IPropertyTree *createPTreeFromIPT(const IPropertyTree *srcTree, ipt_flags flags=ipt_none);
 
+jlib_decl IPropertyTree *createPTreeFromJSONString(const char *json, byte flags=ipt_none, XmlReaderOptions readFlags=xr_ignoreWhiteSpace, IPTreeMaker *iMaker=NULL);
+jlib_decl IPropertyTree *createPTreeFromJSONString(unsigned len, const char *json, byte flags=ipt_none, XmlReaderOptions readFlags=xr_ignoreWhiteSpace, IPTreeMaker *iMaker=NULL);
+
 #define XML_SortTags 0x01
 #define XML_Format   0x02
 #define XML_NoEncode 0x04

+ 11 - 0
system/jlib/jptree.ipp

@@ -140,6 +140,17 @@ inline static bool isValidXPathChr(char c)
     return ('\0' != c && (isalnum(c) || strchr(validChrs, c)));
 }
 
+inline static int validJSONUtf8ChrLen(unsigned char c)
+{
+    if (c <= 31)
+        return 0;
+    if ('\"' == c)
+        return 0;
+    if ('\\' == c)
+        return 2;
+    return utf8CharLen(&c);
+}
+
 inline static bool isAttribute(const char *xpath) { return (xpath && *xpath == '@'); }
 
 jlib_decl const char *splitXPathUQ(const char *xpath, StringBuffer &path);

+ 79 - 0
system/jlib/jstring.cpp

@@ -1617,6 +1617,85 @@ static void writeUtf8(unsigned c, StringBuffer &out)
         assertex(false);
 }
 
+#define JSONSTRICT
+const char *decodeJSON(const char *j, StringBuffer &ret, unsigned len, const char **errMark)
+{
+    if (!j)
+        return j;
+    if ((unsigned)-1 == len)
+        len = (unsigned)strlen(j);
+    try
+    {
+        for (const char *end = j+len; j<end && *j; j++)
+        {
+            if (*j!='\\')
+                ret.append(*j);
+            else
+            {
+                switch (*++j)
+                {
+                case 'u':
+                {
+                    j++;
+                    if (end-j>=4)
+                    {
+                        char *endptr;
+                        StringAttr s(j, 4);
+                        unsigned val = strtoul(s.get(), &endptr, 16);
+                        if (endptr && !*endptr)
+                        {
+                            writeUtf8(val, ret);
+                            j+=3;
+                            break;
+                        }
+                    }
+#ifdef JSONSTRICT
+                    throw MakeStringException(-1, "invalid json \\u escaped sequence");
+#endif
+                    ret.append(*j);
+                    break;
+                }
+                case '\"':
+                case '\\':
+                case '/':
+                    ret.append(*j);
+                    break;
+                case 'b':
+                    ret.append('\b');
+                    break;
+                case 'f':
+                    ret.append('\f');
+                    break;
+                case 'n':
+                    ret.append('\n');
+                    continue;
+                case 'r':
+                    ret.append('\r');
+                    break;
+                case 't':
+                    ret.append('\t');
+                    break;
+                default:
+                {
+#ifdef JSONSTRICT
+                    throw MakeStringException(-1, "invalid json escaped sequence");
+#endif
+                    ret.append('\\');
+                    ret.append(*j);
+                    break;
+                }
+                }
+            }
+        }
+    }
+    catch (IException *)
+    {
+        if (errMark) *errMark = j;
+        throw;
+    }
+    return j;
+}
+
 void decodeXML(ISimpleReadStream &in, StringBuffer &out, unsigned len)
 {
     // TODO

+ 4 - 1
system/jlib/jstring.hpp

@@ -361,12 +361,15 @@ extern jlib_decl StringBuffer & appendStringAsSQL(StringBuffer & out, unsigned l
 extern jlib_decl StringBuffer & appendStringAsECL(StringBuffer & out, unsigned len, const char * src);
 extern jlib_decl StringBuffer & appendStringAsQuotedECL(StringBuffer &out, unsigned len, const char * src);
 
-jlib_decl void extractItem(StringBuffer & res, const char * src, const char * sep, int whichItem, bool caps);
+extern jlib_decl const char *decodeJSON(const char *x, StringBuffer &ret, unsigned len=(unsigned)-1, const char **errMark=NULL);
+extern jlib_decl void extractItem(StringBuffer & res, const char * src, const char * sep, int whichItem, bool caps);
 extern jlib_decl const char *encodeXML(const char *x, StringBuffer &ret, unsigned flags=0, unsigned len=(unsigned)-1, bool utf8=false);
 extern jlib_decl const char *decodeXML(const char *x, StringBuffer &ret, unsigned len=(unsigned)-1, const char **errMark=NULL, IEntityHelper *entityHelper=NULL);
 extern jlib_decl const char *encodeXML(const char *x, IIOStream &out, unsigned flags=0, unsigned len=(unsigned)-1, bool utf8=false);
 extern jlib_decl void decodeXML(ISimpleReadStream &in, StringBuffer &out, unsigned len=(unsigned)-1);
 
+extern jlib_decl int utf8CharLen(const unsigned char *ch);
+
 inline const char *encodeUtf8XML(const char *x, StringBuffer &ret, unsigned flags=false, unsigned len=(unsigned)-1)
 {
     return encodeXML(x, ret, flags, len, true);