Browse Source

Implement URI backbone

The URI library deals with resolving URIs into files in
the file system. The current version contains a library
to parse and split the URI into recognizable parts, to
be used by the URI resolvers, that will come later.

This implementation uses the uriparser library (from
http://uriparser.sourceforge.net/ that is widely
available on Linux distros and MacOS back-ports, but
might have to be compiled on Windows). Further analysis
must be done to see if we include the source directly
or just distribute libraries on Windows (or find another
way).

There are some tests, but will be included separately,
since they require a refactoring in the old (dormant)
unittests tool.
Renato Golin 13 years ago
parent
commit
fe695792ff

+ 42 - 0
cmake_modules/FindUriparser.cmake

@@ -0,0 +1,42 @@
+################################################################################
+#    Copyright (C) 2012 HPCC Systems.
+#
+#    All rights reserved. This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU Affero General Public License as
+#    published by the Free Software Foundation, either version 3 of the
+#    License, or (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU Affero General Public License for more details.
+#
+#    You should have received a copy of the GNU Affero General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+################################################################################
+
+# - Try to find the uriparser uri parsing library
+# Once done this will define
+#
+#  URIPARSER_FOUND - system has the uriparser library
+#  URIPARSER_INCLUDE_DIR - the uriparser include directory
+#  URIPARSER_LIBRARIES - The libraries needed to use uriparser
+
+IF (NOT URIPARSER_FOUND)
+  IF (WIN32)
+    SET (uriparser_lib "liburiparser")
+  ELSE()
+    SET (uriparser_lib "uriparser")
+  ENDIF()
+
+  FIND_PATH (URIPARSER_INCLUDE_DIR NAMES uriparser/Uri.h)
+  FIND_LIBRARY (URIPARSER_LIBRARIES NAMES ${uriparser_lib})
+
+  include(FindPackageHandleStandardArgs)
+  find_package_handle_standard_args(uriparser DEFAULT_MSG
+    URIPARSER_LIBRARIES
+    URIPARSER_INCLUDE_DIR
+  )
+
+  MARK_AS_ADVANCED(URIPARSER_INCLUDE_DIR URIPARSER_LIBRARIES)
+ENDIF()

+ 10 - 0
cmake_modules/commonSetup.cmake

@@ -58,6 +58,7 @@ IF ("${COMMONSETUP_DONE}" STREQUAL "")
   else()
     option(USE_LIBARCHIVE "Configure use of libarchive" ON)
   endif()
+  option(USE_URIPARSER "Configure use of uriparser" ON)
   option(USE_NATIVE_LIBRARIES "Search standard OS locations for thirdparty libraries" ON)
   option(USE_GIT_DESCRIBE "Use git describe to generate build tag" ON)
   option(CHECK_GIT_TAG "Require git tag to match the generated build tag" OFF)
@@ -445,6 +446,15 @@ IF ("${COMMONSETUP_DONE}" STREQUAL "")
         endif()
       endif(USE_LIBARCHIVE)
 
+      if(USE_URIPARSER)
+        find_package(Uriparser)
+        if (URIPARSER_FOUND)
+          add_definitions (-D_USE_URIPARSER)
+        else()
+          message(FATAL_ERROR "URIPARSER requested but package not found")
+        endif()
+      endif(USE_URIPARSER)
+
       if(USE_BOOST_REGEX)
         find_package(BOOST_REGEX)
         if (BOOST_REGEX_FOUND)

+ 8 - 0
common/remote/CMakeLists.txt

@@ -43,6 +43,13 @@ set (    SRCS
          rmtssh.hpp
          sockfile.hpp
     )
+if (USE_URIPARSER)
+  set ( SRCS
+        ${SRCS}
+        uri.cpp
+        uri.hpp
+      )
+endif(USE_URIPARSER)
 
 include_directories (
          ./../../system/hrpc 
@@ -59,4 +66,5 @@ install ( TARGETS remote DESTINATION ${OSSDIR}/lib )
 target_link_libraries ( remote 
     jlib 
     mp
+    ${URIPARSER_LIBRARIES}
     )

+ 155 - 0
common/remote/uri.cpp

@@ -0,0 +1,155 @@
+/*##############################################################################
+
+    Copyright (C) 2012 HPCC Systems.
+
+    All rights reserved. This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+############################################################################## */
+
+#include "uri.hpp"
+#include "jexcept.hpp"
+
+URI::URI(const char* path)
+{
+    state.uri = &uri;
+    try {
+        if (uriParseUriA(&state, path) != URI_SUCCESS)
+            throw MakeStringException(-1, "Invalid URI '%s'", path);
+        populateFields(); // In a format we understand
+    }
+    // On parser failure, but also system exceptions (bad alloc, etc)
+    catch (IException *)
+    {
+        uriFreeUriMembersA(&uri);
+        throw;
+    }
+    uriFreeUriMembersA(&uri);
+}
+
+// Helper, to validate URI before creating object
+bool isURI(const char *path)
+{
+    UriParserStateA state;
+    UriUriA uri;
+    state.uri = &uri;
+    bool match = (uriParseUriA(&state, path) == URI_SUCCESS);
+    uriFreeUriMembersA(&uri);
+    return match;
+}
+
+void URI::populateFields()
+{
+    // Scheme (defines which resolver to use, see above)
+    StringBuffer schemeStr(uri.scheme.afterLast - uri.scheme.first, uri.scheme.first);
+    schemeStr.toLowerCase();
+    if (strcmp(schemeStr.str(), "hpcc") == 0)
+        scheme = URIScheme_hpcc;
+    else if (strcmp(schemeStr.str(), "file") == 0)
+        scheme = URIScheme_file;
+    else
+        scheme = URIScheme_error;
+
+    // Server
+    server.user.set(uri.userInfo.first, uri.userInfo.afterLast - uri.userInfo.first);
+    server.host.set(uri.hostText.first, uri.hostText.afterLast - uri.hostText.first);
+    StringAttr portStr(uri.portText.first, uri.portText.afterLast - uri.portText.first);
+    server.port = atoi(portStr.get()); // More - use default ports?
+
+    // Path
+    UriPathSegmentA* cur = uri.pathHead;
+    StringBuffer pathStr;
+    if (uri.absolutePath || scheme == URIScheme_file)
+        pathStr.append("/");
+    bool first = true;
+    while (cur)
+    {
+        if (!first)
+            pathStr.append("/");
+        pathStr.append(cur->text.afterLast - cur->text.first, cur->text.first);
+        first = false;
+        cur = cur->next;
+    }
+    path.path.set(pathStr.str());
+
+    // Extra info
+    if (scheme == URIScheme_hpcc)
+    {
+        StringBuffer query(uri.query.afterLast - uri.query.first, uri.query.first);
+        query.toLowerCase();
+        if (strcmp(query.str(), "super") == 0)
+        {
+            path.type = URIFile_super;
+            path.subname.set(uri.fragment.first, uri.fragment.afterLast - uri.fragment.first);
+            path.index = 0;
+        }
+        else if (strcmp(query.str(), "stream") == 0)
+        {
+            path.type = URIFile_stream;
+            StringAttr index(uri.fragment.first, uri.fragment.afterLast - uri.fragment.first);
+            path.index = atoi(index.get());
+        }
+        else
+        {
+            path.type = URIFile_logic;
+            path.index = 0;
+        }
+    }
+    else
+    {
+        path.type = URIFile_local;
+        path.index = 0;
+    }
+}
+
+void URI::appendSchemeStr(StringBuffer& buf)
+{
+    switch(scheme)
+    {
+    case URIScheme_hpcc:
+        buf.append("hpcc");
+        return;
+    case URIScheme_file:
+        buf.append("file");
+        return;
+    default:
+        buf.append("unknown");
+        return;
+    }
+}
+
+void URI::appendServerStr(StringBuffer& buf)
+{
+    if (!server.user.isEmpty())
+        buf.append(server.user.get()).append("@");
+    buf.append(server.host.get());
+    if (server.port)
+        buf.append(":").append(server.port);
+}
+
+void URI::appendPathStr(StringBuffer& buf)
+{
+    buf.append(path.path.get());
+    switch(path.type)
+    {
+    case URIFile_super:
+        buf.append("?super");
+        break;
+    case URIFile_stream:
+        buf.append("?stream");
+        break;
+    }
+    if (path.index)
+        buf.append("#").append(path.index);
+    else if (path.subname.length())
+        buf.append("#").append(path.subname.get());
+}

+ 137 - 0
common/remote/uri.hpp

@@ -0,0 +1,137 @@
+/*##############################################################################
+
+    Copyright (C) 2012 HPCC Systems.
+
+    All rights reserved. This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+############################################################################## */
+
+#ifndef __JURI__
+#define __JURI__
+
+#include <uriparser/Uri.h>
+#include "jlib.hpp"
+
+// MORE - This is just a stub, structs below need to be commoned up with existing definitions elsewhere
+
+// Supported URI schemes
+enum URISchemeType
+{
+    URIScheme_error,
+    URIScheme_hpcc,
+    URIScheme_file
+};
+
+// Supported server types
+enum URIServerType
+{
+    URIServer_local,  // Local file
+    URIServer_dali,   // Names resolved by Dali
+    URIServer_host    // Names resolved by DNS
+};
+
+// Supported file types
+enum URIFileType
+{
+    URIFile_local,    // Local files
+    URIFile_logic,    // Normal files
+    URIFile_super,    // Super files
+    URIFile_stream    // Stream files (to be implemented)
+};
+
+struct URIServerDescription
+{
+    StringAttr user;
+    StringAttr host;
+    unsigned port;
+};
+
+struct URIPathDescription
+{
+    StringAttr path;
+    URIFileType type;
+    StringAttr subname;  // Super files' sub
+    unsigned index;      // Stream files
+};
+
+// ==================================================================================
+/*
+ * URIFileResolver is the interface that any resolver should implement to be used
+ * by the URIResolution scheme, to provide a seamless interface to any HPCC engine
+ * to handle files in a plethora of environments.
+ *
+ * This has not be thought about properly and does not concern
+ * much of the initial URI investigations.
+ */
+//interface URIFileResolver
+//{
+//    // Returns a Read-only descriptor of a file. No Dali locks.
+//    virtual IFileDescriptor *getFileDescriptor(StringAttr &filePath) = 0;
+//    // Returns a distributed dali / local file
+//    virtual IResolvedFile *getFile(StringAttr &filePath) = 0;
+//    // Returns a distributed dali / local file form a pre-existing descriptor
+//    virtual IResolvedFile *getFile(IFileDescriptor &fileDesc) = 0;
+//    // Releases any lock and re-reads the information
+//    virtual IFileDescriptor *releaseFile(IResolvedFile &file) = 0;
+//};
+
+// ==================================================================================
+/*
+ * URI deals with strings referring to paths that can be resolved in
+ * many different ways. This object is immutable.
+ *
+ * Dali files (logic, super, stream), local files (on disk),
+ * Web files (http, ftp, webdav) have different ways of resolving, and all of them
+ * should have a consistent query mechanism from the HPCC engines point of view.
+ *
+ * The URI parser used is uriparser, from http://uriparser.sourceforge.net/
+ */
+class URI
+{
+    URISchemeType scheme;
+    URIServerDescription server;
+    URIPathDescription path;
+    UriParserStateA state;
+    UriUriA uri;
+
+    void populateFields();
+
+public:
+    URI(const char* path);
+
+    // Helper, to validate URI before creating object
+    static bool isURI(const char *path);
+
+    // Immutable
+    URISchemeType getScheme() const
+    {
+        return scheme;
+    }
+    // Immutable
+    const URIServerDescription * const getServer() const
+    {
+        return &server;
+    }
+    // Immutable
+    const URIPathDescription * const getPath() const
+    {
+        return &path;
+    }
+
+    // MORE - is this the best way?
+    void appendSchemeStr(StringBuffer& buf);
+    void appendServerStr(StringBuffer& buf);
+    void appendPathStr(StringBuffer& buf);
+};
+
+#endif /* __JURI__ */

+ 1 - 1
system/jlib/jstring.hpp

@@ -247,7 +247,7 @@ public:
     inline char * detach()                      { char * ret = text; text = NULL; return ret; }
     inline const char * get(void) const         { return text; }
     inline size32_t     length() const          { return text ? (size32_t)strlen(text) : 0; }
-    inline bool isEmpty()                       { return !text||!*text; } // faster than (length==0)
+    inline bool isEmpty() const                 { return !text||!*text; } // faster than (length==0)
     inline const char * sget(void) const        { return text ? text : ""; } // safe form of get (doesn't return NULL)
 
     void         set(const char * _text);