Browse Source

HPCC-14879 Support gzip option in soapcall

This fix enables gzip compress in soapcall request
and response. Also allows gzip in httpcall response.

Signed-off-by: wangkx <kevin.wang@lexisnexis.com>
wangkx 9 years ago
parent
commit
dd254ed8e0

+ 9 - 1
common/thorhelper/CMakeLists.txt

@@ -83,7 +83,8 @@ include_directories (
          ./../../rtl/nbcd 
          ./../../system/include 
          ./../../system/mp
-         ./../../system/jlib 
+         ./../../system/jlib
+         ./../../system/security/zcrypt
          ./../../dali/base
          ./../deftype 
          ./../workunit
@@ -120,3 +121,10 @@ IF (USE_OPENSSL)
     	securesocket
     )
 ENDIF()
+
+IF (USE_ZLIB)
+    target_link_libraries ( thorhelper
+        ${ZLIB_LIBRARIES}
+        zcrypt
+    )
+ENDIF()

+ 134 - 8
common/thorhelper/thorsoapcall.cpp

@@ -27,6 +27,9 @@
 #include "securesocket.hpp"
 #include "eclrtl.hpp"
 #include "roxiemem.hpp"
+#ifdef _USE_ZLIB
+#include "zcrypt.hpp"
+#endif
 
 using roxiemem::OwnedRoxieString;
 
@@ -37,6 +40,8 @@ using roxiemem::OwnedRoxieString;
 #include <new>
 
 #define CONTENT_LENGTH "Content-Length: "
+#define CONTENT_ENCODING "Content-Encoding"
+#define ACCEPT_ENCODING "Accept-Encoding"
 
 unsigned soapTraceLevel = 1;
 
@@ -1291,6 +1296,30 @@ bool httpHeaderBlockContainsHeader(const char *httpheaders, const char *header)
         return true;
     return false;
 }
+
+bool getHTTPHeader(const char *httpheaders, const char *header, StringBuffer& value)
+{
+    if (!httpheaders || !*httpheaders || !header || !*header)
+        return false;
+
+    const char* pHeader = strstr(httpheaders, header);
+    if (!pHeader)
+        return false;
+
+    pHeader += strlen(header);
+    if (*pHeader != ':')
+        return getHTTPHeader(pHeader, header, value);
+
+    pHeader++;
+    const char* ppHeader = strchr(pHeader, '\n');
+    if (!ppHeader)
+        value.append(pHeader);
+    else
+        value.append(pHeader, 0, ppHeader - pHeader);
+    value.trim();
+    return true;
+}
+
 class CWSCAsyncFor : implements IWSCAsyncFor, public CInterface, public CAsyncFor
 {
     class CSocketDataProvider : public CInterface
@@ -1371,6 +1400,86 @@ private:
         }
     }
 
+    bool checkContentEncodingSupported(const char* encoding)
+    {
+        if (strieq(encoding, "gzip"))
+            return true;
+        return false;
+    }
+
+    bool checkContentDecoding(const StringBuffer& headers, StringBuffer& content, StringBuffer& contentEncoding)
+    {
+        if ((headers.length() == 0) || (content.length() == 0))
+            return false;
+
+        getHTTPHeader(headers.str(), CONTENT_ENCODING, contentEncoding);
+        if (contentEncoding.isEmpty())
+            return false;
+
+        if (!checkContentEncodingSupported(contentEncoding.str()))
+            throw MakeStringException(-1, "Content-Encoding:%s not supported", contentEncoding.str());
+        return true;
+    }
+
+    void decodeContent(const char* contentEncodingType, StringBuffer& content)
+    {
+        StringBuffer contentDecoded;
+        unsigned contentLength = content.length();
+        if (strieq(contentEncodingType, "gzip"))
+        {
+#ifdef _USE_ZLIB
+            gunzip((const byte*)content.str(), contentLength, contentDecoded);
+            PROGLOG("Content decoded from %d bytes to %d bytes", contentLength, contentDecoded.length());
+#else
+            throw MakeStringException(-1, "_USE_ZLIB is required for Content-Encoding:%s", contentEncodingType);
+#endif
+        }
+
+        content = contentDecoded;
+        if (soapTraceLevel > 6 || master->logXML)
+            master->logctx.CTXLOG("Content decoded. Original %s %d", CONTENT_LENGTH, contentLength);
+    }
+
+    bool checkContentEncoding(const char* httpheaders, StringBuffer& contentEncodingType)
+    {
+        if (xmlWriter.length() == 0)
+            return false;
+
+        getHTTPHeader(httpheaders, CONTENT_ENCODING, contentEncodingType);
+        if (contentEncodingType.isEmpty())
+            return false;
+
+        if (!checkContentEncodingSupported(contentEncodingType.str()))
+            throw MakeStringException(-1, "Content-Encoding:%s not supported", contentEncodingType.str());
+        return true;
+    }
+
+    void encodeContent(const char* contentEncodingType, StringBuffer& content)
+    {
+        if (strieq(contentEncodingType, "gzip"))
+        {
+#ifdef _USE_ZLIB
+            unsigned outlen;
+            char* outbuf = gzip( xmlWriter.str(), xmlWriter.length(), &outlen, GZ_BEST_SPEED);
+            content.setBuffer(outlen+1, outbuf, outlen);
+            PROGLOG("Content encoded from %d bytes to %d bytes", xmlWriter.length(), outlen);
+#else
+            throw MakeStringException(-1, "_USE_ZLIB is required for Content-Encoding:%s", contentEncodingType);
+#endif
+        }
+    }
+
+    void logRequest(bool contentEncoded, StringBuffer& request)
+    {
+        if (soapTraceLevel > 6 || master->logXML)
+        {
+            if (!contentEncoded)
+                master->logctx.CTXLOG("%s: request(%s)", master->wscCallTypeText(), request.str());
+            else
+                master->logctx.CTXLOG("%s: request(%s), content encoded.", master->wscCallTypeText(), request.str());
+        }
+    }
+
     void createHttpRequest(Url &url, StringBuffer &request)
     {
         // Create the HTTP POST request
@@ -1401,6 +1510,11 @@ private:
             }
         }
 
+#ifdef _USE_ZLIB
+        if (!httpHeaderBlockContainsHeader(httpheaders, ACCEPT_ENCODING))
+            request.appendf("%s: gzip, deflate\r\n", ACCEPT_ENCODING);
+#endif
+
         if (master->wscType == STsoap)
         {
             if (master->soapaction.get())
@@ -1425,8 +1539,21 @@ private:
         if (master->wscType == STsoap)
         {
             request.append("Host: ").append(url.host).append(":").append(url.port).append("\r\n");//http 1.1
-            request.append(CONTENT_LENGTH).append(xmlWriter.length()).append("\r\n\r\n");
-            request.append(xmlWriter.str());//add SOAP xml content
+
+            StringBuffer contentEncodingType, encodedContentBuf;
+            if (!checkContentEncoding(httpheaders, contentEncodingType))
+            {
+                request.append(CONTENT_LENGTH).append(xmlWriter.length()).append("\r\n\r\n");
+                request.append(xmlWriter.str());//add SOAP xml content
+                logRequest(false, request);
+            }
+            else
+            {
+                logRequest(true, request);
+                encodeContent(contentEncodingType.str(), encodedContentBuf);
+                request.append(CONTENT_LENGTH).append(encodedContentBuf.length()).append("\r\n\r\n");
+                request.append(encodedContentBuf.length(), encodedContentBuf.str());//add SOAP xml content
+            }
         }
         else
         {
@@ -1435,11 +1562,9 @@ private:
                 request.append(":").append(url.port);
             request.append("\r\n");//http 1.1
             request.append("\r\n");//httpcall
+            logRequest(false, request);
         }
 
-        if (soapTraceLevel > 6 || master->logXML)
-            master->logctx.CTXLOG("%s: request(%s)", master->wscCallTypeText(), request.str());
-
         if (master->logMin)
             master->logctx.CTXLOG("%s: request(%s:%u)", master->wscCallTypeText(), url.host.str(), url.port);
     }
@@ -1457,7 +1582,7 @@ private:
         // first read header
         size32_t payloadofs = 0;
         size32_t payloadsize = 0;
-        StringBuffer dbgheader;
+        StringBuffer dbgheader, contentEncoding;
         bool chunked = false;
         size32_t read = 0;
         do {
@@ -1473,8 +1598,7 @@ private:
                 const char *s = strstr(buffer,"\r\n\r\n");
                 if (s) {
                     payloadofs = (size32_t)(s-buffer+4);
-                    if (soapTraceLevel > 6 || master->logXML)
-                        dbgheader.append(payloadofs,buffer);  // needed for tracing below
+                    dbgheader.append(payloadofs,buffer);
                     s = strstr(buffer, " ");
                     if (s)
                         rval = atoi(s+1);
@@ -1599,6 +1723,8 @@ private:
                 }
             }
         }
+        if (checkContentDecoding(dbgheader, response, contentEncoding))
+            decodeContent(contentEncoding.str(), response);
         if (soapTraceLevel > 6 || master->logXML)
             master->logctx.CTXLOG("%sCALL: LEN=%d %sresponse(%s%s)", master->wscType == STsoap ? "SOAP" : "HTTP",response.length(),chunked?"CHUNKED ":"", dbgheader.str(), response.str());
         else if (soapTraceLevel > 8)

+ 1 - 0
system/security/zcrypt/CMakeLists.txt

@@ -43,6 +43,7 @@ include_directories (
          ${ZLIB_INCLUDE_DIR}
          ${OPENSSL_INCLUDE_DIR}
          ./../../include 
+         ./../../jlib
     )
 
 ADD_DEFINITIONS ( -D_USRDLL -DZCRYPT_EXPORTS )

+ 166 - 0
system/security/zcrypt/zcrypt.cpp

@@ -21,6 +21,8 @@
 #include "base64.ipp"
 
 #include "zip.h"
+#include "jexcept.hpp"
+#include <math.h>
 
 #ifdef WIN32
 #define USEWIN32IOAPI
@@ -998,3 +1000,167 @@ ZCRYPT_API void releaseIZ(IZInterface* iz)
 
 }
 
+static void throwGZipException(const char* operation, int errorCode)
+{
+    const char* errorMsg;
+    switch (errorCode)
+    {
+        case Z_ERRNO:
+            errorMsg = "Error occured while reading file";
+            break;
+        case Z_STREAM_ERROR:
+            errorMsg = "The stream state was inconsistent";
+            break;
+        case Z_DATA_ERROR:
+            errorMsg = "The deflate data was invalid or incomplete";
+            break;
+        case Z_MEM_ERROR:
+            errorMsg = "Memory could not be allocated for processing";
+            break;
+        case Z_BUF_ERROR:
+            errorMsg = "Insufficient output buffer";
+            break;
+        case Z_VERSION_ERROR:
+            errorMsg = "The version mismatch between zlib.h and the library linked";
+            break;
+        default:
+            errorMsg = "Unknown exception";
+            break;
+    }
+    throw MakeStringException(500, "Exception in gzip %s: %s.", operation, errorMsg);
+}
+
+// Compress a character buffer using zlib in gzip format with given compression level
+//
+char* gzip( const char* inputBuffer, unsigned int inputSize, unsigned int* outlen, int compressionLevel)
+{
+    if (inputBuffer == NULL || inputSize == 0)
+        throw MakeStringException(500, "gzip failed: input buffer is empty!");
+
+    /* Before we can begin compressing (aka "deflating") data using the zlib
+     functions, we must initialize zlib. Normally this is done by calling the
+     deflateInit() function; in this case, however, we'll use deflateInit2() so
+     that the compressed data will have gzip headers. This will make it easy to
+     decompress the data later using a tool like gunzip, WinZip, etc.
+     deflateInit2() accepts many parameters, the first of which is a C struct of
+     type "z_stream" defined in zlib.h. The properties of this struct are used to
+     control how the compression algorithms work. z_stream is also used to
+     maintain pointers to the "input" and "output" byte buffers (next_in/out) as
+     well as information about how many bytes have been processed, how many are
+     left to process, etc. */
+    z_stream zs;        // z_stream is zlib's control structure
+    zs.zalloc = Z_NULL; // Set zalloc, zfree, and opaque to Z_NULL so
+    zs.zfree  = Z_NULL; // that when we call deflateInit2 they will be
+    zs.opaque = Z_NULL; // updated to use default allocation functions.
+    zs.total_out = 0;   // Total number of output bytes produced so far
+
+    /* Initialize the zlib deflation (i.e. compression) internals with deflateInit2().
+     The parameters are as follows:
+     z_streamp strm - Pointer to a zstream struct
+     int level      - Compression level. Must be Z_DEFAULT_COMPRESSION, or between
+                      0 and 9: 1 gives best speed, 9 gives best compression, 0 gives
+                      no compression.
+     int method     - Compression method. Only method supported is "Z_DEFLATED".
+     int windowBits - Base two logarithm of the maximum window size (the size of
+                      the history buffer). It should be in the range 8..15. Add
+                      16 to windowBits to write a simple gzip header and trailer
+                      around the compressed data instead of a zlib wrapper. The
+                      gzip header will have no file name, no extra data, no comment,
+                      no modification time (set to zero), no header crc, and the
+                      operating system will be set to 255 (unknown).
+     int memLevel   - Amount of memory allocated for internal compression state.
+                      1 uses minimum memory but is slow and reduces compression
+                      ratio; 9 uses maximum memory for optimal speed. Default value
+                      is 8.
+     int strategy   - Used to tune the compression algorithm. Use the value
+                      Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data
+                      produced by a filter (or predictor), or Z_HUFFMAN_ONLY to
+                      force Huffman encoding only (no string match) */
+    int ret = deflateInit2(&zs, compressionLevel, Z_DEFLATED, (15+16), 8, Z_DEFAULT_STRATEGY);
+    if (ret != Z_OK)
+        throwGZipException("initialization", ret);
+
+    // set the z_stream's input
+    zs.next_in = (Bytef*)inputBuffer;
+    zs.avail_in = inputSize;
+
+    // Create output memory buffer for compressed data. The zlib documentation states that
+    // destination buffer size must be at least 0.1% larger than avail_in plus 12 bytes.
+    const unsigned long outsize = (unsigned long)floorf((float)inputSize * 1.01f) + 12;
+    Bytef* outbuf = new Bytef[outsize];
+
+    do
+    {
+        // Store location where next byte should be put in next_out
+        zs.next_out = outbuf + zs.total_out;
+
+        // Calculate the amount of remaining free space in the output buffer
+        // by subtracting the number of bytes that have been written so far
+        // from the buffer's total capacity
+        zs.avail_out = outsize - zs.total_out;
+
+        /* deflate() compresses as much data as possible, and stops/returns when
+        the input buffer becomes empty or the output buffer becomes full. If
+        deflate() returns Z_OK, it means that there are more bytes left to
+        compress in the input buffer but the output buffer is full; the output
+        buffer should be expanded and deflate should be called again (i.e., the
+        loop should continue to rune). If deflate() returns Z_STREAM_END, the
+        end of the input stream was reached (i.e.g, all of the data has been
+        compressed) and the loop should stop. */
+        ret = deflate(&zs, Z_FINISH);
+    } while (ret == Z_OK);
+
+    if (ret != Z_STREAM_END)          // an error occurred that was not EOS
+    {
+        // Free data structures that were dynamically created for the stream.
+        deflateEnd(&zs);
+        delete[] outbuf;
+        *outlen = 0;
+        throwGZipException("compression", ret);
+    }
+
+    // Free data structures that were dynamically created for the stream.
+    deflateEnd(&zs);
+    *outlen = zs.total_out;
+    return (char*) outbuf;
+}
+
+void gunzip(const byte* compressed, unsigned int comprLen, StringBuffer& sOutput)
+{
+    if (comprLen == 0)
+        return;
+
+    const int CHUNK_OUT = 16384;
+    z_stream d_stream; // decompression stream
+    memset( &d_stream, 0, sizeof(z_stream));
+    d_stream.next_in = (byte*) compressed;
+    d_stream.avail_in = comprLen;
+
+    int ret = inflateInit2(&d_stream, (15+16));
+    if (ret != Z_OK)
+        throwGZipException("initialization", ret);
+
+    unsigned int outLen = 0;
+
+    do
+    {
+        sOutput.ensureCapacity( outLen + CHUNK_OUT );
+        d_stream.avail_out = CHUNK_OUT; //free space in the output buffer
+        d_stream.next_out = (byte*)sOutput.str() + outLen;
+
+        ret = inflate(&d_stream, Z_NO_FLUSH);
+        if (ret < Z_OK)
+            break;
+
+        outLen += CHUNK_OUT - d_stream.avail_out;
+        sOutput.setLength( outLen );
+
+    } while (d_stream.avail_out == 0 || ret != Z_STREAM_END);
+
+    inflateEnd(&d_stream);
+    if (ret != Z_STREAM_END)
+    {
+        sOutput.clear();
+        throwGZipException("decompression", ret);
+    }
+}

+ 12 - 0
system/security/zcrypt/zcrypt.hpp

@@ -31,6 +31,8 @@
 #endif 
 
 #include <string>
+#include "platform.h"
+#include "jlib.hpp"
 
 using namespace std;
 
@@ -88,4 +90,14 @@ ZCRYPT_API IZDecryptor* createZDecryptor(const char* privatekey, const char* pas
 ZCRYPT_API void releaseIZ(IZInterface* iz);
 }
 
+// the following GZ_* values map to corresponding Z_* values defined in zlib.h
+#define GZ_BEST_SPEED             1
+#define GZ_BEST_COMPRESSION       9
+#define GZ_DEFAULT_COMPRESSION  (-1)
+
+// Compress a character buffer using zlib in gzip format with given compression level
+char* gzip( const char* inputBuffer, unsigned int inputSize,
+    unsigned int* outlen, int compressionLevel=GZ_DEFAULT_COMPRESSION);
+void gunzip(const byte* compressed, unsigned int comprLen, StringBuffer& sOutput);
+
 #endif