瀏覽代碼

HPCC-18493 Refactor record translation code so dafilesrv can use it

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 7 年之前
父節點
當前提交
16f9804703

+ 0 - 48
common/thorhelper/thorcommon.ipp

@@ -115,54 +115,6 @@ private:
 
 //------------------------------------------------------------------------------------------------
 
-class THORHELPER_API MemoryBufferBuilder : public RtlRowBuilderBase
-{
-public:
-    MemoryBufferBuilder(MemoryBuffer & _buffer, unsigned _minSize)
-        : buffer(_buffer), minSize(_minSize)
-    {
-        reserved = 0;
-    }
-
-    virtual byte * ensureCapacity(size32_t required, const char * fieldName)
-    {
-        if (required > reserved)
-        {
-            void * next = buffer.reserve(required-reserved);
-            self = (byte *)next - reserved;
-            reserved = required;
-        }
-        return self;
-    }
-
-    void finishRow(size32_t length)
-    {
-        assertex(length <= reserved);
-        size32_t newLength = (buffer.length() - reserved) + length;
-        buffer.setLength(newLength);
-        self = NULL;
-        reserved = 0;
-    }
-    virtual IEngineRowAllocator *queryAllocator() const
-    {
-        return NULL;
-    }
-
-protected:
-    virtual byte * createSelf()
-    {
-        return ensureCapacity(minSize, NULL);
-    }
-
-protected:
-    MemoryBuffer & buffer;
-    size32_t minSize;
-    size32_t reserved;
-};
-
-
-
-//------------------------------------------------------------------------------------------------
 
 //This class is only ever used to apply a delta to a self pointer, it is never finalized, and the builder must stay alive.
 class THORHELPER_API CPrefixedRowBuilder : implements RtlRowBuilderBase

+ 0 - 1
ecl/hql/hqlmanifest.cpp

@@ -227,7 +227,6 @@ bool isManifestFileValid(const char *filename)
         ERRLOG("Error: MANIFEST file '%s' does not exist", filename);
         return false;
     }
-
     ResourceManifest manifest(filename);
     return manifest.checkResourceFilesExist();
 }

+ 0 - 39
ecl/hql/hqlstack.cpp

@@ -28,45 +28,6 @@
 #include "hqlir.hpp"
 #include "hqlutil.hpp"
 
-/**
- * class CDynamicOutputMetaData
- *
- * An implementation of IOutputMetaData for use with a dynamically-created record type info structure
- *
- */
-
-class CDynamicOutputMetaData : public COutputMetaData
-{
-public:
-    CDynamicOutputMetaData(const RtlRecordTypeInfo & fields) : typeInfo(fields)
-    {
-    }
-
-    virtual const RtlTypeInfo * queryTypeInfo() const { return &typeInfo; }
-    virtual size32_t getRecordSize(const void * row)
-    {
-        //Allocate a temporary offset array on the stack to avoid runtime overhead.
-        const RtlRecord &offsetInformation = queryRecordAccessor(true);
-        unsigned numOffsets = offsetInformation.getNumVarFields() + 1;
-        size_t * variableOffsets = (size_t *)alloca(numOffsets * sizeof(size_t));
-        RtlRow offsetCalculator(offsetInformation, row, numOffsets, variableOffsets);
-        return offsetCalculator.getRecordSize();
-    }
-
-    virtual size32_t getFixedSize() const
-    {
-        return queryRecordAccessor(true).getFixedSize();
-    }
-    // returns 0 for variable row size
-    virtual size32_t getMinRecordSize() const
-    {
-        return queryRecordAccessor(true).getMinRecordSize();
-    }
-
-protected:
-    const RtlTypeInfo &typeInfo;
-};
-
 FuncCallStack::FuncCallStack(bool _hasMeta, int size)
 {
     hasMeta = _hasMeta;

+ 1 - 0
rtl/eclrtl/CMakeLists.txt

@@ -27,6 +27,7 @@ project( eclrtl )
 
 set (    SRCS 
          eclhelper_base.cpp
+         eclhelper_dyn.cpp
          eclrtl.cpp 
          eclregex.cpp
          rtlbcd.cpp 

+ 1 - 1
rtl/eclrtl/eclhelper_base.cpp

@@ -432,7 +432,7 @@ bool CThorKeyedDistributeArg::getIndexLayout(size32_t & _retLen, void * & _retDa
 int CThorWorkUnitWriteArg::getSequence() { return -3; }
 const char * CThorWorkUnitWriteArg::queryName() { return NULL; }
 unsigned CThorWorkUnitWriteArg::getFlags() { return 0; }
-void CThorWorkUnitWriteArg::serializeXml(const byte * self, IXmlWriter & out) { rtlSysFail(1, "serializeXml not implemented"); }
+void CThorWorkUnitWriteArg::serializeXml(const byte * self, IXmlWriter & out) { queryOutputMeta()->toXML(self, out); }
 unsigned CThorWorkUnitWriteArg::getMaxSize() { return 0; }
 
 //CThorXmlWorkunitWriteArg

+ 268 - 0
rtl/eclrtl/eclhelper_dyn.cpp

@@ -0,0 +1,268 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+#include "platform.h"
+#include "jptree.hpp"
+#include "eclrtl.hpp"
+#include "eclhelper.hpp"
+#include "rtlds_imp.hpp"
+#include "eclhelper_base.hpp"
+#include "eclhelper_dyn.hpp"
+
+#include "rtlfield.hpp"
+#include "rtlrecord.hpp"
+#include "rtldynfield.hpp"
+#include "rtlkey.hpp"
+
+//---------------------------------------------------------------------------
+static void readString(StringBuffer &out, const char * &in)
+{
+    for (;;)
+    {
+        char c = *in++;
+        if (!c)
+            throw MakeStringException(0, "Invalid filter - missing closing '");
+        if (c=='\'')
+            break;
+        if (c=='\\')
+            UNIMPLEMENTED;
+        out.append(c);
+    }
+}
+class ECLRTL_API CDynamicDiskReadArg : public CThorDiskReadArg
+{
+public:
+    CDynamicDiskReadArg(const char *_fileName, IOutputMetaData *_in, IOutputMetaData *_out, unsigned __int64 _chooseN, unsigned __int64 _skipN, unsigned __int64 _rowLimit)
+        : fileName(_fileName), in(_in), out(_out), chooseN(_chooseN), skipN(_skipN), rowLimit(_rowLimit)
+    {
+        inrec = &in->queryRecordAccessor(true);
+        numOffsets = inrec->getNumVarFields() + 1;
+        translator.setown(createRecordTranslator(queryOutputMeta()->queryRecordAccessor(true), *inrec));
+    }
+    virtual bool needTransform() override
+    {
+        return true;
+        //return translator->needsTranslate(); might be more appropriate?
+    }
+    virtual unsigned getFlags() override
+    {
+        return flags;
+    }
+    virtual void createSegmentMonitors(IIndexReadContext *irc) override
+    {
+        ForEachItemIn(idx, filters)
+        {
+            IStringSet &filter = filters.item(idx);
+            irc->append(createKeySegmentMonitor(false, LINK(&filter), filterOffsets.item(idx), filter.getSize()));
+        }
+    }
+
+    virtual IOutputMetaData * queryOutputMeta() override
+    {
+        return out;
+    }
+    virtual const char * getFileName() override final
+    {
+        return fileName;
+    }
+    virtual IOutputMetaData * queryDiskRecordSize() override final
+    {
+        return in;
+    }
+    virtual unsigned getFormatCrc() override
+    {
+        return 0;  // engines should treat 0 as 'ignore'
+    }
+    virtual size32_t transform(ARowBuilder & rowBuilder, const void * src) override
+    {
+        return translator->translate(rowBuilder, (const byte *) src);
+    }
+    virtual unsigned __int64 getChooseNLimit() { return chooseN; }
+    virtual unsigned __int64 getRowLimit() { return rowLimit; }
+
+    void addFilter(const char *filter)
+    {
+        // Format of a filter is:
+        // field[..n]: valuestring
+        // value string format specifies ranges using a comma-separated list of ranges.
+        // Each range is specified as paren lower, upper paren, where the paren is either ( or [ depending
+        // on whether the specified bound is inclusive or exclusive.
+        // If only one bound is specified then it is used for both upper and lower bound (only meaningful with [] )
+        //
+        // ( A means values > A - exclusive
+        // [ means values >= A - inclusive
+        // A ) means values < A - exclusive
+        // A ] means values <= A - inclusive
+        // For example:
+        // [A] matches just A
+        // (,A),(A,) matches all but A
+        // (A] of [A) are both empty ranges
+        // [A,B) means A*
+        // Values use the ECL syntax for constants. String constants are always utf8. Binary use d'xx' format (hexpairs)
+        // Note that binary serialization format is different
+
+        assertex(filter);
+        const char *epos = strchr(filter,'=');
+        assertex(epos);
+        StringBuffer fieldName(epos-filter, filter);
+        unsigned fieldNum = inrec->getFieldNum(fieldName);
+        assertex(fieldNum != (unsigned) -1);
+        size_t * variableOffsets = (size_t *)alloca(numOffsets * sizeof(size_t));
+        RtlRow offsetCalculator(*inrec, nullptr, numOffsets, variableOffsets);
+        unsigned fieldOffset = offsetCalculator.getOffset(fieldNum);
+        unsigned fieldSize = offsetCalculator.getSize(fieldNum);
+        const RtlTypeInfo *fieldType = inrec->queryType(fieldNum);
+        filter = epos+1;
+        if (*filter=='~')
+        {
+            UNIMPLEMENTED;  // use a regex?
+        }
+        else
+        {
+            MemoryBuffer lobuffer;
+            MemoryBuffer hibuffer;
+            Owned<IStringSet> filterSet = createStringSet(fieldSize);
+            while (*filter)
+            {
+                char startRange = *filter++;
+                if (startRange != '(' && startRange != '[')
+                    throw MakeStringException(0, "Invalid filter string: expected [ or ( at start of range");
+                // Now we expect a constant - type depends on type of field. Assume string or int for now
+                StringBuffer upperString, lowerString;
+                if (*filter=='\'')
+                {
+                    filter++;
+                    readString(lowerString, filter);
+                }
+                else
+                    UNIMPLEMENTED; // lowerInt = readInt(curFilter);
+                if (*filter == ',')
+                {
+                    filter++;
+                    if (*filter=='\'')
+                    {
+                        filter++;
+                        readString(upperString, filter);
+                    }
+                    else
+                        UNIMPLEMENTED; //upperInt = readInt(curFilter);
+                }
+                else
+                    upperString.set(lowerString);
+                char endRange = *filter++;
+                if (endRange != ')' && endRange != ']')
+                    throw MakeStringException(0, "Invalid filter string: expected ] or ) at end of range");
+                if (*filter==',')
+                    filter++;
+                else if (*filter)
+                    throw MakeStringException(0, "Invalid filter string: expected , between ranges");
+                MemoryBufferBuilder lobuilder(lobuffer.clear(), inrec->getMinRecordSize());
+                fieldType->buildUtf8(lobuilder, 0, inrec->queryField(fieldNum), lowerString.length(), lowerString.str());
+
+                MemoryBufferBuilder hibuilder(hibuffer.clear(), inrec->getMinRecordSize());
+                fieldType->buildUtf8(hibuilder, 0, inrec->queryField(fieldNum), upperString.length(), upperString.str());
+
+                filterSet->addRange(lobuffer.toByteArray(), hibuffer.toByteArray());
+                if (startRange=='(')
+                    filterSet->killRange(lobuffer.toByteArray(), lobuffer.toByteArray());
+                if (endRange==')')
+                    filterSet->killRange(hibuffer.toByteArray(), hibuffer.toByteArray());
+            }
+            filters.append(*filterSet.getClear());
+            filterOffsets.append(fieldOffset);
+            flags |= TDRkeyed;
+        }
+    }
+private:
+    StringAttr fileName;
+    unsigned numOffsets = 0;
+    unsigned flags = 0;
+    Owned<IRtlFieldTypeDeserializer> indeserializer;   // Owns the resulting ITypeInfo structures, so needs to be kept around
+    Owned<IRtlFieldTypeDeserializer> outdeserializer;  // Owns the resulting ITypeInfo structures, so needs to be kept around
+    Owned<IOutputMetaData> in;
+    Owned<IOutputMetaData> out;
+    IArrayOf<IStringSet> filters;
+    UnsignedArray filterOffsets;
+    const RtlRecord *inrec = nullptr;
+    Owned<const IDynamicTransform> translator;
+    unsigned __int64 chooseN = I64C(0x7fffffffffffffff); // constant(s) should be commoned up somewhere
+    unsigned __int64 skipN = 0;
+    unsigned __int64 rowLimit = (unsigned __int64) -1;
+};
+
+class ECLRTL_API CDynamicWorkUnitWriteArg : public CThorWorkUnitWriteArg
+{
+public:
+    CDynamicWorkUnitWriteArg(IOutputMetaData *_in) : in(_in)
+    {
+    }
+    virtual int getSequence() override final { return 0; }
+    virtual IOutputMetaData * queryOutputMeta() override final { return in; }
+private:
+    Owned<IOutputMetaData> in;
+};
+
+static IOutputMetaData *loadTypeInfo(IPropertyTree &xgmml, const char *key)
+{
+    StringBuffer xpath;
+    MemoryBuffer binInfo;
+    xgmml.getPropBin(xpath.setf("att[@name='%s_binary']/value", key), binInfo);
+    assertex(binInfo.length());
+    return new CDeserializedOutputMetaData(binInfo);
+}
+
+extern ECLRTL_API IHThorDiskReadArg *createDiskReadArg(IPropertyTree &xgmml)
+{
+    Owned <IOutputMetaData> in = loadTypeInfo(xgmml, "input");
+    Owned <IOutputMetaData> out = loadTypeInfo(xgmml, "output");
+    const char *fileName = xgmml.queryProp("att[@name=\"_fileName\"]/@value");
+    unsigned __int64 chooseN = xgmml.getPropInt64("att[@name=\"chooseN\"]/@value", -1);
+    unsigned __int64 skipN = xgmml.getPropInt64("att[@name=\"skipN\"]/@value", -1);
+    unsigned __int64 rowLimit = xgmml.getPropInt64("att[@name=\"rowLimit\"]/@value", -1);
+    Owned<CDynamicDiskReadArg> ret = new CDynamicDiskReadArg(fileName, in.getClear(), out.getClear(), chooseN, skipN, rowLimit);
+    Owned<IPropertyTreeIterator> filters = xgmml.getElements("att[@name=\"keyfilter\"]");
+    ForEach(*filters)
+        ret->addFilter(filters->query().queryProp("@value"));
+    return ret.getClear();
+}
+
+extern ECLRTL_API IHThorDiskReadArg *createDiskReadArg(const char *fileName, IOutputMetaData *in, IOutputMetaData *out, unsigned __int64 chooseN, unsigned __int64 skipN, unsigned __int64 rowLimit)
+{
+    return new CDynamicDiskReadArg(fileName, in, out, chooseN, skipN, rowLimit);
+}
+
+extern ECLRTL_API IHThorArg *createWorkunitWriteArg(IPropertyTree &xgmml)
+{
+    Owned <IOutputMetaData> in = loadTypeInfo(xgmml, "input");
+    return new CDynamicWorkUnitWriteArg(in.getClear());
+}
+
+struct ECLRTL_API DynamicEclProcess : public EclProcess {
+    virtual unsigned getActivityVersion() const override { return ACTIVITY_INTERFACE_VERSION; }
+    virtual int perform(IGlobalCodeContext * gctx, unsigned wfid) override {
+        ICodeContext * ctx;
+        ctx = gctx->queryCodeContext();
+        ctx->executeGraph("graph1",false,0,NULL);
+        return 1U;
+    }
+};
+
+extern ECLRTL_API IEclProcess* createDynamicEclProcess()
+{
+    return new DynamicEclProcess;
+}
+

+ 29 - 0
rtl/eclrtl/eclhelper_dyn.hpp

@@ -0,0 +1,29 @@
+/*##############################################################################
+#    HPCC SYSTEMS software Copyright (C) 2017 HPCC Systems®.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+##############################################################################
+ */
+
+#ifndef ECLHELPER_DYN_HPP
+#define ECLHELPER_DYN_HPP
+
+#include "jptree.hpp"
+#include "eclrtl.hpp"
+#include "eclhelper.hpp"
+
+extern ECLRTL_API IHThorDiskReadArg *createDiskReadArg(IPropertyTree &xgmml);
+extern ECLRTL_API IHThorArg *createWorkunitWriteArg(IPropertyTree &xgmml);
+extern ECLRTL_API IEclProcess* createDynamicEclProcess();
+
+#endif

+ 2 - 2
rtl/eclrtl/eclrtl_imp.hpp

@@ -212,8 +212,8 @@ private:
 
 
 #define RTLIMPLEMENT_IINTERFACE                                                 \
-    virtual void Link(void) const       { RtlCInterface::Link(); }              \
-    virtual bool Release(void) const    { return RtlCInterface::Release(); }
+    virtual void Link(void) const override     { RtlCInterface::Link(); }              \
+    virtual bool Release(void) const override  { return RtlCInterface::Release(); }
 
 
 //Inline definitions of the hash32 functions for small sizes - to optimize aggregate hash

+ 21 - 0
rtl/eclrtl/rtlds.cpp

@@ -1836,3 +1836,24 @@ bool RtlSimpleIterator::next()
     return (cur != NULL);
 }
 
+////
+
+byte * MemoryBufferBuilder::ensureCapacity(size32_t required, const char * fieldName)
+{
+    if (required > reserved)
+    {
+        void * next = buffer.reserve(required-reserved);
+        self = (byte *)next - reserved;
+        reserved = required;
+    }
+    return self;
+}
+
+void MemoryBufferBuilder::finishRow(size32_t length)
+{
+    assertex(length <= reserved);
+    size32_t newLength = (buffer.length() - reserved) + length;
+    buffer.setLength(newLength);
+    self = NULL;
+    reserved = 0;
+}

+ 37 - 1
rtl/eclrtl/rtlds_imp.hpp

@@ -48,7 +48,6 @@ class ECLRTL_API RtlDatasetBuilder : protected ARowBuilder, public RtlCInterface
 public:
     RtlDatasetBuilder();
     ~RtlDatasetBuilder();
-    RTLIMPLEMENT_IINTERFACE
 
     void getData(size32_t & len, void * & data);
     size32_t getSize();
@@ -664,4 +663,41 @@ protected:
     const byte * * cursor;
 };
 
+class MemoryBuffer;
+class ECLRTL_API MemoryBufferBuilder : public RtlRowBuilderBase
+{
+public:
+    MemoryBufferBuilder(MemoryBuffer & _buffer, unsigned _minSize)
+        : buffer(_buffer), minSize(_minSize)
+    {
+        reserved = 0;
+    }
+
+    virtual byte * ensureCapacity(size32_t required, const char * fieldName);
+
+    MemoryBufferBuilder &ensureRow()
+    {
+        ensureCapacity(minSize, nullptr);
+        return *this;
+    }
+
+    void finishRow(size32_t length);
+    virtual IEngineRowAllocator *queryAllocator() const
+    {
+        return NULL;
+    }
+
+protected:
+    virtual byte * createSelf()
+    {
+        return ensureCapacity(minSize, NULL);
+    }
+
+protected:
+    MemoryBuffer & buffer;
+    size32_t minSize;
+    size32_t reserved;
+};
+
+
 #endif

+ 56 - 24
rtl/eclrtl/rtldynfield.cpp

@@ -473,6 +473,25 @@ public:
      * <p>
      * Do not call more than once.
      *
+     * @param  _jsonTree JSON property tree to be deserialized, as created by CRtlFieldTypeSerializer
+     * @return Deserialized type object
+     */
+    virtual const RtlTypeInfo *deserialize(IPropertyTree &jsonTree) override
+    {
+        assertex(!base);
+        base = deserializeType(&jsonTree, &jsonTree);
+        return base;
+    }
+
+    /**
+     * Obtain the deserialized type information
+     * <p>
+     * Note that the RtlTypeInfo objects are not link-counted, so the lifetime of these objects
+     * is determined by the lifetime of the deserializer. They will be released once the deserializer
+     * that created them is deleted.
+     * <p>
+     * Do not call more than once.
+     *
      * @param  _json JSON text to be deserialized, as created by CRtlFieldTypeSerializer
      * @return Deserialized type object
      */
@@ -759,7 +778,7 @@ StringBuffer &describeFlags(StringBuffer &out, FieldMatchType flags)
 inline constexpr FieldMatchType operator|(FieldMatchType a, FieldMatchType b) { return (FieldMatchType)((int)a | (int)b); }
 inline FieldMatchType &operator|=(FieldMatchType &a, FieldMatchType b) { return (FieldMatchType &) ((int &)a |= (int)b); }
 
-class GeneralRecordTranslator
+class GeneralRecordTranslator : public CInterfaceOf<IDynamicTransform>
 {
 public:
     GeneralRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo)
@@ -772,7 +791,24 @@ public:
     {
         delete [] matchInfo;
     }
-    void describe(unsigned indent = 0) const
+    virtual void describe() const override
+    {
+        doDescribe(0);
+    }
+    virtual size32_t translate(ARowBuilder &builder, const byte *sourceRec) const override
+    {
+        return doTranslate(builder, 0, sourceRec);
+    }
+    virtual bool canTranslate() const override
+    {
+        return (matchFlags & match_fail) == 0;
+    }
+    virtual bool needsTranslate() const override
+    {
+        return (matchFlags & ~match_link) != 0;
+    }
+private:
+    void doDescribe(unsigned indent) const
     {
         for (unsigned idx = 0; idx <  destRecInfo.getNumFields(); idx++)
         {
@@ -785,7 +821,7 @@ public:
                 StringBuffer matchStr;
                 DBGLOG("%*sMatch (%s) to field %d for field %s", indent, "", describeFlags(matchStr, match.matchType).str(), match.matchIdx, source);
                 if (match.subTrans)
-                    match.subTrans->describe(indent+2);
+                    match.subTrans->doDescribe(indent+2);
             }
         }
         if (!canTranslate())
@@ -798,7 +834,7 @@ public:
         else
             DBGLOG("%*sTranslation is not necessary", indent, "");
     }
-    size32_t translate(ARowBuilder &builder, size32_t offset, const byte *sourceRec) const
+    size32_t doTranslate(ARowBuilder &builder, size32_t offset, const byte *sourceRec) const
     {
         unsigned numOffsets = sourceRecInfo.getNumVarFields() + 1;
         size_t * variableOffsets = (size_t *)alloca(numOffsets * sizeof(size_t));
@@ -881,7 +917,7 @@ public:
                 }
                 case match_recurse:
                     if (type->getType()==type_record)
-                        offset = match.subTrans->translate(builder, offset, source);
+                        offset = match.subTrans->doTranslate(builder, offset, source);
                     else if (type->isLinkCounted())
                     {
                         // a 32-bit record count, and a pointer to an array of record pointers
@@ -901,7 +937,7 @@ public:
                             for (size32_t childRow = 0; childRow < childCount; childRow++)
                             {
                                 RtlDynamicRowBuilder childBuilder(*childAllocator);
-                                size32_t childLen = match.subTrans->translate(childBuilder, 0, sourceRows[childRow]);
+                                size32_t childLen = match.subTrans->doTranslate(childBuilder, 0, sourceRows[childRow]);
                                 childRows = childAllocator->appendRowOwn(childRows, ++numRows, (void *) childBuilder.finalizeRowClear(childLen));
                             }
                         }
@@ -914,7 +950,7 @@ public:
                             while ((size_t)(source - initialSource) < childSize)
                             {
                                 RtlDynamicRowBuilder childBuilder(*childAllocator);
-                                size32_t childLen = match.subTrans->translate(childBuilder, 0, source);
+                                size32_t childLen = match.subTrans->doTranslate(childBuilder, 0, source);
                                 childRows = childAllocator->appendRowOwn(childRows, ++numRows, (void *) childBuilder.finalizeRowClear(childLen));
                                 source += sourceType->queryChildType()->size(source, nullptr); // MORE - shame to repeat a calculation that the translate above almost certainly just did
                             }
@@ -939,7 +975,7 @@ public:
                             const byte ** sourceRows = *(const byte***) source;
                             for (size32_t childRow = 0; childRow < childCount; childRow++)
                             {
-                                offset = match.subTrans->translate(builder, offset, sourceRows[childRow]);
+                                offset = match.subTrans->doTranslate(builder, offset, sourceRows[childRow]);
                             }
                         }
                         else
@@ -950,7 +986,7 @@ public:
                             const byte *initialSource = source;
                             while ((size_t)(source - initialSource) < childSize)
                             {
-                                offset = match.subTrans->translate(builder, offset, source);
+                                offset = match.subTrans->doTranslate(builder, offset, source);
                                 source += sourceType->queryChildType()->size(source, nullptr); // MORE - shame to repeat a calculation that the translate above almost certainly just did
                             }
                         }
@@ -976,15 +1012,6 @@ public:
     {
         return matchFlags;
     }
-    bool canTranslate() const
-    {
-        return (matchFlags & match_fail) == 0;
-    }
-    bool needsTranslate() const
-    {
-        return (matchFlags & ~match_link) != 0;
-    }
-private:
     const RtlRecord &destRecInfo;
     const RtlRecord &sourceRecInfo;
     unsigned fixedDelta = 0;  // total size of all fixed-size source fields that are not matched
@@ -1256,13 +1283,18 @@ private:
     }
 };
 
+extern ECLRTL_API const IDynamicTransform *createRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo)
+{
+    return new GeneralRecordTranslator(_destRecInfo, _srcRecInfo);
+}
+
 class TranslatedRowStream : public CInterfaceOf<IRowStream>
 {
 public:
     TranslatedRowStream(IRowStream *_inputStream, IEngineRowAllocator *_resultAllocator, const RtlRecord &outputRecord, const RtlRecord &inputRecord)
-    : inputStream(_inputStream), resultAllocator(_resultAllocator), translator(outputRecord, inputRecord)
+    : inputStream(_inputStream), resultAllocator(_resultAllocator), translator(new GeneralRecordTranslator(outputRecord, inputRecord))
     {
-        translator.describe();
+        translator->describe();
     }
     virtual const void *nextRow()
     {
@@ -1280,7 +1312,7 @@ public:
         else
             eogSeen = false;
         RtlDynamicRowBuilder rowBuilder(resultAllocator);
-        size32_t len = translator.translate(rowBuilder, 0, (const byte *) inRow);
+        size32_t len = translator->translate(rowBuilder, (const byte *) inRow);
         rtlReleaseRow(inRow);
         return rowBuilder.finalizeRowClear(len);
     }
@@ -1290,16 +1322,16 @@ public:
     }
     bool canTranslate() const
     {
-        return translator.canTranslate();
+        return translator->canTranslate();
     }
     bool needsTranslate() const
     {
-        return translator.needsTranslate();
+        return translator->needsTranslate();
     }
 protected:
     Linked<IRowStream> inputStream;
     Linked<IEngineRowAllocator> resultAllocator;
-    const GeneralRecordTranslator translator;
+    Owned<const IDynamicTransform> translator;
     unsigned numOffsets = 0;
     size_t * variableOffsets = nullptr;
     bool eof = false;

+ 19 - 0
rtl/eclrtl/rtldynfield.hpp

@@ -57,6 +57,14 @@ interface IRtlFieldTypeDeserializer : public IInterface
     virtual const RtlTypeInfo *deserialize(const char *json) = 0;
 
     /*
+     * Create RtlTypeInfo structures from a serialized json representation
+     *
+     * @param jsonTree The json representation
+     * @return         Deserialized RtlTypeInfo structure
+     */
+     virtual const RtlTypeInfo *deserialize(IPropertyTree &jsonTree) = 0;
+
+    /*
      * Create RtlTypeInfo structures from a serialized binary representation
      *
      * @param buf  The binary representation
@@ -93,9 +101,20 @@ interface IRtlFieldTypeDeserializer : public IInterface
 
 };
 
+interface IDynamicTransform : public IInterface
+{
+    virtual void describe() const = 0;
+    virtual size32_t translate(ARowBuilder &builder, const byte *sourceRec) const = 0;
+    virtual bool canTranslate() const = 0;
+    virtual bool needsTranslate() const = 0;
+};
+
+extern ECLRTL_API const IDynamicTransform *createRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo);
+
 extern ECLRTL_API IRtlFieldTypeDeserializer *createRtlFieldTypeDeserializer();
 
 extern ECLRTL_API StringBuffer &dumpTypeInfo(StringBuffer &ret, const RtlTypeInfo *t);
+
 extern ECLRTL_API MemoryBuffer &dumpTypeInfo(MemoryBuffer &ret, const RtlTypeInfo *t);
 
 /**

+ 27 - 2
rtl/eclrtl/rtlfield.cpp

@@ -159,8 +159,7 @@ size32_t RtlTypeInfoBase::buildString(ARowBuilder &builder, size32_t offset, con
 
 size32_t RtlTypeInfoBase::buildUtf8(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, size32_t len, const char *val) const
 {
-    rtlFailUnexpected();
-    return 0;
+    return buildString(builder, offset, field, len, val);
 }
 
 size32_t RtlTypeInfoBase::buildInt(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, __int64 val) const
@@ -643,6 +642,32 @@ size32_t RtlStringTypeInfo::buildString(ARowBuilder &builder, size32_t offset, c
     return offset;
 }
 
+size32_t RtlStringTypeInfo::buildUtf8(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, size32_t codepoints, const char *value) const
+{
+    if (!isFixedSize())
+    {
+        builder.ensureCapacity(offset+codepoints+sizeof(size32_t), field->name);
+        char *dest = (char *) builder.getSelf()+offset;
+        rtlWriteInt4(dest, codepoints);
+        if (isEbcdic())
+            UNIMPLEMENTED;
+        else
+            rtlUtf8ToStr(codepoints, dest+sizeof(size32_t), codepoints, value);
+        offset += codepoints+sizeof(size32_t);
+    }
+    else
+    {
+        builder.ensureCapacity(offset+length, field->name);
+        char *dest = (char *) builder.getSelf()+offset;
+        if (isEbcdic())
+            UNIMPLEMENTED;
+        else
+            rtlUtf8ToStr(length, dest, codepoints, value);
+        offset += length;
+    }
+    return offset;
+}
+
 size32_t RtlStringTypeInfo::buildNull(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field) const
 {
     if (field->initializer || !isFixedSize())

+ 2 - 1
rtl/eclrtl/rtlfield.hpp

@@ -164,8 +164,9 @@ struct ECLRTL_API RtlStringTypeInfo : public RtlTypeInfoBase
     virtual size32_t getMinSize() const override;
     virtual size32_t size(const byte * self, const byte * selfrow) const override;
     virtual size32_t build(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, IFieldSource &source) const override;
-    size32_t buildNull(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field) const override;
+    virtual size32_t buildNull(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field) const override;
     virtual size32_t buildString(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, size32_t len, const char *value) const override;
+    virtual size32_t buildUtf8(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, size32_t codepoints, const char *value) const override;
     virtual size32_t process(const byte * self, const byte * selfrow, const RtlFieldInfo * field, IFieldProcessor & target) const override;
     virtual size32_t toXML(const byte * self, const byte * selfrow, const RtlFieldInfo * field, IXmlWriter & target) const override;
     virtual size32_t deserialize(ARowBuilder & rowBuilder, IRowDeserializerSource & in, size32_t offset) const override;

+ 2 - 3
rtl/eclrtl/rtlkey.cpp

@@ -203,7 +203,6 @@ public:
     virtual IKeySegmentMonitor *clone() const;
     virtual IKeySegmentMonitor *combine(const IKeySegmentMonitor *with) const { throwUnexpected(); }
     virtual KeySegmentMonitorSerializeType serializeType() const { return KSMST_WILDKEYSEGMENTMONITOR; }
-
 };
 
 class CSetKeySegmentMonitor : public CKeySegmentMonitor
@@ -827,14 +826,14 @@ protected:
 };
 
 // The base monitor provided to this segment monitor is constructed with offsets of 0
-// offset refers to where the field would be in an "expanded" version of the record (all variable size - and thus unkeyed - fields being assumed null)
 
 class CNewVarOffsetKeySegmentMonitor : public CIndirectKeySegmentMonitor
 {
 public:
     CNewVarOffsetKeySegmentMonitor(IKeySegmentMonitor * _base, unsigned _offset, unsigned _fieldIdx)
-    : CIndirectKeySegmentMonitor(_base, _offset), fieldIdx(_fieldIdx)
+    : CIndirectKeySegmentMonitor(_base, 0), fieldIdx(_fieldIdx)
     {
+        assert(_offset = 0);   // We no longer use partial size for offset
     }
 
     CNewVarOffsetKeySegmentMonitor(MemoryBuffer &mb)

+ 35 - 1
rtl/eclrtl/rtlrecord.cpp

@@ -495,7 +495,7 @@ COutputMetaData::~COutputMetaData()
 const RtlRecord &COutputMetaData::queryRecordAccessor(bool expand) const
 {
     // NOTE: the recordAccessor field cannot be declared as atomic, since the class definition is included in generated
-    // code which is not (yet) compiled using C++11. If that changes then the reinterpret_cast can be removed.
+    // code which does not include <atomic>. If that changes then the reinterpret_cast can be removed.
     std::atomic<const RtlRecord *> &aRecordAccessor = reinterpret_cast<std::atomic<const RtlRecord *> &>(recordAccessor[expand]);
     const RtlRecord *useAccessor = aRecordAccessor.load(std::memory_order_relaxed);
     if (!useAccessor)
@@ -503,6 +503,40 @@ const RtlRecord &COutputMetaData::queryRecordAccessor(bool expand) const
     return *useAccessor;
 }
 
+size32_t COutputMetaData::getRecordSize(const void * data)
+{
+    //Allocate a temporary offset array on the stack to avoid runtime overhead.
+    const RtlRecord &r = queryRecordAccessor(true);
+    size32_t size = r.getFixedSize();
+    if (!size)
+    {
+        unsigned numOffsets = r.getNumVarFields() + 1;
+        size_t * variableOffsets = (size_t *)alloca(numOffsets * sizeof(size_t));
+        RtlRow offsetCalculator(r, data, numOffsets, variableOffsets);
+        size = offsetCalculator.getRecordSize();
+    }
+    return size;
+}
+
+CDeserializedOutputMetaData::CDeserializedOutputMetaData(MemoryBuffer &binInfo)
+{
+    deserializer.setown(createRtlFieldTypeDeserializer());
+    typeInfo = deserializer->deserialize(binInfo);
+}
+
+CDeserializedOutputMetaData::CDeserializedOutputMetaData(IPropertyTree &jsonInfo)
+{
+    deserializer.setown(createRtlFieldTypeDeserializer());
+    typeInfo = deserializer->deserialize(jsonInfo);
+}
+
+CDeserializedOutputMetaData::CDeserializedOutputMetaData(const char *json)
+{
+    deserializer.setown(createRtlFieldTypeDeserializer());
+    typeInfo = deserializer->deserialize(json);
+}
+
+
 class CVariableOutputRowSerializer : public COutputRowSerializer
 {
 public:

+ 86 - 33
rtl/eclrtl/rtlrecord.hpp

@@ -50,7 +50,7 @@ public:
     inline COutputRowSerializer(unsigned _activityId) : CGlobalHelperClass(_activityId) { }
     RTLIMPLEMENT_IINTERFACE
 
-    virtual void serialize(IRowSerializerTarget & out, const byte * self) = 0;
+    virtual void serialize(IRowSerializerTarget & out, const byte * self) override = 0;
 };
 
 
@@ -62,7 +62,7 @@ public:
 
     inline void onCreate(ICodeContext * _ctx) { ctx = _ctx; }
 
-    virtual size32_t deserialize(ARowBuilder & rowBuilder, IRowDeserializerSource & in) = 0;
+    virtual size32_t deserialize(ARowBuilder & rowBuilder, IRowDeserializerSource & in) override = 0;
 
 protected:
     ICodeContext * ctx;
@@ -78,7 +78,7 @@ public:
 
     inline void onCreate(ICodeContext * _ctx) { ctx = _ctx; }
 
-    virtual void readAhead(IRowDeserializerSource & in) = 0;
+    virtual void readAhead(IRowDeserializerSource & in) override = 0;
 
 protected:
     ICodeContext * ctx;
@@ -138,8 +138,8 @@ public:
     CNormalizeChildIterator(IOutputMetaData & _recordSize) : iter(0, NULL, _recordSize) {}
     RTLIMPLEMENT_IINTERFACE
 
-    virtual byte * first(const void * parentRecord)         { init(parentRecord); return (byte *)iter.first(); }
-    virtual byte * next()                                   { return (byte *)iter.next(); }
+    virtual byte * first(const void * parentRecord) override         { init(parentRecord); return (byte *)iter.first(); }
+    virtual byte * next() override                                   { return (byte *)iter.next(); }
     virtual void init(const void * parentRecord) = 0;
 
     inline void setDataset(size32_t len, const void * data) { iter.setDataset(len, data); }
@@ -154,8 +154,8 @@ public:
     CNormalizeLinkedChildIterator() : iter(0, NULL) {}
     RTLIMPLEMENT_IINTERFACE
 
-    virtual byte * first(const void * parentRecord)         { init(parentRecord); return (byte *)iter.first(); }
-    virtual byte * next()                                   { return (byte *)iter.next(); }
+    virtual byte * first(const void * parentRecord) override         { init(parentRecord); return (byte *)iter.first(); }
+    virtual byte * next() override                                   { return (byte *)iter.next(); }
     virtual void init(const void * parentRecord) = 0;
 
     inline void setDataset(unsigned _numRows, const byte * * _rows) { iter.setDataset(_numRows, _rows); }
@@ -170,8 +170,8 @@ public:
     CNormalizeStreamedChildIterator() {}
     RTLIMPLEMENT_IINTERFACE
 
-    virtual byte * first(const void * parentRecord)         { init(parentRecord); return (byte *)iter.first(); }
-    virtual byte * next()                                   { return (byte *)iter.next(); }
+    virtual byte * first(const void * parentRecord) override         { init(parentRecord); return (byte *)iter.first(); }
+    virtual byte * next() override                                   { return (byte *)iter.next(); }
     virtual void init(const void * parentRecord) = 0;
 
     inline void setDataset(IRowStream * _streamed) { iter.init(_streamed); }
@@ -305,7 +305,7 @@ public:
     RtlRecordSize(const RtlRecordTypeInfo & fields) : offsetInformation(fields, true) {}
     RTLIMPLEMENT_IINTERFACE
 
-    virtual size32_t getRecordSize(const void * row)
+    virtual size32_t getRecordSize(const void * row) override
     {
         //Allocate a temporary offset array on the stack to avoid runtime overhead.
         unsigned numOffsets = offsetInformation.getNumVarFields() + 1;
@@ -314,12 +314,12 @@ public:
         return offsetCalculator.getRecordSize();
     }
 
-    virtual size32_t getFixedSize() const
+    virtual size32_t getFixedSize() const override
     {
         return offsetInformation.getFixedSize();
     }
     // returns 0 for variable row size
-    virtual size32_t getMinRecordSize() const
+    virtual size32_t getMinRecordSize() const override
     {
         return offsetInformation.getMinRecordSize();
     }
@@ -337,35 +337,48 @@ public:
     COutputMetaData();
     ~COutputMetaData();
 
-    virtual void toXML(const byte * self, IXmlWriter & out) {
-                                                                const RtlTypeInfo * type = queryTypeInfo();
-                                                                if (type)
-                                                                {
-                                                                    RtlFieldStrInfo dummyField("",NULL,type);
-                                                                    type->toXML(self, self, &dummyField, out);
-                                                                }
-                                                            }
-    virtual unsigned getVersion() const                     { return OUTPUTMETADATA_VERSION; }
-    virtual unsigned getMetaFlags()                         { return MDFhasserialize|MDFhasxml; }
-
-    virtual void destruct(byte * self)                      {}
-    virtual IOutputMetaData * querySerializedDiskMeta()    { return this; }
-    virtual IOutputRowSerializer * createDiskSerializer(ICodeContext * ctx, unsigned activityId);
-    virtual ISourceRowPrefetcher * createDiskPrefetcher(ICodeContext * ctx, unsigned activityId);
-    virtual IOutputRowDeserializer * createDiskDeserializer(ICodeContext * ctx, unsigned activityId);
+    virtual void toXML(const byte * self, IXmlWriter & out) override
+    {
+        const RtlTypeInfo * type = queryTypeInfo();
+        if (type)
+        {
+            RtlFieldStrInfo dummyField("",NULL,type);
+            type->toXML(self, self, &dummyField, out);
+        }
+    }
+    virtual unsigned getVersion() const override                     { return OUTPUTMETADATA_VERSION; }
+    virtual unsigned getMetaFlags() override                         { return MDFhasserialize|MDFhasxml; }
+
+    virtual void destruct(byte * self) override                      {}
+    virtual IOutputMetaData * querySerializedDiskMeta() override    { return this; }
+    virtual IOutputRowSerializer * createDiskSerializer(ICodeContext * ctx, unsigned activityId) override;
+    virtual ISourceRowPrefetcher * createDiskPrefetcher(ICodeContext * ctx, unsigned activityId) override;
+    virtual IOutputRowDeserializer * createDiskDeserializer(ICodeContext * ctx, unsigned activityId) override;
     //Default internal serializers are the same as the disk versions
-    virtual IOutputRowSerializer * createInternalSerializer(ICodeContext * ctx, unsigned activityId)
+    virtual IOutputRowSerializer * createInternalSerializer(ICodeContext * ctx, unsigned activityId) override
     {
         return createDiskSerializer(ctx, activityId);
     }
-    virtual IOutputRowDeserializer * createInternalDeserializer(ICodeContext * ctx, unsigned activityId)
+    virtual IOutputRowDeserializer * createInternalDeserializer(ICodeContext * ctx, unsigned activityId) override
     {
         return createDiskDeserializer(ctx, activityId);
     }
-    virtual void walkIndirectMembers(const byte * self, IIndirectMemberVisitor & visitor) { }
-    virtual IOutputMetaData * queryChildMeta(unsigned i) { return NULL; }
+    virtual void walkIndirectMembers(const byte * self, IIndirectMemberVisitor & visitor) override{ }
+    virtual IOutputMetaData * queryChildMeta(unsigned i) override { return NULL; }
+
+    virtual const RtlRecord &queryRecordAccessor(bool expand) const override final;
+    virtual size32_t getRecordSize(const void * data) override;
+
+    virtual size32_t getFixedSize() const override
+    {
+        return queryRecordAccessor(true).getFixedSize();
+    }
+    // returns 0 for variable row size
+    virtual size32_t getMinRecordSize() const override
+    {
+        return queryRecordAccessor(true).getMinRecordSize();
+    }
 
-    virtual const RtlRecord &queryRecordAccessor(bool expand) const;
 
 protected:
     //This is the prefetch function that is actually generated by the code generator
@@ -375,6 +388,46 @@ protected:
     mutable RtlRecord *recordAccessor[2];
 };
 
+/**
+ * class CDynamicOutputMetaData
+ *
+ * An implementation of IOutputMetaData for use with a dynamically-created record type info structure
+ *
+ */
+
+class ECLRTL_API CDynamicOutputMetaData : public COutputMetaData
+{
+public:
+    CDynamicOutputMetaData(const RtlRecordTypeInfo & fields) : typeInfo(fields)
+    {
+    }
+
+    virtual const RtlTypeInfo * queryTypeInfo() const override { return &typeInfo; }
+protected:
+    const RtlTypeInfo &typeInfo;
+};
+
+/**
+ * class CDeserializedOutputMetaData
+ *
+ * An implementation of IOutputMetaData for use with serialized rtlTypeInfo information
+ *
+ */
+
+class ECLRTL_API CDeserializedOutputMetaData : public COutputMetaData
+{
+public:
+    CDeserializedOutputMetaData(MemoryBuffer &binInfo);
+    CDeserializedOutputMetaData(IPropertyTree &jsonInfo);
+    CDeserializedOutputMetaData(const char *json);
+
+    virtual const RtlTypeInfo * queryTypeInfo() const override { return typeInfo; }
+protected:
+    Owned<IRtlFieldTypeDeserializer> deserializer;
+    const RtlTypeInfo *typeInfo = nullptr;
+};
+
+
 class ECLRTL_API CFixedOutputMetaData : public COutputMetaData
 {
 public: