Преглед на файлове

Merge pull request #12617 from jakesmith/hpcc-18484

HPCC-18484 dafilesrv csv, xml, json support

Reviewed-by: Gavin Halliday <ghalliday@hpccsystems.com>
Gavin Halliday преди 6 години
родител
ревизия
91c701744a

+ 26 - 0
common/thorhelper/csvsplitter.cpp

@@ -162,6 +162,32 @@ void CSVSplitter::init(unsigned _maxColumns, ICsvParameters * csvInfo, const cha
     }
 }
 
+void CSVSplitter::init(unsigned _maxColumns, size32_t _maxCsvSize, const char *quotes, const char *separators, const char *terminators, const char *escapes, bool preserveWhitespace)
+{
+    reset();
+
+    maxCsvSize = _maxCsvSize;
+    maxColumns = _maxColumns;
+    lengths = new unsigned [maxColumns+1];      // NB: One larger to remove some tests in main loop...
+    data = new const byte * [maxColumns+1];
+
+    unsigned idx;
+    if (quotes)
+        addActionList(matcher, quotes, QUOTE);
+    if (separators)
+        addActionList(matcher, separators, SEPARATOR);
+    if (terminators)
+        addActionList(matcher, terminators, TERMINATOR);
+    if (escapes)
+        addActionList(matcher, escapes, ESCAPE);
+
+    if (preserveWhitespace)
+    {
+        matcher.queryAddEntry(1, " ", WHITESPACE);
+        matcher.queryAddEntry(1, "\t", WHITESPACE);
+    }
+}
+
 void CSVSplitter::setFieldRange(const byte * start, const byte * end, unsigned curColumn, unsigned quoteToStrip, bool unescape)
 {
     size32_t sizeOriginal = (size32_t)(end - start);

+ 3 - 2
common/thorhelper/csvsplitter.hpp

@@ -70,12 +70,13 @@ public:
     void addEscape(const char * text);
 
     void init(unsigned maxColumns, ICsvParameters * csvInfo, const char * dfsQuotes, const char * dfsSeparators, const char * dfsTerminators, const char * dfsEscapes);
+    void init(unsigned maxColumns, size32_t maxCsvSize, const char *quotes, const char *separators, const char *terminators, const char *escapes, bool preserveWhitespace);
     void reset();
     size32_t splitLine(size32_t maxLen, const byte * start);
     size32_t splitLine(ISerialStream *stream, size32_t maxRowSize);
 
-    inline unsigned * queryLengths() { return lengths; }
-    inline const byte * * queryData() { return data; }
+    inline unsigned * queryLengths() const { return lengths; }
+    inline const byte * * queryData() const { return data; }
 
 protected:
     void setFieldRange(const byte * start, const byte * end, unsigned curColumn, unsigned quoteToStrip, bool unescape);

+ 13 - 1
common/thorhelper/thorxmlread.cpp

@@ -329,6 +329,13 @@ void XmlDatasetColumnProvider::readUtf8X(size32_t & len, char * & target, const
         rtlUtf8ToUtf8X(len, target, _lenDefault, _default);
 }
 
+const char *XmlDatasetColumnProvider::readRaw(const char * path, size32_t &sz) const
+{
+    const char *value = row->queryProp(path);
+    sz = value ? strlen(value) : 0;
+    return value;
+}
+
 //=====================================================================================================
 
 bool XmlSetColumnProvider::getBool(const char * name)
@@ -1632,7 +1639,12 @@ public:
         }
         return _default;
     }
-
+    virtual const char *readRaw(const char * path, size32_t &sz) const override
+    {
+        const char *value = node->queryProp(path);
+        sz = value ? strlen(value) : 0;
+        return value;
+    }
 };
 
 void CColumnIterator::setCurrent()

+ 1 - 0
common/thorhelper/thorxmlread.hpp

@@ -74,6 +74,7 @@ public:
     virtual void        readDataRaw(size32_t len, void * text, const char * path, size32_t _lenDefault, const void * _default);
     virtual void        readDataRawX(size32_t & len, void * & text, const char * path, size32_t _lenDefault, const void * _default);
 
+    virtual const char *readRaw(const char * path, size32_t &sz) const override;
 };
 
 class thorhelper_decl XmlColumnIterator : implements IColumnProviderIterator, public CInterface

+ 27 - 3
dali/datest/datest.cpp

@@ -2238,11 +2238,26 @@ void TestSDS1()
 #endif
 }
 
-void testDfuStreamRead(const char *fname)
+void testDfuStreamRead(StringArray &params)
 {
     // reads a DFS file
     try
     {
+        const char *fname = params.item(0);
+        const char *filter = nullptr;
+        const char *outputECLFormat = nullptr;
+        if (params.ordinality()>1)
+        {
+            filter = params.item(1);
+            if (isEmptyString(filter))
+                filter = nullptr;
+            if (params.ordinality()>2)
+            {
+                outputECLFormat = params.item(2);
+                if (isEmptyString(outputECLFormat))
+                    outputECLFormat = nullptr;
+            }
+        }
         Owned<IUserDescriptor> userDesc = createUserDescriptor();
         userDesc->set("jsmith","password");
 
@@ -2254,13 +2269,22 @@ void testDfuStreamRead(const char *fname)
         }
 
         IOutputMetaData *meta = srcFile->queryEngineInterface()->queryMeta();
-        CommonXmlWriter xmlWriter(0);
+        CommonXmlWriter xmlWriter(XWFnoindent);
 
         unsigned sourceN = srcFile->queryNumParts();
         for (unsigned p=0; p<sourceN; p++)
         {
             Owned<IDFUFilePartReader> reader = srcFile->createFilePartReader(p, 0, nullptr, true);
 
+            if (outputECLFormat)
+            {
+                reader->setOutputRecordFormat(outputECLFormat);
+                meta = reader->queryMeta();
+            }
+
+            if (filter)
+                reader->addFieldFilter(filter);
+
             reader->start();
 
             while (true)
@@ -3524,7 +3548,7 @@ int main(int argc, char* argv[])
             else if (TEST("MULTICONNECT"))
                 testMultiConnect();
             else if (TEST("DFUSTREAMREAD"))
-                testDfuStreamRead(testParams.item(0));
+                testDfuStreamRead(testParams);
             else if (TEST("DFUSTREAMWRITE"))
                 testDfuStreamWrite(testParams.ordinality() ? testParams.item(0) : nullptr);
             else if (TEST("DFUSTREAMCOPY"))

+ 1 - 0
esp/scm/ws_dfu.ecm

@@ -906,6 +906,7 @@ ESPenum DFUFileType : string
     Xml("Xml"),
     Csv("Csv"),
     Json("Json"),
+    Unset("Unset"), // placed here to avoid enum values shifting (if at start) and causing backward compatibility issues.
 };
 
 ESPresponse [exceptions_inline] DFUFileAccessResponse

+ 2 - 2
esp/services/ws_dfu/ws_dfuService.cpp

@@ -6069,6 +6069,7 @@ void CWsDfuEx::dFUFileAccessCommon(IEspContext &context, const CDfsLogicalFileNa
     encodeDFUFileMeta(metaInfoBlob, metaInfo, env);
     accessInfo.setMetaInfoBlob(metaInfoBlob);
 
+    CDFUFileType kind = CDFUFileType_Unset;
     if (returnTextResponse)
     {
         getFilePartsInfo(context, *fileDesc, false, accessInfo);
@@ -6078,7 +6079,6 @@ void CWsDfuEx::dFUFileAccessCommon(IEspContext &context, const CDfsLogicalFileNa
         accessInfo.setFileAccessPort(metaInfo->getPropInt("port"));
         accessInfo.setFileAccessSSL(metaInfo->getPropBool("secure"));
 
-        CDFUFileType kind = DFUFileType_Undefined;
         if (isFileKey(fileDesc))
             kind = CDFUFileType_Index;
         else
@@ -6093,8 +6093,8 @@ void CWsDfuEx::dFUFileAccessCommon(IEspContext &context, const CDfsLogicalFileNa
             else if (streq("json", kindStr))
                 kind = CDFUFileType_Json;
         }
-        resp.setType(kind);
     }
+    resp.setType(kind);
 
     LOG(daliAuditLogCat,",FileAccess,EspProcess,READ,%s,%s,%s,jobid=%s,expirySecs=%d", cluster.str(), userID.str(), fileName.str(), requestId, expirySecs);
 }

+ 2 - 0
fs/dafsserver/CMakeLists.txt

@@ -41,6 +41,7 @@ include_directories (
          ${HPCC_SOURCE_DIR}/rtl/eclrtl
          ${HPCC_SOURCE_DIR}/ecl/hql
          ${HPCC_SOURCE_DIR}/common/deftype
+         ${HPCC_SOURCE_DIR}/common/thorhelper
          ${HPCC_SOURCE_DIR}/fs/dafsclient
          ${HPCC_SOURCE_DIR}/dali/base
          ${HPCC_SOURCE_DIR}/testing/unittests
@@ -61,6 +62,7 @@ target_link_libraries ( dafsserver
     hql   
     dafsclient
     dalibase
+    thorhelper
     ${CPPUNIT_LIBRARIES}
     )
 

Файловите разлики са ограничени, защото са твърде много
+ 705 - 108
fs/dafsserver/dafsserver.cpp


+ 53 - 7
fs/dafsstream/dafsstream.cpp

@@ -50,6 +50,24 @@ static StringAttr defaultCompCompression = "LZ4";
 static const char *DFUFileIdSeparator = "|";
 static const unsigned defaultExpirySecs = 300;
 
+static const char *getReadActivityString(DFUFileType fileType)
+{
+    switch (fileType)
+    {
+        case dft_flat:
+            return "diskread";
+        case dft_index:
+            return "indexread";
+        case dft_csv:
+            return "csvread";
+        case dft_xml:
+            return "xmlread";
+        case dft_json:
+            return "jsonread";
+    }
+    return "unknown";
+}
+
 class CDaFsException : public CSimpleInterfaceOf<IDaFsException>
 {
     DaFsExceptionCode code;
@@ -206,7 +224,22 @@ public:
         if (isFileKey(fileDesc))
             fileType = dft_index;
         else
-            fileType = dft_flat;
+        {
+            const char *kind = fileDesc->queryKind();
+            if (kind)
+            {
+                if (streq("csv", kind))
+                    fileType = dft_csv;
+                else if (streq("xml", kind))
+                    fileType = dft_xml;
+                else if (streq("json", kind))
+                    fileType = dft_json;
+                else
+                    fileType = dft_flat;
+            }
+            else
+                fileType = dft_flat;
+        }
 
         fileDesc->getClusterGroupName(0, groupName);
         grouped = fileDesc->isGrouped();
@@ -855,14 +888,25 @@ public:
             requestNode->setPropBool("outputGrouped", preserveGrouping);
         }
 
+        // JCSMORE these are defaults, but should be picked up from file->queryFileDescriptor()
         switch (file->queryType())
         {
-            case dft_flat:
-                requestNode->setProp("kind", "diskread");
+            case dft_xml:
+                requestNode->setProp("xpath", "/Dataset/Row");
+                break;
+            case dft_json:
+                requestNode->setProp("xpath", "/Row");
                 break;
+        }
+        switch (file->queryType())
+        {
+            case dft_xml:
+            case dft_json:
+            case dft_flat:
+            case dft_csv:
             case dft_index:
             {
-                requestNode->setProp("kind", "indexread");
+                requestNode->setProp("kind", getReadActivityString(file->queryType()));
                 break;
             }
             default:
@@ -970,9 +1014,11 @@ public:
         // this is purely to validate the textFilter
         const RtlRecord *record = &file->queryMeta()->queryRecordAccessor(true);
         Owned<IFieldFilter> rtlFilter = deserializeFieldFilter(*record, textFilter);
-
-        fieldFilters.push_back(textFilter);
-        variableContentDirty = true;
+        if (rtlFilter)
+        {
+            fieldFilters.push_back(textFilter);
+            variableContentDirty = true;
+        }
     }
     virtual void clearFieldFilters() override
     {

+ 1 - 1
fs/dafsstream/dafsstream.hpp

@@ -31,7 +31,7 @@ interface IOutputMetaData;
 namespace dafsstream
 {
 
-enum DFUFileType { dft_none, dft_flat, dft_index };
+enum DFUFileType { dft_none, dft_flat, dft_index, dft_csv, dft_xml, dft_json };
 
 enum DaFsExceptionCode
 {

+ 10 - 0
rtl/eclrtl/rtlds.cpp

@@ -1892,3 +1892,13 @@ void MemoryBufferBuilder::finishRow(size32_t length)
     self = NULL;
     reserved = 0;
 }
+
+void MemoryBufferBuilder::removeBytes(size32_t len)
+{
+    dbgassertex(buffer);
+    assertex(len>=reserved);
+    size32_t sz = buffer->length();
+    dbgassertex(sz>=len);
+    buffer->setLength(sz-len);
+    reserved -= len;
+}

+ 2 - 0
rtl/eclrtl/rtlds_imp.hpp

@@ -691,6 +691,8 @@ public:
         buffer = &_buffer;
     }
 
+    void removeBytes(size32_t len);
+
     virtual byte * ensureCapacity(size32_t required, const char * fieldName);
 
     MemoryBufferBuilder &ensureRow()

+ 227 - 75
rtl/eclrtl/rtldynfield.cpp

@@ -993,6 +993,7 @@ enum FieldMatchType {
     // This flag may be set in conjunction with the others
     match_inifblock   = 0x400,   // matching to a field in an ifblock - may not be present
     match_deblob      = 0x1000,  // source needs fetching from a blob prior to translation
+    match_dynamic     = 0x2000,  // source needs fetching from dynamic source (callback)
 };
 
 StringBuffer &describeFlags(StringBuffer &out, FieldMatchType flags)
@@ -1013,6 +1014,7 @@ StringBuffer &describeFlags(StringBuffer &out, FieldMatchType flags)
     if (flags & match_fail) out.append("|fail");
     if (flags & match_virtual) out.append("|virtual");
     if (flags & match_deblob) out.append("|blob");
+    if (flags & match_dynamic) out.append("|dynamic");
     assertex(out.length() > origlen);
     return out.remove(origlen, 1);
 }
@@ -1023,8 +1025,8 @@ inline FieldMatchType &operator|=(FieldMatchType &a, FieldMatchType b) { return
 class GeneralRecordTranslator : public CInterfaceOf<IDynamicTransform>
 {
 public:
-    GeneralRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo)
-    : destRecInfo(_destRecInfo), sourceRecInfo(_srcRecInfo)
+    GeneralRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo, bool _binarySource)
+        : destRecInfo(_destRecInfo), sourceRecInfo(_srcRecInfo), binarySource(_binarySource)
     {
         matchInfo = new MatchInfo[destRecInfo.getNumFields()];
         createMatchInfo();
@@ -1036,18 +1038,26 @@ public:
     {
         delete [] matchInfo;
     }
+// IDynamicTransform impl.
     virtual void describe() const override
     {
         doDescribe(0);
     }
     virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const byte *sourceRec) const override
     {
+        assertex(binarySource);
         return doTranslate(builder, callback, 0, sourceRec);
     }
     virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const RtlRow &sourceRow) const override
     {
+        assertex(binarySource);
         sourceRow.lazyCalcOffsets(-1);  // MORE - could save the max one we actually need...
-        return doTranslate(builder, callback, 0, sourceRow);
+        return doTranslateOpaqueType(builder, callback, 0, &sourceRow);
+    }
+    virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const IDynamicFieldValueFetcher & fetcher) const override
+    {
+        assertex(!binarySource);
+        return doTranslateOpaqueType(builder, callback, 0, &fetcher);
     }
     virtual bool canTranslate() const override
     {
@@ -1099,9 +1109,9 @@ private:
         unsigned numOffsets = sourceRecInfo.getNumVarFields() + 1;
         size_t * variableOffsets = (size_t *)alloca(numOffsets * sizeof(size_t));
         RtlRow sourceRow(sourceRecInfo, sourceRec, numOffsets, variableOffsets);  // MORE - could save the max source offset we actually need, and only set up that many...
-        return doTranslate(builder, callback, offset, sourceRow);
+        return doTranslateOpaqueType(builder, callback, offset, &sourceRow);
     }
-    size32_t doTranslate(ARowBuilder &builder, IVirtualFieldCallback & callback, size32_t offset, const RtlRow &sourceRow) const
+    size32_t doTranslateOpaqueType(ARowBuilder &builder, IVirtualFieldCallback & callback, size32_t offset, const void *sourceRow) const
     {
         dbgassertex(canTranslate());
         byte * destConditions = (byte *)alloca(destRecInfo.getNumIfBlocks() * sizeof(byte));
@@ -1110,7 +1120,10 @@ private:
         bool hasBlobs = false;
         if (!estimate)
         {
-            estimate = estimateNewSize(sourceRow);
+            if (binarySource)
+                estimate = estimateNewSize(*(const RtlRow *)sourceRow);
+            else
+                estimate = destRecInfo.getMinRecordSize();
             builder.ensureCapacity(offset+estimate, "record");
         }
         size32_t origOffset = offset;
@@ -1130,14 +1143,14 @@ private:
                 switch (getVirtualInitializer(field->initializer))
                 {
                 case FVirtualFilePosition:
-                    offset = type->buildInt(builder, offset, field, callback.getFilePosition(sourceRow.queryRow()));
+                    offset = type->buildInt(builder, offset, field, callback.getFilePosition(sourceRow));
                     break;
                 case FVirtualLocalFilePosition:
-                    offset = type->buildInt(builder, offset, field, callback.getLocalFilePosition(sourceRow.queryRow()));
+                    offset = type->buildInt(builder, offset, field, callback.getLocalFilePosition(sourceRow));
                     break;
                 case FVirtualFilename:
                     {
-                        const char * filename = callback.queryLogicalFilename(sourceRow.queryRow());
+                        const char * filename = callback.queryLogicalFilename(sourceRow);
                         offset = type->buildString(builder, offset, field, strlen(filename), filename);
                         break;
                     }
@@ -1149,9 +1162,17 @@ private:
             {
                 unsigned matchField = match.matchIdx;
                 const RtlTypeInfo *sourceType = sourceRecInfo.queryType(matchField);
-                size_t sourceOffset = sourceRow.getOffset(matchField);
-                const byte *source = sourceRow.queryRow() + sourceOffset;
-                size_t copySize = sourceRow.getSize(matchField);
+
+                size_t sourceOffset = 0;
+                const byte *source = nullptr;
+                size_t copySize = 0;
+                if (binarySource)
+                {
+                    const RtlRow &rtlRow = *(const RtlRow *)sourceRow;
+                    sourceOffset = rtlRow.getOffset(matchField);
+                    source = rtlRow.queryRow() + sourceOffset;
+                    copySize = rtlRow.getSize(matchField);
+                }
                 if (match.matchType & match_deblob)
                 {
                     offset_t blobId = sourceType->getInt(source);
@@ -1185,7 +1206,7 @@ private:
                                 else
                                     break;
                             }
-                            copySize = sourceRow.getOffset(matchField+1) - sourceOffset;
+                            copySize = ((const RtlRow *)sourceRow)->getOffset(matchField+1) - sourceOffset;
                         }
                         builder.ensureCapacity(offset+copySize, field->name);
                         memcpy(builder.getSelf()+offset, source, copySize);
@@ -1216,6 +1237,13 @@ private:
                     case match_typecast:
                         offset = translateScalar(builder, offset, field, *type, *sourceType, source);
                         break;
+                    case match_typecast|match_dynamic:
+                    {
+                        const IDynamicFieldValueFetcher &callbackRowHandler = *(const IDynamicFieldValueFetcher *)sourceRow;
+                        source = callbackRowHandler.queryValue(matchField, copySize);
+                        offset = translateScalarFromUtf8(builder, offset, field, *type, *sourceType, (const char *)source, (size_t)copySize);
+                        break;
+                    }
                     case match_link:
                     {
                         // a 32-bit record count, and a (linked) pointer to an array of record pointers
@@ -1225,6 +1253,61 @@ private:
                         offset += sizeof(size32_t)+sizeof(const byte **);
                         break;
                     }
+                    case match_recurse|match_dynamic:
+                    {
+                        const IDynamicFieldValueFetcher &callbackRowHandler = *(const IDynamicFieldValueFetcher *)sourceRow;
+                        Owned<IDynamicRowIterator> iterator = callbackRowHandler.getNestedIterator(matchField);
+                        if (type->getType()==type_record)
+                        {
+                            IDynamicFieldValueFetcher &fieldFetcher = iterator->query();
+                            offset = match.subTrans->doTranslateOpaqueType(builder, callback, offset, &fieldFetcher);
+                        }
+                        else if (type->isLinkCounted())
+                        {
+                            // a 32-bit record count, and a pointer to an array of record pointers
+                            IEngineRowAllocator *childAllocator = builder.queryAllocator()->createChildRowAllocator(type->queryChildType());
+                            assertex(childAllocator);  // May not be available when using serialized types (but unlikely to want to create linkcounted children remotely either)
+
+                            size32_t sizeInBytes = sizeof(size32_t) + sizeof(void *);
+                            builder.ensureCapacity(offset+sizeInBytes, field->name);
+                            size32_t numRows = 0;
+                            const byte **childRows = nullptr;
+                            ForEach(*iterator)
+                            {
+                                IDynamicFieldValueFetcher &fieldFetcher = iterator->query();
+                                RtlDynamicRowBuilder childBuilder(*childAllocator);
+                                size32_t childLen = match.subTrans->doTranslateOpaqueType(childBuilder, callback, 0, &fieldFetcher);
+                                childRows = childAllocator->appendRowOwn(childRows, ++numRows, (void *) childBuilder.finalizeRowClear(childLen));
+                            }
+                            if (type->getType() == type_dictionary)
+                            {
+                                const RtlTypeInfo * childType = type->queryChildType();
+                                assertex(childType && childType->getType() == type_record);
+                                CHThorHashLookupInfo lookupHelper(static_cast<const RtlRecordTypeInfo &>(*childType));
+                                rtlCreateDictionaryFromDataset(numRows, childRows, childAllocator, lookupHelper);
+                            }
+                            // Go back in and patch the count, remembering it may have moved
+                            rtlWriteInt4(builder.getSelf()+offset, numRows);
+                            * ( const void * * ) (builder.getSelf()+offset+sizeof(size32_t)) = childRows;
+                            offset += sizeInBytes;
+                        }
+                        else
+                        {
+                            size32_t countOffset = offset;
+                            byte *dest = builder.ensureCapacity(offset+sizeof(size32_t), field->name)+offset;
+                            offset += sizeof(size32_t);
+                            size32_t initialOffset = offset;
+                            *(size32_t *)dest = 0;  // patched below when true figure known
+                            ForEach(*iterator)
+                            {
+                                IDynamicFieldValueFetcher &fieldFetcher = iterator->query();
+                                offset = match.subTrans->doTranslateOpaqueType(builder, callback, offset, &fieldFetcher);
+                            }
+                            dest = builder.getSelf() + countOffset;  // Note - may have been moved by reallocs since last calculated
+                            *(size32_t *)dest = offset - initialOffset;
+                        }
+                        break;
+                    }
                     case match_recurse:
                         if (type->getType()==type_record)
                             offset = match.subTrans->doTranslate(builder, callback, offset, source);
@@ -1347,6 +1430,7 @@ private:
     }
     const RtlRecord &destRecInfo;
     const RtlRecord &sourceRecInfo;
+    bool binarySource = true;
     unsigned fixedDelta = 0;  // total size of all fixed-size source fields that are not matched
     UnsignedArray unmatched;  // List of all variable-size source fields that are unmatched
     FieldMatchType matchFlags = match_perfect;
@@ -1363,47 +1447,50 @@ private:
         }
     } *matchInfo;
 
-    size32_t estimateNewSize(const RtlRow &sourceRow) const
+    static size32_t translateScalarFromUtf8(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, const RtlTypeInfo &destType, const RtlTypeInfo &sourceType, const char *source, size_t srcSize)
     {
-        //DBGLOG("Source record size is %d", (int) sourceRow.getRecordSize());
-        size32_t expectedSize = sourceRow.getRecordSize() - fixedDelta;
-        //DBGLOG("Source record size without omitted fixed size fields is %d", expectedSize);
-        ForEachItemIn(i, unmatched)
+        switch(destType.getType())
         {
-            unsigned fieldNo = unmatched.item(i);
-            expectedSize -= sourceRow.getSize(fieldNo);
-            //DBGLOG("Reducing estimated size by %d to %d for omitted field %d (%s)", (int) sourceRow.getSize(fieldNo), expectedSize, fieldNo, sourceRecInfo.queryName(fieldNo));
+        case type_boolean:
+        case type_int:
+        case type_swapint:
+        case type_packedint:
+        case type_filepos:
+        case type_keyedint:
+        {
+            __int64 res = rtlStrToInt8(srcSize, source);
+            offset = destType.buildInt(builder, offset, field, res);
+            break;
         }
-        if (matchFlags & ~(match_perfect|match_link|match_none|match_virtual|match_extend|match_truncate))
+        case type_real:
         {
-            for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
-            {
-                const MatchInfo &match = matchInfo[idx];
-                const RtlTypeInfo *type = destRecInfo.queryType(idx);
-                unsigned matchField = match.matchIdx;
-                switch (match.matchType)
-                {
-                case match_perfect:
-                case match_link:
-                case match_none:
-                case match_virtual:
-                case match_extend:
-                case match_truncate:
-                    // These ones were already included in fixedDelta
-                    break;
-                default:
-                    // This errs on the side of small - i.e. it assumes that all typecasts end up at minimum size
-                    // We could do better in some cases e.g. variable string <-> variable unicode we can assume factor of 2,
-                    // uft8 <-> string we could calculate here - but unlikely to be worth the effort.
-                    // But it's fine for fixed size output fields, including truncate/extend
-                    // We could also precalculate the expected delta if all omitted fields are fixed size - but not sure how likely/worthwhile that is.
-                    expectedSize += type->getMinSize() - sourceRow.getSize(matchField);
-                    //DBGLOG("Adjusting estimated size by (%d - %d) to %d for translated field %d (%s)", (int) sourceRow.getSize(matchField), type->getMinSize(), expectedSize, matchField, sourceRecInfo.queryName(matchField));
-                    break;
-                }
-            }
+            double res = rtlStrToReal(srcSize, source);
+            offset = destType.buildReal(builder, offset, field, res);
+            break;
         }
-        return expectedSize;
+        case type_data:
+        case type_string:
+        case type_decimal:  // Go via string - not common enough to special-case
+        case type_varstring:
+        case type_qstring:
+        case type_utf8:
+            //MORE: Could special case casting from utf8 to utf8 similar to strings above
+        case type_unicode:
+        case type_varunicode:
+        {
+            size32_t utf8chars = rtlUtf8Length(srcSize, source);
+            offset = destType.buildUtf8(builder, offset, field, utf8chars, source);
+            break;
+        }
+        case type_set:
+        {
+            UNIMPLEMENTED; // JCS->GH - but perhaps can/should translate using iterator too?
+            break;
+        }
+        default:
+            throwUnexpected();
+        }
+        return offset;
     }
     static bool canTranslateNonScalar(const RtlTypeInfo * type, const RtlTypeInfo * sourceType)
     {
@@ -1417,7 +1504,6 @@ private:
             return true;
         return false;
     }
-
     void createMatchInfo()
     {
         for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
@@ -1440,7 +1526,7 @@ private:
             {
                 bool deblob = false;
                 const RtlTypeInfo *sourceType = sourceRecInfo.queryType(info.matchIdx);
-                if (sourceType->isBlob())
+                if (binarySource && sourceType->isBlob())
                 {
                     if (type->isBlob())
                     {
@@ -1460,11 +1546,16 @@ private:
                         switch (type->getType())
                         {
                         case type_set:
-                            if (type->queryChildType()->fieldType==sourceType->queryChildType()->fieldType &&
-                                type->queryChildType()->length==sourceType->queryChildType()->length)
-                                info.matchType = match_perfect;
+                            if (binarySource)
+                            {
+                                if (type->queryChildType()->fieldType==sourceType->queryChildType()->fieldType &&
+                                    type->queryChildType()->length==sourceType->queryChildType()->length)
+                                    info.matchType = match_perfect;
+                                else
+                                    info.matchType = match_typecast;
+                            }
                             else
-                                info.matchType = match_typecast;
+                                info.matchType = match_typecast|match_dynamic;
                             break;
                         case type_row:      // These are not expected I think...
                             throwUnexpected();
@@ -1475,29 +1566,38 @@ private:
                         {
                             const RtlRecord *subDest = destRecInfo.queryNested(idx);
                             const RtlRecord *subSrc = sourceRecInfo.queryNested(info.matchIdx);
-                            info.subTrans = new GeneralRecordTranslator(*subDest, *subSrc);
+                            info.subTrans = new GeneralRecordTranslator(*subDest, *subSrc, binarySource);
                             if (!info.subTrans->needsTranslate())
                             {
-                                // Child does not require translation, but check linkcount mode matches too!
-                                if (type->isLinkCounted())
-                                    if (sourceType->isLinkCounted())
-                                        info.matchType = match_link;
-                                    else
-                                        info.matchType = match_recurse;
+                                if (!binarySource)
+                                    info.matchType = match_recurse|match_dynamic;
                                 else
-                                    if (sourceType->isLinkCounted())
-                                        info.matchType = match_recurse;
-                                    else
-                                        info.matchType = match_perfect;
-                                if (info.matchType != match_recurse)
                                 {
-                                    delete info.subTrans;
-                                    info.subTrans = nullptr;
+                                    // Child does not require translation, but check linkcount mode matches too!
+                                    if (type->isLinkCounted())
+                                    {
+                                        if (sourceType->isLinkCounted())
+                                            info.matchType = match_link;
+                                        else
+                                            info.matchType = match_recurse;
+                                    }
+                                    else
+                                    {
+                                        if (sourceType->isLinkCounted())
+                                            info.matchType = match_recurse;
+                                        else
+                                            info.matchType = match_perfect;
+                                    }
+                                    if (info.matchType != match_recurse)
+                                    {
+                                        delete info.subTrans;
+                                        info.subTrans = nullptr;
+                                    }
                                 }
                             }
                             else if (info.subTrans->canTranslate())
                             {
-                                info.matchType = match_recurse;
+                                info.matchType = binarySource ? match_recurse : (match_recurse|match_dynamic);
                                 matchFlags |= info.subTrans->matchFlags;
                             }
                             else
@@ -1505,7 +1605,9 @@ private:
                             break;
                         }
                         case type_blob:
-                            if (sourceType->isBlob())
+                            if (!binarySource)
+                                info.matchType = match_fail;
+                            else if (sourceType->isBlob())
                                 info.matchType = match_perfect;  // We don't check that the child type matches
                             else
                                 info.matchType = match_fail;
@@ -1516,7 +1618,9 @@ private:
                         }
                     }
                 }
-                else if (type->fieldType==sourceType->fieldType)
+                else if (!binarySource)
+                    info.matchType = match_typecast|match_dynamic;
+                else if ((type->fieldType==sourceType->fieldType))
                 {
                     if (type->length==sourceType->length)
                     {
@@ -1594,11 +1698,58 @@ private:
             //DBGLOG("Source record contains %d bytes of omitted fixed size fields", fixedDelta);
         }
     }
+    size32_t estimateNewSize(const RtlRow &sourceRow) const
+    {
+        //DBGLOG("Source record size is %d", (int) sourceRow.getRecordSize());
+        size32_t expectedSize = sourceRow.getRecordSize() - fixedDelta;
+        //DBGLOG("Source record size without omitted fixed size fields is %d", expectedSize);
+        ForEachItemIn(i, unmatched)
+        {
+            unsigned fieldNo = unmatched.item(i);
+            expectedSize -= sourceRow.getSize(fieldNo);
+            //DBGLOG("Reducing estimated size by %d to %d for omitted field %d (%s)", (int) sourceRow.getSize(fieldNo), expectedSize, fieldNo, sourceRecInfo.queryName(fieldNo));
+        }
+        if (matchFlags & ~(match_perfect|match_link|match_none|match_virtual|match_extend|match_truncate))
+        {
+            for (unsigned idx = 0; idx < destRecInfo.getNumFields(); idx++)
+            {
+                const MatchInfo &match = matchInfo[idx];
+                const RtlTypeInfo *type = destRecInfo.queryType(idx);
+                unsigned matchField = match.matchIdx;
+                switch (match.matchType)
+                {
+                case match_perfect:
+                case match_link:
+                case match_none:
+                case match_virtual:
+                case match_extend:
+                case match_truncate:
+                    // These ones were already included in fixedDelta
+                    break;
+                default:
+                    // This errs on the side of small - i.e. it assumes that all typecasts end up at minimum size
+                    // We could do better in some cases e.g. variable string <-> variable unicode we can assume factor of 2,
+                    // uft8 <-> string we could calculate here - but unlikely to be worth the effort.
+                    // But it's fine for fixed size output fields, including truncate/extend
+                    // We could also precalculate the expected delta if all omitted fields are fixed size - but not sure how likely/worthwhile that is.
+                    expectedSize += type->getMinSize() - sourceRow.getSize(matchField);
+                    //DBGLOG("Adjusting estimated size by (%d - %d) to %d for translated field %d (%s)", (int) sourceRow.getSize(matchField), type->getMinSize(), expectedSize, matchField, sourceRecInfo.queryName(matchField));
+                    break;
+                }
+            }
+        }
+        return expectedSize;
+    }
 };
 
-extern ECLRTL_API const IDynamicTransform *createRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo)
+extern ECLRTL_API const IDynamicTransform *createRecordTranslator(const RtlRecord &destRecInfo, const RtlRecord &srcRecInfo)
+{
+    return new GeneralRecordTranslator(destRecInfo, srcRecInfo, true);
+}
+
+extern ECLRTL_API const IDynamicTransform *createRecordTranslatorViaCallback(const RtlRecord &destRecInfo, const RtlRecord &srcRecInfo)
 {
-    return new GeneralRecordTranslator(_destRecInfo, _srcRecInfo);
+    return new GeneralRecordTranslator(destRecInfo, srcRecInfo, false);
 }
 
 extern ECLRTL_API void throwTranslationError(const RtlRecord & destRecInfo, const RtlRecord & srcRecInfo, const char * filename)
@@ -1616,8 +1767,9 @@ class TranslatedRowStream : public CInterfaceOf<IRowStream>
 {
 public:
     TranslatedRowStream(IRowStream *_inputStream, IEngineRowAllocator *_resultAllocator, const RtlRecord &outputRecord, const RtlRecord &inputRecord)
-    : inputStream(_inputStream), resultAllocator(_resultAllocator), translator(new GeneralRecordTranslator(outputRecord, inputRecord))
+    : inputStream(_inputStream), resultAllocator(_resultAllocator)
     {
+        translator.setown(createRecordTranslator(outputRecord, inputRecord));
         translator->describe();
     }
     virtual const void *nextRow() override

+ 25 - 0
rtl/eclrtl/rtldynfield.hpp

@@ -108,11 +108,25 @@ enum class RecordTranslationMode:byte { None = 0, All = 1, Payload = 2, AlwaysDi
 extern ECLRTL_API RecordTranslationMode getTranslationMode(const char *modeStr);
 extern ECLRTL_API const char *getTranslationModeText(RecordTranslationMode val);
 
+interface IDynamicRowIterator;
+interface IDynamicFieldValueFetcher : extends IInterface
+{
+    virtual const byte *queryValue(unsigned fieldNum, size_t &sz) const = 0;
+    virtual IDynamicRowIterator *getNestedIterator(unsigned fieldNum) const = 0;
+    virtual size_t getSize(unsigned fieldNum) const = 0;
+    virtual size32_t getRecordSize() const = 0;
+};
+
+interface IDynamicRowIterator : extends IIteratorOf<IDynamicFieldValueFetcher>
+{
+};
+
 interface IDynamicTransform : public IInterface
 {
     virtual void describe() const = 0;
     virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const byte *sourceRec) const = 0;
     virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const RtlRow &sourceRow) const = 0;
+    virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const IDynamicFieldValueFetcher & fetcher) const = 0;
     virtual bool canTranslate() const = 0;
     virtual bool needsTranslate() const = 0;
     virtual bool keyedTranslated() const = 0;
@@ -148,7 +162,18 @@ interface IKeyTranslator : public IInterface
     virtual bool needsTranslate() const = 0;
 };
 
+interface IDynamicTransformViaCallback : public IInterface
+{
+    virtual void describe() const = 0;
+    virtual size32_t translate(ARowBuilder &builder, IVirtualFieldCallback & callback, const void *sourceRec) const = 0;
+    virtual bool canTranslate() const = 0;
+    virtual bool needsTranslate() const = 0;
+    virtual bool keyedTranslated() const = 0;
+    virtual bool needsNonVirtualTranslate() const = 0;
+};
+
 extern ECLRTL_API const IDynamicTransform *createRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo);
+extern ECLRTL_API const IDynamicTransform *createRecordTranslatorViaCallback(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo);
 extern ECLRTL_API void throwTranslationError(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo, const char * filename);
 
 extern ECLRTL_API const IKeyTranslator *createKeyTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo);

+ 4 - 1
rtl/eclrtl/rtlnewkey.cpp

@@ -2091,7 +2091,10 @@ IFieldFilter * deserializeFieldFilter(const RtlRecord & record, const char * src
         fieldNum = atoi(fieldText.str());
     else
         fieldNum = record.getFieldNum(fieldText);
-    return deserializeFieldFilter(fieldNum, *record.queryType(fieldNum), src);
+    if (((unsigned)-1) == fieldNum)
+        return nullptr;
+    else
+        return deserializeFieldFilter(fieldNum, *record.queryType(fieldNum), src);
 }
 
 IFieldFilter * deserializeFieldFilter(unsigned fieldId, const RtlTypeInfo & type, MemoryBuffer & in)

+ 54 - 10
rtl/eclrtl/rtlrecord.cpp

@@ -88,7 +88,7 @@
  *   For nested selects the code would need to be consistent.
  */
 
-static unsigned countFields(const RtlFieldInfo * const * fields, bool & containsNested, unsigned &numIfBlocks)
+static unsigned countFields(const RtlFieldInfo * const * fields, bool & containsNested, bool &containsXPaths, unsigned &numIfBlocks)
 {
     unsigned cnt = 0;
     for (;*fields;fields++)
@@ -101,10 +101,14 @@ static unsigned countFields(const RtlFieldInfo * const * fields, bool & contains
                 numIfBlocks++;
             const RtlFieldInfo * const * nested = type->queryFields();
             if (nested)
-                cnt += countFields(nested, containsNested, numIfBlocks);
+                cnt += countFields(nested, containsNested, containsXPaths, numIfBlocks);
         }
         else
+        {
+            if (!containsXPaths && !isEmptyString((*fields)->xpath))
+                containsXPaths = true;
             cnt++;
+        }
     }
     return cnt;
 }
@@ -155,7 +159,7 @@ public:
     const IfBlockInfo &ifblock;
 };
 
-static unsigned expandNestedRows(unsigned idx, unsigned startIdx, const char *prefix, const RtlFieldInfo * const * fields, const RtlFieldInfo * * target, const char * *names, const IfBlockInfo *inIfBlock, ConstPointerArrayOf<IfBlockInfo> &ifblocks)
+static unsigned expandNestedRows(unsigned idx, unsigned startIdx, StringBuffer &prefix, StringBuffer &xPathPrefix, const RtlFieldInfo * const * fields, const RtlFieldInfo * * target, const char * *names, const char * *xpaths, const IfBlockInfo *inIfBlock, ConstPointerArrayOf<IfBlockInfo> &ifblocks)
 {
     for (;*fields;fields++)
     {
@@ -173,10 +177,21 @@ static unsigned expandNestedRows(unsigned idx, unsigned startIdx, const char *pr
             const RtlFieldInfo * const * nested = type->queryFields();
             if (nested)
             {
-                StringBuffer newPrefix(prefix);
-                if (cur->name && *cur->name)
-                    newPrefix.append(cur->name).append('.');
-                idx = expandNestedRows(idx, isIfBlock ? startIdx : idx, newPrefix.str(), nested, target, names, nestIfBlock, ifblocks);
+                size32_t prevPrefixLength = prefix.length();
+                if (!isEmptyString(cur->name))
+                    prefix.append(cur->name).append('.');
+
+                size32_t prevXPathPrefixLength = xPathPrefix.length();
+                if (xpaths)
+                {
+                    const char *xpath = cur->queryXPath();
+                    if (!isEmptyString(xpath))
+                        xPathPrefix.append(xpath).append('/');
+                }
+                idx = expandNestedRows(idx, isIfBlock ? startIdx : idx, prefix, xPathPrefix, nested, target, names, xpaths, nestIfBlock, ifblocks);
+                prefix.setLength(prevPrefixLength);
+                if (xpaths)
+                    xPathPrefix.setLength(prevXPathPrefixLength);
             }
         }
         else
@@ -186,9 +201,19 @@ static unsigned expandNestedRows(unsigned idx, unsigned startIdx, const char *pr
                 StringBuffer name(prefix);
                 name.append(cur->name);
                 names[idx] = name.detach();
+                if (xpaths)
+                {
+                    StringBuffer xpath(xPathPrefix);
+                    xpath.append(cur->queryXPath());
+                    xpaths[idx] = xpath.detach();
+                }
             }
             else
+            {
                 names[idx] = nullptr;
+                if (xpaths)
+                    xpaths[idx] = nullptr;
+            }
             if (inIfBlock && !(cur->flags & RFTMinifblock))
                 target[idx++] = new RtlCondFieldStrInfo(*cur, *inIfBlock);
             else
@@ -226,7 +251,7 @@ RtlRecord::RtlRecord(const RtlRecordTypeInfo & record, bool expandFields)
 {
 }
 
-RtlRecord::RtlRecord(const RtlFieldInfo * const *_fields, bool expandFields) : fields(_fields), originalFields(_fields), names(nullptr), nameMap(nullptr)
+RtlRecord::RtlRecord(const RtlFieldInfo * const *_fields, bool expandFields) : fields(_fields), originalFields(_fields), names(nullptr), xpaths(nullptr), nameMap(nullptr)
 {
     numVarFields = 0;
     numTables = 0;
@@ -236,14 +261,18 @@ RtlRecord::RtlRecord(const RtlFieldInfo * const *_fields, bool expandFields) : f
     if (expandFields)
     {
         bool containsNested = false;
-        numFields = countFields(fields, containsNested, numIfBlocks);
+        bool containsXPaths = false;
+        numFields = countFields(fields, containsNested, containsXPaths, numIfBlocks);
         if (containsNested)
         {
             ConstPointerArrayOf<IfBlockInfo> _ifblocks;
             const RtlFieldInfo * * allocated  = new const RtlFieldInfo * [numFields+1];
             names = new const char *[numFields];
+            if (containsXPaths)
+                xpaths = new const char *[numFields];
             fields = allocated;
-            unsigned idx = expandNestedRows(0, 0, nullptr, originalFields, allocated, names, nullptr, _ifblocks);
+            StringBuffer prefix, xPathPrefix;
+            unsigned idx = expandNestedRows(0, 0, prefix, xPathPrefix, originalFields, allocated, names, xpaths, nullptr, _ifblocks);
             ifblocks = _ifblocks.detach();
             assertex(idx == numFields);
             allocated[idx] = nullptr;
@@ -327,6 +356,14 @@ RtlRecord::~RtlRecord()
         }
         delete [] names;
     }
+    if (xpaths)
+    {
+        for (unsigned i = 0; i < numFields; i++)
+        {
+            free((char *) xpaths[i]);
+        }
+        delete [] xpaths;
+    }
     if (fields != originalFields)
     {
         for (const RtlFieldInfo * const * finger = fields; *finger; finger++)
@@ -528,6 +565,13 @@ const char *RtlRecord::queryName(unsigned field) const
     return fields[field]->name;
 }
 
+const char *RtlRecord::queryXPath(unsigned field) const // NB: returns name if no xpath
+{
+    if (xpaths && xpaths[field])
+        return xpaths[field];
+    return fields[field]->queryXPath();
+}
+
 const RtlRecord *RtlRecord::queryNested(unsigned fieldId) const
 {
     // Map goes in wrong direction (for size reasons). We could replace with a hashtable or binsearch but

+ 2 - 0
rtl/eclrtl/rtlrecord.hpp

@@ -234,6 +234,7 @@ public:
     const RtlFieldInfo * queryOriginalField(unsigned field) const;
     inline const RtlTypeInfo * queryType(unsigned field) const { return fields[field]->type; }
     const char * queryName(unsigned field) const;
+    const char * queryXPath(unsigned field) const; // NB: returns name if no xpath
     unsigned getFieldNum(const char *fieldName) const;
     const RtlRecord *queryNested(unsigned field) const;
     bool excluded(const RtlFieldInfo *field, const byte *row, byte *conditions) const;
@@ -250,6 +251,7 @@ protected:
     const RtlFieldInfo * const * originalFields;
     const RtlRecord **nestedTables;
     const char **names;
+    const char **xpaths;
     const IfBlockInfo **ifblocks;
     mutable const FieldNameToFieldNumMap *nameMap;
 };

+ 6 - 0
rtl/include/eclhelper.hpp

@@ -516,6 +516,10 @@ struct RtlFieldInfo
         return type->toXML(self, selfrow, this, target);
     }
     bool equivalent(const RtlFieldInfo *to) const;
+    const char *queryXPath() const
+    {
+        return xpath ? xpath : name;
+    }
 };
 
 enum
@@ -821,6 +825,8 @@ interface IColumnProvider : extends IInterface
 //V4
     virtual __uint64    getUInt(const char * path) = 0;
     virtual __uint64    readUInt(const char * path, __uint64 _default) = 0;
+
+    virtual const char *readRaw(const char * path, size32_t &sz) const { return nullptr; }
 };
 
 //Member - can extend if new accessor function defined.

Файловите разлики са ограничени, защото са твърде много
+ 24 - 0
testing/regress/ecl/key/nestedtranslate.xml


+ 100 - 0
testing/regress/ecl/nestedtranslate.ecl

@@ -0,0 +1,100 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2019 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//class=file
+//version multiPart=true
+
+//#onwarning(10138, ignore);
+
+import ^ as root;
+multiPart := #IFDEFINED(root.multiPart, true);
+useLocal := #IFDEFINED(root.useLocal, false);
+useTranslation := #IFDEFINED(root.useTranslation, false);
+
+//--- end of version configuration ---
+
+import $.setup;
+Files := setup.Files(multiPart, useLocal, useTranslation);
+
+prefix := setup.Files(false, false).QueryFilePrefix;
+
+tableColumn := RECORD
+ string style{xpath('@table:style-name')};
+ string repeated{xpath('table:number-columns-repeated')};
+ string defaultCellStyle{xpath('@table:default-cell-style-name')};
+END;
+
+tableRow := RECORD,maxlength(99999)
+ string          month{xpath('/office:document-content/office:body/table:table/@table:name')};
+ varstring       date{xpath('table:table-cell[1]/text:p')};
+ varunicode      description{xpath('table:table-cell[2]/text:p')};
+ varunicode      fullDescription{xpath('table:table-cell[2]/text:p<>')};
+ varstring       amount{xpath('table:table-cell[3]/text:p')};
+END;
+
+tableTableWithoutNestedDS := RECORD
+ tableColumn col1{xpath('table:table-column[1]')};
+ tableColumn col2{xpath('table:table-column[2]')};
+ tableColumn col3{xpath('table:table-column[3]')};
+ tableColumn col4{xpath('table:table-column[4]')};
+END;
+
+tableTable := RECORD(tableTableWithoutNestedDS)
+ DATASET(tableRow) rows{xpath('table:table-row')};
+END;
+
+accounts := dataset(Files.DG_FileOut+'accountxml', tableTable, XML('/office:document-content/office:body/table:table'));
+
+newTableColumn := RECORD
+ string defaultCellStyle{xpath('@table:default-cell-style-name')};
+ string style{xpath('@table:style-name')};
+END;
+
+newTableRow := RECORD
+ varunicode      description;
+ varunicode      fullDescription;
+ string          date;
+ real8           amount;
+END;
+
+newTableTableWithoutNestedDS := RECORD
+ newTableColumn col3;
+ newTableColumn col1;
+ newTableColumn col4;
+END;
+
+newTableTable := RECORD(newTableTableWithoutNestedDS)
+ DATASET(newTableRow) rows{xpath('table:table-row')};
+END;
+
+newaccountsflat := dataset(prefix+'accountsflat', newTableTable, FLAT);
+newaccountscsv := dataset(prefix+'accountscsv', newTableTableWithoutNestedDS, CSV);
+newaccountsxml := dataset(prefix+'accountsxml', newTableTable, XML('/Dataset/Row'));
+newaccountsjson := dataset(prefix+'accountsjson', newTableTable, JSON('/Row'));
+
+SEQUENTIAL(
+ PARALLEL(
+  OUTPUT(accounts, , prefix+'accountsflat', OVERWRITE);
+  OUTPUT(accounts, , prefix+'accountscsv', CSV, OVERWRITE);
+  OUTPUT(accounts, , prefix+'accountsxml', XML, OVERWRITE);
+  OUTPUT(accounts, , prefix+'accountsjson', JSON, OVERWRITE);
+ );
+ OUTPUT(newaccountsflat, NAMED('newaccountsflat'));
+ OUTPUT(newaccountscsv, NAMED('newaccountscsv'));
+ OUTPUT(newaccountsxml, NAMED('newaccountsxml'));
+ OUTPUT(newaccountsjson, NAMED('newaccountsjson'));
+);