浏览代码

HPCC-21462 Start refactoring disk read code

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 6 年之前
父节点
当前提交
623ab9a190

+ 16 - 9
common/thorhelper/csvsplitter.cpp

@@ -71,11 +71,23 @@ void CSVSplitter::addTerminator(const char * text)
     matcher.addEntry(text, TERMINATOR);
 }
 
+void CSVSplitter::addItem(MatchItem item, const char * text)
+{
+    if (text)
+        matcher.addEntry(text, item);
+}
+
 void CSVSplitter::addEscape(const char * text)
 {
     matcher.queryAddEntry((size32_t)strlen(text), text, ESCAPE);
 }
 
+void CSVSplitter::addWhitespace()
+{
+    matcher.queryAddEntry(1, " ", WHITESPACE);
+    matcher.queryAddEntry(1, "\t", WHITESPACE);
+}
+
 void CSVSplitter::reset()
 {
     matcher.reset();
@@ -156,12 +168,10 @@ void CSVSplitter::init(unsigned _maxColumns, ICsvParameters * csvInfo, const cha
 
     //MORE Should this be configurable??
     if (!(flags & ICsvParameters::preserveWhitespace))
-    {
-        matcher.queryAddEntry(1, " ", WHITESPACE);
-        matcher.queryAddEntry(1, "\t", WHITESPACE);
-    }
+        addWhitespace();
 }
 
+
 void CSVSplitter::init(unsigned _maxColumns, size32_t _maxCsvSize, const char *quotes, const char *separators, const char *terminators, const char *escapes, bool preserveWhitespace)
 {
     reset();
@@ -181,11 +191,8 @@ void CSVSplitter::init(unsigned _maxColumns, size32_t _maxCsvSize, const char *q
     if (escapes)
         addActionList(matcher, escapes, ESCAPE);
 
-    if (preserveWhitespace)
-    {
-        matcher.queryAddEntry(1, " ", WHITESPACE);
-        matcher.queryAddEntry(1, "\t", WHITESPACE);
-    }
+    if (!preserveWhitespace)
+        addWhitespace();
 }
 
 void CSVSplitter::setFieldRange(const byte * start, const byte * end, unsigned curColumn, unsigned quoteToStrip, bool unescape)

+ 4 - 1
common/thorhelper/csvsplitter.hpp

@@ -61,6 +61,8 @@ interface ISerialStream;
 class THORHELPER_API CSVSplitter
 {
 public:
+    enum MatchItem { NONE=0, SEPARATOR=1, TERMINATOR=2, WHITESPACE=3, QUOTE=4, ESCAPE=5 };
+
     CSVSplitter();
     ~CSVSplitter();
 
@@ -68,6 +70,8 @@ public:
     void addSeparator(const char * text);
     void addTerminator(const char * text);
     void addEscape(const char * text);
+    void addItem(MatchItem item, const char * text);
+    void addWhitespace();
 
     void init(unsigned maxColumns, ICsvParameters * csvInfo, const char * dfsQuotes, const char * dfsSeparators, const char * dfsTerminators, const char * dfsEscapes);
     void init(unsigned maxColumns, size32_t maxCsvSize, const char *quotes, const char *separators, const char *terminators, const char *escapes, bool preserveWhitespace);
@@ -82,7 +86,6 @@ protected:
     void setFieldRange(const byte * start, const byte * end, unsigned curColumn, unsigned quoteToStrip, bool unescape);
 
 protected:
-    enum { NONE=0, SEPARATOR=1, TERMINATOR=2, WHITESPACE=3, QUOTE=4, ESCAPE=5 };
     unsigned            maxColumns;
     StringMatcher       matcher;
     unsigned            numQuotes;

文件差异内容过多而无法显示
+ 910 - 200
common/thorhelper/thorread.cpp


+ 43 - 17
common/thorhelper/thorread.hpp

@@ -27,19 +27,56 @@
 #include "jrowstream.hpp"
 #include "rtlkey.hpp"
 
+//The following is constant for the life of a disk read activity
+interface IDiskReadOutputMapping : public IInterface
+{
+public:
+    virtual unsigned getExpectedCrc() const = 0;
+    virtual unsigned getProjectedCrc() const = 0;
+    virtual IOutputMetaData * queryExpectedMeta() const = 0;
+    virtual IOutputMetaData * queryProjectedMeta() const = 0;
+    virtual RecordTranslationMode queryTranslationMode() const = 0;
+    virtual bool matches(const IDiskReadOutputMapping * other) const = 0;
+};
+THORHELPER_API IDiskReadOutputMapping * createDiskReadOutputMapping(RecordTranslationMode mode, unsigned expectedCrc, IOutputMetaData & expected, unsigned projectedCrc, IOutputMetaData & projected);
+
+interface IDiskReadMapping : public IInterface
+{
+public:
+    // Accessor functions to provide the basic information from the disk read
+    virtual const char * queryFormat() const = 0;
+    virtual unsigned getActualCrc() const = 0;
+    virtual unsigned getExpectedCrc() const = 0;
+    virtual unsigned getProjectedCrc() const = 0;
+    virtual IOutputMetaData * queryActualMeta() const = 0;
+    virtual IOutputMetaData * queryExpectedMeta() const = 0;
+    virtual IOutputMetaData * queryProjectedMeta() const = 0;
+    virtual const IPropertyTree * queryOptions() const = 0;
+    virtual RecordTranslationMode queryTranslationMode() const = 0;
+
+    virtual bool matches(const IDiskReadMapping * other) const = 0;
+    virtual bool expectedMatchesProjected() const = 0;
+
+    virtual const IDynamicTransform * queryTranslator() const = 0; // translates from actual to projected - null if no translation needed
+    virtual const IKeyTranslator *queryKeyedTranslator() const = 0; // translates from expected to actual
+};
+
+THORHELPER_API IDiskReadMapping * createDiskReadMapping(RecordTranslationMode mode, const char * format, unsigned actualCrc, IOutputMetaData & actual, unsigned expectedCrc, IOutputMetaData & expected, unsigned projectedCrc, IOutputMetaData & projected, const IPropertyTree * options);
+
+
 typedef IConstArrayOf<IFieldFilter> FieldFilterArray;
 interface IRowReader : extends IInterface
 {
 public:
-    virtual IRawRowStream * queryRawRowStream() = 0;
-    virtual IAllocRowStream * queryAllocatedRowStream(IEngineRowAllocator * _outputAllocator) = 0;
+    // get the interface for reading streams of row.  outputAllocator can be null if allocating next is not used.
+    virtual IDiskRowStream * queryAllocatedRowStream(IEngineRowAllocator * _outputAllocator) = 0;
 };
 
 interface ITranslator;
 interface IDiskRowReader : extends IRowReader
 {
 public:
-    virtual bool matches(const char * format, bool streamRemote, unsigned _expectedCrc, IOutputMetaData & _expected, unsigned _projectedCrc, IOutputMetaData & _projected, unsigned _actualCrc, IOutputMetaData & _actual, const IPropertyTree * options) = 0;
+    virtual bool matches(const char * format, bool streamRemote, IDiskReadMapping * mapping) = 0;
 
     //Specify where the raw binary input for a particular file is coming from, together with its actual format.
     //Does this make sense, or should it be passed a filename?  an actual format?
@@ -49,20 +86,9 @@ public:
     virtual bool setInputFile(const RemoteFilename & filename, const char * logicalFilename, unsigned partNumber, offset_t baseOffset, const IPropertyTree * meta, const FieldFilterArray & expectedFilter) = 0;
 };
 
-//MORE: These functions have too many parameters - should probably move them into something like the following?:
-class TranslateOptions
-{
-    IOutputMetaData * expected;
-    IOutputMetaData * projected;
-    IOutputMetaData * actual;
-    unsigned expectedCrc;
-    unsigned projectedCrc;
-    unsigned actualCrc;
-};
-
 //Create a row reader for a thor binary file.  The expected, projected, actual and options never change.  The file providing the data can change.
-extern THORHELPER_API IDiskRowReader * createLocalDiskReader(const char * format, unsigned _expectedCrc, IOutputMetaData & _expected, unsigned _projectedCrc, IOutputMetaData & _projected, unsigned _actualCrc, IOutputMetaData & _actual, const IPropertyTree * options);
-extern THORHELPER_API IDiskRowReader * createRemoteDiskReader(const char * format, unsigned _expectedCrc, IOutputMetaData & _expected, unsigned _projectedCrc, IOutputMetaData & _projected, unsigned _actualCrc, IOutputMetaData & _actual, const IPropertyTree * options);
-extern THORHELPER_API IDiskRowReader * createDiskReader(const char * format, bool streamRemote, unsigned _expectedCrc, IOutputMetaData & _expected, unsigned _projectedCrc, IOutputMetaData & _projected, unsigned _actualCrc, IOutputMetaData & _actual, const IPropertyTree * options);
+extern THORHELPER_API IDiskRowReader * createLocalDiskReader(const char * format, IDiskReadMapping * mapping);
+extern THORHELPER_API IDiskRowReader * createRemoteDiskReader(const char * format, IDiskReadMapping * mapping);
+extern THORHELPER_API IDiskRowReader * createDiskReader(const char * format, bool streamRemote, IDiskReadMapping * mapping);
 
 #endif

+ 145 - 1
common/thorhelper/thorxmlwrite.cpp

@@ -24,7 +24,8 @@
 #include "deftype.hpp"
 #include "rtlformat.hpp"
 #include "rtlbcd.hpp"
-
+#include "eclrtl_imp.hpp"
+#include "nbcd.hpp"
 
 CommonFieldProcessor::CommonFieldProcessor(StringBuffer &_result, bool _trim) : result(_result), trim(_trim)
 {
@@ -149,3 +150,146 @@ extern thorhelper_decl void convertRowToJSON(size32_t & lenResult, char * & resu
     rtlStrToStrX(sizeResult, result, writer.length(), writer.str());
     lenResult = rtlUtf8Length(sizeResult, result);
 }
+
+
+//=============================================================================================
+
+void PropertyTreeXmlWriter::outputLiteralString(size32_t size, const char *value, const char *fieldname)
+{
+    //Horrible that there is no interface for adding a (len, data) string
+    StringBuffer temp;
+    temp.append(size, value);
+    root->addProp(fieldname, temp.str());
+}
+
+void PropertyTreeXmlWriter::outputInlineXml(const char *text)
+{
+    throwUnexpected();
+}
+
+void PropertyTreeXmlWriter::outputQuoted(const char *text)
+{
+    throwUnexpected();
+}
+
+void PropertyTreeXmlWriter::outputQString(unsigned len, const char *value, const char *fieldname)
+{
+    MemoryAttr tempBuffer;
+    char * temp = (char *)tempBuffer.allocate(len);
+    rtlQStrToStr(len, temp, len, value);
+    outputString(len, temp, fieldname);
+}
+
+void PropertyTreeXmlWriter::outputString(unsigned len, const char *value, const char *fieldname)
+{
+    size32_t resultLen;
+    rtlDataAttr result;
+    rtlStrToUtf8X(resultLen, result.refstr(), len, value);
+
+    size32_t size = rtlUtf8Size(resultLen, result.getstr());
+    outputLiteralString(size, result.getstr(), fieldname);
+}
+
+void PropertyTreeXmlWriter::outputBool(bool value, const char *fieldname)
+{
+    root->setPropBool(fieldname, value);
+}
+
+void PropertyTreeXmlWriter::outputData(unsigned len, const void *value, const char *fieldname)
+{
+    root->setPropBin(fieldname, len, value);
+}
+
+void PropertyTreeXmlWriter::outputInt(__int64 value, unsigned size, const char *fieldname)
+{
+    root->setPropInt64(fieldname, value);
+}
+
+void PropertyTreeXmlWriter::outputUInt(unsigned __int64 value, unsigned size, const char *fieldname)
+{
+    //Convert to string first?
+    root->setPropInt64(fieldname, value);
+}
+
+void PropertyTreeXmlWriter::outputReal(double value, const char *fieldname)
+{
+    StringAttr temp;
+    temp.setown(rtlRealToVStrX(value));
+    root->setProp(fieldname, temp);
+}
+
+void PropertyTreeXmlWriter::outputDecimal(const void *value, unsigned size, unsigned precision, const char *fieldname)
+{
+    Decimal temp;
+    char text[50];
+    temp.setDecimal(size, precision, value);
+    temp.getCString(sizeof(text), text);
+    root->setProp(fieldname, text);
+}
+
+void PropertyTreeXmlWriter::outputUDecimal(const void *value, unsigned size, unsigned precision, const char *fieldname)
+{
+    Decimal temp;
+    char text[50];
+    temp.setUDecimal(size, precision, value);
+    temp.getCString(sizeof(text), text);
+    root->setProp(fieldname, text);
+}
+
+void PropertyTreeXmlWriter::outputUnicode(unsigned len, const UChar *value, const char *fieldname)
+{
+    size32_t resultLen;
+    rtlDataAttr result;
+    rtlUnicodeToUtf8X(resultLen, result.refstr(), len, value);
+
+    size32_t size = rtlUtf8Size(resultLen, result.getstr());
+    outputLiteralString(size, result.getstr(), fieldname);
+}
+
+void PropertyTreeXmlWriter::outputUtf8(unsigned len, const char *value, const char *fieldname)
+{
+    size32_t size = rtlUtf8Size(len, value);
+    outputLiteralString(size, value, fieldname);
+}
+
+void PropertyTreeXmlWriter::outputBeginDataset(const char *fieldname, bool nestChildren)
+{
+    stack.append(*root);
+    root = root->setPropTree(fieldname);
+}
+
+void PropertyTreeXmlWriter::outputEndDataset(const char *fieldname)
+{
+    root = &stack.popGet();
+}
+
+void PropertyTreeXmlWriter::outputBeginNested(const char *fieldname, bool nestChildren)
+{
+    stack.append(*root);
+    root = root->setPropTree(fieldname);
+}
+
+void PropertyTreeXmlWriter::outputEndNested(const char *fieldname)
+{
+    root = &stack.popGet();
+}
+
+void PropertyTreeXmlWriter::outputBeginArray(const char *fieldname) //repeated elements are inline for xml
+{
+    throwUnexpected();
+}
+
+void PropertyTreeXmlWriter::outputEndArray(const char *fieldname)
+{
+    throwUnexpected();
+}
+
+void PropertyTreeXmlWriter::outputSetAll()
+{
+    throwUnexpected();
+}
+
+void PropertyTreeXmlWriter::outputXmlns(const char *name, const char *uri)
+{
+    throwUnexpected();
+}

+ 37 - 0
common/thorhelper/thorxmlwrite.hpp

@@ -57,6 +57,43 @@ public:
 
 };
 
+class thorhelper_decl PropertyTreeXmlWriter : implements CInterfaceOf<IXmlWriter>
+{
+public:
+    PropertyTreeXmlWriter(IPropertyTree * _root) : root(_root) {}
+
+    virtual void outputInlineXml(const char *text) override;
+    virtual void outputQuoted(const char *text) override;
+    virtual void outputQString(unsigned len, const char *field, const char *fieldname) override;
+    virtual void outputString(unsigned len, const char *field, const char *fieldname) override;
+    virtual void outputBool(bool field, const char *fieldname) override;
+    virtual void outputData(unsigned len, const void *field, const char *fieldname) override;
+    virtual void outputInt(__int64 field, unsigned size, const char *fieldname) override;
+    virtual void outputUInt(unsigned __int64 field, unsigned size, const char *fieldname) override;
+    virtual void outputReal(double field, const char *fieldname) override;
+    virtual void outputDecimal(const void *field, unsigned size, unsigned precision, const char *fieldname) override;
+    virtual void outputUDecimal(const void *field, unsigned size, unsigned precision, const char *fieldname) override;
+    virtual void outputUnicode(unsigned len, const UChar *field, const char *fieldname) override;
+    virtual void outputUtf8(unsigned len, const char *field, const char *fieldname) override;
+    virtual void outputBeginDataset(const char *dsname, bool nestChildren) override;
+    virtual void outputEndDataset(const char *dsname) override;
+    virtual void outputBeginNested(const char *fieldname, bool nestChildren) override;
+    virtual void outputEndNested(const char *fieldname) override;
+    virtual void outputBeginArray(const char *fieldname) override; //repeated elements are inline for xml
+    virtual void outputEndArray(const char *fieldname) override;
+    virtual void outputSetAll() override;
+    virtual void outputXmlns(const char *name, const char *uri) override;
+
+protected:
+    void outputLiteralString(size32_t size, const char *value, const char *fieldname);
+
+protected:
+    IPropertyTree * root;
+    ICopyArrayOf<IPropertyTree> stack;
+};
+
+
+
 extern thorhelper_decl void printKeyedValues(StringBuffer &out, IIndexReadContext *segs, IOutputMetaData *rowMeta);
 
 extern thorhelper_decl void convertRowToXML(size32_t & lenResult, char * & result, IOutputMetaData & info, const void * row, unsigned flags = (unsigned)-1);

+ 0 - 9
ecl/eclagent/eclgraph.cpp

@@ -482,15 +482,6 @@ void EclGraphElement::createActivity(IAgentContext & agent, EclSubGraph * owner)
                 }
             }
             arg.setown(createHelper(agent, owner));
-            //Use the new disk read activity unless disabled or the transform uses a virtual field
-            if (((kind == TAKdiskread) || (kind == TAKspillread)) && agent.forceNewDiskReadActivity())
-            {
-                unsigned flags = ((IHThorNewDiskReadArg &)*arg).getFlags();
-                //New activity doesn't currently support virtual callbacks from the transform.  We may want
-                //to implement a new variant to support it without imposing the overhead on the general cases.
-                if ((flags & TDRtransformvirtual) == 0)
-                    kind = TAKnewdiskread;
-            }
             activity.setown(::createActivity(agent, id, subgraph->id, resultsGraph ? resultsGraph->id : 0, kind, isLocal, isGrouped, *arg, node, this));
 
             ForEachItemIn(i2, branches)

+ 2 - 0
ecl/hqlcpp/hqlcerrors.hpp

@@ -223,6 +223,7 @@
 #define HQLERR_NonNullChildDSDefault            4211
 #define HQLERR_AttributeXMustBeConstant         4212
 #define HQLERR_CannotInterpretRecord            4213
+#define HQLERR_NoVirtualAndAlien                4214
 
 //Warnings....
 #define HQLWRN_PersistDataNotLikely             4500
@@ -529,6 +530,7 @@
 #define HQLERR_NonNullChildDSDefault_Text       "Non-null child dataset may not be used as default value (target field '%s')"
 #define HQLERR_AttributeXMustBeConstant_Text    "Attribute %s must be set to a constant value"
 #define HQLERR_CannotInterpretRecord_Text       "This dataset contains deprecated record formats and virtual fields.  Remove the alien data types, or temporarily add __OPTION__(LEGACY) to the table definition"
+#define HQLERR_NoVirtualAndAlien_Text           "GenericDiskReads: VIRTUAL fields are not currently supported with ALIEN types"
 
 //Warnings.
 #define HQLWRN_CannotRecreateDistribution_Text  "Cannot recreate the distribution for a persistent dataset"

+ 4 - 0
ecl/hqlcpp/hqlcpp.cpp

@@ -1855,6 +1855,7 @@ void HqlCppTranslator::cacheOptions()
         DebugOption(options.checkDuplicateThreshold, "checkDuplicateThreshold", 0), // If non zero, create a warning if duplicates > this percentage increase
         DebugOption(options.checkDuplicateMinActivities, "checkDuplicateMinActivities", 100),
         DebugOption(options.diskReadsAreSimple, "diskReadsAreSimple", false), // Not yet enabled - needs filters to default to generating keyed info first
+        DebugOption(options.genericDiskReads, "genericDiskReads", false),
     };
 
     //get options values from workunit
@@ -1954,6 +1955,9 @@ void HqlCppTranslator::postProcessOptions()
         options.optimizeSpillProject = false;
     }
 
+    if (!targetHThor())
+        options.genericDiskReads = false;
+
     if (options.resourceSequential)
         options.resourceConditionalActions = true;
 

+ 2 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -457,6 +457,7 @@ public:
 
     void start(const char * text, unsigned _flags = 0);
     void finish();
+    bool isEmpty() const;
     unsigned numStmts() const;
     void setIncomplete(bool value);
     void setIncluded(bool value);
@@ -823,6 +824,7 @@ struct HqlCppOptions
     bool                forceAllProjectedDiskSerialized;
     bool                newIndexReadMapping;
     bool                diskReadsAreSimple;
+    bool                genericDiskReads;
 };
 
 //Any information gathered while processing the query should be moved into here, rather than cluttering up the translator class

+ 7 - 0
ecl/hqlcpp/hqlhtcpp.cpp

@@ -308,6 +308,13 @@ void MemberFunction::start(const char * text, unsigned _flags)
     stmt = ctx.addQuotedFunction(text, (flags & MFdynamicproto) != 0);
 }
 
+bool MemberFunction::isEmpty() const
+{
+    assertex(stmt);
+    return (stmt->numChildren() == 0);
+}
+
+
 //---------------------------------------------------------------------------
 
 static HqlTransformerInfo childDatasetSpotterInfo("ChildDatasetSpotter");

+ 111 - 20
ecl/hqlcpp/hqlsource.cpp

@@ -662,6 +662,7 @@ public:
         useImplementationClass = false;
         isUnfilteredCount = false;
         requiresOrderedMerge = false;
+        genericDiskReads = translator.queryOptions().genericDiskReads;
         rootSelfRow = NULL;
         activityKind = TAKnone;
 
@@ -802,6 +803,10 @@ public:
     bool            requiresOrderedMerge;
     bool            newInputMapping;
     bool            extractCanMatch = false;
+    bool            genericDiskReads;
+    bool            genericDiskRead = false;
+    bool            hasDynamicOptions = false;
+
 protected:
     HqlCppTranslator & translator;
 };
@@ -1201,7 +1206,7 @@ void SourceBuilder::buildTransformBody(BuildCtx & transformCtx, IHqlExpression *
     if (tableExpr && bindInputRow)
     {
         IHqlExpression * mode = (tableExpr->getOperator() == no_table) ? tableExpr->queryChild(2) : NULL;
-        if (mode && mode->getOperator() == no_csv)
+        if (mode && mode->getOperator() == no_csv && !genericDiskRead)
         {
             translator.bindCsvTableCursor(transformCtx, tableExpr, "Src", no_none, NULL, true, queryCsvEncoding(mode));
         }
@@ -1545,7 +1550,7 @@ void SourceBuilder::buildTransformElements(BuildCtx & ctx, IHqlExpression * expr
             buildTargetCursor(tempRow, rowBuilder, subctx, expr);
             // MORE - don't understand why this is required here but not in hqlproject above
             IHqlExpression * dataset = expr->queryChild(0);
-            BoundRow * leftCursor;
+            BoundRow * leftCursor = nullptr;
             switch (getDatasetKind(tableExpr))
             {
             case no_csv:
@@ -1555,10 +1560,9 @@ void SourceBuilder::buildTransformElements(BuildCtx & ctx, IHqlExpression * expr
             case no_json:
                 leftCursor = translator.bindXmlTableCursor(subctx, dataset, "xmlLeft", no_left, querySelSeq(expr), true);
                 break;
-            default:
-                leftCursor = translator.bindTableCursor(subctx, dataset, "left", no_left, querySelSeq(expr));
-                break;
             }
+            if (!leftCursor)
+                leftCursor = translator.bindTableCursor(subctx, dataset, "left", no_left, querySelSeq(expr));
 
             BoundRow * rightCursor = NULL;
             LinkedHqlExpr transform = expr->queryChild(3);
@@ -1862,6 +1866,7 @@ static bool expandGraphLabel(ThorActivityKind kind)
     switch (kind)
     {
     case TAKdiskread:
+    case TAKnewdiskread:
     case TAKcsvread:
     case TAKxmlread:
     case TAKjsonread:
@@ -1881,15 +1886,17 @@ ABoundActivity * SourceBuilder::buildActivity(BuildCtx & ctx, IHqlExpression * e
     translator.gatherActiveCursors(ctx, parentCursors);
 
     bool isSpill = tableExpr && tableExpr->hasAttribute(_spill_Atom);
+    //If genericDiskReads are supported, this will no longer generate spill activities.
+    //Good for testing, but will change once all disk reads go through that interface
     if (isSpill && (activityKind == TAKdiskread))
         activityKind = TAKspillread;
-    useImplementationClass = translator.queryOptions().minimizeActivityClasses && translator.targetRoxie() && isSpill;
+    useImplementationClass = translator.queryOptions().minimizeActivityClasses && translator.targetRoxie() && (activityKind == TAKspillread);
 
     Owned<ActivityInstance> localInstance = new ActivityInstance(translator, ctx, activityKind, expr, kind);
     if (useImplementationClass)
         localInstance->setImplementationClass(newMemorySpillReadArgId);
 
-    if (((activityKind >= TAKdiskread) && (activityKind <= TAKdiskgroupaggregate)) || (activityKind == TAKspillread))
+    if (((activityKind >= TAKdiskread) && (activityKind <= TAKdiskgroupaggregate)) || (activityKind == TAKspillread) || (activityKind == TAKnewdiskread))
     {
         IHqlExpression * seq = querySequence(tableExpr);
         translator.noteResultAccessed(ctx, seq, nameExpr);
@@ -2827,7 +2834,7 @@ void SourceBuilder::gatherSteppingMeta(IHqlExpression * expr, SourceSteppingInfo
 class DiskReadBuilderBase : public SourceBuilder
 {
 public:
-    DiskReadBuilderBase(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr)
+    DiskReadBuilderBase(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr, bool canReadGenerically)
         : SourceBuilder(_translator, _tableExpr, _nameExpr), monitors(_tableExpr, _translator, 0, true, false)
     {
         fpos.setown(getFilepos(tableExpr, false));
@@ -2835,7 +2842,8 @@ public:
         logicalFilenameMarker.setown(getFileLogicalName(tableExpr));
         mode = tableExpr->queryChild(2);
         modeOp = mode->getOperator();
-        includeFormatCrc = (modeOp != no_csv && modeOp != no_pipe);
+        genericDiskRead = genericDiskReads && canReadGenerically;
+        includeFormatCrc = ((modeOp != no_csv) || genericDiskRead) && (modeOp != no_pipe);
     }
 
     virtual void buildMembers(IHqlExpression * expr);
@@ -2887,6 +2895,15 @@ void DiskReadBuilderBase::buildMembers(IHqlExpression * expr)
             throwError1(HQLERR_ReadSpillBeforeWrite, spillName.str());
     }
 
+    if (genericDiskRead)
+    {
+        if ((modeOp != no_thor) && (modeOp != no_flat))
+        {
+            StringBuffer format;
+            format.append(getOpString(modeOp)).toLowerCase();
+            instance->startctx.addQuotedF("virtual const char * queryFormat() { return \"%s\"; }", format.str());
+        }
+    }
 
     //---- virtual bool canMatchAny() { return <value>; } ----
     LinkedHqlExpr guard = globalGuard.get();
@@ -2964,15 +2981,22 @@ void DiskReadBuilderBase::buildFlagsMember(IHqlExpression * expr)
     if (transformUsesVirtualFilePosition || transformUsesVirtualLogicalFilename)
         flags.append("|TDRtransformvirtual");
     if (requiresOrderedMerge) flags.append("|TDRorderedmerge");
+    if (hasDynamicOptions) flags.append("|TDRdynformatoptions");
 
     if (flags.length())
         translator.doBuildUnsignedFunction(instance->classctx, "getFlags", flags.str()+1);
+
+    //New activity doesn't currently support virtual callbacks from the transform.
+    //At a later date this error will be removed, and a new variant of the activity will be created
+    //that does not imposing the overhead of tracking filepositions on the general cases.
+    if (genericDiskRead && (transformUsesVirtualFilePosition || transformUsesVirtualLogicalFilename))
+        throwError(HQLERR_NoVirtualAndAlien);
 }
 
 
 void DiskReadBuilderBase::buildTransformFpos(BuildCtx & transformCtx)
 {
-    if (modeOp == no_csv)
+    if ((modeOp == no_csv) && !genericDiskRead)
         associateFilePositions(transformCtx, "fpp", "dataSrc[0]");
     else
         associateFilePositions(transformCtx, "fpp", "left");
@@ -3056,15 +3080,20 @@ class DiskReadBuilder : public DiskReadBuilderBase
 {
 public:
     DiskReadBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr)
-        : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr)
+        : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, (modeOp != no_pipe))
     {
-        extractCanMatch = (modeOp == no_thor) || (modeOp == no_flat);
+        extractCanMatch = (modeOp == no_thor) || (modeOp == no_flat) ||
+                          ((modeOp == no_csv) && genericDiskRead);
     }
 
 protected:
     virtual void buildTransform(IHqlExpression * expr) override;
     virtual void buildMembers(IHqlExpression * expr) override;
     virtual void analyseGraph(IHqlExpression * expr) override;
+
+    void buildFormatOption(BuildCtx & ctx, IHqlExpression * name, IHqlExpression * value);
+    void buildFormatOptions(BuildCtx & fixedCtx, BuildCtx & dynCtx, IHqlExpression * expr);
+    void buildFormatOptions(IHqlExpression * expr);
 };
 
 
@@ -3093,7 +3122,7 @@ void DiskReadBuilder::analyseGraph(IHqlExpression * expr)
 
 void DiskReadBuilder::buildMembers(IHqlExpression * expr)
 {
-    if (modeOp == no_csv)
+    if ((modeOp == no_csv) && !genericDiskRead)
         buildFilenameMember();
     else if (modeOp != no_pipe)
         buildReadMembers(expr);
@@ -3154,6 +3183,63 @@ void DiskReadBuilder::buildMembers(IHqlExpression * expr)
 }
 
 
+void DiskReadBuilder::buildFormatOption(BuildCtx & ctx, IHqlExpression * name, IHqlExpression * value)
+{
+    if (value->isAttribute())
+    {
+    }
+    else if (value->isList())
+    {
+        node_operator op = value->getOperator();
+        if ((op == no_list) && value->numChildren())
+        {
+            ForEachChild(i, value)
+                buildFormatOption(ctx, name, value->queryChild(i));
+        }
+        else if ((op == no_list) || (op == no_null))
+        {
+            //MORE: There should be a better way of doing this!
+            translator.buildXmlSerializeBeginNested(ctx, name, false);
+            translator.buildXmlSerializeEndNested(ctx, name);
+        }
+    }
+    else
+    {
+        translator.buildXmlSerializeScalar(ctx, value, name);
+    }
+}
+
+void DiskReadBuilder::buildFormatOptions(BuildCtx & fixedCtx, BuildCtx & dynCtx, IHqlExpression * expr)
+{
+    ForEachChild(i, expr)
+    {
+        IHqlExpression * cur = expr->queryChild(i);
+        if (cur->isAttribute())
+        {
+            OwnedHqlExpr name = createConstant(str(cur->queryName()));
+            if (cur->numChildren())
+            {
+                BuildCtx & ctx = cur->isConstant() ? fixedCtx : dynCtx;
+                ForEachChild(c, cur)
+                    buildFormatOption(ctx, name, cur->queryChild(c));
+            }
+            else
+                translator.buildXmlSerializeScalar(fixedCtx, queryBoolExpr(true), name);
+        }
+    }
+}
+
+void DiskReadBuilder::buildFormatOptions(IHqlExpression * expr)
+{
+    MemberFunction fixedFunc(translator, instance->createctx, "virtual void getFormatOptions(IXmlWriter & out) override", MFopt);
+    MemberFunction dynFunc(translator, instance->startctx, "virtual void getFormatDynOptions(IXmlWriter & out) override", MFopt);
+
+    buildFormatOptions(fixedFunc.ctx, dynFunc.ctx, expr);
+
+    if (!dynFunc.isEmpty())
+        hasDynamicOptions = true;
+}
+
 void DiskReadBuilder::buildTransform(IHqlExpression * expr)
 {
     if (modeOp == no_pipe)
@@ -3172,7 +3258,7 @@ void DiskReadBuilder::buildTransform(IHqlExpression * expr)
         return;
     }
 
-    if (modeOp == no_csv)
+    if ((modeOp == no_csv) && !genericDiskRead)
     {
         translator.buildCsvParameters(instance->nestedctx, mode, NULL, true);
 
@@ -3192,8 +3278,11 @@ void DiskReadBuilder::buildTransform(IHqlExpression * expr)
         return;
     }
 
+    if (genericDiskRead)
+        buildFormatOptions(mode);
+
     MemberFunction func(translator, instance->startctx);
-    if ((instance->kind == TAKdiskread) || (instance->kind == TAKspillread))
+    if ((instance->kind == TAKdiskread) || (instance->kind == TAKspillread) || (instance->kind == TAKnewdiskread))
         func.start("virtual size32_t transform(ARowBuilder & crSelf, const void * _left) override");
     else
         func.start("virtual size32_t transform(ARowBuilder & crSelf, const void * _left, IFilePositionProvider * fpp) override");
@@ -3218,7 +3307,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDiskRead(BuildCtx & ctx, IHqlE
     info.deduceDiskRecords();
 
     unsigned optFlags = (options.foldOptimized ? HOOfold : 0);
-    if (info.newInputMapping && (modeOp != no_csv) && (modeOp != no_xml) && (modeOp != no_pipe))
+    if (info.newInputMapping && ((modeOp != no_csv) || options.genericDiskReads) && (modeOp != no_xml) && (modeOp != no_pipe))
     {
         //The projected disk information (which is passed to the transform) uses the in memory format IFF
         // - The disk read is a trivial slimming transform (so no transform needs calling on the projected disk format.
@@ -3274,6 +3363,8 @@ ABoundActivity * HqlCppTranslator::doBuildActivityDiskRead(BuildCtx & ctx, IHqlE
     if (isPiped)
         return info.buildActivity(ctx, expr, TAKpiperead, "PipeRead", NULL);
     ensureDiskAccessAllowed(tableExpr);
+    if (info.genericDiskRead)
+        return info.buildActivity(ctx, expr, TAKnewdiskread, "NewDiskRead", NULL);
     if (modeOp == no_csv)
         return info.buildActivity(ctx, expr, TAKcsvread, "CsvRead", NULL);
     return info.buildActivity(ctx, expr, TAKdiskread, "DiskRead", NULL);
@@ -3285,7 +3376,7 @@ class DiskNormalizeBuilder : public DiskReadBuilderBase
 {
 public:
     DiskNormalizeBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr)
-        : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr)
+        : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false)
     { 
     }
 
@@ -3357,7 +3448,7 @@ class DiskAggregateBuilder : public DiskReadBuilderBase
 {
 public:
     DiskAggregateBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr)
-        : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr)
+        : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false)
     { 
         failedFilterValue.clear();
     }
@@ -3425,7 +3516,7 @@ class DiskCountBuilder : public DiskReadBuilderBase
 {
 public:
     DiskCountBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr, node_operator _aggOp)
-        : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr)
+        : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false)
     { 
         aggOp = _aggOp;
         isCompoundCount = true;
@@ -3524,7 +3615,7 @@ class DiskGroupAggregateBuilder : public DiskReadBuilderBase
 {
 public:
     DiskGroupAggregateBuilder(HqlCppTranslator & _translator, IHqlExpression *_tableExpr, IHqlExpression *_nameExpr)
-        : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr)
+        : DiskReadBuilderBase(_translator, _tableExpr, _nameExpr, false)
     { 
         failedFilterValue.clear();
     }

+ 85 - 35
ecl/hthor/hthor.cpp

@@ -10429,20 +10429,22 @@ void CHThorExternalActivity::stop()
 
 //=====================================================================================================
 
-CHThorNewDiskReadBaseActivity::CHThorNewDiskReadBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskReadBaseArg &_arg, IHThorCompoundBaseArg & _segHelper, ThorActivityKind _kind, IPropertyTree *_node)
+CHThorNewDiskReadBaseActivity::CHThorNewDiskReadBaseActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadBaseArg &_arg, IHThorCompoundBaseArg & _segHelper, ThorActivityKind _kind, IPropertyTree *_node)
 : CHThorActivityBase(_agent, _activityId, _subgraphId, _arg, _kind), helper(_arg), segHelper(_segHelper)
 {
     helper.setCallback(this);
     expectedDiskMeta = helper.queryDiskRecordSize();
     projectedDiskMeta = helper.queryProjectedDiskRecordSize();
-    readerOptions.setown(createPTree());
+    formatOptions.setown(createPTree());
     if (_node)
     {
         const char *recordTranslationModeHintText = _node->queryProp("hint[@name='layoutTranslation']/@value");
         if (recordTranslationModeHintText)
             recordTranslationModeHint = getTranslationMode(recordTranslationModeHintText);
     }
-    readerOptions->setPropInt("translationMode", (int)getLayoutTranslationMode());
+
+    PropertyTreeXmlWriter writer(formatOptions);
+    helper.getFormatOptions(writer);
 }
 
 CHThorNewDiskReadBaseActivity::~CHThorNewDiskReadBaseActivity()
@@ -10490,7 +10492,7 @@ void CHThorNewDiskReadBaseActivity::resolveFile()
 {
     //If in a child query, and the filenames haven't changed, the information about the resolved filenames will also not have changed
     //MORE: Is this ever untrue?
-    if (subfiles && !(helper.getFlags() & TDXvarfilename))
+    if (subfiles && !(helper.getFlags() & (TDXvarfilename|TDRdynformatoptions)))
         return;
 
     //Only clear these members if we are re-resolving the file - otherwise the previous entries are still valid
@@ -10499,6 +10501,16 @@ void CHThorNewDiskReadBaseActivity::resolveFile()
     dfsParts.clear();
     subfiles.kill();
 
+    Owned<IPropertyTree> curFormatOptions;
+    if (helper.getFlags() & TDRdynformatoptions)
+    {
+        curFormatOptions.setown(createPTreeFromIPT(formatOptions));
+        PropertyTreeXmlWriter writer(curFormatOptions);
+        helper.getFormatDynOptions(writer);
+    }
+    else
+        curFormatOptions.set(formatOptions);
+
     OwnedRoxieString fileName(helper.getFileName());
     mangleHelperFileName(mangledHelperFileName, fileName, agent.queryWuid(), helper.getFlags());
     if (helper.getFlags() & (TDXtemporary | TDXjobtemp))
@@ -10508,7 +10520,7 @@ void CHThorNewDiskReadBaseActivity::resolveFile()
         tempFileName.set(agent.queryTemporaryFile(mangledFilename.str()));
         logicalFileName = tempFileName.str();
         gatherInfo(NULL);
-        subfiles.append(*extractFileInformation(nullptr));
+        subfiles.append(*extractFileInformation(nullptr, curFormatOptions));
     }
     else
     {
@@ -10535,22 +10547,22 @@ void CHThorNewDiskReadBaseActivity::resolveFile()
                     for (; s<numsubs; s++)
                     {
                         IDistributedFile &subfile = super->querySubFile(s, true);
-                        subfiles.append(*extractFileInformation(&subfile));
+                        subfiles.append(*extractFileInformation(&subfile, curFormatOptions));
                     }
                     assertex(fdesc);
                     superfile.set(fdesc->querySuperFileDescriptor());
                 }
                 else
-                    subfiles.append(*extractFileInformation(dFile));
+                    subfiles.append(*extractFileInformation(dFile, curFormatOptions));
 
                 if((helper.getFlags() & (TDXtemporary | TDXjobtemp)) == 0)
                     agent.logFileAccess(dFile, "HThor", "READ");
             }
             else
-                subfiles.append(*extractFileInformation(nullptr));
+                subfiles.append(*extractFileInformation(nullptr, curFormatOptions));
         }
         else
-            subfiles.append(*extractFileInformation(nullptr));
+            subfiles.append(*extractFileInformation(nullptr, curFormatOptions));
 
         if (!ldFile)
         {
@@ -10586,19 +10598,46 @@ void CHThorNewDiskReadBaseActivity::gatherInfo(IFileDescriptor * fileDesc)
     }
 }
 
-CHThorNewDiskReadBaseActivity::InputFileInfo * CHThorNewDiskReadBaseActivity::extractFileInformation(IDistributedFile * distributedFile)
+static void queryInheritProp(IPropertyTree & target, const char * targetName, IPropertyTree & source, const char * sourceName)
+{
+    if (source.hasProp(sourceName) && !target.hasProp(targetName))
+        target.setProp(targetName, source.queryProp(sourceName));
+}
+
+static void queryInheritSeparatorProp(IPropertyTree & target, const char * targetName, IPropertyTree & source, const char * sourceName)
+{
+    //Legacy - commas are quoted if they occur in a separator list, so need to remove the leading backslashes
+    if (source.hasProp(sourceName) && !target.hasProp(targetName))
+    {
+        StringBuffer unquoted;
+        const char * text = source.queryProp(sourceName);
+        while (*text)
+        {
+            if ((text[0] == '\\') && (text[1] == ','))
+                text++;
+            unquoted.append(*text++);
+        }
+        target.setProp(targetName, unquoted);
+    }
+}
+
+CHThorNewDiskReadBaseActivity::InputFileInfo * CHThorNewDiskReadBaseActivity::extractFileInformation(IDistributedFile * distributedFile, const IPropertyTree * curFormatOptions)
 {
     Owned<IPropertyTree> meta = createPTree();
     unsigned actualCrc = helper.getDiskFormatCrc();
     Linked<IOutputMetaData> actualDiskMeta = expectedDiskMeta;
+    Linked<const IPropertyTree> fileFormatOptions = curFormatOptions;
     bool compressed = false;
     bool blockcompressed = false;
+    const char * readFormat = helper.queryFormat();
 
     if (distributedFile)
     {
         const char *kind = queryFileKind(distributedFile);
         //Do not use the field translation if the file was originally csv/xml - unless explicitly set
-        if (strisame(kind, "flat") || (RecordTranslationMode::AlwaysDisk == getLayoutTranslationMode()))
+        if ((strisame(kind, "flat") || (RecordTranslationMode::AlwaysDisk == getLayoutTranslationMode())) &&
+//            (strisame(readFormat, "flat") || strisame(kind, readFormat)))
+              (strisame(readFormat, "flat"))) // Not sure about this - only allow fixed source format if reading as flat
         {
             //Yuk this will be horrible - it needs to cache it for each distributed file
             //and also common them up if they are the same.
@@ -10626,6 +10665,18 @@ CHThorNewDiskReadBaseActivity::InputFileInfo * CHThorNewDiskReadBaseActivity::ex
             blockcompressed = true;
             compressed = true;
         }
+
+        //MORE: There should probably be a generic way of storing and extracting format options for a file
+        IPropertyTree & options = distributedFile->queryAttributes();
+        Linked<IPropertyTree> tempOptions = createPTreeFromIPT(fileFormatOptions);
+        queryInheritProp(*tempOptions, "quote", options, "@csvQuote");
+        queryInheritSeparatorProp(*tempOptions, "separator", options, "@csvSeparate");
+        queryInheritProp(*tempOptions, "terminator", options, "@csvTerminate");
+        queryInheritProp(*tempOptions, "escape", options, "@csvEscape");
+        dbglogXML(fileFormatOptions);
+        dbglogXML(tempOptions);
+        if (!areMatchingPTrees(fileFormatOptions, tempOptions))
+            fileFormatOptions.setown(tempOptions.getClear());
     }
 
     meta->setPropBool("grouped", grouped);
@@ -10635,7 +10686,8 @@ CHThorNewDiskReadBaseActivity::InputFileInfo * CHThorNewDiskReadBaseActivity::ex
 
     InputFileInfo & target = * new InputFileInfo;
     target.file = distributedFile;
-    target.meta.swap(meta);
+    target.formatOptions.swap(fileFormatOptions);
+    target.meta.setown(meta.getClear());
     target.actualCrc = actualCrc;
     target.actualMeta.swap(actualDiskMeta);
     return &target;
@@ -10759,10 +10811,7 @@ bool CHThorNewDiskReadBaseActivity::openNextPart(bool prevWasMissing)
 void CHThorNewDiskReadBaseActivity::initStream(IDiskRowReader * reader, const char * filename)
 {
     activeReader = reader;
-    if (useRawStream)
-        rawRowStream = reader->queryRawRowStream();
-    else
-        roxieRowStream = reader->queryAllocatedRowStream(rowAllocator);
+    inputRowStream = reader->queryAllocatedRowStream(rowAllocator);
 
     StringBuffer report("Reading file ");
     report.append(filename);
@@ -10771,34 +10820,33 @@ void CHThorNewDiskReadBaseActivity::initStream(IDiskRowReader * reader, const ch
 
 void CHThorNewDiskReadBaseActivity::setEmptyStream()
 {
-    if (useRawStream)
-        rawRowStream = queryNullRawRowStream();
-    else
-        roxieRowStream = queryNullAllocatedRowStream();
+    inputRowStream = queryNullDiskRowStream();
     finishedParts = true;
 }
 
-IDiskRowReader * CHThorNewDiskReadBaseActivity::ensureRowReader(const char * format, bool streamRemote, unsigned expectedCrc, IOutputMetaData & expected, unsigned projectedCrc, IOutputMetaData & projected, unsigned actualCrc, IOutputMetaData & actual, IPropertyTree * options)
+IDiskRowReader * CHThorNewDiskReadBaseActivity::ensureRowReader(const char * format, bool streamRemote, unsigned expectedCrc, IOutputMetaData & expected, unsigned projectedCrc, IOutputMetaData & projected, unsigned actualCrc, IOutputMetaData & actual, const IPropertyTree * options)
 {
+    Owned<IDiskReadMapping> mapping = createDiskReadMapping(getLayoutTranslationMode(), format, actualCrc, actual, expectedCrc, expected, projectedCrc, projected, options);
+
     ForEachItemIn(i, readers)
     {
         IDiskRowReader & cur = readers.item(i);
-        if (cur.matches(format, streamRemote, expectedCrc, expected, projectedCrc, projected, actualCrc, actual, options))
+        if (cur.matches(format, streamRemote, mapping))
             return &cur;
     }
-    IDiskRowReader * reader = createDiskReader(format, streamRemote, expectedCrc, expected, projectedCrc, projected, actualCrc, actual, options);
+    IDiskRowReader * reader = createDiskReader(format, streamRemote, mapping);
     readers.append(*reader);
     return reader;
 }
 
 bool CHThorNewDiskReadBaseActivity::openFilePart(const char * filename)
 {
-    const char * format = "thor";   // more - should extract from the current file (could even mix flat and csv...)
+    const char * format = helper.queryFormat();   // more - should extract from the current file (could even mix flat and csv...)
     InputFileInfo * fileInfo = &subfiles.item(0);
 
     unsigned expectedCrc = helper.getDiskFormatCrc();
     unsigned projectedCrc = helper.getProjectedFormatCrc();
-    IDiskRowReader * reader = ensureRowReader(format, false, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, expectedCrc, *expectedDiskMeta, readerOptions);
+    IDiskRowReader * reader = ensureRowReader(format, false, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, expectedCrc, *expectedDiskMeta, fileInfo->formatOptions);
     if (reader->setInputFile(filename, logicalFileName, 0, offsetOfPart, fileInfo->meta, fieldFilters))
     {
         initStream(reader, filename);
@@ -10809,6 +10857,7 @@ bool CHThorNewDiskReadBaseActivity::openFilePart(const char * filename)
 
 bool CHThorNewDiskReadBaseActivity::openFilePart(ILocalOrDistributedFile * localFile, IDistributedFilePart * filePart, unsigned whichPart)
 {
+    IDistributedFile * distributedFile = localFile->queryDistributedFile();
     InputFileInfo * fileInfo = &subfiles.item(0);
     if (superfile && filePart)
     {
@@ -10817,7 +10866,7 @@ bool CHThorNewDiskReadBaseActivity::openFilePart(ILocalOrDistributedFile * local
         if (superfile->mapSubPart(partNum, subfile, lnum))
         {
             fileInfo = &subfiles.item(subfile);
-            IDistributedFile * distributedFile = fileInfo->file;
+            distributedFile = fileInfo->file;
             logicalFileName = distributedFile->queryLogicalName();
         }
     }
@@ -10830,12 +10879,13 @@ bool CHThorNewDiskReadBaseActivity::openFilePart(ILocalOrDistributedFile * local
     bool tryRemoteStream = actualDiskMeta->queryTypeInfo()->canInterpret() && actualDiskMeta->queryTypeInfo()->canSerialize() &&
                            projectedDiskMeta->queryTypeInfo()->canInterpret() && projectedDiskMeta->queryTypeInfo()->canSerialize();
 
+
     /*
      * If a file part can be accessed local, then read it locally
      * If a file part supports a remote stream, then use that
      * Otherwise failover to the legacy remote access.
      */
-    const char * format = "thor";   // more - should extract from the current file (could even mix flat and csv...)
+    const char * format = helper.queryFormat();   // more - should extract from the current file (could even mix flat and csv...)
     Owned<IException> saveOpenExc;
     StringBuffer filename, filenamelist;
     std::vector<unsigned> remoteCandidates;
@@ -10851,8 +10901,8 @@ bool CHThorNewDiskReadBaseActivity::openFilePart(ILocalOrDistributedFile * local
         {
             StringBuffer path;
             rfn.getPath(path);
-            IDiskRowReader * reader = ensureRowReader(format, false, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, readerOptions);
-            if (reader->setInputFile(path.str(), logicalFileName, filePart->getPartIndex(), offsetOfPart, fileInfo->meta, fieldFilters))
+            IDiskRowReader * reader = ensureRowReader(format, false, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, fileInfo->formatOptions);
+            if (reader->setInputFile(path.str(), logicalFileName, whichPart, offsetOfPart, fileInfo->meta, fieldFilters))
             {
                 initStream(reader, path.str());
                 return true;
@@ -10874,8 +10924,8 @@ bool CHThorNewDiskReadBaseActivity::openFilePart(ILocalOrDistributedFile * local
             filenamelist.append('\n').append(filename);
             try
             {
-                IDiskRowReader * reader = ensureRowReader(format, tryRemoteStream, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, readerOptions);
-                if (reader->setInputFile(rfilename, logicalFileName, filePart->getPartIndex(), offsetOfPart, fileInfo->meta, fieldFilters))
+                IDiskRowReader * reader = ensureRowReader(format, tryRemoteStream, expectedCrc, *expectedDiskMeta, projectedCrc, *projectedDiskMeta, actualCrc, *actualDiskMeta, fileInfo->formatOptions);
+                if (reader->setInputFile(rfilename, logicalFileName, whichPart, offsetOfPart, fileInfo->meta, fieldFilters))
                 {
                     initStream(reader, filename);
                     return true;
@@ -10956,7 +11006,7 @@ void CHThorNewDiskReadBaseActivity::append(FFoption option, const IFieldFilter *
 
 //=====================================================================================================
 
-CHThorNewDiskReadActivity::CHThorNewDiskReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskReadArg &_arg, ThorActivityKind _kind, IPropertyTree *_node)
+CHThorNewDiskReadActivity::CHThorNewDiskReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadArg &_arg, ThorActivityKind _kind, IPropertyTree *_node)
 : CHThorNewDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _arg, _kind, _node), helper(_arg), outBuilder(NULL)
 {
     needTransform = false;
@@ -11011,13 +11061,13 @@ const void *CHThorNewDiskReadActivity::nextRow()
 
     try
     {
-        if (rawRowStream)
+        if (useRawStream)
         {
             for (;;)
             {
                 //Returns a row in the serialized form of the projected format
                 size32_t nextSize;
-                const byte * next = (const byte *)rawRowStream->nextRow(nextSize);
+                const byte * next = (const byte *)inputRowStream->nextRow(nextSize);
                 if (!isSpecialRow(next))
                 {
                     size32_t thisSize = 0;
@@ -11070,7 +11120,7 @@ const void *CHThorNewDiskReadActivity::nextRow()
             //whether there was a limit, a transform etc., but unlikely to save more than a couple of boolean tests.
             for (;;)
             {
-                const byte * next = (const byte *)roxieRowStream->nextRow();
+                const byte * next = (const byte *)inputRowStream->nextRow();
                 if (!isSpecialRow(next))
                 {
                     if (unlikely((processed - initialProcessed) >= limit))

+ 10 - 10
ecl/hthor/hthor.ipp

@@ -2929,16 +2929,16 @@ protected:
     {
         IDistributedFile * file;
         Owned<IOutputMetaData> actualMeta;
-        Owned<IPropertyTree> meta;
+        Owned<const IPropertyTree> formatOptions;
+        Owned<const IPropertyTree> meta;
         unsigned actualCrc;
     };
 
-    IHThorDiskReadBaseArg &helper;
+    IHThorNewDiskReadBaseArg &helper;
     IHThorCompoundBaseArg & segHelper;
     IDiskRowReader * activeReader = nullptr;
     IArrayOf<IDiskRowReader> readers;
-    IRawRowStream * rawRowStream = nullptr;
-    IAllocRowStream * roxieRowStream = nullptr;
+    IDiskRowStream * inputRowStream = nullptr;
     StringBuffer mangledHelperFileName;
     StringAttr tempFileName;
     const char * logicalFileName = "";
@@ -2949,7 +2949,7 @@ protected:
     IOutputMetaData *expectedDiskMeta = nullptr;
     IOutputMetaData *projectedDiskMeta = nullptr;
     IConstArrayOf<IFieldFilter> fieldFilters;  // These refer to the expected layout
-    Owned<IPropertyTree> readerOptions;
+    Owned<IPropertyTree> formatOptions;
     unsigned partNum = 0;
     RecordTranslationMode recordTranslationModeHint = RecordTranslationMode::Unspecified;
     bool useRawStream = false; // Constant for the lifetime of the activity
@@ -2978,7 +2978,7 @@ protected:
     }
 
 public:
-    CHThorNewDiskReadBaseActivity(IAgentContext &agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskReadBaseArg &_arg, IHThorCompoundBaseArg & _segHelper, ThorActivityKind _kind, IPropertyTree *node);
+    CHThorNewDiskReadBaseActivity(IAgentContext &agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadBaseArg &_arg, IHThorCompoundBaseArg & _segHelper, ThorActivityKind _kind, IPropertyTree *node);
     ~CHThorNewDiskReadBaseActivity();
     IMPLEMENT_IINTERFACE
 
@@ -3004,7 +3004,7 @@ public:
 protected:
     bool openFirstPart();
     void initStream(IDiskRowReader * reader, const char * filename);
-    InputFileInfo * extractFileInformation(IDistributedFile * fileDesc);
+    InputFileInfo * extractFileInformation(IDistributedFile * fileDesc, const IPropertyTree * curFormatOptions);
     bool openFilePart(const char * filename);
     bool openFilePart(ILocalOrDistributedFile * localFile, IDistributedFilePart * filePart, unsigned whichPart);
     void setEmptyStream();
@@ -3014,7 +3014,7 @@ protected:
     virtual void closepart();
 
     bool openNextPart(bool prevWasMissing);
-    IDiskRowReader * ensureRowReader(const char * format, bool streamRemote, unsigned expectedCrc, IOutputMetaData & expected, unsigned projectedCrc, IOutputMetaData & projected, unsigned actualCrc, IOutputMetaData & actual, IPropertyTree * options);
+    IDiskRowReader * ensureRowReader(const char * format, bool streamRemote, unsigned expectedCrc, IOutputMetaData & expected, unsigned projectedCrc, IOutputMetaData & projected, unsigned actualCrc, IOutputMetaData & actual, const IPropertyTree * options);
 };
 
 
@@ -3022,7 +3022,7 @@ class CHThorNewDiskReadActivity : public CHThorNewDiskReadBaseActivity
 {
     typedef CHThorNewDiskReadBaseActivity PARENT;
 protected:
-    IHThorDiskReadArg &helper;
+    IHThorNewDiskReadArg &helper;
     bool needTransform;
     bool hasMatchFilter;
     unsigned __int64 lastGroupProcessed;
@@ -3031,7 +3031,7 @@ protected:
     unsigned __int64 remoteLimit = 0;
 
 public:
-    CHThorNewDiskReadActivity(IAgentContext &agent, unsigned _activityId, unsigned _subgraphId, IHThorDiskReadArg &_arg, ThorActivityKind _kind, IPropertyTree *node);
+    CHThorNewDiskReadActivity(IAgentContext &agent, unsigned _activityId, unsigned _subgraphId, IHThorNewDiskReadArg &_arg, ThorActivityKind _kind, IPropertyTree *node);
 
     virtual void ready();
     virtual void stop();

+ 1 - 1
fs/dafsserver/dafsserver.cpp

@@ -1464,7 +1464,7 @@ public:
             else
                 outMeta.set(inMeta);
         }
-        translator.setown(createRecordTranslatorViaCallback(*outRecord, *record));
+        translator.setown(createRecordTranslatorViaCallback(*outRecord, *record, type_utf8));
     }
     virtual bool requiresPostProject() const override
     {

+ 33 - 0
rtl/eclrtl/eclhelper_base.cpp

@@ -876,6 +876,39 @@ bool CThorXmlReadArg::canMatch(const void * row)                 { return true;
 bool CThorXmlReadArg::hasMatchFilter()                           { return false; }
 void CThorXmlReadArg::getEncryptKey(size32_t & keyLen, void * & key) { keyLen = 0; key = 0; }
 
+//CThorNewDiskReadArg
+
+unsigned CThorNewDiskReadArg::getFlags() { return 0; }
+void CThorNewDiskReadArg::setCallback(IThorDiskCallback * _tc) { fpp = _tc; }
+bool CThorNewDiskReadArg::canMatchAny()                              { return true; }
+void CThorNewDiskReadArg::createSegmentMonitors(IIndexReadContext *ctx) {}
+bool CThorNewDiskReadArg::canMatch(const void * row)                 { return true; }
+bool CThorNewDiskReadArg::hasMatchFilter()                           { return false; }
+void CThorNewDiskReadArg::getEncryptKey(size32_t & keyLen, void * & key) { keyLen = 0; key = 0; }
+
+unsigned __int64 CThorNewDiskReadArg::getChooseNLimit()              { return I64C(0x7fffffffffffffff); }
+unsigned __int64 CThorNewDiskReadArg::getRowLimit()                  { return (unsigned __int64) -1; }
+void CThorNewDiskReadArg::onLimitExceeded()                          { }
+const char * CThorNewDiskReadArg::queryFormat()                      { return "flat"; }
+void CThorNewDiskReadArg::getFormatOptions(IXmlWriter & options)     { }
+void CThorNewDiskReadArg::getFormatDynOptions(IXmlWriter & options)  { }
+
+bool CThorNewDiskReadArg::needTransform() { return false; }
+bool CThorNewDiskReadArg::transformMayFilter() { return false; }
+unsigned __int64 CThorNewDiskReadArg::getKeyedLimit() { return (unsigned __int64) -1; }
+void CThorNewDiskReadArg::onKeyedLimitExceeded() { }
+ISteppingMeta * CThorNewDiskReadArg::queryRawSteppingMeta() { return NULL; }
+ISteppingMeta * CThorNewDiskReadArg::queryProjectedSteppingMeta() { return NULL; }
+void CThorNewDiskReadArg::mapOutputToInput(ARowBuilder & rowBuilder, const void * projectedRow, unsigned numFields) { }
+size32_t CThorNewDiskReadArg::transform(ARowBuilder & rowBuilder, const void * src)
+{
+    rtlFail(800, "transform() should not be called, input is deserialized");
+}
+
+size32_t CThorNewDiskReadArg::unfilteredTransform(ARowBuilder & rowBuilder, const void * src) { return 0; }
+size32_t CThorNewDiskReadArg::transformOnLimitExceeded(ARowBuilder & rowBuilder) { return 0; }
+size32_t CThorNewDiskReadArg::transformOnKeyedLimitExceeded(ARowBuilder & rowBuilder) { return 0; }
+
 //CThorChildGroupAggregateArg
 
 size32_t CThorChildGroupAggregateArg::mergeAggregate(ARowBuilder & rowBuilder, const void * src) { rtlFailUnexpected(); return 0; }

+ 1 - 0
rtl/eclrtl/eclhelper_dyn.cpp

@@ -159,6 +159,7 @@ public:
         filters.addFilter(in->queryRecordAccessor(true), filter);
         flags |= TDRkeyed;
     }
+
 private:
     StringAttr fileName;
     UnexpectedVirtualFieldCallback fieldCallback;

+ 6 - 0
rtl/eclrtl/rtlds.cpp

@@ -1893,6 +1893,12 @@ void MemoryBufferBuilder::finishRow(size32_t length)
     reserved = 0;
 }
 
+void MemoryBufferBuilder::appendBytes(size32_t len, const void * ptr)
+{
+    dbgassertex(buffer);
+    buffer->append(len, ptr);
+}
+
 void MemoryBufferBuilder::removeBytes(size32_t len)
 {
     dbgassertex(buffer);

+ 1 - 0
rtl/eclrtl/rtlds_imp.hpp

@@ -691,6 +691,7 @@ public:
         buffer = &_buffer;
     }
 
+    void appendBytes(size32_t len, const void * ptr);
     void removeBytes(size32_t len);
 
     virtual byte * ensureCapacity(size32_t required, const char * fieldName);

+ 54 - 6
rtl/eclrtl/rtldynfield.cpp

@@ -1025,8 +1025,8 @@ inline FieldMatchType &operator|=(FieldMatchType &a, FieldMatchType b) { return
 class GeneralRecordTranslator : public CInterfaceOf<IDynamicTransform>
 {
 public:
-    GeneralRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo, bool _binarySource)
-        : destRecInfo(_destRecInfo), sourceRecInfo(_srcRecInfo), binarySource(_binarySource)
+    GeneralRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo, bool _binarySource, type_vals _callbackRawType = type_any)
+        : destRecInfo(_destRecInfo), sourceRecInfo(_srcRecInfo), binarySource(_binarySource), callbackRawType(_callbackRawType)
     {
         matchInfo = new MatchInfo[destRecInfo.getNumFields()];
         createMatchInfo();
@@ -1065,7 +1065,7 @@ public:
     }
     virtual bool needsTranslate() const override
     {
-        return (matchFlags & ~(match_link|match_inifblock)) != 0;
+        return !binarySource || (matchFlags & ~(match_link|match_inifblock)) != 0;
     }
     virtual bool needsNonVirtualTranslate() const override
     {
@@ -1267,7 +1267,10 @@ private:
                     {
                         const IDynamicFieldValueFetcher &callbackRowHandler = *(const IDynamicFieldValueFetcher *)sourceRow;
                         source = callbackRowHandler.queryValue(matchField, copySize);
-                        offset = translateScalarFromUtf8(builder, offset, field, *type, *sourceType, (const char *)source, (size_t)copySize);
+                        if (callbackRawType == type_string)
+                            offset = translateScalarFromString(builder, offset, field, *type, *sourceType, (const char *)source, (size_t)copySize);
+                        else
+                            offset = translateScalarFromUtf8(builder, offset, field, *type, *sourceType, (const char *)source, (size_t)copySize);
                         break;
                     }
                     case match_link:
@@ -1457,6 +1460,7 @@ private:
     const RtlRecord &destRecInfo;
     const RtlRecord &sourceRecInfo;
     bool binarySource = true;
+    type_vals callbackRawType;
     unsigned fixedDelta = 0;  // total size of all fixed-size source fields that are not matched
     UnsignedArray allUnmatched;  // List of all source fields that are unmatched (so that we can trace them)
     UnsignedArray variableUnmatched;  // List of all variable-size source fields that are unmatched
@@ -1519,6 +1523,50 @@ private:
         }
         return offset;
     }
+    static size32_t translateScalarFromString(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, const RtlTypeInfo &destType, const RtlTypeInfo &sourceType, const char *source, size_t srcSize)
+    {
+        switch(destType.getType())
+        {
+        case type_boolean:
+        case type_int:
+        case type_swapint:
+        case type_packedint:
+        case type_filepos:
+        case type_keyedint:
+        {
+            __int64 res = rtlStrToInt8(srcSize, source);
+            offset = destType.buildInt(builder, offset, field, res);
+            break;
+        }
+        case type_real:
+        {
+            double res = rtlStrToReal(srcSize, source);
+            offset = destType.buildReal(builder, offset, field, res);
+            break;
+        }
+        case type_data:
+        case type_string:
+        case type_decimal:  // Go via string - not common enough to special-case
+        case type_varstring:
+        case type_qstring:
+        case type_utf8:
+            //MORE: Could special case casting from utf8 to utf8 similar to strings above
+        case type_unicode:
+        case type_varunicode:
+        {
+            offset = destType.buildString(builder, offset, field, srcSize, source);
+            break;
+        }
+        case type_set:
+        {
+            UNIMPLEMENTED; // JCS->GH - but perhaps can/should translate using iterator too?
+            break;
+        }
+        default:
+            throwUnexpected();
+        }
+        return offset;
+    }
     static bool canTranslateNonScalar(const RtlTypeInfo * type, const RtlTypeInfo * sourceType)
     {
         auto target = type->getType();
@@ -1776,9 +1824,9 @@ extern ECLRTL_API const IDynamicTransform *createRecordTranslator(const RtlRecor
     return new GeneralRecordTranslator(destRecInfo, srcRecInfo, true);
 }
 
-extern ECLRTL_API const IDynamicTransform *createRecordTranslatorViaCallback(const RtlRecord &destRecInfo, const RtlRecord &srcRecInfo)
+extern ECLRTL_API const IDynamicTransform *createRecordTranslatorViaCallback(const RtlRecord &destRecInfo, const RtlRecord &srcRecInfo, type_vals rawType)
 {
-    return new GeneralRecordTranslator(destRecInfo, srcRecInfo, false);
+    return new GeneralRecordTranslator(destRecInfo, srcRecInfo, false, rawType);
 }
 
 extern ECLRTL_API void throwTranslationError(const RtlRecord & destRecInfo, const RtlRecord & srcRecInfo, const char * filename)

+ 1 - 1
rtl/eclrtl/rtldynfield.hpp

@@ -181,7 +181,7 @@ interface IDynamicTransformViaCallback : public IInterface
 };
 
 extern ECLRTL_API const IDynamicTransform *createRecordTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo);
-extern ECLRTL_API const IDynamicTransform *createRecordTranslatorViaCallback(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo);
+extern ECLRTL_API const IDynamicTransform *createRecordTranslatorViaCallback(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo, type_vals rawType);
 extern ECLRTL_API void throwTranslationError(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo, const char * filename);
 
 extern ECLRTL_API const IKeyTranslator *createKeyTranslator(const RtlRecord &_destRecInfo, const RtlRecord &_srcRecInfo);

+ 2 - 1
rtl/eclrtl/rtlnewkey.cpp

@@ -2287,10 +2287,11 @@ void RowFilter::remove(unsigned idx)
     filters.remove(idx);
 }
 
-void RowFilter::clear()
+RowFilter & RowFilter::clear()
 {
     filters.kill();
     numFieldsRequired = 0;
+    return *this;
 }
 
 void RowFilter::recalcFieldsRequired()

+ 1 - 1
rtl/eclrtl/rtlnewkey.hpp

@@ -72,7 +72,7 @@ public:
     void remapField(unsigned filterIdx, unsigned newFieldNum);
     void recalcFieldsRequired();
     void remove(unsigned idx);
-    void clear();
+    RowFilter & clear();
     void appendFilters(const IConstArrayOf<IFieldFilter> &_filters);
 protected:
     IConstArrayOf<IFieldFilter> filters;

+ 16 - 4
rtl/include/eclhelper.hpp

@@ -1125,6 +1125,7 @@ enum
     TDRunfilteredcount  = 0x00800000,       // count/aggregegate doesn't have an additional filter
     TDRfilenamecallback = 0x01000000,
     TDRtransformvirtual = 0x02000000,       // transform uses a virtual field.
+    TDRdynformatoptions = 0x04000000,
 
 //disk write flags
     TDWextend           = 0x0100,
@@ -2400,6 +2401,15 @@ struct IHThorDiskReadBaseArg : extends IHThorCompoundBaseArg
 };
 
 
+//New prototype interface for reading any format file through the same interface
+//liable to change at any point.
+struct IHThorNewDiskReadBaseArg : extends IHThorDiskReadBaseArg
+{
+    virtual const char * queryFormat() = 0;
+    virtual void getFormatOptions(IXmlWriter & options) = 0;
+    virtual void getFormatDynOptions(IXmlWriter & options) = 0;
+};
+
 //The following are mixin classes added to one of the activity base interfaces above.
 // common between Read, Normalize
 struct IHThorCompoundExtra : public IInterface
@@ -2540,6 +2550,12 @@ struct IHThorDiskGroupAggregateArg : extends IHThorDiskReadBaseArg, extends IHTh
 };
 
 
+struct IHThorNewDiskReadArg : extends IHThorNewDiskReadBaseArg, extends IHThorSourceLimitTransformExtra, extends IHThorCompoundReadExtra
+{
+    COMMON_NEWTHOR_FUNCTIONS
+};
+
+
 struct IHThorCsvReadArg: public IHThorDiskReadBaseArg
 {
     virtual unsigned getMaxColumns() = 0;
@@ -2559,10 +2575,6 @@ struct IHThorXmlReadArg: public IHThorDiskReadBaseArg
     virtual void onLimitExceeded() = 0;
 };
 
-struct IHThorNewDiskReadArg: public IHThorDiskReadArg
-{
-};
-
 typedef unsigned thor_loop_counter_t;
 struct IHThorLoopArg : public IHThorArg
 {

+ 36 - 0
rtl/include/eclhelper_base.hpp

@@ -1127,6 +1127,42 @@ public:
     IThorDiskCallback * fpp;
 };
 
+class ECLRTL_API CThorNewDiskReadArg : public CThorArgOf<IHThorNewDiskReadArg>
+{
+    virtual unsigned getFlags() override;
+    virtual void setCallback(IThorDiskCallback * _tc) override;
+
+    virtual bool canMatchAny() override;
+    virtual void createSegmentMonitors(IIndexReadContext *ctx) override;
+    virtual bool canMatch(const void * row) override;
+    virtual bool hasMatchFilter() override;
+    virtual void getEncryptKey(size32_t & keyLen, void * & key) override;
+
+    virtual unsigned __int64 getChooseNLimit() override;
+    virtual unsigned __int64 getRowLimit() override;
+    virtual void onLimitExceeded() override;
+
+    virtual bool needTransform() override;
+    virtual bool transformMayFilter() override;
+    virtual unsigned __int64 getKeyedLimit() override;
+    virtual void onKeyedLimitExceeded() override;
+    virtual ISteppingMeta * queryRawSteppingMeta() override;
+    virtual ISteppingMeta * queryProjectedSteppingMeta() override;
+    virtual void mapOutputToInput(ARowBuilder & rowBuilder, const void * projectedRow, unsigned numFields) override;
+    virtual size32_t transform(ARowBuilder & rowBuilder, const void * src) override;
+    virtual size32_t unfilteredTransform(ARowBuilder & rowBuilder, const void * src) override;
+
+    virtual size32_t transformOnLimitExceeded(ARowBuilder & rowBuilder) override;
+    virtual size32_t transformOnKeyedLimitExceeded(ARowBuilder & rowBuilder) override;
+    virtual const char * queryFormat() override;
+    virtual void getFormatOptions(IXmlWriter & options) override;
+    virtual void getFormatDynOptions(IXmlWriter & options) override;
+
+public:
+    IThorDiskCallback * fpp;
+};
+
+
 //Normalize
 class ECLRTL_API CThorChildNormalizeArg : public CThorArgOf<IHThorChildNormalizeArg>
 {

+ 1 - 1
system/jlib/jptree.cpp

@@ -6065,7 +6065,7 @@ bool validateXMLParseXPath(const char *xpath, StringBuffer *error)
     return true;
 }
 
-bool areMatchingPTrees(IPropertyTree * left, IPropertyTree * right)
+bool areMatchingPTrees(const IPropertyTree * left, const IPropertyTree * right)
 {
     if (left == right)
         return true;

+ 2 - 2
system/jlib/jptree.hpp

@@ -120,7 +120,7 @@ interface jlib_decl IPropertyTree : extends serializable
     virtual bool hasChildren() const = 0;
     virtual unsigned numUniq() = 0;
     virtual unsigned numChildren() = 0;
-    virtual bool isCaseInsensitive() = 0;
+    virtual bool isCaseInsensitive() const = 0;
     virtual bool IsShared() const = 0;
     virtual void localizeElements(const char *xpath, bool allTail=false) = 0;
     virtual unsigned getCount(const char *xpath) = 0;
@@ -209,7 +209,7 @@ jlib_decl IPullPTreeReader *createPullJSONBufferReader(const void *buf, size32_t
 jlib_decl void mergePTree(IPropertyTree *target, IPropertyTree *toMerge);
 jlib_decl void synchronizePTree(IPropertyTree *target, IPropertyTree *source, bool removeTargetsNotInSource=true, bool rootsMustMatch=true);
 jlib_decl IPropertyTree *ensurePTree(IPropertyTree *root, const char *xpath);
-jlib_decl bool areMatchingPTrees(IPropertyTree * left, IPropertyTree * right);
+jlib_decl bool areMatchingPTrees(const IPropertyTree * left, const IPropertyTree * right);
 
 jlib_decl IPropertyTree *createPTree(MemoryBuffer &src, byte flags=ipt_none);
 

+ 1 - 1
system/jlib/jptree.ipp

@@ -648,7 +648,7 @@ public:
     virtual bool hasChildren() const override { return children && children->count()?true:false; }
     virtual unsigned numUniq() override { return checkChildren()?children->count():0; }
     virtual unsigned numChildren() override;
-    virtual bool isCaseInsensitive() override { return isnocase(); }
+    virtual bool isCaseInsensitive() const override { return isnocase(); }
     virtual unsigned getCount(const char *xpath) override;
 // serializable impl.
     virtual void serialize(MemoryBuffer &tgt) override;

+ 14 - 26
system/jlib/jrowstream.cpp

@@ -20,49 +20,37 @@
 
 #include "jrowstream.hpp"
 
-class NullRawRowStream : public CInterfaceOf<IRawRowStream>
+//---------------------------------------------------------------------------------------------------------------------
+
+class NullDiskRowStream : public CInterfaceOf<IDiskRowStream>
 {
-    virtual bool getCursor(MemoryBuffer & cursor)
+    virtual bool getCursor(MemoryBuffer & cursor) override
     {
         return true;
     }
-    virtual void setCursor(MemoryBuffer & cursor)
+    virtual void setCursor(MemoryBuffer & cursor) override
     {
     }
     virtual void stop()
     {
     }
-    virtual const void *nextRow(size32_t & size)
+    virtual const void *nextRow(size32_t & size) override
     {
         size = 0;
         return eofRow;
     }
-};
-static NullRawRowStream nullRawStream;
-
-IRawRowStream * queryNullRawRowStream()
-{
-    return &nullRawStream;
-}
-
-IRawRowStream * createNullRawRowStream()
-{
-    return new NullRawRowStream;
-}
-
-
-//---------------------------------------------------------------------------------------------------------------------
-
-class NullAllocRowStream : public CInterfaceOf<IAllocRowStream>
-{
-    virtual const void *nextRow()
+    virtual const void *nextRow() override
+    {
+        return eofRow;
+    }
+    virtual const void *nextRow(MemoryBufferBuilder & builder) override
     {
         return eofRow;
     }
 };
-static NullAllocRowStream nullAllocStream;
 
-IAllocRowStream * queryNullAllocatedRowStream()
+static NullDiskRowStream nullDiskRowStream;
+IDiskRowStream * queryNullDiskRowStream()
 {
-    return &nullAllocStream;
+    return &nullDiskRowStream;
 }

+ 15 - 25
system/jlib/jrowstream.hpp

@@ -19,6 +19,7 @@
 #define JROWSTREAM_INCL
 
 #include "jiface.hpp"
+#include "jio.hpp"
 
 //The following values are used as values for special rows which are returned in the row stream
 enum class SpecialRow : memsize_t
@@ -44,17 +45,21 @@ inline SpecialRow getSpecialRowType(const void * row) { return (SpecialRow)(mems
 
 //Base interface for reading a stream of rows
 class MemoryBuffer;
-interface IRowStreamBase : extends IInterface
+class MemoryBufferBuilder;
+
+//An interface for reading rows - which can request the row in the most efficient way for the caller.
+interface IDiskRowStream : extends IRowStream
 {
+// Defined in IRowStream, here for documentation:
+// Request a row which is owned by the caller, and must be freed once it is finished with.
+    virtual const void *nextRow() override =0;
+    virtual void stop() override = 0;                              // after stop called NULL is returned
+
     virtual bool getCursor(MemoryBuffer & cursor) = 0;
     virtual void setCursor(MemoryBuffer & cursor) = 0;
-    virtual void stop() = 0;                              // after stop called NULL is returned
-};
 
-//An interface for reading rows from which are not cloned
-interface IRawRowStream : extends IRowStreamBase
-{
-    virtual const void *nextRow(size32_t & size)=0;       // rows returned are only valid until next call.  Size is the number of bytes in the row.
+// rows returned are only valid until next call.  Size is the number of bytes in the row.
+    virtual const void *nextRow(size32_t & size)=0;
 
     inline const void *ungroupedNextRow(size32_t & size)  // size will not include the size of the eog
     {
@@ -65,27 +70,12 @@ interface IRawRowStream : extends IRowStreamBase
                 return ret;
         }
     }
-};
-
-//An interface for reading rows which have been allocated
-interface IAllocRowStream : extends IInterface
-{
-    virtual const void *nextRow()=0;                      // rows returned must be freed
 
-    inline const void *ungroupedNextRow()
-    {
-        for (;;)
-        {
-            const void *ret = nextRow();
-            if (likely(!isEndOfGroup(ret)))
-                return ret;
-        }
-    }
+    virtual const void *nextRow(MemoryBufferBuilder & builder)=0;
+    // rows returned are created in the target buffer.  This should be generalized to an ARowBuilder
 };
 
 
-extern jlib_decl IRawRowStream * queryNullRawRowStream();
-extern jlib_decl IAllocRowStream * queryNullAllocatedRowStream();
-extern jlib_decl IRawRowStream * createNullRawRowStream();
+extern jlib_decl IDiskRowStream * queryNullDiskRowStream();
 
 #endif

+ 89 - 0
testing/regress/ecl/csvoptions.ecl

@@ -0,0 +1,89 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//version format='ASCII'
+//version format='UNICODE'
+//version format='ASCII',optRemoteRead=true
+//version format='UNICODE',optRemoteRead=true
+//xxversion format='EBCDIC'     output doesn't seem to be supported currently, and not sure input is ever used
+
+import ^ as root;
+csvFormat := #IFDEFINED(root.format, 'UNICODE') + ',';
+optRemoteRead := #IFDEFINED(root.optRemoteRead, false);
+
+import $.setup;
+prefix := setup.Files(false, false).QueryFilePrefix;
+
+// Roxie needs this to resolve files at run time
+#option ('allowVariableRoxieFilenames', 1);
+#option('forceRemoteRead', optRemoteRead);
+
+VarString EmptyString := '' : STORED('dummy');
+
+rec3 := RECORD
+  string f1;
+  string f2;
+  string f3;
+END;
+
+rec5 := RECORD(rec3)
+  string f4;
+  string f5;
+END;
+
+textLines := DATASET([
+    '!abc!,dêf,gêll',
+    '!abc,"dêf!,hêllo"',
+    '"one " ,"two "," thrêê "'
+    ], { string line });
+
+OUTPUT(textLines, ,prefix + 'csv-options'+EmptyString, OVERWRITE, CSV(#EXPAND(csvFormat) MAXSIZE(9999)));
+
+
+generateOutput3(options) := MACRO
+    OUTPUT(DATASET(prefix + 'csv-options'+EmptyString, rec3, CSV(#EXPAND(csvFormat) #EXPAND(options) MAXSIZE(9999))))
+ENDMACRO;
+
+generateOutput5(options) := MACRO
+    OUTPUT(DATASET(prefix + 'csv-options'+EmptyString, rec5, CSV(#EXPAND(csvFormat) #EXPAND(options) MAXSIZE(9999))))
+ENDMACRO;
+
+output('Quote');
+generateOutput3('');
+generateOutput3('QUOTE(\'\'),');
+generateOutput3('QUOTE(\'!\'),');
+generateOutput3('QUOTE(\'"\'),');
+generateOutput3('QUOTE(\'\'),NOTRIM,');
+generateOutput3('QUOTE(\'"\'),NOTRIM,');
+
+output('Separator');
+generateOutput3('SEPARATOR(\'!\'),');
+generateOutput5('SEPARATOR(\'ê\'),');
+generateOutput5('SEPARATOR(\'ê\'),');  //The following currently ignores the second item generateOutput5('SEPARATOR(\'ê\'),SEPARATOR(\',\'),');
+generateOutput5('SEPARATOR([\'ê\',\',\']),');
+
+output('Terminator');
+generateOutput3('QUOTE(\'\'),TERMINATOR(\'!\'),');
+generateOutput3('QUOTE(\'\'),TERMINATOR(\'\\n\'),');            //You shouldn't really need to use \\n, but too likely to break things
+
+output('Heading');
+generateOutput5('SEPARATOR(\'ê\'),HEADING(0),');
+generateOutput5('SEPARATOR(\'ê\'),HEADING(1),');
+generateOutput5('SEPARATOR(\'ê\'),HEADING(2),');
+
+output('Escape');
+generateOutput5('ESCAPE(\'!\'),');

+ 89 - 0
testing/regress/ecl/csvoptions2.ecl

@@ -0,0 +1,89 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//version format='ASCII'
+//version format='UNICODE'
+//version format='ASCII',optRemoteRead=true
+//version format='UNICODE',optRemoteRead=true
+//xxversion format='EBCDIC'     output doesn't seem to be supported currently, and not sure input is ever used
+
+import ^ as root;
+csvFormat := #IFDEFINED(root.format, 'UNICODE') + ',';
+optRemoteRead := #IFDEFINED(root.optRemoteRead, false);
+
+import $.setup;
+prefix := setup.Files(false, false).QueryFilePrefix;
+
+// Roxie needs this to resolve files at run time
+#option ('allowVariableRoxieFilenames', 1);
+#option('forceRemoteRead', optRemoteRead);
+
+VarString EmptyString := '' : STORED('dummy');
+
+rec3 := RECORD
+  string f1;
+  string f2;
+  string f3;
+END;
+
+rec5 := RECORD(rec3)
+  string f4;
+  string f5;
+END;
+
+textLines := DATASET([
+    {'!abc!','dêf','gêll\'s'},
+    {'!abc','"dêf!','hêllo"'},
+    {'"one " ','"two "','" thrêê "'}
+    ], rec3);
+
+OUTPUT(textLines, ,prefix + 'csv-options'+EmptyString, OVERWRITE, CSV(#EXPAND(csvFormat) QUOTE('\''),SEPARATOR('$$'),TERMINATOR(';'),MAXSIZE(9999)));
+
+
+generateOutput3(options) := MACRO
+    OUTPUT(DATASET(prefix + 'csv-options'+EmptyString, rec3, CSV(#EXPAND(csvFormat) #EXPAND(options) MAXSIZE(9999))))
+ENDMACRO;
+
+generateOutput5(options) := MACRO
+    OUTPUT(DATASET(prefix + 'csv-options'+EmptyString, rec5, CSV(#EXPAND(csvFormat) #EXPAND(options) MAXSIZE(9999))))
+ENDMACRO;
+
+output('Quote');
+generateOutput3('');
+generateOutput3('QUOTE(\'\'),');
+generateOutput3('QUOTE(\'!\'),');
+generateOutput3('QUOTE(\'"\'),');
+generateOutput3('QUOTE(\'\'),NOTRIM,');
+generateOutput3('QUOTE(\'"\'),NOTRIM,');
+
+output('Separator');
+generateOutput3('SEPARATOR(\'!\'),');
+generateOutput5('SEPARATOR(\'ê\'),');
+generateOutput5('SEPARATOR(\'ê\'),');  //The following currently ignores the second item generateOutput5('SEPARATOR(\'ê\'),SEPARATOR(\',\'),');
+generateOutput5('SEPARATOR([\'ê\',\',\']),');
+
+output('Terminator');
+generateOutput3('QUOTE(\'\'),TERMINATOR(\'!\'),');
+generateOutput3('QUOTE(\'\'),TERMINATOR(\'\\n\'),');            //You shouldn't really need to use \\n, but too likely to break things
+
+output('Heading');
+generateOutput5('SEPARATOR(\'ê\'),HEADING(0),');
+generateOutput5('SEPARATOR(\'ê\'),HEADING(1),');
+generateOutput5('SEPARATOR(\'ê\'),HEADING(2),');
+
+output('Escape');
+generateOutput5('ESCAPE(\'!\'),');

+ 80 - 0
testing/regress/ecl/csvvirtual.ecl

@@ -0,0 +1,80 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2019 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//xversion format='ASCII'   // has different filepositions
+//version format='UNICODE'
+//xversion format='ASCII',optRemoteRead=true
+//xversion format='UNICODE',optRemoteRead=true
+
+//noroxie      - see HPCC-22629
+
+import ^ as root;
+csvFormat := #IFDEFINED(root.format, 'UNICODE') + ',';
+optRemoteRead := #IFDEFINED(root.optRemoteRead, false);
+
+import $.setup;
+prefix := setup.Files(false, false).QueryFilePrefix;
+
+// Roxie needs this to resolve files at run time
+#option ('allowVariableRoxieFilenames', 1);
+#option('forceRemoteRead', optRemoteRead);
+
+VarString EmptyString := '' : STORED('dummy');
+
+rec3 := RECORD
+  string f1;
+  string f2;
+  string f3;
+  unsigned8 filepos{virtual(fileposition)};
+  unsigned8 filepos2{virtual(localfileposition)};
+END;
+
+rec4 := RECORD//(rec3)  // inheritance currently doesn't work, need to investigate why not
+  string f1;
+  string f2;
+  string f3;
+  unsigned8 filepos{virtual(fileposition)};
+  unsigned8 filepos2{virtual(localfileposition)};
+  string filename{virtual(logicalfilename)};
+END;
+
+textLines := DATASET([
+    '!abc!,dêf,gêll',
+    '!abc,"dêf!,hêllo"',
+    '"one " ,"two "," thrêê "'
+    ], { string line });
+
+filename := prefix + 'csv-options'+EmptyString;
+
+inDs := DATASET('{' + filename + ',' + filename + '}', rec3, CSV(#EXPAND(csvFormat) MAXSIZE(9999)));
+inDs2 := DATASET('{' + filename + ',' + filename + '}', rec4, CSV(#EXPAND(csvFormat) MAXSIZE(9999)));
+
+sequential(
+    OUTPUT(textLines, ,filename, OVERWRITE, CSV(#EXPAND(csvFormat) MAXSIZE(9999)));
+    output('Quote');
+    output(inDs);
+    output(inDs, { f1 });
+    output(inDs, { f1, filepos, filepos2 });
+    output(inDs, { f1, filepos2, filepos });
+    output(inDs, { f1, DATA8 castlocal := (>DATA8<)(big_endian unsigned8)filepos2, filepos });
+    output(inDs, { filepos2, f3, filepos });
+    output(inDs, { f3, filepos2, f2, f1, filepos });
+    output(inDs, { f1, filepos, biaslocal := (unsigned8)filepos2-(unsigned8)0x8000000000000000});
+    output(inDs2, { f1, filepos, biaslocal := (unsigned8)filepos2-(unsigned8)0x8000000000000000});
+    //output(inDs2, { f1, filepos, biaslocal := (unsigned8)filepos2-(unsigned8)0x8000000000000000});
+    //output(inDs2, { f1, filepos, filepos2, filename[length(filename)-13..] });  Currently generates a compile error... JIRA #xxxx
+);

+ 103 - 0
testing/regress/ecl/key/csvoptions.xml

@@ -0,0 +1,103 @@
+<Dataset name='Result 1'>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><Result_2>Quote</Result_2></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>g&#234;ll</f3></Row>
+ <Row><f1>!abc</f1><f2>d&#234;f!,h&#234;llo</f2><f3></f3></Row>
+ <Row><f1>one </f1><f2>two </f2><f3> thr&#234;&#234; </f3></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>g&#234;ll</f3></Row>
+ <Row><f1>!abc</f1><f2>&quot;d&#234;f!</f2><f3>h&#234;llo&quot;</f3></Row>
+ <Row><f1>&quot;one &quot;</f1><f2>&quot;two &quot;</f2><f3>&quot; thr&#234;&#234; &quot;</f3></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><f1>abc</f1><f2>d&#234;f</f2><f3>g&#234;ll</f3></Row>
+ <Row><f1>abc,&quot;d&#234;f</f1><f2>h&#234;llo&quot;</f2><f3></f3></Row>
+ <Row><f1>&quot;one &quot;</f1><f2>&quot;two &quot;</f2><f3>&quot; thr&#234;&#234; &quot;</f3></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>g&#234;ll</f3></Row>
+ <Row><f1>!abc</f1><f2>d&#234;f!,h&#234;llo</f2><f3></f3></Row>
+ <Row><f1>one </f1><f2>two </f2><f3> thr&#234;&#234; </f3></Row>
+</Dataset>
+<Dataset name='Result 7'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>g&#234;ll</f3></Row>
+ <Row><f1>!abc</f1><f2>&quot;d&#234;f!</f2><f3>h&#234;llo&quot;</f3></Row>
+ <Row><f1>&quot;one &quot; </f1><f2>&quot;two &quot;</f2><f3>&quot; thr&#234;&#234; &quot;</f3></Row>
+</Dataset>
+<Dataset name='Result 8'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>g&#234;ll</f3></Row>
+ <Row><f1>!abc</f1><f2>d&#234;f!,h&#234;llo</f2><f3></f3></Row>
+ <Row><f1>one &quot; </f1><f2>two </f2><f3> thr&#234;&#234; </f3></Row>
+</Dataset>
+<Dataset name='Result 9'>
+ <Row><Result_9>Separator</Result_9></Row>
+</Dataset>
+<Dataset name='Result 10'>
+ <Row><f1></f1><f2>abc</f2><f3>,d&#234;f,g&#234;ll</f3></Row>
+ <Row><f1></f1><f2>abc,&quot;d&#234;f</f2><f3>,h&#234;llo&quot;</f3></Row>
+ <Row><f1>one &quot; ,&quot;two &quot;,&quot; thr&#234;&#234; &quot;</f1><f2></f2><f3></f3></Row>
+</Dataset>
+<Dataset name='Result 11'>
+ <Row><f1>!abc!,d</f1><f2>f,g</f2><f3>ll</f3><f4></f4><f5></f5></Row>
+ <Row><f1>!abc,&quot;d</f1><f2>f!,h</f2><f3>llo&quot;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>one &quot; ,&quot;two &quot;,&quot; thr</f1><f2></f2><f3>
+</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 12'>
+ <Row><f1>!abc!,d</f1><f2>f,g</f2><f3>ll</f3><f4></f4><f5></f5></Row>
+ <Row><f1>!abc,&quot;d</f1><f2>f!,h</f2><f3>llo&quot;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>one &quot; ,&quot;two &quot;,&quot; thr</f1><f2></f2><f3>
+</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 13'>
+ <Row><f1>!abc!</f1><f2>d</f2><f3>f</f3><f4>g</f4><f5>ll</f5></Row>
+ <Row><f1>!abc</f1><f2>d&#234;f!,h&#234;llo</f2><f3></f3><f4></f4><f5></f5></Row>
+ <Row><f1>one </f1><f2>two </f2><f3> thr&#234;&#234; </f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 14'>
+ <Row><Result_14>Terminator</Result_14></Row>
+</Dataset>
+<Dataset name='Result 15'>
+ <Row><f1></f1><f2></f2><f3></f3></Row>
+ <Row><f1>abc</f1><f2></f2><f3></f3></Row>
+ <Row><f1></f1><f2>d&#234;f</f2><f3>g&#234;ll
+</f3></Row>
+ <Row><f1>abc</f1><f2>&quot;d&#234;f</f2><f3></f3></Row>
+ <Row><f1></f1><f2>h&#234;llo&quot;
+&quot;one &quot;</f2><f3>&quot;two &quot;</f3></Row>
+</Dataset>
+<Dataset name='Result 16'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>g&#234;ll</f3></Row>
+ <Row><f1>!abc</f1><f2>&quot;d&#234;f!</f2><f3>h&#234;llo&quot;</f3></Row>
+ <Row><f1>&quot;one &quot;</f1><f2>&quot;two &quot;</f2><f3>&quot; thr&#234;&#234; &quot;</f3></Row>
+</Dataset>
+<Dataset name='Result 17'>
+ <Row><Result_17>Heading</Result_17></Row>
+</Dataset>
+<Dataset name='Result 18'>
+ <Row><f1>!abc!,d</f1><f2>f,g</f2><f3>ll</f3><f4></f4><f5></f5></Row>
+ <Row><f1>!abc,&quot;d</f1><f2>f!,h</f2><f3>llo&quot;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>one &quot; ,&quot;two &quot;,&quot; thr</f1><f2></f2><f3>
+</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 19'>
+ <Row><f1>!abc,&quot;d</f1><f2>f!,h</f2><f3>llo&quot;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>one &quot; ,&quot;two &quot;,&quot; thr</f1><f2></f2><f3>
+</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 20'>
+ <Row><f1>one &quot; ,&quot;two &quot;,&quot; thr</f1><f2></f2><f3>
+</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 21'>
+ <Row><Result_21>Escape</Result_21></Row>
+</Dataset>
+<Dataset name='Result 22'>
+ <Row><f1>abc,d&#234;f</f1><f2>g&#234;ll</f2><f3></f3><f4></f4><f5></f5></Row>
+ <Row><f1>abc</f1><f2>d&#234;f,h&#234;llo</f2><f3></f3><f4></f4><f5></f5></Row>
+ <Row><f1>one </f1><f2>two </f2><f3> thr&#234;&#234; </f3><f4></f4><f5></f5></Row>
+</Dataset>

+ 94 - 0
testing/regress/ecl/key/csvoptions2.xml

@@ -0,0 +1,94 @@
+<Dataset name='Result 1'>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><Result_2>Quote</Result_2></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>g&#234;ll&apos;s</f3></Row>
+ <Row><f1>!abc</f1><f2>&quot;d&#234;f!</f2><f3>h&#234;llo&quot;</f3></Row>
+ <Row><f1>&quot;one &quot;</f1><f2>&quot;two &quot;</f2><f3>&quot; thr&#234;&#234; &quot;</f3></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>&apos;g&#234;ll&apos;&apos;s&apos;</f3></Row>
+ <Row><f1>!abc</f1><f2>&quot;d&#234;f!</f2><f3>h&#234;llo&quot;</f3></Row>
+ <Row><f1>&quot;one &quot;</f1><f2>&quot;two &quot;</f2><f3>&quot; thr&#234;&#234; &quot;</f3></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><f1>abc</f1><f2>d&#234;f</f2><f3>&apos;g&#234;ll&apos;&apos;s&apos;</f3></Row>
+ <Row><f1>abc$$&quot;d&#234;f</f1><f2>h&#234;llo&quot;</f2><f3></f3></Row>
+ <Row><f1>&quot;one &quot;</f1><f2>&quot;two &quot;</f2><f3>&quot; thr&#234;&#234; &quot;</f3></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>&apos;g&#234;ll&apos;&apos;s&apos;</f3></Row>
+ <Row><f1>!abc</f1><f2>d&#234;f!$$h&#234;llo</f2><f3></f3></Row>
+ <Row><f1>one </f1><f2>two </f2><f3> thr&#234;&#234; </f3></Row>
+</Dataset>
+<Dataset name='Result 7'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>&apos;g&#234;ll&apos;&apos;s&apos;</f3></Row>
+ <Row><f1>!abc</f1><f2>&quot;d&#234;f!</f2><f3>h&#234;llo&quot;</f3></Row>
+ <Row><f1>&quot;one &quot;</f1><f2>&quot;two &quot;</f2><f3>&quot; thr&#234;&#234; &quot;</f3></Row>
+</Dataset>
+<Dataset name='Result 8'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>&apos;g&#234;ll&apos;&apos;s&apos;</f3></Row>
+ <Row><f1>!abc</f1><f2>d&#234;f!$$h&#234;llo</f2><f3></f3></Row>
+ <Row><f1>one </f1><f2>two </f2><f3> thr&#234;&#234; </f3></Row>
+</Dataset>
+<Dataset name='Result 9'>
+ <Row><Result_9>Separator</Result_9></Row>
+</Dataset>
+<Dataset name='Result 10'>
+ <Row><f1></f1><f2>abc</f2><f3>$$d&#234;f$$&apos;g&#234;ll&apos;&apos;s&apos;</f3></Row>
+ <Row><f1></f1><f2>abc$$&quot;d&#234;f</f2><f3>$$h&#234;llo&quot;</f3></Row>
+ <Row><f1>&quot;one &quot;$$&quot;two &quot;$$&quot; thr&#234;&#234; &quot;</f1><f2></f2><f3></f3></Row>
+</Dataset>
+<Dataset name='Result 11'>
+ <Row><f1>!abc!$$d</f1><f2>f$$&apos;g</f2><f3>ll&apos;&apos;s&apos;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>!abc$$&quot;d</f1><f2>f!$$h</f2><f3>llo&quot;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>&quot;one &quot;$$&quot;two &quot;$$&quot; thr</f1><f2></f2><f3>&quot;</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 12'>
+ <Row><f1>!abc!$$d</f1><f2>f$$&apos;g</f2><f3>ll&apos;&apos;s&apos;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>!abc$$&quot;d</f1><f2>f!$$h</f2><f3>llo&quot;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>&quot;one &quot;$$&quot;two &quot;$$&quot; thr</f1><f2></f2><f3>&quot;</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 13'>
+ <Row><f1>!abc!$$d</f1><f2>f$$&apos;g</f2><f3>ll&apos;&apos;s&apos;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>!abc$$&quot;d</f1><f2>f!$$h</f2><f3>llo&quot;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>&quot;one &quot;$$&quot;two &quot;$$&quot; thr</f1><f2></f2><f3>&quot;</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 14'>
+ <Row><Result_14>Terminator</Result_14></Row>
+</Dataset>
+<Dataset name='Result 15'>
+ <Row><f1></f1><f2></f2><f3></f3></Row>
+ <Row><f1>abc</f1><f2></f2><f3></f3></Row>
+ <Row><f1></f1><f2>d&#234;f</f2><f3>&apos;g&#234;ll&apos;&apos;s&apos;;</f3></Row>
+ <Row><f1>abc</f1><f2>&quot;d&#234;f</f2><f3></f3></Row>
+ <Row><f1></f1><f2>h&#234;llo&quot;;&quot;one &quot;</f2><f3>&quot;two &quot;</f3></Row>
+</Dataset>
+<Dataset name='Result 16'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>&apos;g&#234;ll&apos;&apos;s&apos;;!abc</f3></Row>
+</Dataset>
+<Dataset name='Result 17'>
+ <Row><Result_17>Heading</Result_17></Row>
+</Dataset>
+<Dataset name='Result 18'>
+ <Row><f1>!abc!$$d</f1><f2>f$$&apos;g</f2><f3>ll&apos;&apos;s&apos;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>!abc$$&quot;d</f1><f2>f!$$h</f2><f3>llo&quot;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>&quot;one &quot;$$&quot;two &quot;$$&quot; thr</f1><f2></f2><f3>&quot;</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 19'>
+ <Row><f1>!abc$$&quot;d</f1><f2>f!$$h</f2><f3>llo&quot;</f3><f4></f4><f5></f5></Row>
+ <Row><f1>&quot;one &quot;$$&quot;two &quot;$$&quot; thr</f1><f2></f2><f3>&quot;</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 20'>
+ <Row><f1>&quot;one &quot;$$&quot;two &quot;$$&quot; thr</f1><f2></f2><f3>&quot;</f3><f4></f4><f5></f5></Row>
+</Dataset>
+<Dataset name='Result 21'>
+ <Row><Result_21>Escape</Result_21></Row>
+</Dataset>
+<Dataset name='Result 22'>
+ <Row><f1>abc$$d&#234;f</f1><f2>g&#234;ll&apos;s</f2><f3></f3><f4></f4><f5></f5></Row>
+ <Row><f1>abc</f1><f2>&quot;d&#234;f$$h&#234;llo&quot;</f2><f3></f3><f4></f4><f5></f5></Row>
+ <Row><f1>&quot;one &quot;</f1><f2>&quot;two &quot;</f2><f3>&quot; thr&#234;&#234; &quot;</f3><f4></f4><f5></f5></Row>
+</Dataset>

+ 77 - 0
testing/regress/ecl/key/csvvirtual.xml

@@ -0,0 +1,77 @@
+<Dataset name='Result 1'>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><Result_2>Quote</Result_2></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>g&#234;ll</f3><filepos>0</filepos><filepos2>9223372036854775808</filepos2></Row>
+ <Row><f1>!abc</f1><f2>d&#234;f!,h&#234;llo</f2><f3></f3><filepos>17</filepos><filepos2>9223372036854775825</filepos2></Row>
+ <Row><f1>one </f1><f2>two </f2><f3> thr&#234;&#234; </f3><filepos>37</filepos><filepos2>9223372036854775845</filepos2></Row>
+ <Row><f1>!abc!</f1><f2>d&#234;f</f2><f3>g&#234;ll</f3><filepos>64</filepos><filepos2>9223653511831486464</filepos2></Row>
+ <Row><f1>!abc</f1><f2>d&#234;f!,h&#234;llo</f2><f3></f3><filepos>81</filepos><filepos2>9223653511831486481</filepos2></Row>
+ <Row><f1>one </f1><f2>two </f2><f3> thr&#234;&#234; </f3><filepos>101</filepos><filepos2>9223653511831486501</filepos2></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><f1>!abc!</f1></Row>
+ <Row><f1>!abc</f1></Row>
+ <Row><f1>one </f1></Row>
+ <Row><f1>!abc!</f1></Row>
+ <Row><f1>!abc</f1></Row>
+ <Row><f1>one </f1></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><f1>!abc!</f1><filepos>0</filepos><filepos2>9223372036854775808</filepos2></Row>
+ <Row><f1>!abc</f1><filepos>17</filepos><filepos2>9223372036854775825</filepos2></Row>
+ <Row><f1>one </f1><filepos>37</filepos><filepos2>9223372036854775845</filepos2></Row>
+ <Row><f1>!abc!</f1><filepos>64</filepos><filepos2>9223653511831486464</filepos2></Row>
+ <Row><f1>!abc</f1><filepos>81</filepos><filepos2>9223653511831486481</filepos2></Row>
+ <Row><f1>one </f1><filepos>101</filepos><filepos2>9223653511831486501</filepos2></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><f1>!abc!</f1><filepos2>9223372036854775808</filepos2><filepos>0</filepos></Row>
+ <Row><f1>!abc</f1><filepos2>9223372036854775825</filepos2><filepos>17</filepos></Row>
+ <Row><f1>one </f1><filepos2>9223372036854775845</filepos2><filepos>37</filepos></Row>
+ <Row><f1>!abc!</f1><filepos2>9223653511831486464</filepos2><filepos>64</filepos></Row>
+ <Row><f1>!abc</f1><filepos2>9223653511831486481</filepos2><filepos>81</filepos></Row>
+ <Row><f1>one </f1><filepos2>9223653511831486501</filepos2><filepos>101</filepos></Row>
+</Dataset>
+<Dataset name='Result 7'>
+ <Row><f1>!abc!</f1><castlocal>8000000000000000</castlocal><filepos>0</filepos></Row>
+ <Row><f1>!abc</f1><castlocal>8000000000000011</castlocal><filepos>17</filepos></Row>
+ <Row><f1>one </f1><castlocal>8000000000000025</castlocal><filepos>37</filepos></Row>
+ <Row><f1>!abc!</f1><castlocal>8001000000000000</castlocal><filepos>64</filepos></Row>
+ <Row><f1>!abc</f1><castlocal>8001000000000011</castlocal><filepos>81</filepos></Row>
+ <Row><f1>one </f1><castlocal>8001000000000025</castlocal><filepos>101</filepos></Row>
+</Dataset>
+<Dataset name='Result 8'>
+ <Row><filepos2>9223372036854775808</filepos2><f3>g&#234;ll</f3><filepos>0</filepos></Row>
+ <Row><filepos2>9223372036854775825</filepos2><f3></f3><filepos>17</filepos></Row>
+ <Row><filepos2>9223372036854775845</filepos2><f3> thr&#234;&#234; </f3><filepos>37</filepos></Row>
+ <Row><filepos2>9223653511831486464</filepos2><f3>g&#234;ll</f3><filepos>64</filepos></Row>
+ <Row><filepos2>9223653511831486481</filepos2><f3></f3><filepos>81</filepos></Row>
+ <Row><filepos2>9223653511831486501</filepos2><f3> thr&#234;&#234; </f3><filepos>101</filepos></Row>
+</Dataset>
+<Dataset name='Result 9'>
+ <Row><f3>g&#234;ll</f3><filepos2>9223372036854775808</filepos2><f2>d&#234;f</f2><f1>!abc!</f1><filepos>0</filepos></Row>
+ <Row><f3></f3><filepos2>9223372036854775825</filepos2><f2>d&#234;f!,h&#234;llo</f2><f1>!abc</f1><filepos>17</filepos></Row>
+ <Row><f3> thr&#234;&#234; </f3><filepos2>9223372036854775845</filepos2><f2>two </f2><f1>one </f1><filepos>37</filepos></Row>
+ <Row><f3>g&#234;ll</f3><filepos2>9223653511831486464</filepos2><f2>d&#234;f</f2><f1>!abc!</f1><filepos>64</filepos></Row>
+ <Row><f3></f3><filepos2>9223653511831486481</filepos2><f2>d&#234;f!,h&#234;llo</f2><f1>!abc</f1><filepos>81</filepos></Row>
+ <Row><f3> thr&#234;&#234; </f3><filepos2>9223653511831486501</filepos2><f2>two </f2><f1>one </f1><filepos>101</filepos></Row>
+</Dataset>
+<Dataset name='Result 10'>
+ <Row><f1>!abc!</f1><filepos>0</filepos><biaslocal>0</biaslocal></Row>
+ <Row><f1>!abc</f1><filepos>17</filepos><biaslocal>17</biaslocal></Row>
+ <Row><f1>one </f1><filepos>37</filepos><biaslocal>37</biaslocal></Row>
+ <Row><f1>!abc!</f1><filepos>64</filepos><biaslocal>281474976710656</biaslocal></Row>
+ <Row><f1>!abc</f1><filepos>81</filepos><biaslocal>281474976710673</biaslocal></Row>
+ <Row><f1>one </f1><filepos>101</filepos><biaslocal>281474976710693</biaslocal></Row>
+</Dataset>
+<Dataset name='Result 11'>
+ <Row><f1>!abc!</f1><filepos>0</filepos><biaslocal>0</biaslocal></Row>
+ <Row><f1>!abc</f1><filepos>17</filepos><biaslocal>17</biaslocal></Row>
+ <Row><f1>one </f1><filepos>37</filepos><biaslocal>37</biaslocal></Row>
+ <Row><f1>!abc!</f1><filepos>64</filepos><biaslocal>281474976710656</biaslocal></Row>
+ <Row><f1>!abc</f1><filepos>81</filepos><biaslocal>281474976710673</biaslocal></Row>
+ <Row><f1>one </f1><filepos>101</filepos><biaslocal>281474976710693</biaslocal></Row>
+</Dataset>