Browse Source

HPCC-3092 Add ESCAPE option to CSV dataset

Signed-off-by: Renato Golin <rengolin@hpccsystems.com>
Renato Golin 12 years ago
parent
commit
9cb86ffb93

+ 49 - 6
common/thorhelper/csvsplitter.cpp

@@ -62,6 +62,10 @@ void CSVSplitter::addTerminator(const char * text)
     matcher.addEntry(text, TERMINATOR);
 }
 
+void CSVSplitter::addEscape(const char * text)
+{
+    matcher.addEntry(text, ESCAPE);
+}
 
 void CSVSplitter::reset()
 {
@@ -76,7 +80,7 @@ void CSVSplitter::reset()
     maxCsvSize = 0;
 }
 
-void CSVSplitter::init(unsigned _maxColumns, ICsvParameters * csvInfo, const char * dfsQuotes, const char * dfsSeparators, const char * dfsTerminators)
+void CSVSplitter::init(unsigned _maxColumns, ICsvParameters * csvInfo, const char * dfsQuotes, const char * dfsSeparators, const char * dfsTerminators, const char * dfsEscapes)
 {
     reset();
     maxCsvSize = csvInfo->queryMaxSize();
@@ -127,6 +131,19 @@ void CSVSplitter::init(unsigned _maxColumns, ICsvParameters * csvInfo, const cha
         }
     }
 
+    if (dfsEscapes && (flags & ICsvParameters::defaultEscape))
+        addActionList(matcher, dfsEscapes, ESCAPE);
+    else
+    {
+        for (idx=0;;idx++)
+        {
+            const char * text = csvInfo->queryEscape(idx);
+            if (!text)
+                break;
+            addEscape(text);
+        }
+    }
+
     //MORE Should this be configurable??
     if (!(flags & ICsvParameters::preserveWhitespace))
     {
@@ -135,8 +152,7 @@ void CSVSplitter::init(unsigned _maxColumns, ICsvParameters * csvInfo, const cha
     }
 }
 
-
-void CSVSplitter::setFieldRange(const byte * start, const byte * end, unsigned curColumn, unsigned quoteToStrip)
+void CSVSplitter::setFieldRange(const byte * start, const byte * end, unsigned curColumn, unsigned quoteToStrip, bool unescape)
 {
     if (quoteToStrip)
     {
@@ -190,6 +206,25 @@ done:
         data[curColumn] = start;
         lengths[curColumn] = (size32_t)(end-start);
     }
+    // Un-escape string, if necessary.
+    if (unescape)
+    {
+        byte * cur = const_cast<byte*>(data[curColumn]);
+        byte * end = cur + lengths[curColumn];
+        for (; cur <= end; cur++)
+        {
+            unsigned matchLen;
+            unsigned match = matcher.getMatch((size32_t)(end-cur), (const char *)cur, matchLen);
+            if ((match & 255) == ESCAPE)
+            {
+                unsigned long restLen = end-cur+matchLen;
+                memmove(cur, cur+matchLen, restLen);
+                cur += matchLen;
+                end -= matchLen;
+                lengths[curColumn] -= matchLen;
+            }
+        }
+    }
 }
 
 size32_t CSVSplitter::splitLine(size32_t maxLength, const byte * start)
@@ -201,6 +236,7 @@ size32_t CSVSplitter::splitLine(size32_t maxLength, const byte * start)
     const byte * end = start + maxLength;
     const byte * firstGood = start;
     const byte * lastGood = start;
+    bool unescape = false;
     curUnquoted = unquotedBuffer;
 
     while (cur != end)
@@ -226,7 +262,8 @@ size32_t CSVSplitter::splitLine(size32_t maxLength, const byte * start)
         case SEPARATOR:
             if ((curColumn < maxColumns) && (quote == 0))
             {
-                setFieldRange(firstGood, lastGood, curColumn, quoteToStrip);
+                setFieldRange(firstGood, lastGood, curColumn, quoteToStrip, unescape);
+                unescape = false;
                 quoteToStrip = 0;
                 curColumn++;
                 firstGood = cur + matchLen;
@@ -236,7 +273,8 @@ size32_t CSVSplitter::splitLine(size32_t maxLength, const byte * start)
         case TERMINATOR:
             if (quote == 0) // Is this a good idea? Means a mismatched quote is not fixed by EOL
             {
-                setFieldRange(firstGood, lastGood, curColumn, quoteToStrip);
+                setFieldRange(firstGood, lastGood, curColumn, quoteToStrip, unescape);
+                unescape = false;
                 while (++curColumn < maxColumns)
                     lengths[curColumn] = 0;
                 return (size32_t)(cur + matchLen - start);
@@ -279,11 +317,15 @@ size32_t CSVSplitter::splitLine(size32_t maxLength, const byte * start)
                     lastGood = cur+matchLen;
             }
             break;
+        case ESCAPE:
+            unescape = true;
+            lastGood = cur+matchLen+1;
+            cur++; // MORE: This assumes next char is ASCii
         }
         cur += matchLen;
     }
 
-    setFieldRange(firstGood, lastGood, curColumn, quoteToStrip);
+    setFieldRange(firstGood, lastGood, curColumn, quoteToStrip, unescape);
     while (++curColumn < maxColumns)
         lengths[curColumn] = 0;
     return (size32_t)(end - start);
@@ -310,6 +352,7 @@ void CSVOutputStream::init(ICsvParameters * args, bool _oldOutputFormat)
     quote.set(args->queryQuote(0));
     separator.set(args->querySeparator(0));
     terminator.set(args->queryTerminator(0));
+    escape.set(args->queryEscape(0));
     oldOutputFormat = _oldOutputFormat||!quote.length();
 }
 

+ 5 - 3
common/thorhelper/csvsplitter.hpp

@@ -41,8 +41,9 @@ public:
     void addQuote(const char * text);
     void addSeparator(const char * text);
     void addTerminator(const char * text);
+    void addEscape(const char * text);
 
-    void init(unsigned maxColumns, ICsvParameters * csvInfo, const char * dfsQuotes, const char * dfsSeparators, const char * dfsTerminators);
+    void init(unsigned maxColumns, ICsvParameters * csvInfo, const char * dfsQuotes, const char * dfsSeparators, const char * dfsTerminators, const char * dfsEscapes);
     void reset();
     size32_t splitLine(size32_t maxLen, const byte * start);
 
@@ -50,10 +51,10 @@ public:
     inline const byte * * queryData() { return data; }
 
 protected:
-    void setFieldRange(const byte * start, const byte * end, unsigned curColumn, unsigned quoteToStrip);
+    void setFieldRange(const byte * start, const byte * end, unsigned curColumn, unsigned quoteToStrip, bool unescape);
 
 protected:
-    enum { NONE=0, SEPARATOR=1, TERMINATOR=2, WHITESPACE=3, QUOTE=4 };
+    enum { NONE=0, SEPARATOR=1, TERMINATOR=2, WHITESPACE=3, QUOTE=4, ESCAPE=5 };
     unsigned            maxColumns;
     StringMatcher       matcher;
     unsigned            numQuotes;
@@ -83,6 +84,7 @@ protected:
     StringAttr separator;
     StringAttr terminator;
     StringAttr quote;
+    StringAttr escape;
     const char * prefix;
     bool oldOutputFormat;
 };

+ 2 - 1
common/thorhelper/thorpipe.cpp

@@ -163,7 +163,8 @@ public:
         const char * quotes = NULL;
         const char * separators = NULL;
         const char * terminators = NULL;
-        csvSplitter.init(csvTransformer->getMaxColumns(), csvInfo, quotes, separators, terminators);
+        const char * escapes = NULL;
+        csvSplitter.init(csvTransformer->getMaxColumns(), csvInfo, quotes, separators, terminators, escapes);
     }
 
     virtual const void * next()

+ 1 - 1
common/thorhelper/thorxmlread.cpp

@@ -672,7 +672,7 @@ IDataVal & CCsvToRawTransformer::transform(IDataVal & result, size32_t len, cons
 {
     CSVSplitter csvSplitter;
 
-    csvSplitter.init(rowTransformer->getMaxColumns(), rowTransformer->queryCsvParameters(), NULL, NULL, NULL);
+    csvSplitter.init(rowTransformer->getMaxColumns(), rowTransformer->queryCsvParameters(), NULL, NULL, NULL, NULL);
 
     unsigned maxRecordSize = rowTransformer->queryRecordSize()->getRecordSize(NULL);
     const byte *finger = (const byte *) text;

+ 2 - 0
ecl/hql/hqlatoms.cpp

@@ -302,6 +302,7 @@ _ATOM _selectors_Atom;
 _ATOM _selectorSequence_Atom;
 _ATOM selfAtom;
 _ATOM separatorAtom;
+_ATOM escapeAtom;
 _ATOM sequenceAtom;
 _ATOM _sequence_Atom;
 _ATOM sequentialAtom;
@@ -715,6 +716,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKEATOM(template);
     MAKEATOM(terminate);
     MAKEATOM(terminator);
+    MAKEATOM(escape);
     MAKEATOM(thor);
     MAKEATOM(threshold);
     MAKEATOM(timeout);

+ 1 - 0
ecl/hql/hqlatoms.hpp

@@ -341,6 +341,7 @@ extern HQL_API _ATOM tempAtom;
 extern HQL_API _ATOM templateAtom;
 extern HQL_API _ATOM terminateAtom;
 extern HQL_API _ATOM terminatorAtom;
+extern HQL_API _ATOM escapeAtom;
 extern HQL_API _ATOM thorAtom;
 extern HQL_API _ATOM thresholdAtom;
 extern HQL_API _ATOM timeoutAtom;

+ 7 - 0
ecl/hql/hqlgram.y

@@ -411,6 +411,7 @@ static void eclsyntaxerror(HqlGram * parser, const char * s, short yystate, int
   TAN
   TANH
   TERMINATOR
+  ESCAPE
   THEN
   THISNODE
   THOR
@@ -9513,6 +9514,12 @@ csvOption
                             $$.setExpr(createExprAttribute(terminatorAtom, createComma($3.getExpr(), createAttribute(quoteAtom))));
                             $$.setPosition($1);
                         }
+    | ESCAPE '(' expression ')'
+                        {
+                            parser->normalizeExpression($3);
+                            $$.setExpr(createExprAttribute(escapeAtom, $3.getExpr()));
+                            $$.setPosition($1);
+                        }
     | NOTRIM
                         {
                             $$.setExpr(createAttribute(noTrimAtom));

+ 1 - 0
ecl/hql/hqllex.l

@@ -903,6 +903,7 @@ TABLE               { RETURNSYM(TABLE); }
 TAN                 { RETURNSYM(TAN); }
 TANH                { RETURNSYM(TANH); }
 TERMINATOR          { RETURNSYM(TERMINATOR); }
+ESCAPE              { RETURNSYM(ESCAPE); }
 THEN                { RETURNSYM(THEN); }
 THISNODE            { RETURNSYM(THISNODE); }
 THOR                { RETURNSYM(THOR); }

+ 3 - 0
ecl/hqlcpp/hqlhtcpp.cpp

@@ -9129,6 +9129,7 @@ void HqlCppTranslator::buildCsvParameters(BuildCtx & subctx, IHqlExpression * cs
     IHqlExpression * headerAttr = queryProperty(headerAtom, attrs);
     IHqlExpression * terminator = queryProperty(terminatorAtom, attrs);
     IHqlExpression * separator = queryProperty(separatorAtom, attrs);
+    IHqlExpression * escape = queryProperty(escapeAtom, attrs);
     if (headerAttr)
     {
         IHqlExpression * header = queryRealChild(headerAttr, 0);
@@ -9187,11 +9188,13 @@ void HqlCppTranslator::buildCsvParameters(BuildCtx & subctx, IHqlExpression * cs
     buildCsvListFunc(classctx, "queryQuote", queryProperty(quoteAtom, attrs), isReading ? "'" : NULL);
     buildCsvListFunc(classctx, "querySeparator", separator, ",");
     buildCsvListFunc(classctx, "queryTerminator", terminator, isReading ? "\r\n|\n" : "\n");
+    buildCsvListFunc(classctx, "queryEscape", escape, NULL);
 
     StringBuffer flags;
     if (!queryProperty(quoteAtom, attrs))       flags.append("|defaultQuote");
     if (!queryProperty(separatorAtom, attrs))   flags.append("|defaultSeparate");
     if (!queryProperty(terminatorAtom, attrs))  flags.append("|defaultTerminate");
+    if (!queryProperty(escapeAtom, attrs))      flags.append("|defaultEscape");
     if (singleHeader)                           flags.append("|singleHeaderFooter");
     if (manyHeader)                             flags.append("|manyHeaderFooter");
     if (queryProperty(noTrimAtom, attrs))       flags.append("|preserveWhitespace");

+ 4 - 1
ecl/hthor/hthor.cpp

@@ -860,6 +860,7 @@ void CHThorCsvWriteActivity::setFormat(IFileDescriptor * desc)
     desc->queryProperties().setProp("@csvSeparate", separator.str());
     desc->queryProperties().setProp("@csvQuote", csvInfo->queryQuote(0));
     desc->queryProperties().setProp("@csvTerminate", csvInfo->queryTerminator(0));
+    desc->queryProperties().setProp("@csvEscape", csvInfo->queryEscape(0));
     desc->queryProperties().setProp("@format","utf8n");
 }
 
@@ -8489,6 +8490,7 @@ void CHThorCsvReadActivity::gatherInfo(IFileDescriptor * fd)
     const char * quotes = NULL;
     const char * separators = NULL;
     const char * terminators = NULL;
+    const char * escapes = NULL;
     IDistributedFile * dFile = ldFile?ldFile->queryDistributedFile():NULL;
     if (dFile)  //only makes sense for distributed (non local) files
     {
@@ -8496,8 +8498,9 @@ void CHThorCsvReadActivity::gatherInfo(IFileDescriptor * fd)
         quotes = options.queryProp("@csvQuote");
         separators = options.queryProp("@csvSeparate");
         terminators = options.queryProp("@csvTerminate");
+        escapes = options.queryProp("@csvEscape");
     }
-    csvSplitter.init(helper.getMaxColumns(), csvInfo, quotes, separators, terminators);
+    csvSplitter.init(helper.getMaxColumns(), csvInfo, quotes, separators, terminators, escapes);
 }
 
 void CHThorCsvReadActivity::calcFixedDiskRecordSize()

+ 3 - 1
ecl/hthor/hthorkey.cpp

@@ -2455,12 +2455,14 @@ public:
         const char * quotes = NULL;
         const char * separators = NULL;
         const char * terminators = NULL;
+        const char * escapes = NULL;
         if (dFile)
         {
             IPropertyTree & options = dFile->queryAttributes();
             quotes = options.queryProp("@csvQuote");
             separators = options.queryProp("@csvSeparate");
             terminators = options.queryProp("@csvTerminate");
+            escapes = options.queryProp("@csvEscape");
             agent.logFileAccess(dFile, "HThor", "READ");
         }
         else
@@ -2471,7 +2473,7 @@ public:
             agent.addWuException(buff.str(), 0, ExceptionSeverityWarning, "hthor");
         }
             
-        csvSplitter.init(_arg.getMaxColumns(), csvInfo, quotes, separators, terminators);
+        csvSplitter.init(_arg.getMaxColumns(), csvInfo, quotes, separators, terminators, escapes);
     }
 
     ~CHThorCsvFetchActivity()

+ 6 - 2
roxie/ccd/ccdactivities.cpp

@@ -1455,6 +1455,7 @@ public:
         const char *quotes = NULL;
         const char *separators = NULL;
         const char *terminators = NULL;
+        const char *escapes = NULL;
         CSVSplitter csvSplitter;
         if (datafile)
         {
@@ -1464,9 +1465,10 @@ public:
                 quotes = options->queryProp("@csvQuote");
                 separators = options->queryProp("@csvSeparate");
                 terminators = options->queryProp("@csvTerminate");
+                escapes = options->queryProp("@csvEscape");
             }
         }
-        csvSplitter.init(helper->getMaxColumns(), csvInfo, quotes, separators, terminators);
+        csvSplitter.init(helper->getMaxColumns(), csvInfo, quotes, separators, terminators, escapes);
         while (!aborted)
         {
             // MORE - there are rumours of a  csvSplitter that operates on a stream... if/when it exists, this should use it
@@ -4383,6 +4385,7 @@ public:
         const char * quotes = NULL;
         const char * separators = NULL;
         const char * terminators = NULL;
+        const char * escapes = NULL;
 
         const IResolvedFile *fileInfo = varFileInfo ? varFileInfo : factory->datafile;
         if (fileInfo)
@@ -4393,12 +4396,13 @@ public:
                 quotes = options->queryProp("@csvQuote");
                 separators = options->queryProp("@csvSeparate");
                 terminators = options->queryProp("@csvTerminate");
+                escapes = options->queryProp("@csvEscape");
             }
         }
 
         IHThorCsvFetchArg *h = (IHThorCsvFetchArg *) helper;
         ICsvParameters *csvInfo = h->queryCsvParameters();
-        csvSplitter.init(_maxColumns, csvInfo, quotes, separators, terminators);
+        csvSplitter.init(_maxColumns, csvInfo, quotes, separators, terminators, escapes);
     }
 
     virtual size32_t doFetch(ARowBuilder & rowBuilder, offset_t pos, offset_t rawpos, void *inputData)

+ 10 - 5
roxie/ccd/ccdserver.cpp

@@ -10921,6 +10921,7 @@ public:
         props.setProp("@csvSeparate", separator.str());
         props.setProp("@csvQuote", csvParameters->queryQuote(0));
         props.setProp("@csvTerminate", csvParameters->queryTerminator(0));
+        props.setProp("@csvEscape", csvParameters->queryEscape(0));
     }
 
     virtual bool isOutputTransformed() const { return true; }
@@ -20036,12 +20037,13 @@ class CRoxieServerCsvReadActivity : public CRoxieServerDiskReadBaseActivity
     const char *quotes;
     const char *separators;
     const char *terminators;
+    const char *escapes;
 public:
     CRoxieServerCsvReadActivity(const IRoxieServerActivityFactory *_factory, IProbeManager *_probeManager, const RemoteActivityId &_remoteId,
                                 unsigned _numParts, bool _isLocal, bool _sorted, bool _maySkip, IInMemoryIndexManager *_manager,
-                                const char *_quotes, const char *_separators, const char *_terminators)
+                                const char *_quotes, const char *_separators, const char *_terminators, const char *_escapes)
         : CRoxieServerDiskReadBaseActivity(_factory, _probeManager, _remoteId, _numParts, _isLocal, _sorted, _maySkip, _manager),
-          quotes(_quotes), separators(_separators), terminators(_terminators)
+          quotes(_quotes), separators(_separators), terminators(_terminators), escapes(_escapes)
     {
         compoundHelper = NULL;
         readHelper = (IHThorCsvReadArg *)&helper;
@@ -20073,9 +20075,10 @@ public:
                         quotes = options->queryProp("@csvQuote");
                         separators = options->queryProp("@csvSeparate");
                         terminators = options->queryProp("@csvTerminate");
+                        escapes = options->queryProp("@csvEscape");
                     }
                 }
-                csvSplitter.init(readHelper->getMaxColumns(), csvInfo, quotes, separators, terminators);
+                csvSplitter.init(readHelper->getMaxColumns(), csvInfo, quotes, separators, terminators, escapes);
             }
         }
     }
@@ -20593,6 +20596,7 @@ public:
     const char *quotes;
     const char *separators;
     const char *terminators;
+    const char *escapes;
 
     CRoxieServerDiskReadActivityFactory(unsigned _id, unsigned _subgraphId, IQueryFactory &_queryFactory, HelperFactory *_helperFactory, ThorActivityKind _kind, const RemoteActivityId &_remoteId, IPropertyTree &_graphNode)
         : CRoxieServerActivityFactory(_id, _subgraphId, _queryFactory, _helperFactory, _kind), remoteId(_remoteId)
@@ -20602,7 +20606,7 @@ public:
         sorted = (helper->getFlags() & TDRunsorted) == 0;
         variableFileName = (helper->getFlags() & (TDXvarfilename|TDXdynamicfilename)) != 0;
         maySkip = (helper->getFlags() & (TDRkeyedlimitskips|TDRkeyedlimitcreates|TDRlimitskips|TDRlimitcreates)) != 0;
-        quotes = separators = terminators = NULL;
+        quotes = separators = terminators = escapes = NULL;
         if (!variableFileName)
         {
             bool isOpt = (helper->getFlags() & TDRoptional) != 0;
@@ -20624,6 +20628,7 @@ public:
                         quotes = options->queryProp("@csvQuote");
                         separators = options->queryProp("@csvSeparate");
                         terminators = options->queryProp("@csvTerminate");
+                        escapes = options->queryProp("@csvEscape");
                     }
                 }
                 else
@@ -20639,7 +20644,7 @@ public:
         {
         case TAKcsvread:
             return new CRoxieServerCsvReadActivity(this, _probeManager, remoteId, numParts, isLocal, sorted, maySkip, manager,
-                                                   quotes, separators, terminators);
+                                                   quotes, separators, terminators, escapes);
         case TAKxmlread:
             return new CRoxieServerXmlReadActivity(this, _probeManager, remoteId, numParts, isLocal, sorted, maySkip, manager);
         case TAKdiskread:

+ 12 - 1
rtl/include/eclhelper.hpp

@@ -1912,7 +1912,17 @@ typedef IHThorLocalResultWriteArg IHThorLocalResultSpillArg;
 
 struct ICsvParameters
 {
-    enum { defaultQuote = 1, defaultSeparate = 2, defaultTerminate = 4, hasUnicode = 8, singleHeaderFooter = 16, preserveWhitespace = 32, manyHeaderFooter = 64, }; // flags values
+    enum
+    {
+        defaultQuote = 1,
+        defaultSeparate = 2,
+        defaultTerminate = 4,
+        hasUnicode = 8,
+        singleHeaderFooter = 16,
+        preserveWhitespace = 32,
+        manyHeaderFooter = 64,
+        defaultEscape = 128
+    }; // flags values
     virtual unsigned     getFlags() = 0;
     virtual bool         queryEBCDIC() = 0;
     virtual const char * queryHeader()              { return NULL; }
@@ -1921,6 +1931,7 @@ struct ICsvParameters
     virtual const char * queryQuote(unsigned idx) = 0;
     virtual const char * querySeparator(unsigned idx) = 0;
     virtual const char * queryTerminator(unsigned idx) = 0;
+    virtual const char * queryEscape(unsigned idx) = 0;
     virtual const char * queryFooter()              { return NULL; }
 };
 

+ 40 - 0
testing/ecl/csv-escaped.ecl

@@ -0,0 +1,40 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+rec := RECORD
+  string foo;
+  integer id;
+  string bar;
+END;
+
+// Default is no escape
+orig := DATASET([{'this is an \\\'escaped\\\' string', 10, 'while this is not'}], rec);
+OUTPUT(orig, , 'regress::csv-escaped', OVERWRITE, CSV);
+escaped := DATASET('regress::csv-escaped', rec, CSV);
+OUTPUT(escaped);
+
+// Standard escape
+orig2 := DATASET([{'this is an \\\'escaped\\\' string', 10, 'while this is not'}], rec);
+OUTPUT(orig2, , 'regress::csv-escaped', OVERWRITE, CSV);
+escaped2 := DATASET('regress::csv-escaped', rec, CSV(ESCAPE('\\')));
+OUTPUT(escaped2);
+
+// Multi-char escape
+orig3 := DATASET([{'this is an -=-\'escaped-=-\' string', 10, 'while this is not'}], rec);
+OUTPUT(orig3, , 'regress::csv-escaped', OVERWRITE, CSV);
+escaped3 := DATASET('regress::csv-escaped', rec, CSV(ESCAPE('-=-')));
+OUTPUT(escaped3);

+ 15 - 0
testing/ecl/key/csv-escaped.xml

@@ -0,0 +1,15 @@
+<Dataset name='Result 1'>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><foo>this is an \&apos;escaped\&apos; string</foo><id>10</id><bar>while this is not</bar></Row>
+</Dataset>
+<Dataset name='Result 3'>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><foo>this is an &apos;escaped&apos; string</foo><id>10</id><bar>while this is not</bar></Row>
+</Dataset>
+<Dataset name='Result 5'>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><foo>this is an &apos;escaped&apos; string</foo><id>0</id><bar>while this is not</bar></Row>
+</Dataset>

+ 4 - 2
thorlcr/activities/csvread/thcsvrslave.cpp

@@ -36,7 +36,7 @@
 class CCsvReadSlaveActivity : public CDiskReadSlaveActivityBase, public CThorDataLink
 {
     IHThorCsvReadArg *helper;
-    StringAttr csvQuote, csvSeparate, csvTerminate;
+    StringAttr csvQuote, csvSeparate, csvTerminate, csvEscape;
     Owned<IRowStream> out;
     rowcount_t limit;
     rowcount_t stopAfter;
@@ -85,7 +85,7 @@ class CCsvReadSlaveActivity : public CDiskReadSlaveActivityBase, public CThorDat
             readFinished = false;
             //Initialise information...
             ICsvParameters * csvInfo = activity.helper->queryCsvParameters();
-            csvSplitter.init(activity.helper->getMaxColumns(), csvInfo, activity.csvQuote, activity.csvSeparate, activity.csvTerminate);
+            csvSplitter.init(activity.helper->getMaxColumns(), csvInfo, activity.csvQuote, activity.csvSeparate, activity.csvTerminate, activity.csvEscape);
         }
         virtual void setPart(IPartDescriptor *partDesc, unsigned partNoSerialized)
         {
@@ -315,6 +315,8 @@ public:
             if (b) data.read(csvSeparate);
             data.read(b);
             if (b) data.read(csvTerminate);
+            data.read(b);
+            if (b) data.read(csvEscape);
         }
         if (headerLines)
         {

+ 1 - 0
thorlcr/activities/diskwrite/thdiskwrite.cpp

@@ -77,6 +77,7 @@ public:
         props.setProp("@csvSeparate", separator.str());
         props.setProp("@csvQuote", csvParameters->queryQuote(0));
         props.setProp("@csvTerminate", csvParameters->queryTerminator(0));
+        props.setProp("@csvEscape", csvParameters->queryEscape(0));
 
         CWriteMasterBase::done(); // will publish
     }

+ 2 - 1
thorlcr/activities/fetch/thfetchslave.cpp

@@ -542,7 +542,8 @@ public:
         const char * quotes = lFProps->hasProp("@csvQuote")?lFProps->queryProp("@csvQuote"):NULL;
         const char * separators = lFProps->hasProp("@csvSeparate")?lFProps->queryProp("@csvSeparate"):NULL;
         const char * terminators = lFProps->hasProp("@csvTerminate")?lFProps->queryProp("@csvTerminate"):NULL;      
-        csvSplitter.init(helper->getMaxColumns(), csvInfo, quotes, separators, terminators);
+        const char * escapes = lFProps->hasProp("@csvEscape")?lFProps->queryProp("@csvEscape"):NULL;
+        csvSplitter.init(helper->getMaxColumns(), csvInfo, quotes, separators, terminators, escapes);
     }
     virtual size32_t fetch(ARowBuilder & rowBuilder, const void *keyRow, unsigned filePartIndex, unsigned __int64 localFpos, unsigned __int64 fpos)
     {