Просмотр исходного кода

HPCC-10961 Add new parameter to DFU server called "quotedTerminator"
to improve speed of CSV files spray.

Add a Boolean "quotedTerminator" parameter to spray CSV files.

If this parameter is false then we can use quick partitioning algorithm
to spray file with record structure exploration.

The default value is true.

Signed-off-by: Attila Vamos <attila.vamos@gmail.com>

Attila Vamos 11 лет назад
Родитель
Сommit
693bf43639

+ 2 - 0
dali/dfu/dfurun.cpp

@@ -1219,6 +1219,8 @@ public:
                         if (options->getRecordStructurePresent())
                             opttree->setPropBool("@recordStructurePresent", true);
 
+                        opttree->setPropBool("@quotedTerminator", options->getQuotedTerminator());
+
                         Owned<IFileDescriptor> fdesc = destination->getFileDescriptor(iskey,options->getSuppressNonKeyRepeats()&&!iskey);
                         if (fdesc) {
                             if (options->getSubfileCopy()) {// need to set destination compressed or not

+ 11 - 0
dali/dfu/dfuwu.cpp

@@ -2044,6 +2044,17 @@ public:
     {
         queryRoot()->setPropBool("@recordStructurePresent",val);
     }
+
+    bool getQuotedTerminator() const
+    {
+        return queryRoot()->getPropBool("@quotedTerminator");
+    }
+
+    void setQuotedTerminator(bool val)
+    {
+        queryRoot()->setPropBool("@quotedTerminator",val);
+    }
+
 };
 
 class CExceptionIterator: public CInterface, implements IExceptionIterator

+ 3 - 0
dali/dfu/dfuwu.hpp

@@ -170,6 +170,8 @@ interface IConstDFUoptions : extends IInterface
     virtual bool getFailIfNoSourceFile() const = 0;
 
     virtual bool getRecordStructurePresent() const = 0;
+
+    virtual bool getQuotedTerminator() const = 0;
 };
 
 interface IDFUoptions : extends IConstDFUoptions
@@ -206,6 +208,7 @@ interface IDFUoptions : extends IConstDFUoptions
     virtual void setEncDec(const char *enc,const char *dec) = 0;
     virtual void setFailIfNoSourceFile(bool val=false) = 0;
     virtual void setRecordStructurePresent(bool val=false) = 0;
+    virtual void setQuotedTerminator(bool val=true) = 0;
 };
 
 interface IConstDFUfileSpec: extends IInterface

+ 3 - 0
dali/dfuplus/dfuplus.cpp

@@ -436,6 +436,9 @@ bool CDfuPlusHelper::variableSpray(const char* srcxml,const char* srcip,const ch
     if(globals->hasProp("recordStructurePresent"))
         req->setRecordStructurePresent(globals->getPropBool("recordStructurePresent",false));
 
+    if(globals->hasProp("quotedTerminator"))
+            req->setQuotedTerminator(globals->getPropBool("quotedTerminator",true));
+
     if(srcxml == NULL)
         info("\nVariable spraying from %s on %s to %s\n", srcfile, srcip, dstname);
     else

+ 1 - 0
dali/dfuplus/main.cpp

@@ -76,6 +76,7 @@ void handleSyntax()
     out.append("            quote=<quote> -- optional, default is '\n");
     out.append("            escape=<escape> -- optional, no default value \n");
     out.append("            recordstructurepresent=0|1 -- optional, default is 0 (no field names in first row) \n");
+    out.append("            quotedTerminator=1|0 -- optional, default is 1 (quoted terminators in rows) \n");
     out.append("        options for xml:\n");
     out.append("            rowtag=rowTag -- required\n");
     out.append("            encoding=utf8|utf8n|utf16|utf16le|utf16be|utf32|utf32le|utf32be -- optional, default is utf8\n");

+ 2 - 2
dali/ft/daftformat.cpp

@@ -2081,7 +2081,7 @@ IFormatPartitioner * createFormatPartitioner(const SocketEndpoint & ep, const Fi
         case FFTblocked:
             return new CSimpleBlockedPartitioner(sameFormats);
         case FFTcsv:
-            if (srcFormat.hasQuote())
+            if (srcFormat.hasQuote() && srcFormat.hasQuotedTerminator())
                 return new CCsvPartitioner(srcFormat);
             else
                 return new CCsvQuickPartitioner(srcFormat, sameFormats);
@@ -2091,7 +2091,7 @@ IFormatPartitioner * createFormatPartitioner(const SocketEndpoint & ep, const Fi
                 return new CXmlQuickPartitioner(srcFormat, sameFormats);
             else
             {
-                if (srcFormat.hasQuote())
+                if (srcFormat.hasQuote() && srcFormat.hasQuotedTerminator())
                     return new CUtfPartitioner(srcFormat);
                 else
                     return new CUtfQuickPartitioner(srcFormat, sameFormats);

+ 5 - 5
dali/ft/daftformat.ipp

@@ -268,8 +268,8 @@ class DALIFT_API CCsvQuickPartitioner : public CCsvPartitioner
 {
 public:
     CCsvQuickPartitioner(const FileFormat & _format, bool _noTranslation) 
-        : CCsvPartitioner(_format) 
-    { 
+        : CCsvPartitioner(_format)
+    {
         noTranslation = _noTranslation;
     }
 
@@ -289,7 +289,7 @@ public:
     CUtfPartitioner(const FileFormat & _format);
 
     virtual void setTarget(IOutputProcessor * _target);
-    
+
     virtual void getRecordStructure(StringBuffer & _recordStructure) { _recordStructure = recordStructure; }
     virtual void setRecordStructurePresent( bool _isRecordStructurePresent) {isRecordStructurePresent = _isRecordStructurePresent;}
 
@@ -300,7 +300,7 @@ protected:
     {
         return getSplitRecordSize(record,maxToRead,processFullBuffer,true);
     }
-    
+
 private:
     void storeFieldName(const char * start, unsigned len);
 
@@ -311,7 +311,7 @@ protected:
     StringMatcher   matcher;
     unsigned        unitSize;
     UtfReader::UtfFormat utfFormat;
-    
+
     bool            isRecordStructurePresent;
     StringBuffer    recordStructure;
     unsigned        fieldCount;

+ 1 - 0
dali/ft/filecopy.cpp

@@ -1132,6 +1132,7 @@ void FileSprayer::calculateSprayPartition()
         FilePartInfo & cur = sources.item(idx);
         cur.filename.getRemotePath(remoteFilename.clear());
 
+        srcFormat.quotedTerminator = options->getPropBool("@quotedTerminator", true);
         LOG(MCdebugInfoDetail, job, "Partition %d(%s)", idx, remoteFilename.str());
         const SocketEndpoint & ep = cur.filename.queryEndpoint();
         IFormatPartitioner * partitioner = createFormatPartitioner(ep, srcFormat, tgtFormat, calcOutput, queryFixedSlave(), wuid);

+ 4 - 1
dali/ft/filecopy.hpp

@@ -43,7 +43,8 @@ enum { FTactionpull, FTactionpush, FTactionpartition, FTactiondirectory, FTactio
 class DALIFT_API FileFormat
 {
 public:
-    FileFormat(FileFormatType _type = FFTunknown, unsigned _recordSize = 0) { set(_type, _recordSize); maxRecordSize = 0;}
+    FileFormat(FileFormatType _type = FFTunknown, unsigned _recordSize = 0)
+            { set(_type, _recordSize); maxRecordSize = 0; quotedTerminator = true;}
 
     void deserialize(MemoryBuffer & in);
     void deserializeExtra(MemoryBuffer & in, unsigned version);
@@ -57,6 +58,7 @@ public:
     void set(FileFormatType _type, unsigned _recordSize = 0) { type = _type, recordSize = _recordSize; }
     void set(const FileFormat & src);
     bool hasQuote() const                           { return (quote == NULL) || (*quote != '\0'); }
+    bool hasQuotedTerminator() const                { return quotedTerminator; }
 
 public:
     FileFormatType      type;
@@ -67,6 +69,7 @@ public:
     StringAttr          terminate;
     StringAttr          escape;
     StringAttr          rowTag;
+    bool                quotedTerminator;
 };
 UtfReader::UtfFormat getUtfFormatType(FileFormatType type);
 bool sameEncoding(const FileFormat & src, const FileFormat & tgt);

Разница между файлами не показана из-за своего большого размера
+ 8 - 8
ecllibrary/std/File.ecl


+ 8 - 4
esp/scm/ws_fs.ecm

@@ -82,8 +82,10 @@ ESPStruct [nil_remove] DFUWorkunit
     string decrypt;
 
     [min_ver("1.08")] bool failIfNoSourceFile(false);
-    
+
     [min_ver("1.09")] bool recordStructurePresent(false);
+
+    [min_ver("1.10")] bool quotedTerminator(true);
 };
 
 ESPStruct DFUException
@@ -306,9 +308,10 @@ ESPrequest [nil_remove] SprayFixed
     bool   wrap(false);
 
     [min_ver("1.08")] bool failIfNoSourceFile(false);
-    
+
     [min_ver("1.09")] bool recordStructurePresent(false);
 
+    [min_ver("1.10")] bool quotedTerminator(true);
 };
 
 ESPresponse [exceptions_inline] 
@@ -353,9 +356,10 @@ ESPrequest [nil_remove] SprayVariable
     string decrypt;
 
     [min_ver("1.08")] bool failIfNoSourceFile(false);
-    
+
     [min_ver("1.09")] bool recordStructurePresent(false);
 
+    [min_ver("1.10")] bool quotedTerminator(true);
 };
 
 ESPresponse [exceptions_inline] 
@@ -606,7 +610,7 @@ ESPresponse [exceptions_inline] UploadFilesResponse
 };
 
 ESPservice [
-    version("1.09"), default_client_version("1.09"),
+    version("1.10"), default_client_version("1.10"),
     exceptions_inline("./smc_xslt/exceptions.xslt")] FileSpray
 {
     ESPuses ESPstruct DFUWorkunit;

+ 4 - 0
esp/services/ws_fs/ws_fsService.cpp

@@ -1956,6 +1956,8 @@ bool CFileSprayEx::onSprayFixed(IEspContext &context, IEspSprayFixed &req, IEspS
         if (req.getRecordStructurePresent())
             options->setRecordStructurePresent(true);
 
+        options->setQuotedTerminator(req.getQuotedTerminator());
+
         resp.setWuid(wu->queryId());
         resp.setRedirectUrl(StringBuffer("/FileSpray/GetDFUWorkunit?wuid=").append(wu->queryId()).str());
         submitDFUWorkUnit(wu.getClear());
@@ -2122,6 +2124,8 @@ bool CFileSprayEx::onSprayVariable(IEspContext &context, IEspSprayVariable &req,
         if (req.getRecordStructurePresent())
             options->setRecordStructurePresent(true);
 
+        options->setQuotedTerminator(req.getQuotedTerminator());
+
         resp.setWuid(wu->queryId());
         resp.setRedirectUrl(StringBuffer("/FileSpray/GetDFUWorkunit?wuid=").append(wu->queryId()).str());
         submitDFUWorkUnit(wu.getClear());

Разница между файлами не показана из-за своего большого размера
+ 21 - 9
plugins/fileservices/fileservices.cpp


Разница между файлами не показана из-за своего большого размера
+ 1 - 0
plugins/fileservices/fileservices.hpp