Parcourir la source

Merge pull request #7976 from jakesmith/hpcc-14504

HPCC-14504 Expose Thor crc read+write options

Reviewed-by: Gavin Halliday <ghalliday@hpccsystems.com>
Gavin Halliday il y a 9 ans
Parent
commit
e5a3544841

+ 9 - 2
initfiles/componentfiles/configxml/thor.xsd.in

@@ -160,10 +160,17 @@
                 </xs:appinfo>
               </xs:annotation>
             </xs:attribute>
-            <xs:attribute name="fileCrcDisabled" type="xs:boolean" use="optional" default="false">
+            <xs:attribute name="crcReadEnabled" type="xs:boolean" use="optional" default="true">
               <xs:annotation>
                 <xs:appinfo>
-                  <tooltip>Disables file CRC checking</tooltip>
+                  <tooltip>Verify that a read file's CRC matches the published meta data CRC</tooltip>
+                </xs:appinfo>
+              </xs:annotation>
+            </xs:attribute>
+            <xs:attribute name="crcWriteEnabled" type="xs:boolean" use="optional" default="true">
+              <xs:annotation>
+                <xs:appinfo>
+                  <tooltip>Calculate and publish a CRC per published disk output file</tooltip>
                 </xs:appinfo>
               </xs:annotation>
             </xs:attribute>

+ 11 - 5
thorlcr/activities/thdiskbaseslave.cpp

@@ -123,7 +123,7 @@ void CDiskPartHandlerBase::open()
         throw e;
     }
     filename.set(iFile->queryFilename());
-    ActPrintLog(&activity, "%s[part=%d]: reading physical file '%s' (logical file = %s)", kindStr, which, filePath.str(), activity.logicalFilename.get());
+    ActPrintLog(&activity, "%s[part=%d]: reading physical file '%s' (logical file = %s), checkFileCrc=%s", kindStr, which, filePath.str(), activity.logicalFilename.get(), checkFileCrc?"true":"false");
     if (checkFileCrc)
     {
         CDateTime createTime, modifiedTime, accessedTime;
@@ -202,9 +202,12 @@ CDiskReadSlaveActivityBase::CDiskReadSlaveActivityBase(CGraphElementBase *_conta
 {
     helper = (IHThorDiskReadBaseArg *)queryHelper();
     reInit = 0 != (helper->getFlags() & (TDXvarfilename|TDXdynamicfilename));
-    crcCheckCompressed = 0 != container.queryJob().getWorkUnitValueInt("crcCheckCompressed", 0);
+    crcCheckCompressed = getOptBool(THOROPT_READCOMPRESSED_CRC, false);
     markStart = gotMeta = false;
-    checkFileCrc = !globals->getPropBool("Debug/@fileCrcDisabled");
+    if (globals->hasProp("Debug/@fileCrcDisabled"))
+        checkFileCrc = globals->getPropBool("Debug/@fileCrcDisabled");
+    else
+        checkFileCrc = getOptBool(THOROPT_READ_CRC, true);
 }
 
 // IThorSlaveActivity
@@ -327,7 +330,10 @@ void CDiskWriteSlaveActivityBase::open()
             diskRowMinSz += 1;
     }
 
-    calcFileCrc = true;
+    if (compress)
+        calcFileCrc = getOptBool(THOROPT_WRITECOMPRESSED_CRC, false);
+    else
+        calcFileCrc = getOptBool(THOROPT_WRITE_CRC, true);
 
     bool external = dlfn.isExternal();
     bool query = dlfn.isQuery();
@@ -368,7 +374,7 @@ void CDiskWriteSlaveActivityBase::open()
     }
     if (extend || (external && !query))
         stream->seek(0,IFSend);
-    ActPrintLog("Created output stream for %s", fName.get());
+    ActPrintLog("Created output stream for %s, calcFileCrc=%s", fName.get(), calcFileCrc?"true":"false");
 }
 
 void CDiskWriteSlaveActivityBase::removeFiles()

+ 29 - 29
thorlcr/graph/thgraph.cpp

@@ -429,6 +429,35 @@ IThorGraphIterator *CGraphElementBase::getAssociatedChildGraphs() const
     return new CGraphArrayIterator(associatedChildGraphs);
 }
 
+StringBuffer &CGraphElementBase::getOpt(const char *prop, StringBuffer &out) const
+{
+    VStringBuffer path("hint[@name=\"%s\"]/@value", prop);
+    if (!queryXGMML().getProp(path.toLowerCase().str(), out))
+        queryJob().getOpt(prop, out);
+    return out;
+}
+
+bool CGraphElementBase::getOptBool(const char *prop, bool defVal) const
+{
+    bool def = queryJob().getOptBool(prop, defVal);
+    VStringBuffer path("hint[@name=\"%s\"]/@value", prop);
+    return queryXGMML().getPropBool(path.toLowerCase().str(), def);
+}
+
+int CGraphElementBase::getOptInt(const char *prop, int defVal) const
+{
+    int def = queryJob().getOptInt(prop, defVal);
+    VStringBuffer path("hint[@name=\"%s\"]/@value", prop);
+    return queryXGMML().getPropInt(path.toLowerCase().str(), def);
+}
+
+__int64 CGraphElementBase::getOptInt64(const char *prop, __int64 defVal) const
+{
+    __int64 def = queryJob().getOptInt64(prop, defVal);
+    VStringBuffer path("hint[@name=\"%s\"]/@value", prop);
+    return queryXGMML().getPropInt64(path.toLowerCase().str(), def);
+}
+
 IThorGraphDependencyIterator *CGraphElementBase::getDependsIterator() const
 {
     return new ArrayIIteratorOf<const CGraphDependencyArray, CGraphDependency, IThorGraphDependencyIterator>(dependsOn);
@@ -3105,32 +3134,3 @@ void CActivityBase::cancelReceiveMsg(const rank_t rank, const mptag_t mpTag)
     if (receiving)
         queryJobChannel().queryJobComm().cancel(rank, mpTag);
 }
-
-StringBuffer &CActivityBase::getOpt(const char *prop, StringBuffer &out) const
-{
-    VStringBuffer path("hint[@name=\"%s\"]/@value", prop);
-    if (!container.queryXGMML().getProp(path.toLowerCase().str(), out))
-        queryJob().getOpt(prop, out);
-    return out;
-}
-
-bool CActivityBase::getOptBool(const char *prop, bool defVal) const
-{
-    bool def = queryJob().getOptBool(prop, defVal);
-    VStringBuffer path("hint[@name=\"%s\"]/@value", prop);
-    return container.queryXGMML().getPropBool(path.toLowerCase().str(), def);
-}
-
-int CActivityBase::getOptInt(const char *prop, int defVal) const
-{
-    int def = queryJob().getOptInt(prop, defVal);
-    VStringBuffer path("hint[@name=\"%s\"]/@value", prop);
-    return container.queryXGMML().getPropInt(path.toLowerCase().str(), def);
-}
-
-__int64 CActivityBase::getOptInt64(const char *prop, __int64 defVal) const
-{
-    __int64 def = queryJob().getOptInt64(prop, defVal);
-    VStringBuffer path("hint[@name=\"%s\"]/@value", prop);
-    return container.queryXGMML().getPropInt64(path.toLowerCase().str(), def);
-}

+ 12 - 6
thorlcr/graph/thgraph.hpp

@@ -283,6 +283,12 @@ public:
     void releaseIOs();
     void addDependsOn(CGraphBase *graph, int controlId);
     IThorGraphDependencyIterator *getDependsIterator() const;
+    StringBuffer &getOpt(const char *prop, StringBuffer &out) const;
+    bool getOptBool(const char *prop, bool defVal=false) const;
+    int getOptInt(const char *prop, int defVal=0) const;
+    unsigned getOptUInt(const char *prop, unsigned defVal=0) const { return (unsigned)getOptInt(prop, defVal); }
+    __int64 getOptInt64(const char *prop, __int64 defVal=0) const;
+    unsigned __int64 getOptUInt64(const char *prop, unsigned __int64 defVal=0) const { return (unsigned __int64)getOptInt64(prop, defVal); }
     void ActPrintLog(const char *format, ...)  __attribute__((format(printf, 2, 3)));
     void ActPrintLog(IException *e, const char *format, ...) __attribute__((format(printf, 3, 4)));
     void ActPrintLog(IException *e);
@@ -1045,12 +1051,12 @@ public:
     virtual unsigned queryActivityId() { return (unsigned)queryId(); }
     virtual ICodeContext *queryCodeContext() { return container.queryCodeContext(); }
 
-    StringBuffer &getOpt(const char *prop, StringBuffer &out) const;
-    bool getOptBool(const char *prop, bool defVal=false) const;
-    int getOptInt(const char *prop, int defVal=0) const;
-    unsigned getOptUInt(const char *prop, unsigned defVal=0) const { return (unsigned)getOptInt(prop, defVal); }
-    __int64 getOptInt64(const char *prop, __int64 defVal=0) const;
-    unsigned __int64 getOptUInt64(const char *prop, unsigned __int64 defVal=0) const { return (unsigned __int64)getOptInt64(prop, defVal); }
+    StringBuffer &getOpt(const char *prop, StringBuffer &out) const { return container.getOpt(prop, out); }
+    bool getOptBool(const char *prop, bool defVal=false) const { return container.getOptBool(prop, defVal); }
+    int getOptInt(const char *prop, int defVal=0) const { return container.getOptInt(prop, defVal); }
+    unsigned getOptUInt(const char *prop, unsigned defVal=0) const { return container.getOptUInt(prop, defVal); }
+    __int64 getOptInt64(const char *prop, __int64 defVal=0) const { return container.getOptInt64(prop, defVal); }
+    unsigned __int64 getOptUInt64(const char *prop, unsigned __int64 defVal=0) const { return container.getOptUInt64(prop, defVal); }
 };
 
 interface IFileInProgressHandler : extends IInterface

+ 4 - 0
thorlcr/thorutil/thormisc.hpp

@@ -72,6 +72,10 @@
 #define THOROPT_COMP_FORCELZW         "forceLZW"                // Forces file compression to use LZW                                            (default = false)
 #define THOROPT_TRACE_ENABLED         "traceEnabled"            // Output from TRACE activity enabled                                            (default = false)
 #define THOROPT_TRACE_LIMIT           "traceLimit"              // Number of rows from TRACE activity                                            (default = 10)
+#define THOROPT_READ_CRC              "crcReadEnabled"          // Enabled CRC validation on disk reads if file CRC are available                (default = true)
+#define THOROPT_WRITE_CRC             "crcWriteEnabled"         // Calculate CRC's for disk outputs and store in file meta data                  (default = true)
+#define THOROPT_READCOMPRESSED_CRC    "crcReadCompressedEnabled"  // Enabled CRC validation on compressed disk reads if file CRC are available   (default = false)
+#define THOROPT_WRITECOMPRESSED_CRC   "crcWriteCompressedEnabled" // Calculate CRC's for compressed disk outputs and store in file meta data     (default = false)
 
 #define INITIAL_SELFJOIN_MATCH_WARNING_LEVEL 20000  // max of row matches before selfjoin emits warning