Explorar o código

Merge pull request #13072 from jakesmith/hpcc-22960-hthor-filemismatch

HPCC-22960 Check mismatched file type reads

Reviewed-by: Gavin Halliday <ghalliday@hpccsystems.com>
Gavin Halliday %!s(int64=5) %!d(string=hai) anos
pai
achega
4fe12ac827

+ 1 - 0
common/thorhelper/thorcommon.hpp

@@ -37,6 +37,7 @@ static unsigned const defaultMaxCsvRowSize = 10; // MB
 #define OPT_OUTPUTLIMIT_LEGACY    "outputLimit"             // OUTPUT Mb limit (legacy property name, renamed to outputLimitMb in 5.2)
 #define OPT_OUTPUTLIMIT           "outputLimitMb"           // OUTPUT Mb limit                                                               (default = 10 [MB])
 #define OPT_MAXCSVROWSIZE         "maxCsvRowSizeMb"         // Upper limit on csv read line size                                             (default = 10 [MB])
+#define OPT_VALIDATE_FILE_TYPE    "validateFileType"        // Validate that diskread file type matches                                      (default = true)
 
 
 class THORHELPER_API CSizingSerializer : implements IRowSerializerTarget

+ 43 - 1
ecl/hthor/hthor.cpp

@@ -8160,6 +8160,46 @@ void CHThorDiskReadBaseActivity::stop()
     CHThorActivityBase::stop();
 }
 
+#define TE_FileTypeMismatch 10138 // NB: duplicated from thorlcr/shared/thexception.hpp, but be moved to common header
+void CHThorDiskReadBaseActivity::checkFileType(IDistributedFile *file)
+{
+    if (rt_csv == readType)
+        return; // CSV read is permitted to read any type
+    if (!agent.queryWorkUnit()->getDebugValueInt(OPT_VALIDATE_FILE_TYPE, true))
+        return;
+    bool warningOnly = false;
+    const char *expectedType = nullptr;
+    switch (readType)
+    {
+        case rt_binary:
+            if (fixedDiskRecordSize) // we allow fixed width reads of other formats
+                return;
+            expectedType = "flat";
+            break;
+        case rt_xml:
+            expectedType = "xml";
+            warningOnly = true;
+            break;
+        case rt_json:
+            expectedType = "json";
+            warningOnly = true;
+            break;
+        default:
+            throwUnexpected();
+    }
+    const char *kind = queryFileKind(file);
+    if (isEmptyString(kind)) // file has no published kind, can't validate
+        return;
+    if (!strieq(kind, expectedType))
+    {        
+        Owned<IException> e = makeStringExceptionV(TE_FileTypeMismatch, "File format mismatch reading file: '%s'. Expected type '%s', but file is type '%s'", file->queryLogicalName(), expectedType, kind);
+        if (!warningOnly)
+            throw e.getClear();
+        StringBuffer tmp;
+        agent.addWuException(e->errorMessage(tmp), e->errorCode(), SeverityWarning, "eclagent");
+    }
+}
+
 void CHThorDiskReadBaseActivity::resolve()
 {
     OwnedRoxieString fileName(helper.getFileName());
@@ -8190,6 +8230,8 @@ void CHThorDiskReadBaseActivity::resolve()
             IDistributedFile *dFile = ldFile->queryDistributedFile();
             if (dFile)  //only makes sense for distributed (non local) files
             {
+                checkFileType(dFile); // throws an exception if file types mismatch
+
                 persistent = dFile->queryAttributes().getPropBool("@persistent");
                 dfsParts.setown(dFile->getIterator());
                 if (helper.getFlags() & TDRfilenamecallback)
@@ -9352,7 +9394,7 @@ void CHThorCsvReadActivity::checkOpenNext()
 
 CHThorXmlReadActivity::CHThorXmlReadActivity(IAgentContext &_agent, unsigned _activityId, unsigned _subgraphId, IHThorXmlReadArg &_arg, ThorActivityKind _kind, IPropertyTree *_node) : CHThorDiskReadBaseActivity(_agent, _activityId, _subgraphId, _arg, _kind, _node), helper(_arg)
 {
-    readType = rt_xml;
+    readType = (kind==TAKjsonread) ? rt_json : rt_xml;
 }
 
 void CHThorXmlReadActivity::ready()

+ 2 - 1
ecl/hthor/hthor.ipp

@@ -2255,7 +2255,7 @@ protected:
     bool persistent;
     bool grouped;
     bool isCodeSigned = false;
-    enum ReadType:byte { rt_unknown, rt_binary, rt_csv, rt_xml } readType = rt_unknown;
+    enum ReadType:byte { rt_unknown, rt_binary, rt_csv, rt_xml, rt_json } readType = rt_unknown;
     RecordTranslationMode recordTranslationModeHint = RecordTranslationMode::Unspecified;
     unsigned __int64 stopAfter = 0;
     unsigned __int64 remoteLimit = 0;
@@ -2272,6 +2272,7 @@ protected:
     IConstArrayOf<IFieldFilter> fieldFilters;  // These refer to the expected layout
     RowFilter actualFilter;               // This refers to the actual disk layout
 
+    void checkFileType(IDistributedFile *file);
     void close();
     virtual void open();
     void resolve();

+ 2 - 0
testing/regress/ecl/dfs.ecl

@@ -17,6 +17,8 @@
 
 //class=file
 
+#onwarning(10140, ignore);
+
 import $.setup;
 Files := setup.Files(false, false, false);
 

+ 2 - 0
testing/regress/ecl/dfsi.ecl

@@ -17,6 +17,8 @@
 
 //class=file
 
+#onwarning(10140, ignore);
+
 import $.setup;
 Files := setup.Files(false, false, false);
 

+ 2 - 0
testing/regress/ecl/dfsirecordof.ecl

@@ -17,6 +17,8 @@
 
 //class=file
 
+#onwarning(10140, ignore);
+
 import $.setup;
 Files := setup.Files(false, false, false);
 

+ 2 - 0
testing/regress/ecl/dfsj.ecl

@@ -17,6 +17,8 @@
 
 //class=file
 
+#onwarning(10140, ignore);
+
 import $.setup;
 Files := setup.Files(false, false, false);
 

+ 2 - 0
testing/regress/ecl/dfsrecordof.ecl

@@ -17,6 +17,8 @@
 
 //class=file
 
+#onwarning(10140, ignore);
+
 import $.setup;
 Files := setup.Files(false, false, false);
 

+ 1 - 0
testing/regress/ecl/dynamicoptflag.ecl

@@ -20,6 +20,7 @@ prefix := setup.Files(false, false).QueryFilePrefix;
 
 #onwarning (4522, ignore);
 #onwarning (4523, ignore);
+#onwarning(10140, ignore);
 
 d := dataset(DYNAMIC(prefix + 'no_such_file'), {string10 f}, FLAT, OPT);
 output(d);

+ 1 - 1
testing/regress/ecl/key/dfs.xml

@@ -1,4 +1,4 @@
-<Warning><Code>3147</Code><Filename>dfs.ecl</Filename><Line>53</Line><Source>eclcc</Source><Message>Field dg_lastname type mismatch: DFS reports string10 but ECL declared string3</Message></Warning>
+<Warning><Code>3147</Code><Filename>dfs.ecl</Filename><Line>55</Line><Source>eclcc</Source><Message>Field dg_lastname type mismatch: DFS reports string10 but ECL declared string3</Message></Warning>
 <Dataset name='Result 1'>
  <Row><dg_parentid>0</dg_parentid><dg_firstname>DAVID     </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>1</dg_prange></Row>
  <Row><dg_parentid>1</dg_parentid><dg_firstname>DAVID     </dg_firstname><dg_lastname>BAYLISS   </dg_lastname><dg_prange>2</dg_prange></Row>

+ 3 - 0
testing/regress/ecl/keyed_join4.ecl

@@ -16,6 +16,9 @@
 ############################################################################## */
 
 #option('warnOnImplicitJoinLimit', false);
+
+#onwarning(10140, ignore);
+
 import $.setup;
 prefix := setup.Files(false, false).QueryFilePrefix;
 

+ 1 - 1
testing/regress/ecl/nestedtranslate.ecl

@@ -18,7 +18,7 @@
 //class=file
 //version multiPart=true
 
-//#onwarning(10138, ignore);
+#onwarning(10138, ignore);
 
 import ^ as root;
 multiPart := #IFDEFINED(root.multiPart, true);

+ 1 - 0
testing/regress/ecl/optflag.ecl

@@ -19,6 +19,7 @@ import $.setup;
 prefix := setup.Files(false, false).QueryFilePrefix;
 #onwarning(4523, ignore);
 #onwarning(4522, ignore);
+#onwarning(10140, ignore);
 
 d := dataset(prefix + 'no_such_file', {string10 f}, FLAT, OPT);
 output(d);

+ 2 - 0
testing/regress/ecl/optflag2.ecl

@@ -18,6 +18,8 @@
 import $.setup;
 import Std.File;
 
+#onwarning(10140, ignore);
+
 //version optRemoteRead=false
 //version optRemoteRead=true
 

+ 2 - 0
testing/regress/ecl/spray_test_json.ecl

@@ -19,6 +19,8 @@
 //nohthor
 //class=spray
 
+#onwarning(10138, ignore);
+
 //version isSmallFile=true,isUnBallanced=false
 //version isSmallFile=true,isUnBallanced=true
 //version isSmallFile=false

+ 1 - 1
thorlcr/activities/fetch/thfetch.cpp

@@ -55,7 +55,7 @@ public:
         if (fetchFile)
         {
             if (isFileKey(fetchFile))
-                throw MakeActivityException(this, 0, "Attempting to read index as a flat file: %s", fname.get());
+                throw MakeActivityException(this, TE_FileTypeMismatch, "Attempting to read index as a flat file: %s", fname.get());
             Owned<IFileDescriptor> fileDesc = getConfiguredFileDescriptor(*fetchFile);
             void *ekey;
             size32_t ekeylen;

+ 1 - 1
thorlcr/activities/hashdistrib/thhashdistrib.cpp

@@ -129,7 +129,7 @@ public:
         if (0 == file->numParts())
             throw MakeActivityException(this, 0, "KeyedDistribute: Can't distribute based on an empty key: %s", scoped.str());
         if (!isFileKey(file))
-            throw MakeActivityException(this, 0, "Attempting to read flat file as an index: %s", indexFileName.get());
+            throw MakeActivityException(this, TE_FileTypeMismatch, "Attempting to read flat file as an index: %s", indexFileName.get());
 
         checkFormatCrc(this, file, helper->getFormatCrc(), nullptr, helper->getFormatCrc(), nullptr, true);
         Owned<IFileDescriptor> fileDesc = file->getFileDescriptor();

+ 2 - 2
thorlcr/activities/keydiff/thkeydiff.cpp

@@ -58,10 +58,10 @@ public:
 
         originalIndexFile.setown(queryThorFileManager().lookup(container.queryJob(), originalHelperName,false, false, false, container.activityIsCodeSigned()));
         if (!isFileKey(originalIndexFile))
-            throw MakeActivityException(this, 0, "Attempting to read flat file as an index: %s", originalHelperName.get());
+            throw MakeActivityException(this, TE_FileTypeMismatch, "Attempting to read flat file as an index: %s", originalHelperName.get());
         newIndexFile.setown(queryThorFileManager().lookup(container.queryJob(), updatedHelperName, false, false, false, container.activityIsCodeSigned()));
         if (!isFileKey(newIndexFile))
-            throw MakeActivityException(this, 0, "Attempting to read flat file as an index: %s", updatedHelperName.get());
+            throw MakeActivityException(this, TE_FileTypeMismatch, "Attempting to read flat file as an index: %s", updatedHelperName.get());
         if (originalIndexFile->numParts() != newIndexFile->numParts())
             throw MakeActivityException(this, TE_KeyDiffIndexSizeMismatch, "Index %s and %s differ in width", originalName.get(), updatedName.get());
         if (originalIndexFile->querySuperFile() || newIndexFile->querySuperFile())

+ 2 - 2
thorlcr/activities/keyedjoin/thkeyedjoin-legacy.cpp

@@ -99,12 +99,12 @@ public:
         if (indexFile)
         {
             if (!isFileKey(indexFile))
-                throw MakeActivityException(this, 0, "Attempting to read flat file as an index: %s", indexFileName.get());
+                throw MakeActivityException(this, TE_FileTypeMismatch, "Attempting to read flat file as an index: %s", indexFileName.get());
             unsigned numParts = 0;
             localKey = indexFile->queryAttributes().getPropBool("@local");
 
             if (container.queryLocalData() && !localKey)
-                throw MakeActivityException(this, 0, "Keyed Join cannot be LOCAL unless supplied index is local");
+                throw MakeActivityException(this, TE_FileTypeMismatch, "Keyed Join cannot be LOCAL unless supplied index is local");
 
             checkFormatCrc(this, indexFile, helper->getIndexFormatCrc(), helper->queryIndexRecordSize(), helper->getProjectedIndexFormatCrc(), helper->queryProjectedIndexRecordSize(), true);
             Owned<IFileDescriptor> indexFileDesc = indexFile->getFileDescriptor();

+ 2 - 2
thorlcr/activities/keyedjoin/thkeyedjoin.cpp

@@ -280,7 +280,7 @@ public:
         if (indexFile)
         {
             if (!isFileKey(indexFile))
-                throw MakeActivityException(this, 0, "Attempting to read flat file as an index: %s", indexFileName.get());
+                throw MakeActivityException(this, TE_FileTypeMismatch, "Attempting to read flat file as an index: %s", indexFileName.get());
             IDistributedSuperFile *superIndex = indexFile->querySuperFile();
             if (helper->diskAccessRequired())
             {
@@ -291,7 +291,7 @@ public:
                     if (dataFile)
                     {
                         if (isFileKey(dataFile))
-                            throw MakeActivityException(this, 0, "Attempting to read index as a flat file: %s", fetchFilename.get());
+                            throw MakeActivityException(this, TE_FileTypeMismatch, "Attempting to read index as a flat file: %s", fetchFilename.get());
                         if (superIndex)
                             throw MakeActivityException(this, 0, "Superkeys and full keyed joins are not supported");
 

+ 2 - 2
thorlcr/activities/keypatch/thkeypatch.cpp

@@ -57,10 +57,10 @@ public:
 
         originalIndexFile.setown(queryThorFileManager().lookup(container.queryJob(), originalHelperName, false, false, false, container.activityIsCodeSigned()));
         if (!isFileKey(originalIndexFile))
-            throw MakeActivityException(this, 0, "Attempting to read flat file as an index: %s", originalHelperName.get());
+            throw MakeActivityException(this, TE_FileTypeMismatch, "Attempting to read flat file as an index: %s", originalHelperName.get());
         patchFile.setown(queryThorFileManager().lookup(container.queryJob(), patchHelperName, false, false, false, container.activityIsCodeSigned()));
         if (isFileKey(patchFile))
-            throw MakeActivityException(this, 0, "Attempting to read index as a patch file: %s", patchHelperName.get());
+            throw MakeActivityException(this, TE_FileTypeMismatch, "Attempting to read index as a patch file: %s", patchHelperName.get());
         
         if (originalIndexFile->numParts() != patchFile->numParts())
             throw MakeActivityException(this, TE_KeyPatchIndexSizeMismatch, "Index %s and patch %s differ in width", originalName.get(), patchName.get());

+ 5 - 5
thorlcr/activities/msort/thmsort.cpp

@@ -64,8 +64,8 @@ public:
         unsigned flags = helper->getAlgorithmFlags();
         if (algoname && (0 != stricmp(algoname, "quicksort")))
         {
-            Owned<IException> e = MakeActivityException(this, 0, "Ignoring, unsupported sort order algorithm '%s'", algoname.get());
-            reportExceptionToWorkunit(container.queryJob().queryWorkUnit(), e);
+            Owned<IException> e = MakeActivityException(this, TE_UnsupportedSortOrder, "Ignoring, unsupported sort order algorithm '%s'", algoname.get());
+            reportExceptionToWorkunitCheckIgnore(container.queryJob().queryWorkUnit(), e);
         }
     }
 };
@@ -100,15 +100,15 @@ protected:
         unsigned flags = helper->getAlgorithmFlags();
         if (algoname && (0 != stricmp(algoname, "quicksort")))
         {
-            Owned<IException> e = MakeActivityException(this, 0, "Ignoring, unsupported sort order algorithm '%s'", algoname.get());
-            reportExceptionToWorkunit(container.queryJob().queryWorkUnit(), e);
+            Owned<IException> e = MakeActivityException(this, TE_UnsupportedSortOrder, "Ignoring, unsupported sort order algorithm '%s'", algoname.get());
+            reportExceptionToWorkunitCheckIgnore(container.queryJob().queryWorkUnit(), e);
         }
         OwnedRoxieString cosortlogname(helper->getSortedFilename());
         if (cosortlogname&&*cosortlogname)
         {
             Owned<IDistributedFile> coSortFile = queryThorFileManager().lookup(container.queryJob(), cosortlogname, false, false, false, container.activityIsCodeSigned());
             if (isFileKey(coSortFile))
-                throw MakeActivityException(this, 0, "Attempting to read index as a flat file: %s", cosortlogname.get());
+                throw MakeActivityException(this, TE_FileTypeMismatch, "Attempting to read index as a flat file: %s", cosortlogname.get());
             addReadFile(coSortFile);
             Owned<IFileDescriptor> fileDesc = coSortFile->getFileDescriptor();
             unsigned o;

+ 0 - 1
thorlcr/activities/msort/thsortu.cpp

@@ -1195,7 +1195,6 @@ retry:
                     }
                     if (curgroup.ordinality() > INITIAL_SELFJOIN_MATCH_WARNING_LEVEL) {
                         Owned<IThorException> e = MakeActivityWarning(&activity, TE_SelfJoinMatchWarning, "Exceeded initial match limit");
-                        e->setAction(tea_warning);
                         e->queryData().append((unsigned)curgroup.ordinality());
                         activity.fireException(e);
                     }

+ 2 - 6
thorlcr/graph/thgraphmaster.cpp

@@ -1941,9 +1941,7 @@ bool CJobMaster::fireException(IException *e)
         case tea_warning:
         {
             LOG(MCwarning, thorJob, e);
-            ErrorSeverity mappedSeverity = workunit->getWarningSeverity(e->errorCode(), SeverityWarning);
-            if (mappedSeverity != SeverityIgnore)
-                reportExceptionToWorkunit(*workunit, e);
+            reportExceptionToWorkunitCheckIgnore(*workunit, e);
             break;
         }
         default:
@@ -2191,9 +2189,7 @@ bool CMasterGraph::fireException(IException *e)
         case tea_warning:
         {
             LOG(MCwarning, thorJob, e);
-            ErrorSeverity mappedSeverity = job.queryWorkUnit().getWarningSeverity(e->errorCode(), SeverityWarning);
-            if (mappedSeverity != SeverityIgnore)
-                reportExceptionToWorkunit(job.queryWorkUnit(), e);
+            reportExceptionToWorkunitCheckIgnore(job.queryWorkUnit(), e);
             break;
         }
         default:

+ 1 - 0
thorlcr/master/thactivitymaster.cpp

@@ -650,6 +650,7 @@ void checkFormatCrc(CActivityBase *activity, IDistributedFile *file, unsigned ex
                     {
                         // propagate as warning to workunit
                         e->setAction(tea_warning);
+                        e->setSeverity(SeverityWarning);
                         activity->fireException(e);
                     }
                     else

+ 3 - 2
thorlcr/mfilemanager/thmfilemanager.cpp

@@ -360,9 +360,10 @@ public:
                 throw MakeStringException(TE_MachineOrderNotFound, "Missing logical file %s\n", scopedName.str());
             if (reportOptional)
             {
-                Owned<IThorException> e = MakeThorException(0, "Input file '%s' was missing but declared optional", scopedName.str());
+                Owned<IThorException> e = MakeThorException(TE_MissingOptionalFile, "Input file '%s' was missing but declared optional", scopedName.str());
                 e->setAction(tea_warning);
-                reportExceptionToWorkunit(job.queryWorkUnit(), e);
+                e->setSeverity(SeverityWarning);
+                reportExceptionToWorkunitCheckIgnore(job.queryWorkUnit(), e);
             }
             return NULL;
         }

+ 4 - 2
thorlcr/shared/thexception.hpp

@@ -158,9 +158,11 @@
 #define TE_KERN                                 TE_Base + 135
 #define TE_WorkUnitAbortingDumpInfo             TE_Base + 136
 #define TE_RowLeaksDetected                     TE_Base + 137
-#define TE_FileFormatMismatch                   TE_Base + 138
+#define TE_FileTypeMismatch                     TE_Base + 138
 #define TE_RemoteReadFailure                    TE_Base + 139
-#define TE_Final                                TE_Base + 140       // keep this last
+#define TE_MissingOptionalFile                  TE_Base + 140
+#define TE_UnsupportedSortOrder                 TE_Base + 141
+#define TE_Final                                TE_Base + 142       // keep this last
 #define ISTHOREXCEPTION(n) (n > TE_Base && n < TE_Final)
 
 #endif

+ 11 - 2
thorlcr/thorutil/thormisc.cpp

@@ -805,7 +805,15 @@ void reportExceptionToWorkunit(IConstWorkUnit &workunit,IException *e, ErrorSeve
         else
             we->setSeverity(severity);
     }
-} 
+}
+
+void reportExceptionToWorkunitCheckIgnore(IConstWorkUnit &workunit, IException *e, ErrorSeverity severity)
+{
+    ErrorSeverity mappedSeverity = workunit.getWarningSeverity(e->errorCode(), severity);
+    if (SeverityIgnore == mappedSeverity)
+        return;
+    reportExceptionToWorkunit(workunit, e, mappedSeverity);
+}
 
 StringBuffer &getCompoundQueryName(StringBuffer &compoundName, const char *queryName, unsigned version)
 {
@@ -1528,10 +1536,11 @@ void checkFileType(CActivityBase *activity, IDistributedFile *file, const char *
             return;
         if (!strieq(kind, expectedType))
         {
-            Owned<IThorException> e = MakeActivityException(activity, TE_FileFormatMismatch, "File format mismatch reading file: '%s'. Expected type '%s', but file is type '%s'", file->queryLogicalName(), expectedType, kind);
+            Owned<IThorException> e = MakeActivityException(activity, TE_FileTypeMismatch, "File format mismatch reading file: '%s'. Expected type '%s', but file is type '%s'", file->queryLogicalName(), expectedType, kind);
             if (throwException)
                 throw e.getClear();
             e->setAction(tea_warning);
+            e->setSeverity(SeverityWarning);
             activity->fireException(e); // will propagate to workunit warning
         }
     }

+ 2 - 0
thorlcr/thorutil/thormisc.hpp

@@ -466,6 +466,8 @@ extern graph_decl void loadCmdProp(IPropertyTree *tree, const char *cmdProp);
 
 extern graph_decl void ensureDirectoryForFile(const char *fName);
 extern graph_decl void reportExceptionToWorkunit(IConstWorkUnit &workunit,IException *e, ErrorSeverity severity=SeverityWarning);
+extern graph_decl void reportExceptionToWorkunitCheckIgnore(IConstWorkUnit &workunit, IException *e, ErrorSeverity severity=SeverityWarning);
+
 
 extern graph_decl IPropertyTree *globals;
 extern graph_decl mptag_t masterSlaveMpTag;