Переглянути джерело

HPCC-16708 Retrieve correct number of thor slaves to get slave logs

The existing WsWorkunits code retrieves number of thor slaves using
getSize(). That is incorrect when channelsPerSlave > 1. In this fix,
a new method getNumberOfSlaveLogs() is added to IConstWUClusterInfo.
It is used to retrieve number of thor slaves for reading slave logs.

Signed-off-by: wangkx <kevin.wang@lexisnexis.com>
wangkx 8 роки тому
батько
коміт
24de47d2f3

+ 7 - 1
common/workunit/workunit.cpp

@@ -4393,12 +4393,13 @@ class CEnvironmentClusterInfo: implements IConstWUClusterInfo, public CInterface
     unsigned clusterWidth;
     unsigned roxieRedundancy;
     unsigned channelsPerNode;
+    unsigned numberOfSlaveLogs;
     int roxieReplicateOffset;
 
 public:
     IMPLEMENT_IINTERFACE;
     CEnvironmentClusterInfo(const char *_name, const char *_prefix, const char *_alias, IPropertyTree *agent, IArrayOf<IPropertyTree> &thors, IPropertyTree *roxie)
-        : name(_name), prefix(_prefix), alias(_alias), roxieRedundancy(0), channelsPerNode(0), roxieReplicateOffset(1)
+        : name(_name), prefix(_prefix), alias(_alias), roxieRedundancy(0), channelsPerNode(0), numberOfSlaveLogs(0), roxieReplicateOffset(1)
     {
         StringBuffer queue;
         if (thors.ordinality())
@@ -4425,6 +4426,7 @@ public:
                 unsigned slavesPerNode = thor.getPropInt("@slavesPerNode", 1);
                 unsigned channelsPerSlave = thor.getPropInt("@channelsPerSlave", 1);
                 unsigned ts = nodes * slavesPerNode * channelsPerSlave;
+                numberOfSlaveLogs = nodes * slavesPerNode;
                 if (clusterWidth && (ts!=clusterWidth)) 
                     throw MakeStringException(WUERR_MismatchClusterSize,"CEnvironmentClusterInfo: mismatched thor sizes in cluster");
                 clusterWidth = ts;
@@ -4513,6 +4515,10 @@ public:
     {
         return clusterWidth;
     }
+    unsigned getNumberOfSlaveLogs() const
+    {
+        return numberOfSlaveLogs;
+    }
     virtual ClusterType getPlatform() const
     {
         return platform;

+ 1 - 0
common/workunit/workunit.hpp

@@ -543,6 +543,7 @@ interface IConstWUClusterInfo : extends IInterface
     virtual IStringVal & getScope(IStringVal & str) const = 0;
     virtual IStringVal & getThorQueue(IStringVal & str) const = 0;
     virtual unsigned getSize() const = 0;
+    virtual unsigned getNumberOfSlaveLogs() const = 0;
     virtual ClusterType getPlatform() const = 0;
     virtual IStringVal & getAgentQueue(IStringVal & str) const = 0;
     virtual IStringVal & getServerQueue(IStringVal & str) const = 0;

+ 2 - 2
esp/services/ws_workunits/ws_workunitsHelpers.cpp

@@ -1059,7 +1059,7 @@ unsigned WsWuInfo::getWorkunitThorLogInfo(IArrayOf<IEspECLHelpFile>& helpers, IE
             return countThorLog;
         }
 
-        unsigned numberOfSlaves = clusterInfo->getSize();
+        unsigned numberOfSlaveLogs = clusterInfo->getNumberOfSlaveLogs();
 
         BoolHash uniqueProcesses;
         Owned<IStringIterator> thorInstances = cw->getProcesses("Thor");
@@ -1125,7 +1125,7 @@ unsigned WsWuInfo::getWorkunitThorLogInfo(IArrayOf<IEspECLHelpFile>& helpers, IE
                 thorLog->setProcessName(processName.str());
                 thorLog->setClusterGroup(groupName.str());
                 thorLog->setLogDate(logDate.str());
-                thorLog->setNumberSlaves(numberOfSlaves);
+                thorLog->setNumberSlaves(numberOfSlaveLogs);
                 thorLogList.append(*thorLog.getLink());
             }
         }

+ 2 - 2
esp/services/ws_workunits/ws_workunitsService.cpp

@@ -4419,7 +4419,7 @@ void CWsWorkunitsEx::addThorSlaveLogfile(Owned<IConstWorkUnit>& cwu, WsWuInfo& w
         return;
     }
 
-    unsigned numberOfSlaves = clusterInfo->getSize();
+    unsigned numberOfSlaveLogs = clusterInfo->getNumberOfSlaveLogs();
     BoolHash uniqueProcesses;
     Owned<IStringIterator> thorInstances = cwu->getProcesses("Thor");
     ForEach (*thorInstances)
@@ -4456,7 +4456,7 @@ void CWsWorkunitsEx::addThorSlaveLogfile(Owned<IConstWorkUnit>& cwu, WsWuInfo& w
             StringBuffer logDate = ppStr;
             logDate.setLength(10);
 
-            for (unsigned i = 0; i < numberOfSlaves; i++)
+            for (unsigned i = 0; i < numberOfSlaveLogs; i++)
             {
                 MemoryBuffer mb;
                 winfo.getWorkunitThorSlaveLog(groupName.str(), NULL, logDate.str(), logDir.str(), i+1, mb, false);