Browse Source

Fix Preflight not work for Multinode thor slaves

In a multinode (2 slaves with 1 slave per node) system, the
preflight was showing certain thorslaves as down, when they
are not down. The problem was because incorrect slave number
was used to generate PID file names being given to preflight
script. The slave number should count per node group, not
per node (identified by its IP address). In this test
environment, the slave numbers are: 1 and 2. For the first
IP read from node group, the slave number is 1. 2 for the
second IP. Their PID files are: mythor_slave_1.pid and
mythor_slave_2.pid.

Signed-off-by: Kevin Wang <kevin.wang@lexisnexis.com>
Kevin Wang 13 years ago
parent
commit
4a4217d9b3
2 changed files with 8 additions and 19 deletions
  1. 4 10
      esp/services/ws_machine/ws_machineService.cpp
  2. 4 9
      esp/smc/SMCLib/TpWrapper.cpp

+ 4 - 10
esp/services/ws_machine/ws_machineService.cpp

@@ -2160,7 +2160,7 @@ void Cws_machineEx::getThorMachineList(IConstEnvironment* constEnv, IPropertyTre
     if (!nodeGroup || (nodeGroup->ordinality() == 0))
         return;
 
-    StringArray netAddresses;
+    unsigned processNumber = 0;
     Owned<INodeIterator> gi = nodeGroup->getIterator();
     ForEach(*gi)
     {
@@ -2172,6 +2172,8 @@ void Cws_machineEx::getThorMachineList(IConstEnvironment* constEnv, IPropertyTre
             continue;
         }
 
+        processNumber++;
+
         StringBuffer netAddress;
         const char* ip = addressRead.str();
         if (!streq(ip, "."))
@@ -2190,20 +2192,12 @@ void Cws_machineEx::getThorMachineList(IConstEnvironment* constEnv, IPropertyTre
             continue;
         }
 
-        unsigned countSameAddress = 1;
-        ForEachItemIn(i, netAddresses)
-        {
-            if (streq(netAddresses.item(i), netAddress.str()))
-                countSameAddress++;
-        }
-        netAddresses.append(netAddress.str());
-
         Owned<IConstMachineInfo> pMachineInfo =  constEnv->getMachineByAddress(addressRead.str());
         if (pMachineInfo.get())
         {
             StringBuffer os, processAddress;
             os.append(pMachineInfo->getOS());
-            processAddress.appendf("%s|%s:%s:%s:%s:%s:%d", netAddress.str(), addressRead.str(), machineType, machineName, os.str(), directory, countSameAddress);
+            processAddress.appendf("%s|%s:%s:%s:%s:%s:%d", netAddress.str(), addressRead.str(), machineType, machineName, os.str(), directory, processNumber);
             processAddresses.append(processAddress);
         }
         else

+ 4 - 9
esp/smc/SMCLib/TpWrapper.cpp

@@ -1574,7 +1574,7 @@ void CTpWrapper::getMachineList(double clientVersion,
         if (!nodeGroup || (nodeGroup->ordinality() == 0))
             return;
 
-        StringArray netAddresses;
+        unsigned processNumber = 0;
         INodeIterator &gi = *nodeGroup->getIterator();
         ForEach(gi)
         {
@@ -1586,6 +1586,8 @@ void CTpWrapper::getMachineList(double clientVersion,
                 continue;
             }
 
+            processNumber++;
+
             IEspTpMachine & machineInfo = *(createTpMachine("",""));
             machineInfo.setType(MachineType);
             machineInfo.setNetaddress(netAddress.str());
@@ -1621,14 +1623,7 @@ void CTpWrapper::getMachineList(double clientVersion,
 
                 if (clientVersion > 1.17)
                 {
-                    unsigned countSameAddress = 1;
-                    ForEachItemIn(i, netAddresses)
-                    {
-                        if (streq(netAddresses.item(i), netAddress.str()))
-                            countSameAddress++;
-                    }
-                    netAddresses.append(netAddress.str());
-                    machineInfo.setProcessNumber(countSameAddress);
+                    machineInfo.setProcessNumber(processNumber);
                 }
             }
             else