Browse Source

HPCC-23517 Switch Thor to use new config system

Signed-off-by: Jake Smith <jake.smith@lexisnexisrisk.com>
Jake Smith 5 years ago
parent
commit
4fa73a54ae

+ 3 - 3
initfiles/bin/init_thor.in

@@ -149,10 +149,10 @@ while [[ 1 ]]; do
     fi
 
     if [ -z ${valgrindOptions} ]; then
-        log "thormaster cmd : $instancedir/thormaster_$THORNAME MASTER=$THORMASTER:$THORMASTERPORT"
-        nohup $instancedir/thormaster_$THORNAME MASTER=$THORMASTER:$THORMASTERPORT 2> /dev/null 1>/dev/null &
+        log "thormaster cmd : $instancedir/thormaster_$THORNAME --master=$THORMASTER:$THORMASTERPORT"
+        nohup $instancedir/thormaster_$THORNAME --master=$THORMASTER:$THORMASTERPORT 2> /dev/null 1>/dev/null &
     else
-        cmd="valgrind ${valgrindOptions} --log-file=$LOG_DIR/$THORNAME/valgrind.thormaster.log $instancedir/thormaster_$THORNAME MASTER=$THORMASTER:$THORMASTERPORT 2>/dev/null 1>/dev/null &"
+        cmd="valgrind ${valgrindOptions} --log-file=$LOG_DIR/$THORNAME/valgrind.thormaster.log $instancedir/thormaster_$THORNAME --master=$THORMASTER:$THORMASTERPORT 2>/dev/null 1>/dev/null &"
         log "${cmd}"
         eval ${cmd}
     fi

+ 1 - 1
thorlcr/graph/thgraphslave.cpp

@@ -1562,7 +1562,7 @@ public:
         if (_foreignNode && !_foreignNode->isNull())
             foreignNode.set(*_foreignNode);
         else
-            foreignNode.set(globals->queryProp("@DALISERVERS"));
+            foreignNode.set(globals->queryProp("@daliServers"));
         return ::getGlobalUniqueIds(num, &foreignNode);
     }
     virtual bool allowDaliAccess() const

+ 28 - 13
thorlcr/master/thmastermain.cpp

@@ -551,8 +551,18 @@ bool ControlHandler(ahType type)
 }
 
 
+static constexpr const char * defaultJson = R"!!({
+"version": "1.0",
+"Thor": {
+    "daliServers": "dali",
+    "watchdogEnabled": "true",
+    "watchdogProgressEnabled": "true"
+}
+})!!";
+
+
 #include "thactivitymaster.hpp"
-int main( int argc, char *argv[]  )
+int main( int argc, const char *argv[]  )
 {
     for (unsigned i=0;i<(unsigned)argc;i++) {
         if (streq(argv[i],"--daemon") || streq(argv[i],"-d")) {
@@ -573,26 +583,22 @@ int main( int argc, char *argv[]  )
     InitModuleObjects();
     NoQuickEditSection xxx;
     {
-        Owned<IFile> iFile = createIFile("thor.xml");
-        globals = iFile->exists() ? createPTree(*iFile, ipt_caseInsensitive) : createPTree("Thor", ipt_caseInsensitive);
+        globals.setown(loadConfiguration(defaultJson, argv, "Thor", "THOR", "thor.xml", nullptr));
     }
     setStatisticsComponentName(SCTthor, globals->queryProp("@name"), true);
 
     globals->setProp("@masterBuildTag", BUILD_TAG);
-    char **pp = argv+1;
-    while (*pp)
-        loadCmdProp(globals, *pp++);
 
     setIORetryCount(globals->getPropInt("Debug/@ioRetries")); // default == 0 == off
     StringBuffer daliServer;
-    if (!globals->getProp("@DALISERVERS", daliServer)) 
+    if (!globals->getProp("@daliServers", daliServer)) 
     {
-        LOG(MCerror, thorJob, "No Dali server list specified in THOR.XML (DALISERVERS=iport,iport...)\n");
+        LOG(MCerror, thorJob, "No Dali server list specified in THOR.XML (daliServers=iport,iport...)\n");
         return 0; // no recycle
     }
 
     SocketEndpoint thorEp;
-    const char *master = globals->queryProp("@MASTER");
+    const char *master = globals->queryProp("@master");
     if (master)
     {
         thorEp.set(master);
@@ -617,7 +623,6 @@ int main( int argc, char *argv[]  )
 #endif
     const char *thorname = NULL;
     StringBuffer nodeGroup, logUrl;
-    unsigned numSlaves = globals->getPropInt("@numSlaves", 0); // >0 in container world, 0 in bare metal
     unsigned slavesPerNode = globals->getPropInt("@slavesPerNode", 1);
     unsigned channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1);
 
@@ -848,7 +853,19 @@ int main( int argc, char *argv[]  )
         masterSlaveMpTag = allocateClusterMPTag();
         kjServiceMpTag = allocateClusterMPTag();
 
-        if (0 == numSlaves) // bare metal
+        unsigned numSlaves = 0;
+        if (isCloud())
+        {
+            if (!globals->hasProp("@numSlaves"))
+                throw makeStringException(0, "Number of slaves not defined (numSlaves)");
+            else
+            {
+                numSlaves = globals->getPropInt("@numSlaves", 0);
+                if (0 == numSlaves)
+                    throw makeStringException(0, "Number of slaves must be > 0 (numSlaves)");
+            }
+        }
+        else
         {
             unsigned localThorPortInc = globals->getPropInt("@localThorPortInc", DEFAULT_SLAVEPORTINC);
             unsigned slaveBasePort = globals->getPropInt("@slaveport", DEFAULT_THORSLAVEPORT);
@@ -912,8 +929,6 @@ int main( int argc, char *argv[]  )
     stopPerformanceMonitor();
     disconnectLogMsgManagerFromDali();
     closeThorServerStatus();
-    if (globals)
-        globals->Release();
     PROGLOG("Thor closing down 4");
     closeDllServer();
     PROGLOG("Thor closing down 3");

+ 1 - 1
thorlcr/slave/slavmain.cpp

@@ -62,7 +62,7 @@ void enableThorSlaveAsDaliClient()
 {
 #ifdef ISDALICLIENT
     PROGLOG("Slave activated as a Dali client");
-    const char *daliServers = globals->queryProp("@DALISERVERS");
+    const char *daliServers = globals->queryProp("@daliServers");
     if (!daliServers)
         throw MakeStringException(0, "No Dali server list specified");
     Owned<IGroup> serverGroup = createIGroup(daliServers, DALI_SERVER_PORT);

+ 7 - 15
thorlcr/slave/thslavemain.cpp

@@ -118,12 +118,11 @@ static bool RegisterSelf(SocketEndpoint &masterEp)
         mySlaveNum = (unsigned)processGroup->rank(queryMyNode());
         assertex(NotFound != mySlaveNum);
         mySlaveNum++; // 1 based;
-        unsigned configSlaveNum = globals->getPropInt("@SLAVENUM", NotFound);
+        unsigned configSlaveNum = globals->getPropInt("@slavenum", NotFound);
         if (NotFound != configSlaveNum)
             assertex(mySlaveNum == configSlaveNum);
 
-        globals->Release();
-        globals = createPTree(msg);
+        globals.setown(createPTree(msg));
         mergeCmdParams(globals); // cmd line
 
         unsigned channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1);
@@ -338,13 +337,7 @@ int main( int argc, char *argv[]  )
     Owned<CReleaseMutex> globalNamedMutex;
 #endif 
 
-    if (globals)
-        globals->Release();
-
-    {
-        Owned<IFile> iFile = createIFile("thor.xml");
-        globals = iFile->exists() ? createPTree(*iFile, ipt_caseInsensitive) : createPTree("Thor", ipt_caseInsensitive);
-    }
+    globals.setown(createPTree("Thor"));
     unsigned multiThorMemoryThreshold = 0;
 
     Owned<IException> unregisterException;
@@ -359,11 +352,11 @@ int main( int argc, char *argv[]  )
         mergeCmdParams(globals);
         cmdArgs = argv+1;
 
-        const char *master = globals->queryProp("@MASTER");
+        const char *master = globals->queryProp("@master");
         if (!master)
             usage();
 
-        mySlaveNum = globals->getPropInt("@SLAVENUM", NotFound);
+        mySlaveNum = globals->getPropInt("@slavenum", NotFound);
         /* NB: in cloud/non-local storage mode, slave number is not known until after registration with the master
         * For the time being log file names are based on their slave number, so can only start when known.
         */
@@ -375,7 +368,7 @@ int main( int argc, char *argv[]  )
         }
 
         // In container world, SLAVE= will not be used
-        const char *slave = globals->queryProp("@SLAVE");
+        const char *slave = globals->queryProp("@slave");
         if (slave)
         {
             slfEp.set(slave);
@@ -389,7 +382,7 @@ int main( int argc, char *argv[]  )
             slfEp.port = globals->getPropInt("@slaveport", THOR_BASESLAVE_PORT);
         setMachinePortBase(slfEp.port);
 
-        setSlaveAffinity(globals->getPropInt("@SLAVEPROCESSNUM"));
+        setSlaveAffinity(globals->getPropInt("@slaveprocessnum"));
 
         startMPServer(DCR_ThorSlave, getFixedPort(TPORT_mp), false);
 
@@ -544,7 +537,6 @@ int main( int argc, char *argv[]  )
     stopLogMsgReceivers();
 #endif
     stopMPServer();
-    ::Release(globals);
     releaseAtoms(); // don't know why we can't use a module_exit to destruct these...
 
     ExitModuleObjects(); // not necessary, atexit will call, but good for leak checking

+ 1 - 1
thorlcr/thorutil/thormisc.cpp

@@ -67,7 +67,7 @@ static Owned<ICommunicator> nodeComm; // communicator based on nodeGroup (master
 
 mptag_t masterSlaveMpTag;
 mptag_t kjServiceMpTag;
-IPropertyTree *globals;
+Owned<IPropertyTree> globals;
 static Owned<IMPtagAllocator> ClusterMPAllocator;
 
 MODULE_INIT(INIT_PRIORITY_STANDARD)

+ 1 - 1
thorlcr/thorutil/thormisc.hpp

@@ -469,7 +469,7 @@ extern graph_decl void reportExceptionToWorkunit(IConstWorkUnit &workunit,IExcep
 extern graph_decl void reportExceptionToWorkunitCheckIgnore(IConstWorkUnit &workunit, IException *e, ErrorSeverity severity=SeverityWarning);
 
 
-extern graph_decl IPropertyTree *globals;
+extern graph_decl Owned<IPropertyTree> globals;
 extern graph_decl mptag_t masterSlaveMpTag;
 extern graph_decl mptag_t kjServiceMpTag;
 enum SlaveMsgTypes:unsigned { smt_errorMsg=1, smt_initGraphReq, smt_initActDataReq, smt_dataReq, smt_getPhysicalName, smt_getFileOffset, smt_actMsg, smt_getresult };