Browse Source

Merge pull request #14202 from jakesmith/hpcc-24775-helm-thorworker

HPCC-24775 Change helm master/slave terminology -> manager/worker

Reviewed-By: Gavin Halliday <gavin.halliday@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 4 years ago
parent
commit
7394566b41

+ 1 - 1
common/workunit/workunit.cpp

@@ -13667,7 +13667,7 @@ void executeThorGraph(const char * graphName, IConstWorkUnit &workunit, const IP
     else
     {
         VStringBuffer job("%s-%s", wuid.str(), graphName);
-        runK8sJob("thormaster", wuid, job, queryComponentConfig().getPropBool("@deleteJobs", true), { { "graphName", graphName} });
+        runK8sJob("thormanager", wuid, job, queryComponentConfig().getPropBool("@deleteJobs", true), { { "graphName", graphName} });
     }
 
     /* In k8s, Thor feeds back the terminating exception via the workunit.

+ 1 - 1
ecl/agentexec/agentexec.cpp

@@ -234,7 +234,7 @@ public:
                 graphName.set(sArray.item(1));
 
                 // JCSMORE - idealy apptype, image and executable name would all be same.
-                jobSpecName.set("thormaster");
+                jobSpecName.set("thormanager");
                 processName.set("thormaster_lcr");
             }
             if (!queryComponentConfig().getPropBool("@useChildProcesses", false))

+ 1 - 1
helm/hpcc/templates/_helpers.tpl

@@ -550,7 +550,7 @@ Generate instance queue names
 - name: {{ .name }}
   type: thor
   prefix: {{ .prefix | default "null" }}
-  width: {{ mul (.numSlaves | default 1) ( .channelsPerSlave | default 1) }}
+  width: {{ mul (.numWorkers | default 1) ( .channelsPerWorker | default 1) }}
  {{- end }}
 {{- end -}}
 {{- end -}}

+ 11 - 11
helm/hpcc/templates/thor.yaml

@@ -4,7 +4,7 @@
 {{- $hthorName := printf "%s-hthor" .name }}
 {{- $eclAgentName := printf "%s-agent" .name }}
 {{- $thorAgentName := printf "%s-thoragent" .name }}
-{{- $slaveName := printf "%s-slave" .name }}
+{{- $workerName := printf "%s-worker" .name }}
 {{- $serviceName := printf "%s-svc" .name }}
 {{- $eclAgentDefaults := dict "name" $eclAgentName "useChildProcesses" true "replicas" 1 }}
 {{- $eclAgentScope := .eclagent | mergeOverwrite $eclAgentDefaults | default $eclAgentDefaults }}
@@ -111,7 +111,7 @@ data:
 {{ toYaml (omit $hthorScope "logging") | indent 6 }}
       platform:
         type: "thor"
-        width: {{ $thor.numSlaves }}
+        width: {{ mul (.numWorkers | default 1) ( .channelsPerWorker | default 1) }}
 {{- include "hpcc.generateLoggingConfig" (dict "root" $ "me" $hthorScope) | indent 6 }}
 {{ include "hpcc.generateVaultConfig" (dict "root" $ "categories" (list "storage" "ecl" "ecl-user" ) ) | indent 6 }}
     eclagent: # main agent Q handler
@@ -185,11 +185,11 @@ data:
 {{- end }}
 {{- if not $thorAgentScope.useChildProcesses }}
 
-  thormaster-jobspec.yaml: |
+  thormanager-jobspec.yaml: |
     apiVersion: batch/v1
     kind: Job
     metadata:
-      name: thormaster-%jobname
+      name: thormanager-%jobname
     spec:
       ttlSecondsAfterFinished: 100
       template:
@@ -204,7 +204,7 @@ data:
           initContainers:
             {{- include "hpcc.checkDataMount" (dict "root" $) | indent 10 }}
           containers:
-          - name: thormaster-%jobname
+          - name: thormanager-%jobname
 {{- include "hpcc.addSecurityContext" (dict "root" $ "me" .) | indent 12 }}
 {{ include "hpcc.addImageAttrs" (dict "root" $ "me" .) | indent 12 }}
             resources:
@@ -234,13 +234,13 @@ data:
           restartPolicy: Never
       backoffLimit: 0
 
-  thorslave-jobspec.yaml: |
+  thorworker-jobspec.yaml: |
     apiVersion: batch/v1
     kind: Job
     metadata:
-      name: thorslave-%jobname
+      name: thorworker-%jobname
     spec:
-      parallelism: %numSlaves
+      parallelism: %numWorkers
       ttlSecondsAfterFinished: 100
       template:
         metadata:
@@ -251,7 +251,7 @@ data:
         spec:
           serviceAccountName: hpcc-default
           containers:
-          - name: thorslave-%jobname
+          - name: thorworker-%jobname
 {{- include "hpcc.addSecurityContext" (dict "root" $ "me" .) | indent 12 }}
 {{ include "hpcc.addImageAttrs" (dict "root" $ "me" .) | indent 12 }}
             resources:
@@ -281,11 +281,11 @@ data:
           restartPolicy: Never
       backoffLimit: 0
 
-  thormaster-networkspec.yaml: |
+  thormanager-networkspec.yaml: |
     apiVersion: networking.k8s.io/v1
     kind: NetworkPolicy
     metadata:
-      name: thormaster-%jobname
+      name: thormanager-%jobname
     spec:
       podSelector:
         matchLabels:

+ 2 - 2
helm/hpcc/values.schema.json

@@ -584,9 +584,9 @@
           "type": "string",
           "description": "The (optional) file prefix to add to relative filenames"
         },
-        "numSlaves": {
+        "numWorkers": {
           "type": "integer",
-          "description": "The number of slave pods",
+          "description": "The number of worker pods",
           "minimum": 1
         },
         "lingerPeriod": {

+ 1 - 1
helm/hpcc/values.yaml

@@ -228,7 +228,7 @@ roxie:
 
 thor:
 - name: thor
-  numSlaves: 2
+  numWorkers: 2
   globalMemorySize: 4096
   prefix: thor
   eclagent:

+ 26 - 19
thorlcr/master/thmastermain.cpp

@@ -313,7 +313,7 @@ public:
 
         unsigned localThorPortInc = globals->getPropInt("@localThorPortInc", DEFAULT_SLAVEPORTINC);
         unsigned slaveBasePort = globals->getPropInt("@slaveport", DEFAULT_THORSLAVEPORT);
-        unsigned channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1);
+        unsigned channelsPerWorker = globals->getPropInt("@channelsPerWorker", 1);
 
         Owned<IGroup> processGroup;
 
@@ -323,7 +323,7 @@ public:
         else
         {
             processGroup.setown(createIGroup(connectedSlaves.ordinality(), connectedSlaves.getArray()));
-            setupCluster(queryMyNode(), processGroup, channelsPerSlave, slaveBasePort, localThorPortInc);
+            setupCluster(queryMyNode(), processGroup, channelsPerWorker, slaveBasePort, localThorPortInc);
         }
 
         PROGLOG("Slaves connected, initializing..");
@@ -640,7 +640,14 @@ int main( int argc, const char *argv[]  )
     const char *thorname = NULL;
     StringBuffer nodeGroup, logUrl;
     unsigned slavesPerNode = globals->getPropInt("@slavesPerNode", 1);
-    unsigned channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1);
+    unsigned channelsPerWorker;
+    if (globals->hasProp("@channelsPerWorker"))
+        channelsPerWorker = globals->getPropInt("@channelsPerWorker", 1);
+    else
+    {   // for backward compatiblity only
+        channelsPerWorker = globals->getPropInt("@channelsPerSlave", 1);
+        globals->setPropInt("@channelsPerWorker", channelsPerWorker);
+    }
 
     installDefaultFileHooks(globals);
     ILogMsgHandler *logHandler;
@@ -892,7 +899,7 @@ int main( int argc, const char *argv[]  )
         masterSlaveMpTag = allocateClusterMPTag();
         kjServiceMpTag = allocateClusterMPTag();
 
-        unsigned numSlaves = 0;
+        unsigned numWorkers = 0;
         StringBuffer cloudJobName;
         const char *workunit = nullptr;
         const char *graphName = nullptr;
@@ -904,21 +911,21 @@ int main( int argc, const char *argv[]  )
         if (isEmptyString(graphName))
             throw makeStringException(0, "missing --graphName");
 
-        if (!globals->hasProp("@numSlaves"))
-            throw makeStringException(0, "Default number of slaves not defined (numSlaves)");
+        if (!globals->hasProp("@numWorkers"))
+            throw makeStringException(0, "Default number of workers not defined (numWorkers)");
         else
         {
-            // check 'numSlaves' workunit option.
+            // check 'numWorkers' workunit option.
             Owned<IWorkUnitFactory> factory = getWorkUnitFactory();
             Owned<IConstWorkUnit> wuRead = factory->openWorkUnit(workunit);
             if (!wuRead)
                 throw makeStringExceptionV(0, "Cannot open workunit: %s", workunit);
-            if (wuRead->hasDebugValue("numSlaves"))
-                numSlaves = wuRead->getDebugValueInt("numSlaves", 0);
+            if (wuRead->hasDebugValue("numWorkers"))
+                numWorkers = wuRead->getDebugValueInt("numWorkers", 0);
             else
-                numSlaves = globals->getPropInt("@numSlaves", 0);
-            if (0 == numSlaves)
-                throw makeStringException(0, "Number of slaves must be > 0 (numSlaves)");
+                numWorkers = globals->getPropInt("@numWorkers", 0);
+            if (0 == numWorkers)
+                throw makeStringException(0, "Number of workers must be > 0 (numWorkers)");
         }
 
         cloudJobName.appendf("%s-%s", workunit, graphName);
@@ -926,16 +933,16 @@ int main( int argc, const char *argv[]  )
         StringBuffer myEp;
         queryMyNode()->endpoint().getUrlStr(myEp);
 
-        applyK8sYaml("thorslave", workunit, cloudJobName, "jobspec", { { "graphName", graphName}, { "master", myEp.str() }, { "%numSlaves", std::to_string(numSlaves)} }, false);
+        applyK8sYaml("thorworker", workunit, cloudJobName, "jobspec", { { "graphName", graphName}, { "master", myEp.str() }, { "%numWorkers", std::to_string(numWorkers)} }, false);
 #else
         unsigned localThorPortInc = globals->getPropInt("@localThorPortInc", DEFAULT_SLAVEPORTINC);
         unsigned slaveBasePort = globals->getPropInt("@slaveport", DEFAULT_THORSLAVEPORT);
         Owned<IGroup> rawGroup = getClusterNodeGroup(thorname, "ThorCluster");
-        setClusterGroup(queryMyNode(), rawGroup, slavesPerNode, channelsPerSlave, slaveBasePort, localThorPortInc);
-        numSlaves = queryNodeClusterWidth();
+        setClusterGroup(queryMyNode(), rawGroup, slavesPerNode, channelsPerWorker, slaveBasePort, localThorPortInc);
+        numWorkers = queryNodeClusterWidth();
 #endif
 
-        if (registry->connect(numSlaves))
+        if (registry->connect(numWorkers))
         {
             if (globals->getPropBool("@replicateOutputs")&&globals->getPropBool("@validateDAFS",true)&&!checkClusterRelicateDAFS(queryNodeGroup()))
             {
@@ -947,7 +954,7 @@ int main( int argc, const char *argv[]  )
             for (unsigned s=0; s<totSlaveProcs; s++)
             {
                 StringBuffer slaveStr;
-                for (unsigned c=0; c<channelsPerSlave; c++)
+                for (unsigned c=0; c<channelsPerWorker; c++)
                 {
                     unsigned o = s + (c * totSlaveProcs);
                     if (c)
@@ -955,7 +962,7 @@ int main( int argc, const char *argv[]  )
                     slaveStr.append(o+1);
                 }
                 StringBuffer virtStr;
-                if (channelsPerSlave>1)
+                if (channelsPerWorker>1)
                     virtStr.append("virtual slaves:");
                 else
                     virtStr.append("slave:");
@@ -1013,7 +1020,7 @@ int main( int argc, const char *argv[]  )
 #ifdef _CONTAINERIZED
         registry.clear();
         if (globals->getPropBool("@deleteJobs", true))
-            deleteK8sResource("thorslave", cloudJobName, "job");
+            deleteK8sResource("thorworker", cloudJobName, "job");
         setExitCode(0);
 #endif
         LOG(MCdebugProgress, thorJob, "ThorMaster terminated OK");