浏览代码

HPCC-24635 Add NetworkPolicy to helm charts for ingress control

Allow access to eclservices

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 4 年之前
父节点
当前提交
1492d8ba0b

+ 28 - 11
common/workunit/workunit.cpp

@@ -14044,13 +14044,13 @@ static void setResources(StringBuffer &jobYaml, const IConstWorkUnit *workunit,
 }
 
 
-void deleteK8sJob(const char *componentName, const char *job)
+void deleteK8sResource(const char *componentName, const char *job, const char *resource)
 {
     VStringBuffer jobname("%s-%s", componentName, job);
     jobname.toLowerCase();
-    VStringBuffer deleteJob("kubectl delete job/%s", jobname.str());
+    VStringBuffer deleteResource("kubectl delete %s/%s", resource, jobname.str());
     StringBuffer output, error;
-    bool ret = runExternalCommand(componentName, output, error, deleteJob.str(), nullptr);
+    bool ret = runExternalCommand(componentName, output, error, deleteResource.str(), nullptr);
     DBGLOG("kubectl delete output: %s", output.str());
     if (error.length())
         DBGLOG("kubectl delete error: %s", error.str());
@@ -14107,13 +14107,23 @@ void waitK8sJob(const char *componentName, const char *job)
     }
 }
 
-void launchK8sJob(const char *componentName, const char *wuid, const char *job, const std::list<std::pair<std::string, std::string>> &extraParams)
+bool applyK8sYaml(const char *componentName, const char *wuid, const char *job, const char *suffix, const std::list<std::pair<std::string, std::string>> &extraParams, bool optional)
 {
-    VStringBuffer jobname("%s-%s", componentName, job);
+    StringBuffer jobname(job);
     jobname.toLowerCase();
-    VStringBuffer jobSpecFilename("/etc/config/%s-jobspec.yaml", componentName);
+    VStringBuffer jobSpecFilename("/etc/config/%s-%s.yaml", componentName, suffix);
     StringBuffer jobYaml;
-    jobYaml.loadFile(jobSpecFilename, false);
+    try
+    {
+        jobYaml.loadFile(jobSpecFilename, false);
+    }
+    catch (IException *E)
+    {
+        if (!optional)
+            throw;
+        E->Release();
+        return false;
+    }
     jobYaml.replaceString("%jobname", jobname.str());
 
     VStringBuffer args("\"--workunit=%s\"", wuid);
@@ -14141,17 +14151,24 @@ void launchK8sJob(const char *componentName, const char *wuid, const char *job,
         DBGLOG("kubectl error: %s", error.str());
     if (ret)
     {
-        DBGLOG("Using job yaml %s", jobYaml.str());
-        throw makeStringException(0, "Failed to start kubectl job");
+        DBGLOG("Using yaml %s", jobYaml.str());
+        throw makeStringException(0, "Failed to replace k8s resource");
     }
+    return true;
 }
 
 void runK8sJob(const char *componentName, const char *wuid, const char *job, bool del, const std::list<std::pair<std::string, std::string>> &extraParams)
 {
-    launchK8sJob(componentName, wuid, job, extraParams);
+    bool removeNetwork = applyK8sYaml(componentName, wuid, job, "networkspec", extraParams, true);
+    applyK8sYaml(componentName, wuid, job, "jobspec", extraParams, false);
     waitK8sJob(componentName, job);
     if (del)
-        deleteK8sJob(componentName, job);
+    {
+        deleteK8sResource(componentName, job, "job");
+        if (removeNetwork)
+            deleteK8sResource(componentName, job, "networkpolicy");
+    }
+    // MORE - this will not remove the network if the job fails.
 }
 
 #endif

+ 2 - 2
common/workunit/workunit.hpp

@@ -1749,9 +1749,9 @@ extern WORKUNIT_API void executeThorGraph(const char * graphName, IConstWorkUnit
 
 #ifdef _CONTAINERIZED
 extern WORKUNIT_API bool executeGraphOnLingeringThor(IConstWorkUnit &workunit, const char *graphName, const char *multiJobLingerQueueName);
-extern WORKUNIT_API void deleteK8sJob(const char *componentName, const char *job);
+extern WORKUNIT_API void deleteK8sResource(const char *componentName, const char *job, const char *resource);
 extern WORKUNIT_API void waitK8sJob(const char *componentName, const char *job);
-extern WORKUNIT_API void launchK8sJob(const char *componentName, const char *wuid, const char *job, const std::list<std::pair<std::string, std::string>> &extraParams={});
+extern WORKUNIT_API bool applyK8sYaml(const char *componentName, const char *wuid, const char *job, const char *suffix, const std::list<std::pair<std::string, std::string>> &extraParams, bool optional);
 extern WORKUNIT_API void runK8sJob(const char *componentName, const char *wuid, const char *job, bool del=true, const std::list<std::pair<std::string, std::string>> &extraParams={});
 #endif
 

+ 1 - 0
dockerfiles/stopall.sh

@@ -22,3 +22,4 @@
 helm uninstall mycluster
 helm uninstall localfile
 kubectl delete jobs --all 
+kubectl delete networkpolicy --all 

+ 2 - 0
helm/hpcc/templates/dali.yaml

@@ -9,10 +9,12 @@ spec:
   selector:
     matchLabels:
       run: {{ .name | quote }}
+      app: dali
   template:
     metadata:
       labels:
         run: {{ .name | quote }}
+        app: dali
     spec:
       serviceAccountName: "hpcc-default"
       initContainers: 

+ 8 - 2
helm/hpcc/templates/eclagent.yaml

@@ -15,6 +15,8 @@ spec:
     metadata:
       labels:
         run: {{ .name | quote }}
+        accessDali: "true"
+        accessEsp: {{ .useChildProcesses | default false | quote }}
     spec:
       serviceAccountName: {{ .useChildProcesses | default false | ternary "hpcc-default" "hpcc-agent" }}
       initContainers:
@@ -60,14 +62,18 @@ data:
     apiVersion: batch/v1
     kind: Job
     metadata:
-      name: %jobname
+      name: {{ $apptype }}-%jobname
     spec:
       ttlSecondsAfterFinished: 100
       template:
+        metadata:
+          labels:
+            accessDali: "true"
+            accessEsp: "true"
         spec:
           serviceAccountName: "hpcc-default"
           containers:
-          - name: %jobname
+          - name: {{ $apptype }}-%jobname
 {{- include "hpcc.addSecurityContext" (dict "root" $ "me" .) | indent 12 }}
 {{ include "hpcc.addImageAttrs" (dict "root" $ "me" .) | indent 12 }}
             resources:

+ 8 - 2
helm/hpcc/templates/eclccserver.yaml

@@ -13,6 +13,8 @@ spec:
     metadata:
       labels:
         run: {{ .name | quote }}
+        accessDali: "true"
+        accessEsp: {{ .useChildProcesses | default false | quote }}
     spec:
       serviceAccountName: {{ .useChildProcesses | default false | ternary "hpcc-default" "hpcc-agent" }}
       terminationGracePeriodSeconds: {{ .terminationGracePeriodSeconds | default 600 }}
@@ -63,14 +65,18 @@ data:
     apiVersion: batch/v1
     kind: Job
     metadata:
-      name: %jobname
+      name: compile-%jobname
     spec:
       ttlSecondsAfterFinished: 100
       template:
+        metadata:
+          labels:
+            accessDali: "true"
+            accessEsp: "true"
         spec:
           serviceAccountName: "hpcc-default"
           containers:
-          - name: %jobname
+          - name: compile-%jobname
 {{- include "hpcc.addSecurityContext" (dict "root" $ "me" .) | indent 12 }}
 {{ include "hpcc.addImageAttrs" (dict "root" $ "me" .) | indent 12 }}
             resources:

+ 2 - 0
helm/hpcc/templates/esp.yaml

@@ -14,6 +14,8 @@ spec:
     metadata:
       labels:
         run: {{ .name | quote }}
+        accessDali: "true"
+        app: {{ $application }}
     spec:
       serviceAccountName: "hpcc-default"
       containers:

+ 2 - 0
helm/hpcc/templates/localroxie.yaml

@@ -19,6 +19,8 @@ spec:
       labels:
         run: {{ $roxie.name | quote }}
         roxie-server: {{ $servername | quote }}
+        accessDali: "true"
+        accessEsp: "true" 
     spec:
       serviceAccountName: "hpcc-default"
       initContainers:

+ 135 - 0
helm/hpcc/templates/network.yaml

@@ -0,0 +1,135 @@
+{{- $egress := (.Values.global.egress | default dict) -}}
+{{- $egressRestricted := $egress.restricted | default false -}}
+---
+##
+## Default to no ingress, and egress only to dns servers
+## 
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: default-deny-all-but-dns
+spec:
+  podSelector: {}
+  policyTypes:
+  - Ingress
+{{- if $egressRestricted }}
+  - Egress
+  egress:
+  - to:
+ {{- if hasKey $egress "kubeSystemLabel" }}
+    - namespaceSelector:
+        matchLabels:
+          name: {{ $egress.kubeSystemLabel }}
+ {{- end }}
+    ports:
+    - protocol: TCP
+      port: 53
+    - protocol: UDP
+      port: 53
+---
+##
+## Pods that launch k8s jobs need access to k8s API server
+## 
+kind: NetworkPolicy
+apiVersion: networking.k8s.io/v1 
+metadata:
+  name: allow-apiserver
+spec:
+  policyTypes:
+  - Egress
+  podSelector: {}
+  egress:
+  - to:
+ {{- if hasKey $egress "kubeApiCidr" }}
+    - ipBlock:
+        cidr: {{ $egress.kubeApiCidr | quote }}
+ {{- end }}
+    ports:
+    - protocol: TCP
+ {{- if hasKey $egress "kubeApiPort" }}
+      port: {{ $egress.kubeApiPort }}
+ {{- end }}
+{{- end }}
+---
+##
+## Many pods need access to dali
+## 
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: dali-server
+spec:
+  podSelector:
+    matchLabels:
+      app: dali
+  ingress:
+  - from:
+    - podSelector:
+        matchLabels:
+          accessDali: "true"
+  egress:
+  - to:
+    - podSelector:
+        matchLabels:
+          accessDali: "true"
+---
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: dali-client
+spec:
+  podSelector:
+    matchLabels:
+      accessDali: "true"
+  ingress:
+  - from:
+    - podSelector:
+        matchLabels:
+          app: dali
+  egress:
+  - to:
+    - podSelector:
+        matchLabels:
+          app: dali
+---
+##
+## Pods that run workunits, or which need to do DFU lookups, are allowed to access espservices
+## 
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: esp-server
+spec:
+  podSelector:
+    matchLabels:
+      app: eclservices
+  ingress:
+  - from:
+    - podSelector:
+        matchLabels:
+          accessEsp: "true"
+  egress:
+  - to:
+    - podSelector:
+        matchLabels:
+          accessEsp: "true"
+---
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: esp-client
+spec:
+  podSelector:
+    matchLabels:
+      accessEsp: "true"
+  ingress:
+  - from:
+    - podSelector:
+        matchLabels:
+          app: eclservices
+  egress:
+  - to:
+    - podSelector:
+        matchLabels:
+          app: eclservices
+          

+ 31 - 0
helm/hpcc/templates/roxie.yaml

@@ -20,6 +20,7 @@ spec:
     metadata:
       labels:
         run: {{ $toponame | quote }}
+        roxie-cluster: {{ $roxie.name | quote }}
     spec:
       serviceAccountName: "hpcc-default"
       containers:
@@ -73,6 +74,30 @@ spec:
   clusterIP: None # Headless service
 
 ---
+
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: {{ $roxie.name | quote }}
+spec:
+  podSelector:
+    matchLabels:
+      roxie-cluster: {{ $roxie.name | quote }}
+  policyTypes:
+  - Ingress
+  - Egress
+  ingress:
+  - from:
+    - podSelector:
+        matchLabels:
+          roxie-cluster: {{ $roxie.name | quote }}
+  egress:
+  - to:
+    - podSelector:
+        matchLabels:
+          roxie-cluster: {{ $roxie.name | quote }}
+
+---
 kind: ConfigMap 
 apiVersion: v1 
 metadata:
@@ -105,6 +130,9 @@ spec:
       labels:
         run: {{ $servername | quote }}
         roxie-server: {{ $servername | quote }}
+        roxie-cluster: {{ $roxie.name | quote }}
+        accessDali: "true"
+        accessEsp: "true" 
     spec:
       serviceAccountName: "hpcc-default"
       initContainers:
@@ -165,6 +193,9 @@ spec:
 {{- if not $roxie.serverReplicas }}        
         roxie-server: {{ $servername | quote }}
 {{- end }}
+        roxie-cluster: {{ $roxie.name | quote }}
+        accessDali: "true"
+        accessEsp: "true" 
     spec:
       serviceAccountName: "hpcc-default"
       initContainers:

+ 35 - 0
helm/hpcc/templates/service-account.yaml

@@ -34,3 +34,38 @@ subjects:
   - kind: ServiceAccount
     name: hpcc-agent
     namespace: {{ .Release.Namespace }}
+---
+# The hpcc-thoragent service account is used by thor agent, which needs not only to launch child jobs but also to set new NetworkPolicies
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: hpcc-thoragent
+---
+# hpcc-thoragent needs to be able to manipulate jobs AND give the resulting pods the ability to see each other
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: hpcc-thoragent
+rules:
+  - apiGroups: [ "" ] # core API group
+    resources: [ "pods" ]
+    verbs: [ "get", "list", "create" ]
+  - apiGroups: [ "networking.k8s.io" ]
+    resources: [ "networkpolicies" ]
+    verbs: [ "get", "create", "delete" ]
+  - apiGroups: [ "batch" ]
+    resources: [ "jobs" ]
+    verbs: [ "get", "create", "list", "delete", "watch" ]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: hpcc-thoragent
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: hpcc-thoragent
+subjects:
+  - kind: ServiceAccount
+    name: hpcc-thoragent
+    namespace: {{ .Release.Namespace }}

+ 51 - 9
helm/hpcc/templates/thor.yaml

@@ -28,6 +28,8 @@ spec:
     metadata:
       labels:
         run: {{ $eclAgentName | quote }}
+        accessDali: "true"
+        accessEsp: {{ .useChildProcesses | default false | quote }}
     spec:
       serviceAccountName: {{ $eclAgentScope.useChildProcesses | default false | ternary "hpcc-default" "hpcc-agent" }}
       containers:
@@ -65,8 +67,10 @@ spec:
     metadata:
       labels:
         run: {{ $thorAgentName | quote }}
+        accessDali: "true"
+        accessEsp: {{ .useChildProcesses | default false | quote }}
     spec:
-      serviceAccountName: {{ $thorAgentScope.useChildProcesses | default false | ternary "hpcc-default" "hpcc-agent" }}
+      serviceAccountName: {{ $thorAgentScope.useChildProcesses | default false | ternary "hpcc-default" "hpcc-thoragent" }}
       containers:
       - name: {{ $thorAgentName | quote }}
         workingDir: /var/lib/HPCCSystems
@@ -133,16 +137,20 @@ data:
     apiVersion: batch/v1
     kind: Job
     metadata:
-      name: %jobname
+      name: {{ $agentAppType }}-%jobname
     spec:
       ttlSecondsAfterFinished: 100
       template:
+        metadata:
+          labels:
+            accessDali: "true"
+            accessEsp: "true"
         spec:
           serviceAccountName: {{ $thorAgentScope.useChildProcesses | default false | ternary "hpcc-default" "hpcc-agent" }}
           initContainers: 
             {{- include "hpcc.checkDataMount" (dict "root" $) | indent 10 }}
           containers:
-          - name: %jobname
+          - name: {{ $agentAppType }}-%jobname
 {{- include "hpcc.addSecurityContext" (dict "root" $ "me" .) | indent 12 }}
 {{ include "hpcc.addImageAttrs" (dict "root" $ "me" .) | indent 12 }}
             resources:
@@ -178,16 +186,22 @@ data:
     apiVersion: batch/v1
     kind: Job
     metadata:
-      name: %jobname
+      name: thormaster-%jobname
     spec:
       ttlSecondsAfterFinished: 100
       template:
+        metadata:
+          labels:
+            app: thor
+            accessDali: "true"
+            accessEsp: "true"
+            job: %jobname
         spec:
           serviceAccountName: hpcc-agent
           initContainers:
             {{- include "hpcc.checkDataMount" (dict "root" $) | indent 10 }}
           containers:
-          - name: %jobname
+          - name: thormaster-%jobname
 {{- include "hpcc.addSecurityContext" (dict "root" $ "me" .) | indent 12 }}
 {{ include "hpcc.addImageAttrs" (dict "root" $ "me" .) | indent 12 }}
             resources:
@@ -216,21 +230,25 @@ data:
 {{ include "hpcc.addSecretVolumes" (dict "root" $ "categories" (list "all" "ecl" "storage" ) ) | indent 10 }}
           restartPolicy: Never
       backoffLimit: 0
-{{- end }}
 
   thorslave-jobspec.yaml: |
     apiVersion: batch/v1
     kind: Job
     metadata:
-      name: %jobname
+      name: thorslave-%jobname
     spec:
       parallelism: %numSlaves
       ttlSecondsAfterFinished: 100
       template:
+        metadata:
+          labels:
+            app: thor
+            accessEsp: "true"
+            job: %jobname
         spec:
           serviceAccountName: hpcc-default
           containers:
-          - name: %jobname
+          - name: thorslave-%jobname
 {{- include "hpcc.addSecurityContext" (dict "root" $ "me" .) | indent 12 }}
 {{ include "hpcc.addImageAttrs" (dict "root" $ "me" .) | indent 12 }}
             resources:
@@ -259,6 +277,30 @@ data:
 {{ include "hpcc.addSecretVolumes" (dict "root" $ "categories" (list "all" "ecl" "storage" ) ) | indent 10 }}
           restartPolicy: Never
       backoffLimit: 0
----
+
+  thormaster-networkspec.yaml: |
+    apiVersion: networking.k8s.io/v1
+    kind: NetworkPolicy
+    metadata:
+      name: thormaster-%jobname
+    spec:
+      podSelector:
+        matchLabels:
+          app: thor
+          job: %jobname
+      ingress:
+      - from:
+        - podSelector:
+            matchLabels:
+              app: thor
+              job: %jobname
+      egress:
+      - to:
+        - podSelector:
+            matchLabels:
+              app: thor
+              job: %jobname
+               
+{{- end }}
 {{- end }}
 {{- end }}

+ 25 - 0
helm/hpcc/values.schema.json

@@ -132,6 +132,9 @@
         "logging": {
           "$ref": "#/definitions/logging"
         },
+        "egress": {
+          "$ref": "#/definitions/egress"
+        },
         "defaultEsp": {
           "type": "string"
         },
@@ -318,6 +321,28 @@
       },
       "additionalProperties": { "type": ["integer", "string", "boolean"] }
     },
+    "egress": {
+      "type": "object",
+      "properties": {
+        "restricted": {
+          "description": "Are any egress controls applied",
+          "type": "boolean"
+        },
+        "kubeApiCidr": {
+          "description": "IP range for kubectl API service",
+          "type": "string"
+        },
+        "kubeApiPort": {
+          "description": "Port used for connections to kubectl API",
+          "type": "integer"
+        },
+        "kubeSystemLabel": {
+          "type": "string",
+          "description": "Label that has been applied to the kube-system namespace, used to restrict DNS service calls on port 53 to pods in the kube-system namespace"
+        }
+      },
+      "additionalProperties": false
+    },
     "dali": {
       "type": "object",
       "required": [ "name" ],

+ 16 - 0
helm/hpcc/values.yaml

@@ -17,6 +17,22 @@ global:
   # If not specified, the first esp component that exposes eclservices application is assumed.
   # Can also be overridden locally in individual components
   ## defaultEsp: eclservices
+  
+  egress:
+    ## If restricted is set, NetworkPolicies will include egress restrictions to allow connections from pods only to the minimum required by the system
+    ## Set to false to disable all egress policy restrictions (not recommended)
+    restricted: true
+    
+    ## The kube-system namespace is not generally labelled by default - to enable more restrictive egress control for dns lookups we need to be told the label
+    ## If not provided, DNS lookups on port 53 will be allowed to connect anywhere
+    ## The namespace may be labelled using a command such as "kubectl label namespace kube-system name=kube-system"
+    # kubeSystemLabel: "kube-system"
+
+    ## To properly allow access to the kubectl API from pods that need it, the cidr of the kubectl endpoint needs to be supplied
+    ## This may be obtained via "kubectl get endpoints --namespace default kubernetes"
+    ## If these are not supplied, egress controls will allow access to any IPs/ports from any pod where API access is needed
+    # kubeApiCidr: 172.17.0.3/32  
+    # kubeApiPort: 7443
 
 ## storage:
 ##

+ 2 - 2
thorlcr/master/thmastermain.cpp

@@ -926,7 +926,7 @@ int main( int argc, const char *argv[]  )
         StringBuffer myEp;
         queryMyNode()->endpoint().getUrlStr(myEp);
 
-        launchK8sJob("thorslave", workunit, cloudJobName, { { "graphName", graphName}, { "master", myEp.str() }, { "%numSlaves", std::to_string(numSlaves)} });
+        applyK8sYaml("thorslave", workunit, cloudJobName, "jobspec", { { "graphName", graphName}, { "master", myEp.str() }, { "%numSlaves", std::to_string(numSlaves)} }, false);
 #else
         unsigned localThorPortInc = globals->getPropInt("@localThorPortInc", DEFAULT_SLAVEPORTINC);
         unsigned slaveBasePort = globals->getPropInt("@slaveport", DEFAULT_THORSLAVEPORT);
@@ -985,7 +985,7 @@ int main( int argc, const char *argv[]  )
 #ifdef _CONTAINERIZED
         registry.clear();
         if (globals->getPropBool("@deleteJobs", true))
-            deleteK8sJob("thorslave", cloudJobName);
+            deleteK8sResource("thorslave", cloudJobName, "job");
         setExitCode(0);
 #endif
         LOG(MCdebugProgress, thorJob, "ThorMaster terminated OK");