Browse Source

Merge pull request #405 from blesson-james/devel

Issue #404: Added support for k8s version 1.19
John Lockman 3 years ago
parent
commit
a44572a959

+ 8 - 0
control_plane/roles/control_plane_common/tasks/verify_omnia_params.yml

@@ -55,6 +55,7 @@
   register: input_config_check
   when:
     - mariadb_password | length < 1 or
+      k8s_version | length < 1 or
       k8s_cni | length < 1
 
 - name: Assert mariadb_password
@@ -69,6 +70,12 @@
     success_msg: "{{ success_msg_mariadb_password }}"
     fail_msg: "{{ fail_msg_mariadb_password }}"
 
+- name: Assert kubernetes version
+  assert:
+    that: "('1.16.7' in k8s_version) or ('1.19.3' in k8s_version)"
+    success_msg: "{{ success_msg_k8s_version }}"
+    fail_msg: "{{ fail_msg_k8s_version }}"
+
 - name: Assert kubernetes cni
   assert:
     that: "('calico' in k8s_cni) or ('flannel' in k8s_cni)"
@@ -78,6 +85,7 @@
 - name: Save input variables from file
   set_fact:
     db_password: "{{ mariadb_password }}"
+    k8s_version: "{{ k8s_version }}"
     k8s_cni: "{{ k8s_cni }}"
   no_log: True
 

+ 2 - 0
control_plane/roles/control_plane_common/vars/main.yml

@@ -91,6 +91,8 @@ config_vaultname: .omnia_vault_key
 input_omnia_failure_msg: "Please provide all the required parameters in omnia_config.yml"
 fail_msg_mariadb_password: "Failed. Incorrect mariadb_password format provided in omnia_config.yml file"
 success_msg_mariadb_password: "mariadb_password validated"
+success_msg_k8s_version: "Kubernetes Version Validated"
+fail_msg_k8s_version: "Failed. Kubernetes Version is unsupported or incorrect in omnia_config.yml"
 success_msg_k8s_cni: "Kubernetes CNI Validated"
 fail_msg_k8s_cni: "Failed. Kubernetes CNI is incorrect in omnia_config.yml"
 

+ 1 - 1
control_plane/roles/control_plane_k8s/tasks/k8s_init.yml

@@ -49,7 +49,7 @@
       changed_when: true
 
     - name: Initialize kubeadm
-      command: "/bin/kubeadm init --pod-network-cidr='{{ k8s_pod_network_cidr }}' \
+      command: "/bin/kubeadm init --pod-network-cidr='{{ appliance_k8s_pod_net_cidr }}' \
           --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'"
       changed_when: true
       register: init_output

+ 5 - 0
omnia_config.yml

@@ -18,6 +18,11 @@
 # The password must not contain -,\, ',"
 mariadb_password: "password"
 
+# Kubernetes version.
+# Supported Versions: "1.16.7" or "1.19.3".
+# Default k8s version is "1.16.7".
+k8s_version: "1.16.7"
+
 # Kubernetes SDN network.
 # It can either be "calico" or "flannel".
 # Default value assigned is "calico".

+ 8 - 0
roles/cluster_validation/tasks/fetch_password.yml

@@ -54,6 +54,7 @@
   register: input_config_check
   when:
     - mariadb_password | length < 1 or
+      k8s_version | length < 1 or
       k8s_cni | length < 1 or
       k8s_pod_network_cidr | length < 1 or
       ansible_config_file_path | length < 1
@@ -70,6 +71,12 @@
     success_msg: "{{ success_msg_mariadb_password }}"
     fail_msg: "{{ fail_msg_mariadb_password }}"
 
+- name: Assert kubernetes version
+  assert:
+    that: "('1.16.7' in k8s_version) or ('1.19.3' in k8s_version)"
+    success_msg: "{{ success_msg_k8s_version }}"
+    fail_msg: "{{ fail_msg_k8s_version }}"
+
 - name: Assert kubernetes cni
   assert:
     that: "('calico' in k8s_cni) or ('flannel' in k8s_cni)"
@@ -87,6 +94,7 @@
 - name: Save input variables from file
   set_fact:
     db_password: "{{ mariadb_password }}"
+    k8s_version: "{{ k8s_version }}"
     k8s_cni: "{{ k8s_cni }}"
     k8s_pod_network_cidr: "{{ k8s_pod_network_cidr }}"
     ansible_conf_file_path: "{{ ansible_config_file_path }}"

+ 2 - 0
roles/cluster_validation/vars/main.yml

@@ -19,6 +19,8 @@ min_length: 8
 max_length: 30
 fail_msg_mariadb_password: "maria_db password not given in correct format."
 success_msg_mariadb_password: "mariadb_password validated"
+success_msg_k8s_version: "Kubernetes Version Validated"
+fail_msg_k8s_version: "Failed. Kubernetes Version is unsupported or incorrect in omnia_config.yml"
 success_msg_k8s_cni: "Kubernetes CNI Validated"
 fail_msg_k8s_cni: "Kubernetes CNI not correct."
 success_msg_k8s_pod_network_cidr: "Kubernetes pod network cidr validated"

+ 13 - 1
roles/common/tasks/main.yml

@@ -37,18 +37,30 @@
   rpm_key:
     state: present
     key: "{{ elrepo_gpg_key_url }}"
+  register: elrepo_gpg_key
+  until: elrepo_gpg_key is not failed
+  retries: 20
+  delay: 10
   tags: install
 
 - name: Add elrepo (nvidia kmod drivers)
   package:
     name: "{{ elrepo_rpm_url }}"
     state: present
+  register: elrepo
+  until: elrepo is not failed
+  retries: 20
+  delay: 10
   tags: install
 
 - name: Add docker community edition repository
   get_url:
     url: "{{ docker_repo_url }}"
     dest: "{{ docker_repo_dest }}"
+  register: docker_repo
+  until: docker_repo is not failed
+  retries: 20
+  delay: 10
   tags: install
 
 - name: Permanently Disable swap
@@ -138,4 +150,4 @@
     mode: "{{ hosts_file_mode }}"
   with_items:
     - "{{ groups['manager'] }}"
-  when: "'compute' in group_names"
+  when: "'compute' in group_names"

+ 4 - 4
roles/k8s_common/vars/main.yml

@@ -14,9 +14,9 @@
 ---
 
 k8s_packages:
-  - kubelet-1.16.7
-  - kubeadm-1.16.7
-  - kubectl-1.16.7
+  - "kubelet-{{ hostvars['127.0.0.1']['k8s_version'] }}"
+  - "kubeadm-{{ hostvars['127.0.0.1']['k8s_version'] }}"
+  - "kubectl-{{ hostvars['127.0.0.1']['k8s_version'] }}"
 
 k8s_repo_dest: /etc/yum.repos.d/
 
@@ -24,4 +24,4 @@ k8s_conf_dest: /etc/sysctl.d/
 
 k8s_repo_file_mode: 0644
 
-k8s_conf_file_mode: 0644
+k8s_conf_file_mode: 0644

+ 7 - 0
roles/k8s_manager/tasks/main.yml

@@ -24,9 +24,16 @@
     url: "{{ helm_installer_url }}"
     dest: "{{ helm_installer_file_dest }}"
     mode: "{{ helm_installer_file_mode }}"
+  register: helm_installer
+  until: helm_installer is not failed
+  retries: 20
+  delay: 10
   tags: manager
 
 - name: Install helm
   command: "/bin/bash {{ helm_installer_file_dest }}"
   changed_when: true
+  register: install_helm
+  until: install_helm is not failed
+  retries: 20
   tags: manager

+ 26 - 6
roles/k8s_start_manager/tasks/main.yml

@@ -13,11 +13,17 @@
 #  limitations under the License.
 ---
 
-- name: Disable swap
+- name: Disable SWAP (1/2)
   command: /usr/sbin/swapoff -a
   changed_when: true
   tags: init
 
+- name: Disable SWAP in fstab (2/2)
+  replace:
+    path: /etc/fstab
+    regexp: '^([^#].*?\sswap\s+.*)$'
+    replace: '# \1'
+
 - name: Get netaddr
   setup:
     filter: ansible_default_ipv4.address
@@ -44,12 +50,25 @@
   tags: init
 
 - name: Initialize kubeadm
-  command: "/bin/kubeadm init --pod-network-cidr='{{ hostvars['127.0.0.1']['k8s_pod_network_cidr'] }}' \
-    --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'"
-  changed_when: true
+  block:
+    - name: Initialize kubeadm
+      command: "/bin/kubeadm init --pod-network-cidr='{{ hostvars['127.0.0.1']['k8s_pod_network_cidr'] }}' \
+        --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'"
+      changed_when: true
+      register: init_output
+      tags: init
+  rescue:
+    - name: Reset kubeadm
+      command: "kubeadm reset -f"
+      changed_when: true
+
+    - name: Initialize kubeadm
+      command: "/bin/kubeadm init --pod-network-cidr='{{ hostvars['127.0.0.1']['k8s_pod_network_cidr'] }}' \
+        --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'"
+      changed_when: true
+      register: init_output
+      tags: init
   when: "'master' not in k8s_nodes.stdout"
-  register: init_output
-  tags: init
 
 - name: Setup directory for Kubernetes environment for root
   file:
@@ -183,4 +202,5 @@
 - name: Edge / Workstation Install allows pods to scheudle on manager
   command: kubectl taint nodes --all node-role.kubernetes.io/master-
   when: groups['manager'][0] == groups['compute'][0] and groups['compute']|length == 1
+  ignore_errors: True
   tags: init

+ 18 - 0
roles/k8s_start_services/files/k8s_dashboard_admin.yaml

@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: admin-user
+  namespace: kubernetes-dashboard
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: admin-user
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: cluster-admin
+subjects:
+- kind: ServiceAccount
+  name: admin-user
+  namespace: kubernetes-dashboard

+ 17 - 3
roles/k8s_start_services/tasks/main.yml

@@ -67,6 +67,18 @@
   when: "'kubernetes-dashboard' not in k8s_pods.stdout"
   tags: init
 
+- name: Copy k8s_dashboard_admin.yml file
+  copy:
+    src: k8s_dashboard_admin.yaml
+    dest: "{{ k8s_dashboard_admin_file_dest }}"
+    owner: root
+    group: root
+    mode: "{{ k8s_dashboard_admin_file_mode }}"
+
+- name: Create admin user for K8s dashboard
+  command: "kubectl apply -f {{ k8s_dashboard_admin_file_dest }}"
+  changed_when: true
+
 - name: Helm - add stable repo
   command: "helm repo add stable '{{ helm_stable_repo_url }}'"
   changed_when: true
@@ -182,16 +194,18 @@
   tags: init
 
 - name: Install Spark Operator
-  command: "helm repo add spark-operator https://googlecloudplatform.github.io/spark-on-k8s-operator"
+  command: "helm repo add spark-operator '{{ spark_operator_repo }}'"
   changed_when: true
   tags: init
 
 - name: Install Spark Operator Namespace
   command: "helm install my-release spark-operator/spark-operator --namespace spark-operator --create-namespace"
   changed_when: true
+  when: "'spark-operator' not in k8s_pods.stdout"
   tags: init
 
 - name: Deploy Volcano Scheduling
-  command: "kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/master/installer/volcano-development.yaml"
+  command: "kubectl apply -f '{{ volcano_scheduling_yaml_url }}'"
   changed_when: true
-  tags: init
+  when: "'volcano-system' not in k8s_pods.stdout"
+  tags: init

+ 11 - 3
roles/k8s_start_services/vars/main.yml

@@ -23,7 +23,11 @@ metallb_deployment_file_mode: 0655
 
 metallb_yaml_url: https://raw.githubusercontent.com/google/metallb/v0.8.1/manifests/metallb.yaml
 
-k8s_dashboard_yaml_url: https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0/aio/deploy/recommended.yaml
+k8s_dashboard_admin_file_dest: /root/k8s/k8s_dashboard_admin.yaml
+
+k8s_dashboard_admin_file_mode: 0655
+
+k8s_dashboard_yaml_url: https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.5/aio/deploy/recommended.yaml
 
 helm_stable_repo_url: https://charts.helm.sh/stable
 
@@ -45,7 +49,7 @@ gpu_feature_discovery_version: 0.2.0
 
 fpga_device_plugin_yaml_url: https://raw.githubusercontent.com/Xilinx/FPGA_as_a_Service/master/k8s-fpga-device-plugin/fpga-device-plugin.yml
 
-rocm_device_plugin_yaml_url: https://raw.githubusercontent.com/RadeonOpenCompute/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml 
+rocm_device_plugin_yaml_url: https://raw.githubusercontent.com/RadeonOpenCompute/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml
 
 slurm_exporter_config_file: extraScrapeConfigs.yaml
 
@@ -53,4 +57,8 @@ slurm_exporter_config_file_path: /var/lib/
 
 slurm_exporter_file_mode: 0655
 
-prometheus_path_on_host: /var/lib/prometheus-2.23.0.linux-amd64/
+prometheus_path_on_host: /var/lib/prometheus-2.23.0.linux-amd64/
+
+spark_operator_repo: https://googlecloudplatform.github.io/spark-on-k8s-operator
+
+volcano_scheduling_yaml_url: https://raw.githubusercontent.com/volcano-sh/volcano/master/installer/volcano-development.yaml