Переглянути джерело

Merge pull request #201 from blesson-james/devel

Issue #200: Fixed k8s_nfs_client_setup bug and other minor bugs
Lucas A. Wilson 4 роки тому
батько
коміт
27f829fa7d

+ 4 - 4
omnia.yml

@@ -22,7 +22,7 @@
 
 - name: Gather facts from all the nodes
   hosts: all
-    
+
 - name: Apply common installation and config
   hosts: manager, compute
   gather_facts: false
@@ -56,7 +56,7 @@
   gather_facts: false
   roles:
     - k8s_nfs_server_setup
-  tags: 
+  tags:
     - kubernetes
     - nfs
 
@@ -65,7 +65,7 @@
   gather_facts: false
   roles:
     - k8s_nfs_client_setup
-  tags: 
+  tags:
     - kubernetes
     - nfs
 
@@ -143,4 +143,4 @@
       set_fact:
         ssh_to: "{{ groups['manager'] }}"
   roles:
-    - cluster_preperation
+    - cluster_preperation

+ 1 - 13
platforms/roles/kubeflow/tasks/main.yml

@@ -114,20 +114,8 @@
     regexp: 'NodePort'
     replace: 'LoadBalancer'
 
-- name: Remove cert-manager application block
-  replace:
-    path: "{{ kubeflow_config_file }}"
-    regexp: "{{ cert_manager_block }}"
-    replace: "\n"
-
-- name: Remove seldon-core-operator application block
-  replace:
-    path: "{{ kubeflow_config_file }}"
-    regexp: "{{ seldon_core_operator_block }}"
-    replace: "\n"
-
 - name: Apply kubeflow configuration
   command:
     cmd: "/usr/bin/kfctl apply -V -f '{{ kubeflow_config_file }}'"
     chdir: "{{ omnia_kubeflow_dir_path }}"
-  changed_when: true
+  changed_when: true

+ 0 - 22
platforms/roles/kubeflow/vars/main.yml

@@ -32,25 +32,3 @@ kfserving_gateway_yaml_file_path: "{{ omnia_kubeflow_dir_path }}/kustomize/kfser
 argo_yaml_file_path: "{{ omnia_kubeflow_dir_path }}/kustomize/argo/base/service.yaml"
 
 kubeflow_config_file: "{{ omnia_kubeflow_dir_path }}/kfctl_k8s_istio.v1.0.2.yaml"
-
-cert_manager_block: >
-    - kustomizeConfig:
-          overlays:
-          - self-signed
-          - application
-          parameters:
-          - name: namespace
-            value: cert-manager
-          repoRef:
-            name: manifests
-            path: cert-manager/cert-manager
-        name: cert-manager
-
-seldon_core_operator_block: >
-    - kustomizeConfig:
-          overlays:
-          - application
-          repoRef:
-            name: manifests
-            path: seldon/seldon-core-operator
-        name: seldon-core-operator

+ 2 - 2
roles/common/tasks/nvidia.yml

@@ -26,7 +26,7 @@
     enabled: yes
   tags: install
 
-- name: Add nvidia-container-runtime Repo 
+- name: Add nvidia-container-runtime Repo
   yum_repository:
     name: nvidia-container-runtime
     description:  nvidia-container-runtime
@@ -39,7 +39,7 @@
     enabled: yes
   tags: install
 
-- name: Add nvidia-docker Repo 
+- name: Add nvidia-docker Repo
   yum_repository:
     name: nvidia-docker
     description:  nvidia-docker

+ 1 - 0
roles/common/vars/main.yml

@@ -24,6 +24,7 @@ common_packages:
   - chrony
   - pciutils
   - docker-ce
+  - openssl
 
 custom_fact_dir: /etc/ansible/facts.d
 

+ 2 - 2
roles/k8s_common/tasks/main.yml

@@ -21,8 +21,8 @@
     enabled: yes
     gpgcheck: no
     repo_gpgcheck: no
-    gpgkey: 
-      - https://packages.cloud.google.com/yum/doc/yum-key.gpg 
+    gpgkey:
+      - https://packages.cloud.google.com/yum/doc/yum-key.gpg
       - https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
   tags: install
 

+ 4 - 4
roles/k8s_firewalld/tasks/main.yml

@@ -50,7 +50,7 @@
     permanent: yes
     state: enabled
   with_items: "{{ flannel_udp_ports }}"
-  when: k8s_cni == "flannel"
+  when: hostvars['127.0.0.1']['k8s_cni'] == "flannel"
   tags: firewalld
 
 - name: Open calico UDP ports on the firewall
@@ -59,7 +59,7 @@
     permanent: yes
     state: enabled
   with_items: "{{ calico_udp_ports }}"
-  when: k8s_cni == "calico"
+  when: hostvars['127.0.0.1']['k8s_cni'] == "calico"
   tags: firewalld
 
 - name: Open calico TCP ports on the firewall
@@ -68,7 +68,7 @@
     permanent: yes
     state: enabled
   with_items: "{{ calico_tcp_ports }}"
-  when: k8s_cni == "calico"
+  when: hostvars['127.0.0.1']['k8s_cni'] == "calico"
   tags: firewalld
 
 - name: Reload firewalld
@@ -81,4 +81,4 @@
     name: firewalld
     state: stopped
     enabled: no
-  tags: firewalld
+  tags: firewalld

+ 1 - 4
roles/k8s_firewalld/vars/main.yml

@@ -13,9 +13,6 @@
 #  limitations under the License.
 ---
 
-# Kubernetes SDN network
-k8s_cni: calico
-
 # Master nodes firewall ports
 k8s_master_ports:
   - 6443
@@ -39,4 +36,4 @@ calico_tcp_ports:
 # Flannel CNI firewall ports
 flannel_udp_ports:
   - 8285
-  - 8472
+  - 8472

+ 14 - 3
roles/k8s_nfs_client_setup/tasks/main.yml

@@ -19,6 +19,15 @@
     state: present
   tags: nfs_client
 
+- name: Check mounted share
+  shell: mount | grep nfs
+  changed_when: false
+  args:
+    warn: false
+  register: mounted_share
+  ignore_errors: True
+  tags: nfs_client
+
 - name: Creating directory to mount NFS Share
   file:
     path: "{{ nfs_mnt_dir }}"
@@ -27,14 +36,16 @@
   tags: nfs_client
 
 - name: Mounting NFS Share
-  command: "mount {{ groups['manager'] }}:{{ nfs_mnt_dir }} {{ nfs_mnt_dir }}"
+  command: "mount {{ groups['manager'][0] }}:{{ nfs_mnt_dir }} {{ nfs_mnt_dir }}"
   changed_when: true
   args:
     warn: false
+  when: groups['manager'][0] not in mounted_share.stdout
   tags: nfs_client
 
 - name: Configuring Automount NFS Shares on reboot
   lineinfile:
     path: "{{ fstab_file_path }}"
-    line: "{{ groups['manager'] }}:{{ nfs_mnt_dir }}     {{ nfs_mnt_dir }}  nfs     nosuid,rw,sync,hard,intr 0 0"
-  tags: nfs_client
+    line: "{{ groups['manager'][0] }}:{{ nfs_mnt_dir }}     {{ nfs_mnt_dir }}  nfs     nosuid,rw,sync,hard,intr 0 0"
+  when: groups['manager'][0] not in mounted_share.stdout
+  tags: nfs_client

+ 22 - 5
roles/k8s_start_manager/tasks/main.yml

@@ -22,9 +22,17 @@
   setup:
     filter: ansible_default_ipv4.address
 
+- name: Check K8s nodes status
+  command: kubectl get nodes
+  changed_when: false
+  ignore_errors: True
+  register: k8s_nodes
+  tags: init
+
 - name: Initialize kubeadm
   command: "/bin/kubeadm init --pod-network-cidr='{{ pod_network_cidr_ip }}' --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'"
   changed_when: true
+  when: "'master' not in k8s_nodes.stdout"
   register: init_output
   tags: init
 
@@ -74,6 +82,7 @@
     token:  "{{ K8S_TOKEN.stdout }}"
     hash:   "{{ K8S_MANAGER_CA_HASH.stdout }}"
     ip:     "{{ ansible_default_ipv4.address }}"
+    k8s_nodes:  "{{ k8s_nodes.stdout }}"
   tags: init
 
 - name: Print k8s token
@@ -96,12 +105,12 @@
 
 - name: Setup Calico SDN network
   command: "kubectl apply -f '{{ calico_yml_url }}'"
-  when: k8s_cni == "calico"
+  when: hostvars['127.0.0.1']['k8s_cni'] == "calico"
   tags: init
 
 - name: Setup Flannel SDN network
   command: "kubectl apply -f '{{ flannel_yml_url }}'"
-  when: k8s_cni == "flannel"
+  when: hostvars['127.0.0.1']['k8s_cni'] == "flannel"
   tags: init
 
 - name: Create yaml repo for setup
@@ -120,9 +129,16 @@
     mode: "{{ k8s_service_account_file_mode }}"
   tags: init
 
+- name: Check K8s service accounts status
+  command: "kubectl get serviceaccounts"
+  changed_when: false
+  register: k8s_service_accounts
+  tags: init
+
 - name: Create service account (K8s dashboard)
   command: "kubectl create -f '{{ k8s_service_account_file_dest }}'"
   changed_when: true
+  when: "'default' not in k8s_service_accounts.stdout"
   tags: init
 
 - name: Create clusterRoleBinding (K8s dashboard) files
@@ -137,6 +153,7 @@
 - name: Create clusterRoleBinding (K8s dashboard)
   command: "kubectl create -f '{{ k8s_clusterRoleBinding_file_dest }}'"
   changed_when: true
+  ignore_errors: True
   tags: init
 
 - name: Dump bearer token for K8s dashboard login
@@ -146,7 +163,7 @@
   changed_when: true
   tags: init
 
-- name: Edge / Workstation Install allows pods to schedule on manager
+- name: Edge / Workstation Install allows pods to scheudle on manager
   command: kubectl taint nodes --all node-role.kubernetes.io/master-
-  when: single_node
-  tags: init
+  when: groups['manager'][0] == groups['compute'][0] and groups['compute']|length == 1
+  tags: init

+ 1 - 5
roles/k8s_start_manager/vars/main.yml

@@ -13,10 +13,6 @@
 #  limitations under the License.
 ---
 
-single_node: false
-
-k8s_cni: calico
-
 pod_network_cidr_ip: 10.244.0.0/16
 
 k8s_root_directory: /root/.kube
@@ -47,4 +43,4 @@ k8s_clusterRoleBinding_file_mode: 0655
 
 calico_yml_url: https://docs.projectcalico.org/manifests/calico.yaml
 
-flannel_yml_url: https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
+flannel_yml_url: https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml

+ 17 - 3
roles/k8s_start_services/tasks/main.yml

@@ -19,9 +19,16 @@
   ignore_errors: True
   tags: init
 
+- name: Get K8s pods
+  command: kubectl get pods --all-namespaces
+  changed_when: false
+  register: k8s_pods
+  tags: init
+
 - name: Deploy MetalLB
   command: "kubectl apply -f '{{ metallb_yaml_url }}'"
   changed_when: true
+  when: "'metallb' not in k8s_pods.stdout"
   tags: init
 
 - name: Create MetalLB Setup Config Files
@@ -45,17 +52,19 @@
 - name: Deploy MetalLB
   command: "kubectl apply -f '{{ metallb_deployment_file_dest }}'"
   changed_when: true
+  when: "'metallb' not in k8s_pods.stdout"
   tags: init
 
 - name: Create default setup for MetalLB
   command: "kubectl apply -f '{{ metallb_config_file_dest }}'"
   changed_when: true
+  when: "'metallb' not in k8s_pods.stdout"
   tags: init
 
 - name: Start k8s dashboard
   command: "kubectl create -f '{{ k8s_dashboard_yaml_url }}'"
   changed_when: true
-  register: result
+  when: "'kubernetes-dashboard' not in k8s_pods.stdout"
   tags: init
 
 - name: Helm - add stable repo
@@ -81,7 +90,7 @@
 - name: Start NFS Client Provisioner
   command: "helm install stable/nfs-client-provisioner --set nfs.server='{{ nfs_server }}' --set nfs.path='{{ nfs_path }}' --generate-name"
   changed_when: true
-  register: result
+  when: "'nfs-client-provisioner' not in k8s_pods.stdout"
   tags: init
 
 - name: Set NFS-Client Provisioner as DEFAULT StorageClass
@@ -97,25 +106,30 @@
     --set alertmanager.persistentVolume.storageClass=nfs-client,server.persistentVolume.storageClass=nfs-client,server.service.type=LoadBalancer \
     --generate-name
   changed_when: true
+  when: "'prometheus' not in k8s_pods.stdout"
   tags: init
 
 - name: Install MPI Operator
   command: "kubectl create -f '{{ mpi_operator_yaml_url }}'"
   changed_when: true
+  when: "'mpi-operator' not in k8s_pods.stdout"
   tags: init
 
 - name: Install nvidia-device-plugin
   command: "helm install --version='{{ nvidia_device_plugin_version }}' --generate-name --set migStrategy='{{ mig_strategy }}' nvdp/nvidia-device-plugin"
   changed_when: true
+  when: "'nvidia-device-plugin' not in k8s_pods.stdout"
   tags: init
 
 - name: Install GPU Feature Discovery
   command: "helm install --version='{{ gpu_feature_discovery_version }}' --generate-name --set migStrategy='{{ mig_strategy }}' nvgfd/gpu-feature-discovery"
   changed_when: true
+  when: "'node-feature-discovery' not in k8s_pods.stdout"
   tags: init
 
 - name: Deploy Xilinx Device plugin
   command: "kubectl create -f '{{ fpga_device_plugin_yaml_url }}'"
   changed_when: true
   register: fpga_enable
-  tags: init
+  when: "'fpga-device-plugin' not in k8s_pods.stdout"
+  tags: init

+ 8 - 2
roles/k8s_start_workers/tasks/main.yml

@@ -18,10 +18,16 @@
   changed_when: true
   tags: init
 
+- name: Get hostname
+  command: hostname
+  changed_when: true
+  register: node_hostname
+  tags: init
+
 - name: Execute kubeadm join command
   shell: >
     kubeadm join --token={{ hostvars['K8S_TOKEN_HOLDER']['token'] }}
     --discovery-token-ca-cert-hash sha256:{{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}
     {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}:{{ apiserver_bind_port }}
-  when: not single_node
-  tags: init
+  when: groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1 and node_hostname.stdout not in hostvars['K8S_TOKEN_HOLDER']['k8s_nodes']
+  tags: init

+ 1 - 3
roles/k8s_start_workers/vars/main.yml

@@ -13,6 +13,4 @@
 #  limitations under the License.
 ---
 
-single_node: false
-
-apiserver_bind_port: 6443
+apiserver_bind_port: 6443