Kaynağa Gözat

Issue #154: Update Kubernetes Playbook

Signed-off-by: blesson-james <blesson-james@dellteam.com>
John Lockman 4 yıl önce
ebeveyn
işleme
ecb402501e
29 değiştirilmiş dosya ile 749 ekleme ve 308 silme
  1. 3 4
      kubernetes/jupyterhub.yml
  2. 3 5
      kubernetes/kubeflow.yml
  3. 22 17
      kubernetes/kubernetes.yml
  4. 44 42
      kubernetes/roles/common/tasks/main.yml
  5. 40 5
      kubernetes/roles/common/vars/main.yml
  6. 0 3
      kubernetes/roles/computeGPU/files/nvidia
  7. 0 21
      kubernetes/roles/computeGPU/handlers/main.yml
  8. 0 10
      kubernetes/roles/computeGPU/vars/main.yml
  9. 0 0
      kubernetes/roles/compute_gpu/files/daemon.json
  10. 0 0
      kubernetes/roles/compute_gpu/files/k8s.conf
  11. 0 0
      kubernetes/roles/compute_gpu/files/kubernetes.repo
  12. 19 22
      kubernetes/roles/computeGPU/tasks/main.yml
  13. 26 0
      kubernetes/roles/compute_gpu/vars/main.yml
  14. 77 0
      kubernetes/roles/firewalld/tasks/main.yml
  15. 43 0
      kubernetes/roles/firewalld/vars/main.yml
  16. 45 9
      kubernetes/roles/jupyterhub/tasks/main.yml
  17. 26 0
      kubernetes/roles/jupyterhub/vars/main.yml
  18. 50 39
      kubernetes/roles/kubeflow/tasks/main.yml
  19. 56 0
      kubernetes/roles/kubeflow/vars/main.yml
  20. 0 3
      kubernetes/roles/manager/files/nvidia
  21. 12 25
      kubernetes/roles/manager/tasks/main.yml
  22. 24 0
      kubernetes/roles/manager/vars/main.yml
  23. 0 1
      kubernetes/roles/startmanager/files/enable_gpu_k8s.sh
  24. 77 50
      kubernetes/roles/startmanager/tasks/main.yml
  25. 52 0
      kubernetes/roles/startmanager/vars/main.yml
  26. 61 26
      kubernetes/roles/startservices/tasks/main.yml
  27. 46 0
      kubernetes/roles/startservices/vars/main.yml
  28. 5 26
      kubernetes/roles/startworkers/tasks/main.yml
  29. 18 0
      kubernetes/roles/startworkers/vars/main.yml

+ 3 - 4
kubernetes/jupyterhub.yml

@@ -13,9 +13,8 @@
 # limitations under the License.
 ---
 
-#Playbook for installing JupyterHub v1.1.0 in Omnia
-# Start K8s worker servers
-- hosts: manager
+- name: Installing JupyterHub
+  hosts: manager
   gather_facts: false
   roles:
-    - jupyterhub
+    - jupyterhub

+ 3 - 5
kubernetes/kubeflow.yml

@@ -11,12 +11,10 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 ---
-#Playbook for installing Kubeflow v1.0 on Omnia
 
-# Start K8s worker servers
-- hosts: manager
+- name: Installing Kubeflow
+  hosts: manager
   gather_facts: false
   roles:
-    - kubeflow
+    - kubeflow

+ 22 - 17
kubernetes/kubernetes.yml

@@ -11,45 +11,50 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 ---
 #Playbook for kubernetes cluster
 
-#collect info from everything
-- hosts: all
+- name: Gather facts from all the nodes
+  hosts: all
 
-# Apply Common Installation and Config
-- hosts: cluster
+- name: Apply common installation and config
+  hosts: manager, compute
   gather_facts: false
   roles:
     - common
 
-# Apply GPU Node Config
-- hosts: gpus
+- name: Apply GPU node config
+  hosts: compute
   gather_facts: false
   roles:
-    - computeGPU
+    - compute_gpu
 
-# Apply Manager Config
-- hosts: manager
+- name: Apply manager config
+  hosts: manager
   gather_facts: false
   roles:
     - manager
 
-# Start K8s on manager server
-- hosts: manager
+- name: Apply firewalld config on manager and compute nodes
+  hosts: manager, compute
+  gather_facts: false
+  roles:
+    - firewalld
+
+- name: Start K8s on manager server
+  hosts: manager
   gather_facts: false
   roles:
     - startmanager
 
-# Start K8s worker servers
-- hosts: compute,gpus
+- name: Start K8s worker servers on compute nodes
+  hosts: compute
   gather_facts: false
   roles:
     - startworkers
 
-# Start K8s worker servers
-- hosts: manager
+- name: Start K8s worker servers on manager nodes
+  hosts: manager
   gather_facts: false
   roles:
-    - startservices
+    - startservices

+ 44 - 42
kubernetes/roles/common/tasks/main.yml

@@ -11,91 +11,93 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 ---
 
-- name: add kubernetes repo
-  copy: src=kubernetes.repo dest=/etc/yum.repos.d/ owner=root group=root mode=644
+- name: Add kubernetes repo
+  copy:
+    src: kubernetes.repo
+    dest: "{{ k8s_repo_dest }}"
+    owner: root
+    group: root
+    mode: "{{ k8s_repo_file_mode }}"
   tags: install
 
-# add ElRepo GPG Key
-- name: add ElRepo GPG Key
+- name: Add elrepo GPG key
   rpm_key:
     state: present
-    key: https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
+    key: "{{ elrepo_gpg_key_url }}"
   tags: install
 
-- name: add ElRepo (Nvidia kmod drivers)
-  yum:
-    name: http://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm
+- name: Add elrepo (nvidia kmod drivers)
+  package:
+    name: "{{ elrepo_rpm_url }}"
     state: present
   tags: install
 
-- name: Add Docker Community Edition Repo
+- name: Add docker community edition repository
   get_url:
-    url: https://download.docker.com/linux/centos/docker-ce.repo
-    dest: /etc/yum.repos.d/docker-ce.repo
-
-- name: update sysctl to handle incorrectly routed traffic when iptables is bypassed
-  copy: src=k8s.conf dest=/etc/sysctl.d/ owner=root group=root mode=644
+    url: "{{ docker_repo_url }}"
+    dest: "{{ docker_repo_dest }}"
   tags: install
 
-- name: update sysctl
-  command: /sbin/sysctl --system
+- name: Update sysctl to handle incorrectly routed traffic when iptables is bypassed
+  copy:
+    src: k8s.conf
+    dest: "{{ k8s_conf_dest }}"
+    owner: root
+    group: root
+    mode: "{{ k8s_conf_file_mode }}"
   tags: install
 
-- name: Install EPEL Repository
-  yum: name=epel-release state=present
+- name: Update sysctl
+  command: /sbin/sysctl --system
+  changed_when: true
   tags: install
 
-- name: disable swap
+- name: Disable swap
   command: /sbin/swapoff -a
+  changed_when: true
   tags: install
 
-- name: Disable SELinux
+- name: Disable selinux
   selinux:
     state: disabled
   tags: install
 
-- name: install common packages
+- name: Install common packages
   package:
-    name:
-      - yum-plugin-versionlock
-      - gcc
-      - nfs-utils
-      - python-pip
-      - docker-ce
-      - bash-completion
-      - kubelet-1.16.7
-      - kubeadm-1.16.7
-      - kubectl-1.16.7
-      - nvidia-detect
+    name: "{{ item }}"
     state: present
+  with_items:
+    - "{{ common_packages }}"
+    - "{{ k8s_packages }}"
   tags: install
 
-- name: versionlock kubernetes
-  command: yum versionlock kubelet-1.16.7 kubectl-1.16.7 kubeadm-1.16.7
+- name: Versionlock kubernetes
+  command: "dnf versionlock '{{ item }}'"
+  args:
+    warn: false
+  with_items:
+    - "{{ k8s_packages }}"
+  changed_when: true
   tags: install
 
-- name: install InfiniBand Support
+- name: Install infiniBand support
   package:
     name: "@Infiniband Support"
     state: present
-
-- name: upgrade pip
-  command: /bin/pip install --upgrade pip
   tags: install
 
-- name: Start and Enable docker service
+- name: Start and enable docker service
   service:
     name: docker
     state: restarted
     enabled: yes
   tags: install
 
-- name: Start and Enable Kubernetes - kubelet
+- name: Start and enable kubernetes - kubelet
   service:
     name: kubelet
     state: restarted
     enabled: yes
-  tags: install
+  tags: install

+ 40 - 5
kubernetes/roles/common/vars/main.yml

@@ -1,10 +1,45 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
 ---
 
 common_packages:
   - epel-release
-  - python-pip
-  - docker
+  - yum-plugin-versionlock
+  - gcc
+  - nfs-utils
+  - python3-pip
+  - docker-ce
   - bash-completion
-  - kubelet 
-  - kubeadm
-  - kubectl
+  - nvidia-detect
+
+k8s_packages:
+  - kubelet-1.16.7
+  - kubeadm-1.16.7
+  - kubectl-1.16.7
+
+k8s_repo_dest: /etc/yum.repos.d/
+
+elrepo_gpg_key_url: https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
+
+elrepo_rpm_url: https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm
+
+docker_repo_url: https://download.docker.com/linux/centos/docker-ce.repo
+
+docker_repo_dest: /etc/yum.repos.d/docker-ce.repo
+
+k8s_conf_dest: /etc/sysctl.d/
+
+k8s_repo_file_mode: 0644
+
+k8s_conf_file_mode: 0644

+ 0 - 3
kubernetes/roles/computeGPU/files/nvidia

@@ -1,3 +0,0 @@
-#!/bin/sh
-PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" exec nvidia-container-runtime-hook "$@"
-

+ 0 - 21
kubernetes/roles/computeGPU/handlers/main.yml

@@ -1,21 +0,0 @@
----
-
-#- name: Enable docker service
-  #service:
-    #name: docker
-    #enabled: yes
-#
-- name: Start and Enable docker service
-  service:
-    name: docker
-    state: restarted
-    enabled: yes
-  #tags: install
-
-- name: Start and Enable Kubernetes - kubelet
-  service:
-    name: kubelet
-    state: started
-    enabled: yes
-  #tags: install
-

+ 0 - 10
kubernetes/roles/computeGPU/vars/main.yml

@@ -1,10 +0,0 @@
----
-
-common_packages:
-  - epel-release
-  - python-pip
-  - docker
-  - bash-completion
-  - kubelet 
-  - kubeadm
-  - kubectl

kubernetes/roles/computeGPU/files/daemon.json → kubernetes/roles/compute_gpu/files/daemon.json


kubernetes/roles/computeGPU/files/k8s.conf → kubernetes/roles/compute_gpu/files/k8s.conf


kubernetes/roles/computeGPU/files/kubernetes.repo → kubernetes/roles/compute_gpu/files/kubernetes.repo


+ 19 - 22
kubernetes/roles/computeGPU/tasks/main.yml

@@ -11,45 +11,42 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 ---
-- name: add Nvidia container runtime support
+
+- name: Add nvidia-docker2 Repo
   get_url:
-    url: https://nvidia.github.io/nvidia-container-runtime/centos7/nvidia-container-runtime.repo
-    dest: /etc/yum.repos.d/nvidia-container-runtime.repo
+    url: "{{ nvidia_docker_repo_url }}"
+    dest: "{{ nvidia_docker_repo_dest }}"
   tags: install, testing
 
-- name:  disable gpg key for nvidia-container-runtime
-  replace:
-    path: /etc/yum.repos.d/nvidia-container-runtime.repo
-    regexp: 'repo_gpgcheck=1'
-    replace: 'repo_gpgcheck=0'
-    backup: yes
-  tags: install
-
-- name: install Nvidia-container-runtime-hook
+- name: Install nvidia driver and nvidia-docker2
   package:
-    name:
-      - kmod-nvidia
-      - nvidia-container-runtime-hook
-      - nvidia-docker2
+    name: "{{ item }}"
     state: present
+  with_items:
+    - "{{ nvidia_packages }}"
   tags: install
 
-- name: Set nvidia as default runtime 
-  copy: src=daemon.json dest=/etc/docker/ owner=root group=root mode=644
+- name: Set nvidia as default runtime
+  copy:
+    src: daemon.json
+    dest: "{{ daemon_file_dest }}"
+    owner: root
+    group: root
+    mode: "{{ daemon_file_mode }}"
   tags: install
 
-- name: Restart and Enable docker service
+- name: Restart and enable docker service
   service:
     name: docker
     state: restarted
     enabled: yes
+    daemon_reload: yes
   tags: install
 
-- name: Restart and Enable Kubernetes - kubelet
+- name: Restart and enable kubernetes - kubelet
   service:
     name: kubelet
     state: restarted
     enabled: yes
-  tags: install
+  tags: install

+ 26 - 0
kubernetes/roles/compute_gpu/vars/main.yml

@@ -0,0 +1,26 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+nvidia_docker_repo_url: https://nvidia.github.io/nvidia-docker/centos7/nvidia-docker.repo
+
+nvidia_docker_repo_dest: /etc/yum.repos.d/nvidia-docker.repo
+
+nvidia_packages:
+  - kmod-nvidia
+  - nvidia-docker2
+
+daemon_file_dest: /etc/docker/
+
+daemon_file_mode: 0644

+ 77 - 0
kubernetes/roles/firewalld/tasks/main.yml

@@ -0,0 +1,77 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Install firewalld
+  package:
+    name: firewalld
+    state: present
+  tags: firewalld
+
+- name: Start and enable firewalld
+  service:
+    name: firewalld
+    state: started
+    enabled: yes
+  tags: firewalld
+
+- name: Configure firewalld on master nodes
+  firewalld:
+    port: "{{ item }}/tcp"
+    permanent: yes
+    state: enabled
+  with_items: '{{ k8s_master_ports }}'
+  when: "'manager' in group_names"
+  tags: firewalld
+
+- name: Configure firewalld on compute nodes
+  firewalld:
+    port: "{{ item }}/tcp"
+    permanent: yes
+    state: enabled
+  with_items: '{{ k8s_worker_ports }}'
+  when: "'compute' in group_names"
+  tags: firewalld
+
+- name: Open flannel ports on the firewall
+  firewalld:
+    port: "{{ item }}/udp"
+    permanent: yes
+    state: enabled
+  with_items: "{{ flannel_udp_ports }}"
+  when: k8s_cni == "flannel"
+  tags: firewalld
+
+- name: Open calico UDP ports on the firewall
+  firewalld:
+    port: "{{ item }}/udp"
+    permanent: yes
+    state: enabled
+  with_items: "{{ calico_udp_ports }}"
+  when: k8s_cni == "calico"
+  tags: firewalld
+
+- name: Open calico TCP ports on the firewall
+  firewalld:
+    port: "{{ item }}/tcp"
+    permanent: yes
+    state: enabled
+  with_items: "{{ calico_tcp_ports }}"
+  when: k8s_cni == "calico"
+  tags: firewalld
+
+- name: Reload firewalld
+  command: firewall-cmd --reload
+  changed_when: true
+  tags: firewalld

+ 43 - 0
kubernetes/roles/firewalld/vars/main.yml

@@ -0,0 +1,43 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+# Kubernetes SDN network
+k8s_cni: calico
+
+# Master nodes firewall ports
+k8s_master_ports:
+  - 6443
+  - 2379-2380
+  - 10250
+  - 10251
+  - 10252
+
+# Worker nodes firewall ports
+k8s_worker_ports:
+  - 10250
+  - 30000-32767
+
+# Calico CNI firewall ports
+calico_udp_ports:
+  - 4789
+calico_tcp_ports:
+  - 5473
+  - 179
+  - 5473
+
+# Flannel CNI firewall ports
+flannel_udp_ports:
+  - 8285
+  - 8472

+ 45 - 9
kubernetes/roles/jupyterhub/tasks/main.yml

@@ -11,16 +11,52 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 ---
-- name: Helm - Add JupyterHub Repo
-  shell: helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/
 
-- name: Helm - Update Repo
-  shell: helm repo update
+- name: Helm - add JupyterHub repo
+  command: "helm repo add jupyterhub '{{ jupyterhub_helm_chart_repo }}'"
+  changed_when: true
+
+- name: Helm - update repo
+  command: helm repo update
+  changed_when: true
+
+- name: Copy JupyterHub custom config file
+  copy:
+   src: jupyter_config.yaml
+   dest: "{{ jupyter_config_file_dest }}"
+   owner: root
+   group: root
+   mode: "{{ jupyter_config_file_mode }}"
+
+- name: JupyterHub deploy
+  block:
+    - name: JupyterHub deploy
+      command: >
+        helm upgrade --cleanup-on-fail \
+        --install {{ jupyterhub_namespace }} jupyterhub/jupyterhub \
+        --namespace {{ jupyterhub_namespace }} \
+        --create-namespace \
+        --version {{ helm_chart_version }} \
+        --values {{ jupyter_config_file_dest }} \
+        --timeout {{ timeout_min_sec }}
+      register: deployment_output
+
+  rescue:
+    - name: JupyterHub deployment error
+      debug:
+        msg: "Previous JupyterHub deployment is in progress"
+      when: "'another operation (install/upgrade/rollback) is in progress' in deployment_output.stderr"
 
-- name: JupyterHub Custom Config (files)
-  copy: src=jupyter_config.yaml dest=/root/k8s/jupyter_config.yaml owner=root group=root mode=0655
+    - name: Delete existing release
+      command: helm delete '{{ jupyterhub_namespace }}'
 
-- name: jupyterHub deploy
-  shell: helm install jupyterhub/jupyterhub  --namespace default --version 0.9.0 --values /root/k8s/jupyter_config.yaml --generate-name --wait --timeout 60m
+    - name: JupyterHub deploy
+      command: >
+        helm upgrade --cleanup-on-fail \
+        --install {{ jupyterhub_namespace }} jupyterhub/jupyterhub \
+        --namespace {{ jupyterhub_namespace }} \
+        --create-namespace \
+        --version {{ helm_chart_version }} \
+        --values {{ jupyter_config_file_dest }} \
+        --timeout {{ timeout_min_sec }}

+ 26 - 0
kubernetes/roles/jupyterhub/vars/main.yml

@@ -0,0 +1,26 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+jupyterhub_helm_chart_repo: https://jupyterhub.github.io/helm-chart/
+
+jupyter_config_file_dest: /root/k8s/jupyter_config.yaml
+
+jupyter_config_file_mode: 0655
+
+helm_chart_version: 0.9.0
+
+timeout_min_sec: 60m
+
+jupyterhub_namespace: default

+ 50 - 39
kubernetes/roles/kubeflow/tasks/main.yml

@@ -11,112 +11,123 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 ---
 
-#Configure build and deploy kubeflow v1.0
-
-- name: Download kfctl v1.0.2 release from the Kubeflow releases page.
+- name: Download kfctl release from the Kubeflow releases page
   unarchive:
-    src: https://github.com/kubeflow/kfctl/releases/download/v1.0.2/kfctl_v1.0.2-0-ga476281_linux.tar.gz
-    dest: /usr/bin/
+    src: "{{ kfctl_download_url }}"
+    dest: "{{ kfctl_download_dest_path }}"
+    mode: "{{ kfctl_download_file_mode }}"
     remote_src: yes
 
-- name: Delete Omnia Kubeflow Directory if exists
+- name: Delete omnia kubeflow directory if exists
   file:
-    path: /root/k8s/omnia-kubeflow
+    path: "{{ omnia_kubeflow_dir_path }}"
     state: absent
 
-- name: Create Kubeflow Directory
+- name: Create omnia kubeflow directory
   file:
-    path: /root/k8s/omnia-kubeflow
+    path: "{{ omnia_kubeflow_dir_path }}"
     state: directory
+    mode: "{{ omnia_kubeflow_dir_mode }}"
     recurse: yes
 
-- name: Build Kubeflow Configuration
-  shell:
-    cmd: /usr/bin/kfctl build -V -f https://raw.githubusercontent.com/kubeflow/manifests/v1.0-branch/kfdef/kfctl_k8s_istio.v1.0.2.yaml
-    chdir: /root/k8s/omnia-kubeflow
+- name: Build kubeflow configuration
+  command:
+    cmd: /usr/bin/kfctl build -V -f "{{ kubeflow_config_yaml_url }}"
+    chdir: "{{ omnia_kubeflow_dir_path }}"
+  changed_when: true
 
-- name: Modify Cpu Limit for istio-ingressgateway-service-account
+- name: Modify CPU limit for istio-ingressgateway-service-account
   replace:
-    path: /root/k8s/omnia-kubeflow/kustomize/istio-install/base/istio-noauth.yaml
+    path: "{{ istio_noauth_yaml_file_path }}"
     after: 'serviceAccountName: istio-ingressgateway-service-account'
     before: '---'
     regexp: 'cpu: 100m'
     replace: 'cpu: 2'
 
-- name: Modify Mem Limit for istio-ingressgateway-service-account
+- name: Modify memory limit for istio-ingressgateway-service-account
   replace:
-    path: /root/k8s/omnia-kubeflow/kustomize/istio-install/base/istio-noauth.yaml
+    path: "{{ istio_noauth_yaml_file_path }}"
     after: 'serviceAccountName: istio-ingressgateway-service-account'
     before: '---'
     regexp: 'memory: 128Mi'
     replace: 'memory: 512Mi'
 
-- name: Modify Cpu Request for istio-ingressgateway-service-account
+- name: Modify CPU request for istio-ingressgateway-service-account
   replace:
-    path: /root/k8s/omnia-kubeflow/kustomize/istio-install/base/istio-noauth.yaml
+    path: "{{ istio_noauth_yaml_file_path }}"
     after: 'serviceAccountName: istio-ingressgateway-service-account'
     before: '---'
     regexp: 'cpu: 10m'
     replace: 'cpu: 1'
 
-- name: Modify Mem Request for istio-ingressgateway-service-account
+- name: Modify memory request for istio-ingressgateway-service-account
   replace:
-    path: /root/k8s/omnia-kubeflow/kustomize/istio-install/base/istio-noauth.yaml
+    path: "{{ istio_noauth_yaml_file_path }}"
     after: 'serviceAccountName: istio-ingressgateway-service-account'
     before: '---'
     regexp: 'memory: 40Mi'
     replace: 'memory: 256Mi'
 
-
-- name: Modify Cpu Limit for kfserving-gateway
+- name: Modify CPU limit for kfserving-gateway
   replace:
-    path: /root/k8s/omnia-kubeflow/kustomize/kfserving-gateway/base/deployment.yaml
+    path: "{{ kfserving_gateway_yaml_file_path }}"
     after: 'serviceAccountName: istio-ingressgateway-service-account'
     before: 'env:'
     regexp: 'cpu: 100m'
     replace: 'cpu: 2'
 
-- name: Modify Mem Limit for kfserving-gateway
+- name: Modify memory limit for kfserving-gateway
   replace:
-    path: /root/k8s/omnia-kubeflow/kustomize/kfserving-gateway/base/deployment.yaml
+    path: "{{ kfserving_gateway_yaml_file_path }}"
     after: 'serviceAccountName: istio-ingressgateway-service-account'
     before: 'env:'
     regexp: 'memory: 128Mi'
     replace: 'memory: 512Mi'
 
-- name: Modify Cpu Request for kfserving-gateway
+- name: Modify CPU request for kfserving-gateway
   replace:
-    path: /root/k8s/omnia-kubeflow/kustomize/kfserving-gateway/base/deployment.yaml
+    path: "{{ kfserving_gateway_yaml_file_path }}"
     after: 'serviceAccountName: istio-ingressgateway-service-account'
     before: 'env:'
     regexp: 'cpu: 10m'
     replace: 'cpu: 1'
 
-- name: Modify Mem Request for kfserving-gateway
+- name: Modify memory request for kfserving-gateway
   replace:
-    path: /root/k8s/omnia-kubeflow/kustomize/kfserving-gateway/base/deployment.yaml
+    path: "{{ kfserving_gateway_yaml_file_path }}"
     after: 'serviceAccountName: istio-ingressgateway-service-account'
     before: 'env:'
     regexp: 'memory: 40Mi'
     replace: 'memory: 256Mi'
 
-
-- name: Change Argo base service from NodePort to LoadBalancer
+- name: Change argo base service from NodePort to LoadBalancer
   replace:
-    path: /root/k8s/omnia-kubeflow/kustomize/argo/base/service.yaml
+    path: "{{ argo_yaml_file_path }}"
     regexp: 'NodePort'
     replace: 'LoadBalancer'
 
 - name: Change istio-install base istio-noauth service from NodePort to LoadBalancer
   replace:
-    path: /root/k8s/omnia-kubeflow/kustomize/istio-install/base/istio-noauth.yaml
+    path: "{{ istio_noauth_yaml_file_path }}"
     regexp: 'NodePort'
     replace: 'LoadBalancer'
 
-- name: Apply Kubeflow Configuration
-  shell:
-    cmd: /usr/bin/kfctl apply -V -f /root/k8s/omnia-kubeflow/kfctl_k8s_istio.v1.0.2.yaml
-    chdir: /root/k8s/omnia-kubeflow
+- name: Remove cert-manager application block
+  replace:
+    path: "{{ kubeflow_config_file }}"
+    regexp: "{{ cert_manager_block }}"
+    replace: "\n"
+
+- name: Remove seldon-core-operator application block
+  replace:
+    path: "{{ kubeflow_config_file }}"
+    regexp: "{{ seldon_core_operator_block }}"
+    replace: "\n"
+
+- name: Apply kubeflow configuration
+  command:
+    cmd: "/usr/bin/kfctl apply -V -f '{{ kubeflow_config_file }}'"
+    chdir: "{{ omnia_kubeflow_dir_path }}"
+  changed_when: true

+ 56 - 0
kubernetes/roles/kubeflow/vars/main.yml

@@ -0,0 +1,56 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+kfctl_download_url: https://github.com/kubeflow/kfctl/releases/download/v1.0.2/kfctl_v1.0.2-0-ga476281_linux.tar.gz
+
+kfctl_download_dest_path: /usr/bin/
+
+kfctl_download_file_mode: 0755
+
+omnia_kubeflow_dir_path: /root/k8s/omnia-kubeflow
+
+omnia_kubeflow_dir_mode: 0755
+
+kubeflow_config_yaml_url: https://raw.githubusercontent.com/kubeflow/manifests/v1.0-branch/kfdef/kfctl_k8s_istio.v1.0.2.yaml
+
+istio_noauth_yaml_file_path: "{{ omnia_kubeflow_dir_path }}/kustomize/istio-install/base/istio-noauth.yaml"
+
+kfserving_gateway_yaml_file_path: "{{ omnia_kubeflow_dir_path }}/kustomize/kfserving-gateway/base/deployment.yaml"
+
+argo_yaml_file_path: "{{ omnia_kubeflow_dir_path }}/kustomize/argo/base/service.yaml"
+
+kubeflow_config_file: "{{ omnia_kubeflow_dir_path }}/kfctl_k8s_istio.v1.0.2.yaml"
+
+cert_manager_block: >
+    - kustomizeConfig:
+          overlays:
+          - self-signed
+          - application
+          parameters:
+          - name: namespace
+            value: cert-manager
+          repoRef:
+            name: manifests
+            path: cert-manager/cert-manager
+        name: cert-manager
+
+seldon_core_operator_block: >
+    - kustomizeConfig:
+          overlays:
+          - application
+          repoRef:
+            name: manifests
+            path: seldon/seldon-core-operator
+        name: seldon-core-operator

+ 0 - 3
kubernetes/roles/manager/files/nvidia

@@ -1,3 +0,0 @@
-#!/bin/sh
-PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" exec nvidia-container-runtime-hook "$@"
-

+ 12 - 25
kubernetes/roles/manager/tasks/main.yml

@@ -11,35 +11,22 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 ---
-#- name: Firewall Rule K8s:6443/tcp
-  #command: firewall-cmd  --zone=internal --add-port=6443/tcp --permanent
-  #tags: manager
-#
-#- name: Firewall Rule K8s:10250/tcp
-  #command: firewall-cmd  --zone=internal --add-port=10250/tcp --permanent
-  #tags: manager
-##
-#- name: Firewall Reload
-  #command: firewall-cmd  --reload
-  #tags: manager
-#
-- name: Create /root/bin (if it doesn't exist)
+
+- name: Create directory for helm installer file
   file:
-    path: /root/bin
+    path: "{{ helm_installer_file_directory }}"
     state: directory
-    mode: 0755
+    mode: "{{ helm_installer_file_directory_mode }}"
 
-- name: Get Helm Installer
+- name: Get helm installer
   get_url:
-    url: https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
-    dest: /root/bin/get_helm.sh
-    mode: 700
-  tags: manager
-
-- name: Install Helm
-  command: /root/bin/get_helm.sh
+    url: "{{ helm_installer_url }}"
+    dest: "{{ helm_installer_file_dest }}"
+    mode: "{{ helm_installer_file_mode }}"
   tags: manager
 
-# install and start up OpenSM -  III
+- name: Install helm
+  command: "/bin/bash {{ helm_installer_file_dest }}"
+  changed_when: true
+  tags: manager

+ 24 - 0
kubernetes/roles/manager/vars/main.yml

@@ -0,0 +1,24 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+helm_installer_file_directory: /root/bin
+
+helm_installer_file_directory_mode: 0755
+
+helm_installer_url: https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
+
+helm_installer_file_dest: /root/bin/get_helm.sh
+
+helm_installer_file_mode: 0700

+ 0 - 1
kubernetes/roles/startmanager/files/enable_gpu_k8s.sh

@@ -1 +0,0 @@
-kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.11/nvidia-device-plugin.yml

+ 77 - 50
kubernetes/roles/startmanager/tasks/main.yml

@@ -11,39 +11,64 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 ---
-- name: Turn Swap OFF (if not already disabled)
+
+- name: Disable swap (if not already disabled)
   command: /usr/sbin/swapoff -a
+  changed_when: true
   tags: init
 
+- name: Start and enable docker service
+  systemd:
+    name: docker
+    state: started
+    enabled: yes
+    daemon_reload: yes
+  tags: docker
+
 - name: Initialize kubeadm
-  command: /bin/kubeadm init --pod-network-cidr=10.244.0.0/16 --apiserver-advertise-address={{ manager_ip }}
-  #command: /bin/kubeadm init
+  command: "/bin/kubeadm init --pod-network-cidr='{{ pod_network_cidr_ip }}' --apiserver-advertise-address='{{ manager_ip }}'"
+  changed_when: true
   register: init_output
   tags: init
 
-- name: Setup Directory for Kubernetes environment for root
-  file: path=/root/.kube state=directory
+- name: Setup directory for Kubernetes environment for root
+  file:
+    path: "{{ k8s_root_directory }}"
+    state: directory
+    mode: "{{ k8s_root_directory_mode }}"
   tags: init
 
-- name: Copy Kubernetes Config for root #do this for other users too?
+- name: Copy Kubernetes config for root
   copy:
-    src: /etc/kubernetes/admin.conf
-    dest: /root/.kube/config
+    src: "{{ k8s_config_src }}"
+    dest: "{{ k8s_config_dest }}"
     owner: root
     group: root
-    mode: 0644
+    mode: "{{ k8s_config_file_mode }}"
     remote_src: yes
   tags: init
 
+- name: Update the kubernetes config file permissions
+  shell: "chown $(id -u):$(id -g) '{{ k8s_config_dest }}'"
+  args:
+    warn: false
+  changed_when: true
+  tags: init
+
 - name: Cluster token
-  shell: kubeadm token list | cut -d ' ' -f1 | sed -n '2p'
+  shell: >
+    set -o pipefail && \
+      kubeadm token list | cut -d ' ' -f1 | sed -n '2p'
+  changed_when: false
   register: K8S_TOKEN
   tags: init
 
 - name: CA Hash
-  shell: openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
+  shell: >
+    set -o pipefail && \
+      openssl x509 -pubkey -in {{ k8s_cert_path }} | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
+  changed_when: false
   register: K8S_MANAGER_CA_HASH
   tags: init
 
@@ -55,75 +80,77 @@
     ip:     "{{ manager_ip }}"
   tags: init
 
-- name:
+- name: Print k8s token
   debug:
     msg: "[Manager] K8S_TOKEN_HOLDER K8S token is {{ hostvars['K8S_TOKEN_HOLDER']['token'] }}"
+    verbosity: 2
   tags: init
 
-- name:
+- name: Print k8s hash
   debug:
     msg: "[Manager] K8S_TOKEN_HOLDER K8S Hash is  {{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}"
+    verbosity: 2
   tags: init
 
-- name:
+- name: Print k8s manager_ip
   debug:
     msg: "[Manager] K8S_MANAGER_IP is  {{ manager_ip }}"
+    verbosity: 2
   tags: init
 
 - name: Setup Calico SDN network
-  shell: kubectl apply -f https://docs.projectcalico.org/manifests/calico.yaml
+  command: "kubectl apply -f '{{ calico_yml_url }}'"
+  when: k8s_cni == "calico"
   tags: init
 
-#- name: Setup Flannel SDN network
-  #shell: kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
-  #tags: init
-
-- name: Enabled GPU support in Kubernetes
-  shell: kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/1.0.0-beta4/nvidia-device-plugin.yml
-                           #https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.11/nvidia-device-plugin.yml
-  register: gpu_enable
-  tags: init
-
-- name: Deploy Xilinx Device Plugin
-  shell: kubectl create -f https://raw.githubusercontent.com/Xilinx/FPGA_as_a_Service/master/k8s-fpga-device-plugin/fpga-device-plugin.yml
-  register: fpga_enable
+- name: Setup Flannel SDN network
+  command: "kubectl apply -f '{{ flannel_yml_url }}'"
+  when: k8s_cni == "flannel"
   tags: init
 
 - name: Create yaml repo for setup
   file:
-    path: /root/k8s
-    owner: root
-    group: root
-    mode: 0755
+    path: "{{ yaml_repo_dir_path }}"
     state: directory
+    mode: "{{ yaml_repo_dir_mode }}"
   tags: init
 
-- name: Create Service Account (K8S Dashboard) Files
-  copy: src=create_admin_user.yaml dest=/root/k8s/create_admin_user.yaml owner=root group=root mode=655
+- name: Create service account (K8s dashboard) files
+  copy:
+    src: create_admin_user.yaml
+    dest: "{{ k8s_service_account_file_dest }}"
+    owner: root
+    group: root
+    mode: "{{ k8s_service_account_file_mode }}"
   tags: init
 
-- name: Create Service Account (K8S Dashboard) - Create
-  shell: kubectl create -f /root/k8s/create_admin_user.yaml
+- name: Create service account (K8s dashboard)
+  command: "kubectl create -f '{{ k8s_service_account_file_dest }}'"
+  changed_when: true
   tags: init
 
-- name: Create ClusterRoleBinding (K8S Dashboard) Files
-  copy: src=create_clusterRoleBinding.yaml dest=/root/k8s/create_clusterRoleBinding.yaml owner=root group=root mode=655
+- name: Create clusterRoleBinding (K8s dashboard) files
+  copy:
+    src: create_clusterRoleBinding.yaml
+    dest: "{{ k8s_clusterRoleBinding_file_dest }}"
+    owner: root
+    group: root
+    mode: "{{ k8s_clusterRoleBinding_file_mode }}"
   tags: init
 
-- name: Create ClusterRoleBinding (K8S Dashboard) - Apply
-  shell: kubectl create -f /root/k8s/create_clusterRoleBinding.yaml
+- name: Create clusterRoleBinding (K8s dashboard)
+  command: "kubectl create -f '{{ k8s_clusterRoleBinding_file_dest }}'"
+  changed_when: true
   tags: init
 
-- name: Dump Bearer Token for K8S Dashboard Login
-  shell: kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin-user | awk '{print $1}') > /root/k8s/token
+- name: Dump bearer token for K8s dashboard login
+  shell: >
+    set -o pipefail && \
+      kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin-user | awk '{print $1}') > /root/k8s/token
+  changed_when: true
   tags: init
 
 - name: Edge / Workstation Install allows pods to scheudle on manager
-  shell: kubectl taint nodes --all node-role.kubernetes.io/master-
+  command: kubectl taint nodes --all node-role.kubernetes.io/master-
   when: single_node
-  tags: init
-
-
-# If more debug information is needed during init uncomment the following 2 lines
-#- debug: var=init_output.stdout_lines
-  #tags: init
+  tags: init

+ 52 - 0
kubernetes/roles/startmanager/vars/main.yml

@@ -0,0 +1,52 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+single_node: false
+
+manager_ip: "{{ ansible_host }}"
+
+k8s_cni: calico
+
+pod_network_cidr_ip: 10.244.0.0/16
+
+k8s_root_directory: /root/.kube
+
+k8s_root_directory_mode: 0755
+
+k8s_config_src: /etc/kubernetes/admin.conf
+
+k8s_config_dest: /root/.kube/config
+
+k8s_config_file_mode: 0644
+
+k8s_cert_path: /etc/kubernetes/pki/ca.crt
+
+k8s_dummy_hostname: K8S_TOKEN_HOLDER
+
+yaml_repo_dir_path: /root/k8s
+
+yaml_repo_dir_mode: 0755
+
+k8s_service_account_file_dest: /root/k8s/create_admin_user.yaml
+
+k8s_service_account_file_mode: 0655
+
+k8s_clusterRoleBinding_file_dest: /root/k8s/create_clusterRoleBinding.yaml
+
+k8s_clusterRoleBinding_file_mode: 0655
+
+calico_yml_url: https://docs.projectcalico.org/manifests/calico.yaml
+
+flannel_yml_url: https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml

+ 61 - 26
kubernetes/roles/startservices/tasks/main.yml

@@ -11,76 +11,111 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 ---
-#- name: Kick CoreDNS (this is a hack that needs to be fixed)
-  #shell:  kubectl get pods -n kube-system --no-headers=true | awk '/coredns/{print $1}'|xargs kubectl delete -n kube-system pod
-  #tags: init
 
 - name: Wait for CoreDNS to restart
-  shell: kubectl rollout status deployment/coredns -n kube-system
+  command: kubectl rollout status deployment/coredns -n kube-system
+  changed_when: false
+  ignore_errors: True
   tags: init
 
 - name: Deploy MetalLB
-  shell: kubectl apply -f https://raw.githubusercontent.com/google/metallb/v0.8.1/manifests/metallb.yaml
+  command: "kubectl apply -f '{{ metallb_yaml_url }}'"
+  changed_when: true
   tags: init
 
 - name: Create MetalLB Setup Config Files
-  copy: src=metal-config.yaml dest=/root/k8s/metal-config.yaml owner=root group=root mode=655
+  copy:
+    src: metal-config.yaml
+    dest: "{{ metallb_config_file_dest }}"
+    owner: root
+    group: root
+    mode: "{{ metallb_config_file_mode }}"
   tags: init
 
 - name: Create MetalLB Setup Deployment Files
-  copy: src=metallb.yaml dest=/root/k8s/metallb.yaml owner=root group=root mode=655
+  copy:
+    src: metallb.yaml
+    dest: "{{ metallb_deployment_file_dest }}"
+    owner: root
+    group: root
+    mode: "{{ metallb_deployment_file_mode }}"
   tags: init
 
 - name: Deploy MetalLB
-  shell: kubectl apply -f /root/k8s/metallb.yaml
+  command: "kubectl apply -f '{{ metallb_deployment_file_dest }}'"
+  changed_when: true
   tags: init
 
 - name: Create default setup for MetalLB
-  shell: kubectl apply -f /root/k8s/metal-config.yaml
+  command: "kubectl apply -f '{{ metallb_config_file_dest }}'"
+  changed_when: true
   tags: init
 
-- name: Start K8S Dashboard
-  shell: kubectl create -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0/aio/deploy/recommended.yaml
+- name: Start k8s dashboard
+  command: "kubectl create -f '{{ k8s_dashboard_yaml_url }}'"
+  changed_when: true
+  register: result
   tags: init
 
-- name: Helm - Add Stable Repo
-  shell: helm repo add stable https://charts.helm.sh/stable
+- name: Helm - add stable repo
+  command: "helm repo add stable '{{ helm_stable_repo_url }}'"
+  changed_when: true
   tags: init
 
-- name: Helm - Add Nvidia k8s-device-plugin (nvdp) Repo
-  shell: helm repo add nvdp https://nvidia.github.io/k8s-device-plugin
+- name: Helm - add Nvidia k8s-device-plugin (nvdp) repo
+  command: "helm repo add nvdp '{{ nvidia_k8s_device_plugin_repo_url }}'"
+  changed_when: true
   tags: init
 
-- name: Helm - Add Nvidia GPU Discovery (nvgfd) Repo
-  shell: helm repo add nvgfd https://nvidia.github.io/gpu-feature-discovery
+- name: Helm - add Nvidia GPU discovery (nvgfd) repo
+  command: "helm repo add nvgfd '{{ nvidia_gpu_discovery_repo_url }}'"
+  changed_when: true
   tags: init
 
-- name: Helm - Update Repo
-  shell: helm repo update
+- name: Helm - update repo
+  command: helm repo update
+  changed_when: true
   tags: init
 
 - name: Start NFS Client Provisioner
-  shell: helm install stable/nfs-client-provisioner --set nfs.server={{ nfs_server }}  --set nfs.path={{ nfs_path }} --generate-name
+  command: "helm install stable/nfs-client-provisioner --set nfs.server='{{ nfs_server }}' --set nfs.path='{{ nfs_path }}' --generate-name"
+  changed_when: true
+  register: result
   tags: init
 
 - name: Set NFS-Client Provisioner as DEFAULT StorageClass
-  shell: "kubectl patch storageclasses.storage.k8s.io nfs-client -p '{\"metadata\": {\"annotations\":{\"storageclass.kubernetes.io/is-default-class\":\"true\"}}}'"
+  shell: >
+    kubectl patch storageclasses.storage.k8s.io nfs-client \
+    -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
+  changed_when: true
   tags: init
 
 - name: Prometheus deployment
-  shell: helm install stable/prometheus --set alertmanager.persistentVolume.storageClass=nfs-client,server.persistentVolume.storageClass=nfs-client,server.service.type=LoadBalancer --generate-name
+  command: >
+    helm install stable/prometheus \
+    --set alertmanager.persistentVolume.storageClass=nfs-client,server.persistentVolume.storageClass=nfs-client,server.service.type=LoadBalancer \
+    --generate-name
+  changed_when: true
   tags: init
 
 - name: Install MPI Operator
-  shell: kubectl create -f https://raw.githubusercontent.com/kubeflow/mpi-operator/master/deploy/v1alpha2/mpi-operator.yaml
+  command: "kubectl create -f '{{ mpi_operator_yaml_url }}'"
+  changed_when: true
   tags: init
 
 - name: Install nvidia-device-plugin
-  shell: helm install --version=0.7.0  --generate-name  --set migStrategy={{ MIG_STRATEGY }}  nvdp/nvidia-device-plugin 
+  command: "helm install --version='{{ nvidia_device_plugin_version }}' --generate-name --set migStrategy='{{ mig_strategy }}' nvdp/nvidia-device-plugin"
+  changed_when: true
   tags: init
 
 - name: Install GPU Feature Discovery
-  shell: helm install  --version=0.2.0  --generate-name  --set migStrategy={{ MIG_STRATEGY }}  nvgfd/gpu-feature-discovery
+  command: "helm install --version='{{ gpu_feature_discovery_version }}' --generate-name --set migStrategy='{{ mig_strategy }}' nvgfd/gpu-feature-discovery"
+  changed_when: true
   tags: init
+
+- name: Deploy Xilinx Device plugin
+  command: "kubectl create -f '{{ fpga_device_plugin_yaml_url }}'"
+  changed_when: true
+  register: fpga_enable
+  tags: init

+ 46 - 0
kubernetes/roles/startservices/vars/main.yml

@@ -0,0 +1,46 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+metallb_config_file_dest: /root/k8s/metal-config.yaml
+
+metallb_config_file_mode: 0655
+
+metallb_deployment_file_dest: /root/k8s/metallb.yaml
+
+metallb_deployment_file_mode: 0655
+
+metallb_yaml_url: https://raw.githubusercontent.com/google/metallb/v0.8.1/manifests/metallb.yaml
+
+k8s_dashboard_yaml_url: https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0-beta6/aio/deploy/recommended.yaml
+
+helm_stable_repo_url: https://charts.helm.sh/stable
+
+nfs_server: "{{ ansible_host }}"
+
+nfs_path: /work
+
+mpi_operator_yaml_url: https://raw.githubusercontent.com/kubeflow/mpi-operator/master/deploy/v1alpha2/mpi-operator.yaml
+
+nvidia_k8s_device_plugin_repo_url: https://nvidia.github.io/k8s-device-plugin
+
+nvidia_gpu_discovery_repo_url: https://nvidia.github.io/gpu-feature-discovery
+
+nvidia_device_plugin_version: 0.7.0
+
+mig_strategy: none
+
+gpu_feature_discovery_version: 0.2.0
+
+fpga_device_plugin_yaml_url: https://raw.githubusercontent.com/Xilinx/FPGA_as_a_Service/master/k8s-fpga-device-plugin/fpga-device-plugin.yml

+ 5 - 26
kubernetes/roles/startworkers/tasks/main.yml

@@ -11,38 +11,17 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 ---
 
-- name: Turn Swap OFF (if not already disabled)
+- name: Disable swap (if not already disabled)
   command: /usr/sbin/swapoff -a
+  changed_when: true
   tags: init
 
-#- name:
-  #debug:
-    #msg: "[Worker] K8S_TOKEN_HOLDER K8S token is {{ hostvars['K8S_TOKEN_HOLDER']['token'] }}"
-  #tags: init
-
-#- name:
-  #debug:
-    #msg: "[Worker] K8S_TOKEN_HOLDER K8S Hash is  {{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}"
-  #tags: init
-
-#- name:
-  #debug:
-    #msg: "[Worker] K8S_MANGER_IP is  {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}"
-  #tags: init
-
-- name: "Kubeadmn join"
+- name: Execute kubeadm join command
   shell: >
     kubeadm join --token={{ hostvars['K8S_TOKEN_HOLDER']['token'] }}
     --discovery-token-ca-cert-hash sha256:{{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}
-    {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}:6443
+    {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}:{{ apiserver_bind_port }}
   when: not single_node
-  tags: init
-
-
-#- name: Join Computes to pool
-#   command: "{{ kubeJoinCommand }}"
-# tags: init
-
+  tags: init

+ 18 - 0
kubernetes/roles/startworkers/vars/main.yml

@@ -0,0 +1,18 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+single_node: false
+
+apiserver_bind_port: 6443