Przeglądaj źródła

fix nvidia-docker2 install

fixing nvidia-docker2 implementation
removing old files no longer used
move Xilinx fpga device plugin from `startmanager` -> `startservices`

Signed-off-by: John lockman <john.lockman@friday.local>
John lockman 4 lat temu
rodzic
commit
0539642f26

+ 0 - 3
kubernetes/roles/computeGPU/files/nvidia

@@ -1,3 +0,0 @@
-#!/bin/sh
-PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" exec nvidia-container-runtime-hook "$@"
-

+ 4 - 13
kubernetes/roles/computeGPU/tasks/main.yml

@@ -13,25 +13,16 @@
 #  limitations under the License.
 
 ---
-- name: add Nvidia container runtime support
+- name: add nvidia-docker2 Repo
   get_url:
-    url: https://nvidia.github.io/nvidia-container-runtime/centos7/nvidia-container-runtime.repo
-    dest: /etc/yum.repos.d/nvidia-container-runtime.repo
+    url: https://nvidia.github.io/nvidia-docker/centos7/nvidia-docker.repo
+    dest: /etc/yum.repos.d/nvidia-docker.repo
   tags: install, testing
 
-- name:  disable gpg key for nvidia-container-runtime
-  replace:
-    path: /etc/yum.repos.d/nvidia-container-runtime.repo
-    regexp: 'repo_gpgcheck=1'
-    replace: 'repo_gpgcheck=0'
-    backup: yes
-  tags: install
-
-- name: install Nvidia-container-runtime-hook
+- name: install Nvidia driver and nvidia-docker2
   package:
     name:
       - kmod-nvidia
-      - nvidia-container-runtime-hook
       - nvidia-docker2
     state: present
   tags: install

+ 0 - 3
kubernetes/roles/manager/files/nvidia

@@ -1,3 +0,0 @@
-#!/bin/sh
-PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" exec nvidia-container-runtime-hook "$@"
-

+ 0 - 1
kubernetes/roles/startmanager/files/enable_gpu_k8s.sh

@@ -1 +0,0 @@
-kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.11/nvidia-device-plugin.yml

+ 0 - 10
kubernetes/roles/startmanager/tasks/main.yml

@@ -78,16 +78,6 @@
   #shell: kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
   #tags: init
 
-- name: Enabled GPU support in Kubernetes
-  shell: kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/1.0.0-beta4/nvidia-device-plugin.yml
-                           #https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.11/nvidia-device-plugin.yml
-  register: gpu_enable
-  tags: init
-
-- name: Deploy Xilinx Device Plugin
-  shell: kubectl create -f https://raw.githubusercontent.com/Xilinx/FPGA_as_a_Service/master/k8s-fpga-device-plugin/fpga-device-plugin.yml
-  register: fpga_enable
-  tags: init
 
 - name: Create yaml repo for setup
   file:

+ 5 - 4
kubernetes/roles/startservices/tasks/main.yml

@@ -13,10 +13,6 @@
 #  limitations under the License.
 
 ---
-#- name: Kick CoreDNS (this is a hack that needs to be fixed)
-  #shell:  kubectl get pods -n kube-system --no-headers=true | awk '/coredns/{print $1}'|xargs kubectl delete -n kube-system pod
-  #tags: init
-
 - name: Wait for CoreDNS to restart
   shell: kubectl rollout status deployment/coredns -n kube-system
   tags: init
@@ -84,3 +80,8 @@
 - name: Install GPU Feature Discovery
   shell: helm install  --version=0.2.0  --generate-name  --set migStrategy={{ MIG_STRATEGY }}  nvgfd/gpu-feature-discovery
   tags: init
+
+- name: Deploy Xilinx Device Plugin
+  shell: kubectl create -f https://raw.githubusercontent.com/Xilinx/FPGA_as_a_Service/master/k8s-fpga-device-plugin/fpga-device-plugin.yml
+  register: fpga_enable
+  tags: init