瀏覽代碼

Merge pull request #199 from lwilson/issue-148

Issue 148: AMD GPU detect/deploy
John Lockman 4 年之前
父節點
當前提交
12ce0480d8

+ 3 - 1
roles/common/files/inventory.fact

@@ -6,13 +6,15 @@ lspci > $INVENTORY
 NVIDIA_GPU=$(cat $INVENTORY | grep -i nvidia | wc -l)
 XILINX_FPGA=$(cat $INVENTORY | grep "Processing accelerators: Xilinx Corporation Device" | wc -l)
 INTEL_A10_FPGA=$(cat $INVENTORY | grep "Processing accelerators: Intel Corporation Device" | wc -l)
+AMD_GPU=$(cat $INVENTORY | grep "Display controller: Advanced Micro Devices, Inc. \[AMD/ATI\]" | wc -l)
 
 cat << EOF
 {
 	"xilinx_fpga" : $XILINX_FPGA,
 	"nvidia_gpu" : $NVIDIA_GPU,
+	"amd_gpu" : $AMD_GPU,
 	"intel_a10_fpga" : $INTEL_A10_FPGA
 }
 EOF
 
-rm -f $INVENTORY
+rm -f $INVENTORY

+ 35 - 0
roles/common/tasks/amd.yml

@@ -0,0 +1,35 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Add AMD ROCm repository 
+  yum_repository:
+    name: ROCm 
+    description: AMD GPU ROCm Repository 
+    baseurl: https://repo.radeon.com/rocm/yum/rpm 
+    gpgcheck: yes 
+    gpgkey: https://repo.radeon.com/rocm/rocm.gpg.key 
+    enabled: yes
+  tags: install
+
+- name: Install AMD ROCm drivers 
+  package:
+    name: rocm-dkms 
+    enablerepo: ROCm 
+    state: present
+  tags: install
+
+- name: Reboot after installing GPU drivers
+  reboot:
+  tags: install

+ 5 - 0
roles/common/tasks/main.yml

@@ -78,3 +78,8 @@
   include_tasks: nvidia.yml
   when: ansible_local.inventory.nvidia_gpu > 0
   tags: install
+
+- name: Install AMD GPU drivers and software components
+  include_tasks: amd.yml
+  when: ansible_local.inventory.amd_gpu > 0
+  tags: install

+ 7 - 0
roles/k8s_start_services/tasks/main.yml

@@ -133,3 +133,10 @@
   register: fpga_enable
   when: "'fpga-device-plugin' not in k8s_pods.stdout"
   tags: init
+
+- name: Deploy ROCm Device plugin
+  command: "kubectl create -f '{{ rocm_device_plugin_yaml_url }}'"
+  changed_when: true
+  register: amd_gpu_enable
+  when: "'amdgpu-device-plugin' not in k8s_pods.stdout" 
+  tags: init

+ 3 - 1
roles/k8s_start_services/vars/main.yml

@@ -43,4 +43,6 @@ mig_strategy: none
 
 gpu_feature_discovery_version: 0.2.0
 
-fpga_device_plugin_yaml_url: https://raw.githubusercontent.com/Xilinx/FPGA_as_a_Service/master/k8s-fpga-device-plugin/fpga-device-plugin.yml
+fpga_device_plugin_yaml_url: https://raw.githubusercontent.com/Xilinx/FPGA_as_a_Service/master/k8s-fpga-device-plugin/fpga-device-plugin.yml
+
+rocm_device_plugin_yaml_url: https://raw.githubusercontent.com/RadeonOpenCompute/k8s-device-plugin/master/k8s-ds-amdgpu-dp.yaml