Sfoglia il codice sorgente

Merge pull request #832 from Lakshmi-Patneedi/devel

Common role changes for omnia.yml
Sujit Jadhav 3 anni fa
parent
commit
5f0eace103

+ 1 - 0
roles/common/files/module.conf

@@ -0,0 +1 @@
+allow_unsupported_modules 1

+ 85 - 33
roles/common/tasks/amd.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,35 +13,87 @@
 #  limitations under the License.
 ---
 
-- name: Add AMD ROCm repository for CentOS 7.x
-  yum_repository:
-    name: ROCm
-    description: AMD GPU ROCm Repository
-    baseurl: https://repo.radeon.com/rocm/yum/rpm
-    gpgcheck: yes
-    gpgkey: https://repo.radeon.com/rocm/rocm.gpg.key
-    enabled: yes
-  tags: install
-  when: ansible_facts['distribution_major_version'] == "7"
-
-- name: Add AMD ROCm repository for CentOS/RockyLinux 8.x
-  yum_repository:
-    name: ROCm
-    description: AMD GPU ROCm Repository
-    baseurl: https://repo.radeon.com/rocm/centos8/rpm
-    gpgcheck: yes
-    gpgkey: https://repo.radeon.com/rocm/rocm.gpg.key
-    enabled: yes
-  tags: install
-  when: ansible_facts['distribution_major_version'] == "8"
-
-- name: Install AMD ROCm drivers
-  package:
-    name: rocm-dkms
-    enablerepo: ROCm
-    state: present
-  tags: install
-
-- name: Reboot after installing GPU drivers
-  reboot:
-  tags: install
+- block:
+    - name: Add AMD ROCm repository for CentOS 7.x
+      yum_repository:
+        name: ROCm
+        description: AMD GPU ROCm Repository
+        baseurl: https://repo.radeon.com/rocm/yum/rpm
+        gpgcheck: yes
+        gpgkey: https://repo.radeon.com/rocm/rocm.gpg.key
+        enabled: yes
+      tags: install
+      when: ansible_facts['distribution_major_version'] == "7"
+
+    - name: Add AMD ROCm repository for CentOS/RockyLinux 8.x
+      yum_repository:
+        name: ROCm
+        description: AMD GPU ROCm Repository
+        baseurl: https://repo.radeon.com/rocm/centos8/rpm
+        gpgcheck: yes
+        gpgkey: https://repo.radeon.com/rocm/rocm.gpg.key
+        enabled: yes
+      tags: install
+      when: ansible_facts['distribution_major_version'] == "8"
+
+    - name: Install AMD ROCm drivers
+      package:
+        name: rocm-dkms
+        enablerepo: ROCm
+        state: present
+      tags: install
+
+    - name: Reboot after installing GPU drivers
+      reboot:
+      tags: install
+  when:  ( os_supported_leap not in compute_os )
+
+- block:
+    - name: Installing dkms
+      package:
+        name: dkms
+        state: present
+      changed_when: true
+      tags: install
+
+    - name: Add AMD ROCm repository for leap
+      zypper_repository:
+        name: rocm
+        repo: "{{ amd_repo }}"
+        state: present
+      failed_when: false
+      tags: install
+
+    - name: Import gpg-key for installing AMD ROCm
+      rpm_key:
+        key: "{{ gpg_key_amd }}"
+        state: present
+      tags: install
+
+    - name: Install AMD ROCm drivers
+      ansible.builtin.expect:
+        command: zypper install rocm-dkms
+        responses:
+            (.*) [1/2/c/d/?](.): '2'
+            (.*)(y): 'y'
+      tags: install
+
+    - name: Allowing modules
+      copy:
+        src: module.conf
+        dest: "{{ amd_gpu_dest }}"
+        owner: root
+        group: root
+        mode: "{{ conf_file_mode }}"
+      tags: install
+
+    - name: Enable the modules amdgpu
+      modprobe:
+        name: amdgpu
+        state: present
+      tags: install
+    
+    - name: Reboot after installing GPU drivers
+      reboot:
+      tags: install
+  when: ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )

+ 138 - 66
roles/common/tasks/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,6 +12,9 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 ---
+- name: Saving distribution of os
+  set_fact:
+    compute_os: "{{ ansible_facts['distribution'] | lower }}"
 
 - name: Create a custom fact directory on each host
   file:
@@ -27,76 +30,144 @@
     group: root
     mode: "{{ accelerator_discovery_script_mode }}"
 
-- name: Add epel-release repo
-  package:
-    name: epel-release
-    state: present
-  tags: install
+- block:
+    - name: Add epel-release repo
+      package:
+        name: epel-release
+        state: present
+      tags: install
+    
+    - name: Add elrepo GPG key
+      rpm_key:
+        state: present
+        key: "{{ elrepo_gpg_key_url }}"
+      register: elrepo_gpg_key
+      until: elrepo_gpg_key is not failed
+      retries: "{{ max_retries }}"
+      delay: "{{ max_delay }}"
+      tags: install
 
-- name: Add elrepo GPG key
-  rpm_key:
-    state: present
-    key: "{{ elrepo_gpg_key_url }}"
-  register: elrepo_gpg_key
-  until: elrepo_gpg_key is not failed
-  retries: 20
-  delay: 10
-  tags: install
+    - name: Add elrepo (nvidia kmod drivers)
+      package:
+        name: "{{ elrepo_rpm_url }}"
+        state: present
+      register: elrepo
+      until: elrepo is not failed
+      retries: "{{ max_retries }}"
+      delay: "{{ max_delay }}"
+      tags: install
+    
+    - name: Add docker community edition repository
+      get_url:
+        url: "{{ docker_repo_url }}"
+        dest: "{{ docker_repo_dest }}"
+      register: docker_repo
+      until: docker_repo is not failed
+      retries: "{{ max_retries }}"
+      delay: "{{ max_delay }}"
+      tags: install
 
-- name: Add elrepo (nvidia kmod drivers)
-  package:
-    name: "{{ elrepo_rpm_url }}"
-    state: present
-  register: elrepo
-  until: elrepo is not failed
-  retries: 20
-  delay: 10
-  tags: install
+    - name: Permanently Disable swap
+      mount:
+        name: "swap"
+        fstype: swap
+        state: absent
 
-- name: Add docker community edition repository
-  get_url:
-    url: "{{ docker_repo_url }}"
-    dest: "{{ docker_repo_dest }}"
-  register: docker_repo
-  until: docker_repo is not failed
-  retries: 20
-  delay: 10
-  tags: install
+    - name: Disable selinux
+      selinux:
+        state: disabled
+      tags: install
 
-- name: Permanently Disable swap
-  mount:
-    name: "swap"
-    fstype: swap
-    state: absent
+    - name: Install common packages
+      package:
+        name: "{{ common_packages }}"
+        state: present
+      tags: install
 
-- name: Disable selinux
-  selinux:
-    state: disabled
-  tags: install
+    - name: Install common packages
+      package:
+        name: "{{ common_packages_for_non_leap }}"
+        state: present
+      tags: install
 
-- name: Install common packages
-  package:
-    name: "{{ common_packages }}"
-    state: present
-  tags: install
+    - name: Versionlock docker
+      command: "yum versionlock '{{ item }}'"
+      args:
+        warn: false
+      with_items:
+        - "{{ docker_packages }}"
+      changed_when: true
+      tags: install
 
-- name: Versionlock docker
-  command: "yum versionlock '{{ item }}'"
-  args:
-    warn: false
-  with_items:
-    - "{{ docker_packages }}"
-  changed_when: true
-  tags: install
+    - name: Collect host facts (including acclerator information)
+      setup: ~
 
-- name: Collect host facts (including acclerator information)
-  setup: ~
+    - name: Install infiniBand support
+      package:
+        name: "@Infiniband Support"
+        state: present
+      tags: install
+  when: ( os_supported_leap not in compute_os )
 
-- name: Install infiniBand support
-  package:
-    name: "@Infiniband Support"
-    state: present
-  tags: install
+- block:
+    - name: Installing python-xml
+      package:
+        name: python-xml
+        state: present
+      tags: install
+
+    - name: Add nvidia repo
+      zypper_repository:
+        name: NVIDIA
+        repo: "{{ nvidia_repo }}"
+        state: present
+        autorefresh: yes
+      tags: install
+
+    - name: Install nvidia
+      command: zypper --gpg-auto-import-keys install -l -y x11-video-nvidiaG06
+      changed_when: false
+      tags: install
+
+    - name: Add docker community edition repository
+      get_url:
+        url: "{{ docker_repo_url_leap }}"
+        dest: "{{ docker_repo_dest_leap }}"
+      register: docker_repo
+      until: docker_repo is not failed
+      retries: "{{ max_retries }}"
+      delay: "{{ max_delay }}"
+      tags: install
+
+    - name: Permanently Disable swap
+      mount:
+        name: "swap"
+        fstype: swap
+        state: absent
+      tags: install
+
+    - name: Install common packages
+      package:
+        name: "{{ common_packages }}"
+        state: present
+      tags: install
+
+    - name: Install docker-compose
+      package:
+        name: docker-compose
+        state: present
+      tags: install
+
+    - name: Collect host facts (including acclerator information)
+      setup: ~
+      tags: install
+
+    - name: Install infiniBand support
+      package:
+        name: infiniband-diags
+        state: present
+      tags: install
+  when: ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
 
 - name: Deploy time ntp/chrony
   include_tasks: ntp.yml
@@ -106,15 +177,16 @@
   include_tasks: nvidia.yml
   when:
     - ansible_local.inventory.nvidia_gpu > 0
-    - ansible_facts['distribution'] == os_name
-    - ansible_facts['distribution_major_version'] == os_version
+    - ( ansible_facts['distribution'] == os_name and ansible_facts['distribution_major_version'] == os_version) or 
+      ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
   tags: install
 
 - name: Install AMD GPU drivers and software components
   include_tasks: amd.yml
   when:
     - ansible_local.inventory.amd_gpu > 0
-    - ansible_facts['distribution'] == os_name
+    - ansible_facts['distribution'] == os_name or 
+      ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
   tags: install
 
 - name: Get the hostname
@@ -194,4 +266,4 @@
     - "{{ groups['compute'] }}"
   when:
     - hostvars["127.0.0.1"]["login_node_required"]
-    - '"login_node" in group_names'
+    - '"login_node" in group_names'

+ 3 - 2
roles/common/tasks/ntp.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -53,4 +53,5 @@
         notify:
           - Restart chrony
           - Sync chrony sources
-    when:  ( ansible_distribution == "CentOS" or   ansible_distribution == "RedHat" ) and ansible_distribution_major_version  > os_version
+    when:  (( ansible_distribution == "CentOS" or   ansible_distribution == "RedHat" ) and ansible_distribution_major_version  > os_version) or
+           ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )

+ 89 - 47
roles/common/tasks/nvidia.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,56 +12,98 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 ---
+- block:
+    - name: Add libnvidia container Repo
+      yum_repository:
+        name: libnvidia-container
+        description:  libnvidia-container
+        baseurl: https://nvidia.github.io/libnvidia-container/stable/centos7/$basearch
+        repo_gpgcheck: no
+        gpgcheck: no
+        gpgkey: https://nvidia.github.io/libnvidia-container/gpgkey
+        sslverify: yes
+        sslcacert: /etc/pki/tls/certs/ca-bundle.crt
+        enabled: yes
+      tags: install
 
-- name: Add libnvidia container Repo
-  yum_repository:
-    name: libnvidia-container
-    description:  libnvidia-container
-    baseurl: https://nvidia.github.io/libnvidia-container/stable/centos7/$basearch
-    repo_gpgcheck: no
-    gpgcheck: no
-    gpgkey: https://nvidia.github.io/libnvidia-container/gpgkey
-    sslverify: yes
-    sslcacert: /etc/pki/tls/certs/ca-bundle.crt
-    enabled: yes
-  tags: install
+    - name: Add nvidia-container-runtime Repo
+      yum_repository:
+        name: nvidia-container-runtime
+        description:  nvidia-container-runtime
+        baseurl: https://nvidia.github.io/nvidia-container-runtime/stable/centos7/$basearch
+        repo_gpgcheck: no
+        gpgcheck: no
+        gpgkey: https://nvidia.github.io/nvidia-container-runtime/gpgkey
+        sslverify: yes
+        sslcacert: /etc/pki/tls/certs/ca-bundle.crt
+        enabled: yes
+      tags: install
 
-- name: Add nvidia-container-runtime Repo
-  yum_repository:
-    name: nvidia-container-runtime
-    description:  nvidia-container-runtime
-    baseurl: https://nvidia.github.io/nvidia-container-runtime/stable/centos7/$basearch
-    repo_gpgcheck: no
-    gpgcheck: no
-    gpgkey: https://nvidia.github.io/nvidia-container-runtime/gpgkey
-    sslverify: yes
-    sslcacert: /etc/pki/tls/certs/ca-bundle.crt
-    enabled: yes
-  tags: install
+    - name: Add nvidia-docker Repo
+      yum_repository:
+        name: nvidia-docker
+        description:  nvidia-docker
+        baseurl: https://nvidia.github.io/nvidia-docker/centos7/$basearch
+        repo_gpgcheck: no
+        gpgcheck: no
+        gpgkey: https://nvidia.github.io/nvidia-docker/gpgkey
+        enabled: yes
+        sslverify: yes
+        sslcacert: /etc/pki/tls/certs/ca-bundle.crt
+      tags: install
 
-- name: Add nvidia-docker Repo
-  yum_repository:
-    name: nvidia-docker
-    description:  nvidia-docker
-    baseurl: https://nvidia.github.io/nvidia-docker/centos7/$basearch
-    repo_gpgcheck: no
-    gpgcheck: no
-    gpgkey: https://nvidia.github.io/nvidia-docker/gpgkey
-    enabled: yes
-    sslverify: yes
-    sslcacert: /etc/pki/tls/certs/ca-bundle.crt
-  tags: install
+    - name: Install nvidia driver and nvidia-docker2
+      package:
+        name: "{{ nvidia_packages }}"
+        enablerepo: libnvidia-container,nvidia-docker
+        state: present
+      tags: install
+    
+    - name: Reboot after installing GPU drivers
+      reboot:
+      tags: install
+  when:  ( os_supported_leap not in compute_os )
 
-- name: Install nvidia driver and nvidia-docker2
-  package:
-    name: "{{ nvidia_packages }}"
-    enablerepo: libnvidia-container,nvidia-docker
-    state: present
-  tags: install
+- block:
+    - name: Fetching list of repos from zypper
+      command: /usr/bin/zypper lr --uri
+      changed_when: false
+      register: zypper_repos
+      tags: install
 
-- name: Reboot after installing GPU drivers
-  reboot:
-  tags: install
+    - name: Add nvidia repository for leap
+      command: zypper --gpg-auto-import-keys addrepo {{ nvidia_docker_repo }}
+      failed_when: false
+      changed_when: false
+      when: '"nvidia-docker" not in zypper_repos.stdout'
+      tags: install
+
+    - name: Install libnvidia container repo
+      zypper:
+        name: libnvidia-container1
+        disable_gpg_check: yes
+        state: present
+      tags: install
+
+    - name: Install nvidia-container-runtime repo
+      zypper:
+        name: nvidia-container-runtime
+        disable_gpg_check: yes
+        state: present
+      tags: install
+
+    - name: Install nvidia-docker
+      zypper:
+        name: nvidia-container-runtime
+        disable_gpg_check: yes
+        replacefiles: true
+        state: present
+      tags: install
+    
+    - name: Reboot after installing GPU drivers
+      reboot:
+      tags: install
+  when: ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
 
 - name: Set nvidia as default runtime
   copy:
@@ -78,4 +120,4 @@
     state: restarted
     enabled: yes
     daemon_reload: yes
-  tags: install
+  tags: install

+ 23 - 6
roles/common/vars/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,19 +13,36 @@
 #  limitations under the License.
 ---
 
+nvidia_repo: https://download.nvidia.com/opensuse/leap/15.3/
+docker_repo_url_leap: https://download.docker.com/linux/sles/docker-ce.repo
+docker_repo_dest_leap: /etc/YaST2/docker-ce.repo
+os_supported_leap: "leap"
+os_supported_leap_version: "15.3"
 common_packages:
-  - yum-plugin-versionlock
   - gcc
   - nfs-utils
   - python3-pip
   - bash-completion
-  - nvidia-detect
   - chrony
   - pciutils
-  - docker-ce-cli-20.10.2
-  - docker-ce-20.10.2
   - openssl
   - singularity
+  - python3-pexpect
+max_retries: 20
+max_delay: 20
+
+amd_gpu_dest: /etc/modprobe.d/10-unsupported-modules.conf
+gpg_key_amd: 'https://repo.radeon.com/rocm/rocm.gpg.key'
+amd_repo: 'https://repo.radeon.com/rocm/zyp/zypper/'
+
+nvidia_docker_repo: 'https://nvidia.github.io/nvidia-docker/opensuse-leap15.1/nvidia-docker.repo'
+conf_file_mode: 0644
+
+common_packages_for_non_leap:
+  - yum-plugin-versionlock
+  - nvidia-detect
+  - docker-ce-cli-20.10.2
+  - docker-ce-20.10.2
 
 docker_packages:
   - docker-ce-cli-20.10.2
@@ -78,4 +95,4 @@ daemon_file_dest: /etc/docker/
 daemon_file_mode: 0644
 
 hosts_file_dest: "/etc/hosts"
-hosts_file_mode: "0644"
+hosts_file_mode: "0644"