Explorar o código

Merge branch 'devel' into devel

Sujit Jadhav %!s(int64=3) %!d(string=hai) anos
pai
achega
200285028a
Modificáronse 39 ficheiros con 1255 adicións e 769 borrados
  1. 33 10
      control_plane/input_params/base_vars.yml
  2. 2 2
      control_plane/input_params/login_vars.yml
  3. 96 0
      control_plane/roles/control_plane_common/tasks/device_config_validations.yml
  4. 84 40
      control_plane/roles/control_plane_common/tasks/fetch_base_inputs.yml
  5. 20 4
      control_plane/roles/control_plane_common/tasks/nfs_server_setup.yml
  6. 40 0
      control_plane/roles/control_plane_common/tasks/validate_device_ip_file.yml
  7. 51 50
      control_plane/roles/control_plane_common/tasks/validate_device_mapping_file.yml
  8. 29 28
      control_plane/roles/control_plane_common/tasks/validate_idrac_vars.yml
  9. 104 97
      control_plane/roles/control_plane_common/tasks/validate_nic_vars.yml
  10. 11 6
      control_plane/roles/control_plane_common/tasks/verify_login_inputs.yml
  11. 19 17
      control_plane/roles/control_plane_common/vars/main.yml
  12. 9 0
      control_plane/roles/control_plane_customiso/tasks/check_prerequisites.yml
  13. 15 13
      control_plane/roles/control_plane_customiso/tasks/main.yml
  14. 1 7
      control_plane/roles/control_plane_device/tasks/check_prerequisites.yml
  15. 48 48
      control_plane/roles/control_plane_device/tasks/main.yml
  16. 1 5
      control_plane/roles/control_plane_ib/tasks/configure_infiniband_container.yml
  17. 5 2
      control_plane/roles/control_plane_ib/tasks/main.yml
  18. 1 1
      control_plane/roles/control_plane_ib/vars/main.yml
  19. 3 1
      control_plane/roles/control_plane_repo/tasks/main.yml
  20. 221 218
      control_plane/roles/deploy_job_templates/tasks/main.yml
  21. 2 3
      control_plane/roles/network_ethernet/tasks/main.yml
  22. 2 3
      control_plane/roles/network_ib/tasks/main.yml
  23. 3 2
      control_plane/roles/webui_awx/tasks/awx_configuration.yml
  24. 10 10
      control_plane/roles/webui_awx/vars/main.yml
  25. 19 8
      docs/INSTALL_OMNIA.md
  26. 8 11
      docs/INSTALL_OMNIA_CONTROL_PLANE.md
  27. 12 7
      docs/README.md
  28. 15 0
      docs/Security/Enable_Security_LoginNode.md
  29. 26 14
      docs/Security/Enable_Security_ManagementStation.md
  30. 0 0
      docs/Security/login_user_creation.md
  31. 24 8
      docs/Telemetry_Visualization/Visualization.md
  32. BIN=BIN
      docs/images/omnia-logo-transparent.png
  33. 2 0
      examples/device_ip_list.yml
  34. 1 0
      roles/common/files/module.conf
  35. 85 33
      roles/common/tasks/amd.yml
  36. 138 66
      roles/common/tasks/main.yml
  37. 3 2
      roles/common/tasks/ntp.yml
  38. 89 47
      roles/common/tasks/nvidia.yml
  39. 23 6
      roles/common/vars/main.yml

+ 33 - 10
control_plane/input_params/base_vars.yml

@@ -19,20 +19,43 @@
 # If ansible is installed using pip, this path should be set
 ansible_conf_file_path: /etc/ansible
 
+# This variable is used to support the management network container.
+# This container configures IP for all the different devices like idrac, switches and powervaults.
+# Accepted value: "true" or "false"
+# Default value: "true"
+device_config_support: false
+
+# This variable is used to enable idrac support
+# Enable this support
+# Accepted values:  "true" or "false".
+# Default value: "false".
+# If idrac support is needed set this to "true"
+idrac_support: false
+
+# List of all the configured IPs for different types of supported devices.
+# Accepted values: " File path for the list of IPs of different devices."
+# Give this value if minimum idrac_support is true.
+# Format: Set of IPs in new line
+# e.g:
+# 172.19.0.1
+# 172.19.0.5
+# A template for this file exists in omnia/examples and is named as device_ip_list.yml
+device_ip_list_path: ""
+
 # This variable is used to enable ethernet switch configuration
-# Accepted values:  "true" or "false". 
-# Default value: "true".
+# Accepted values:  "true" or "false".
+# Default value: "false".
 # If ethernet switch support is needed set this to "true"
-ethernet_switch_support: true
+ethernet_switch_support: false
 
 # This variable is used to enable infiniband switch configuration
-# Accepted values:  "true" or "false". 
-# Default value: "true".
+# Accepted values:  "true" or "false".
+# Default value: "false".
 # If infiniband configuration is needed set this to "true"
-ib_switch_support: true
+ib_switch_support: false
 
 # This variable is used to enable powervault configuration
-# Accepted values:  "true" or "false". 
+# Accepted values:  "true" or "false".
 # Default value: "false".
 # If powervault configuration is needed, set this to "true"
 powervault_support: false
@@ -77,10 +100,10 @@ mount_location: /opt/omnia/
 
 # This variable is used to set node provisioning method
 # Accepted values: idrac, PXE
-# Default value: "idrac"
+# Default value: "PXE"
 # If provisioning needs to be done through cobbler, set it to "PXE"
 # If idrac license is not present, provisioning mode will be set to "PXE"
-provision_method: "idrac"
+provision_method: "PXE"
 
 # This variable is used to set provisioning type
 # Accepted values: stateful
@@ -121,7 +144,7 @@ default_lease_time: "86400"
 
 ### Usage: control_plane_device ###
 
-# The nic/ethernet card that needs to be connected to provision 
+# The nic/ethernet card that needs to be connected to provision
 # the fabric, idrac and powervault.
 # This nic will be configured by Omnia for the DHCP server.
 # Default value: "eno1"

+ 2 - 2
control_plane/input_params/login_vars.yml

@@ -33,12 +33,12 @@ cobbler_password: ""
 
 # The username for idrac
 # The username must not contain -,\, ',"
-# Required field
+# Required only if idrac_support: true
 idrac_username: ""
 
 # Password used for idrac
 # The password must not contain -,\, ',"
-# Required field
+# Required only if idrac_support: true
 idrac_password: ""
 
 ### Usage: webui_awx ###

+ 96 - 0
control_plane/roles/control_plane_common/tasks/device_config_validations.yml

@@ -0,0 +1,96 @@
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Initialize variables
+  set_fact:
+    mngmnt_network_container_status: false
+
+- name: Check mngmnt_network_container status on the machine
+  command: kubectl get pods -n network-config
+  register: mngmnt_network_container_result
+  failed_when: false
+  changed_when: false
+
+- name: Validate if mngmnt_network is present if device_config is false
+  set_fact:
+    mngmnt_network_container_status: true
+  when: "'mngmnt-network-container' in mngmnt_network_container_result.stdout"
+
+- name: Validate if mngmnt_network is present if device_config is false
+  assert:
+    that: device_config_support != false
+    success_msg: "{{ mgmnt_device_fail }}"
+    fail_msg: "{{ mgmnt_device_fail }}"
+  when: mngmnt_network_container_status
+
+- name: Assert value of idrac_support if mngmnt_network container needed
+  assert:
+    that: idrac_support == true
+    success_msg: "{{ idrac_support_valid }}"
+    fail_msg: " {{ failed_idrac_support }}"
+  when: device_config_support
+
+- block:
+    - name: Assert ethernet_switch_support
+      assert:
+        that: ethernet_switch_support == true or ethernet_switch_support == false
+        success_msg: "{{ ethernet_switch_support_success_msg }}"
+        fail_msg: "{{ ethernet_switch_support_fail_msg }}"
+
+    - name: Assert ib_switch_support
+      assert:
+        that:  ib_switch_support == true or ib_switch_support == false
+        success_msg: "{{ ib_switch_support_success_msg }}"
+        fail_msg: "{{ ib_switch_support_fail_msg }}"
+
+    - name: Assert powervault_support
+      assert:
+        that: powervault_support == true or powervault_support == false
+        success_msg: "{{ powervault_support_success_msg }}"
+        fail_msg: "{{ powervault_support_fail_msg }}"
+
+  when: device_support_status
+
+- block:
+  - name: Check value of ethernet_switch_support when not device_config_support
+    assert:
+      that: ethernet_switch_support == true
+      success_msg: "{{ ethernet_device_config }}"
+    failed_when: false
+
+  - name: Set ethernet_switch_support when not device_config_support
+    set_fact:
+      ethernet_switch_support: false
+
+  - name: Check value of ib_switch_support when not device_config_support
+    assert:
+      that: ib_switch_support == true
+      success_msg: "{{ ib_device_config }}"
+    failed_when: false
+
+  - name: Set ib_switch_support when not device_config_support
+    set_fact:
+      ib_switch_support: false
+
+  - name: Check value of powervault_support when not device_config_support
+    assert:
+      that: powervault_support == true
+      success_msg: "{{ pv_device_config }}"
+    failed_when: false
+
+  - name: Set powervault_support when not device_config_support
+    set_fact:
+      powervault_support: false
+  when: not device_support_status

+ 84 - 40
control_plane/roles/control_plane_common/tasks/fetch_base_inputs.yml

@@ -31,9 +31,6 @@
       timezone | length < 1 or
       language | length < 1 or
       iso_file_path | length < 1 or
-      mngmnt_network_nic | length < 1 or
-      mngmnt_network_dhcp_start_range | length < 1 or
-      mngmnt_network_dhcp_end_range | length < 1 or
       host_network_nic | length < 1 or
       host_network_dhcp_start_range | length < 1 or
       host_network_dhcp_end_range | length < 1 or
@@ -43,6 +40,66 @@
       provision_state | length < 1 or
       mount_location | length < 1
 
+- name: Validate management network nic
+  assert:
+    that: mngmnt_network_nic | length < 1
+    fail_msg: "{{ input_base_failure_msg }}"
+  when: device_config_support
+  
+- name: Validate the value of device config support
+  assert:
+    that:
+      - device_config_support | lower == "true" or device_config_support | lower == "false"
+    success_msg: "{{ success_device_config }}"
+    fail_msg: "{{ fail_device_config }}"
+  tags: init
+
+- name: Set fact for device_ip_list_path
+  set_fact:
+    device_config_ip_file: false
+  tags: init
+
+- name: Set the mapping file value for device network
+  set_fact:
+    device_config_ip_file: true
+  when:
+    - device_ip_list_path | length > 0
+    - '"/" in device_ip_list_path'
+    - idrac_support
+  tags: init
+
+- name: Warning msg if idrac_support is false and device_ip_list is given
+  assert:
+    that:
+      - '"/"  in device_ip_list_path'
+    success_msg: "{{ device_ip_list_not_supported  }}"
+  when: not idrac_support
+  failed_when: false
+
+- name: Set status for device_config_support
+  set_fact:
+     device_support_status: false
+  tags: init
+
+- name: Validate contents of device_ip_list
+  set_fact:
+    device_support_status: true
+  when: (device_config_support) or (idrac_support and device_config_ip_file)
+  tags: init
+
+- name: Assert valid mngmnt_mapping_file_path
+  stat:
+    path: "{{ device_ip_list_path }}"
+  when: device_config_ip_file
+  register: result_device_ip_file
+  tags: init
+
+- name : Valid device_ip_list
+  fail:
+    msg: "{{ invalid_mapping_file_path }} for configurations"
+  when: device_config_ip_file and not result_device_ip_file.stat.exists
+  tags: init
+
 - name: Validate default lease time
   assert:
     that:
@@ -58,21 +115,9 @@
     max_lease_time: "{{ default_lease_time|int + 10000 }}"
   tags: init
 
-- name: Validate infiniband base_vars are not empty
-  assert:
-    that:
-      - ib_network_nic | length > 2
-      - ib_network_dhcp_start_range | length > 6
-      - ib_network_dhcp_end_range | length > 6
-    success_msg: "{{ success_msg_ib }}"
-    fail_msg: "{{ fail_msg_ib }}"
-  register: ib_check
-  when: ib_switch_support
-  tags: [ validate, network-ib ]
-
 - name: Set facts to validate snmp support
   set_fact:
-    snmp_enabled: false   
+    snmp_enabled: false
     mngmnt_mapping_file: false
     host_mapping_file: false
   tags: init
@@ -80,9 +125,11 @@
 - name: Verify snmp_trap_destination IP address
   set_fact:
     snmp_enabled: true
-  when: snmp_trap_destination | length > 1
+  when:
+    - device_support_status
+    - snmp_trap_destination | length > 1
   tags: init
-  
+
 - name: Assert snmp trap destination address
   assert:
     that:
@@ -129,29 +176,17 @@
   when: not ansible_conf_exists.stat.exists
   tags: init
 
-- name: Assert ethernet_switch_support
-  assert:
-    that:
-      - ethernet_switch_support == true or ethernet_switch_support == false
-    success_msg: "{{ ethernet_switch_support_success_msg }}"
-    fail_msg: "{{ ethernet_switch_support_fail_msg }}"
-  tags: [ validate, init ]
-
-- name: Assert ib_switch_support
-  assert:
-    that:
-      - ib_switch_support == true or ib_switch_support == false
-    success_msg: "{{ ib_switch_support_success_msg }}"
-    fail_msg: "{{ ib_switch_support_fail_msg }}"
-  tags: [ validate, init ]
-
-- name: Assert powervault_support
+- name: Validate infiniband base_vars are not empty
   assert:
     that:
-      - powervault_support == true or powervault_support == false
-    success_msg: "{{ powervault_support_success_msg }}"
-    fail_msg: "{{ powervault_support_fail_msg }}"
-  tags: [ validate, init ]
+        - ib_network_nic | length > 2
+        - ib_network_dhcp_start_range | length > 6
+        - ib_network_dhcp_end_range | length > 6
+    success_msg: "{{ success_msg_ib }}"
+    fail_msg: "{{ fail_msg_ib }}"
+    register: ib_check
+  when: ib_switch_support
+  tags: [ validate, network-ib ]
 
 - name: Assert enable_security_support
   assert:
@@ -160,7 +195,7 @@
     success_msg: "{{ enable_security_support_success_msg }}"
     fail_msg: "{{ enable_security_support_fail_msg }}"
   tags: [ validate, init ]
-  
+
 - name: Assert kubernetes pod network CIDR
   assert:
     that:
@@ -197,5 +232,14 @@
   import_tasks: validate_provision_vars.yml
   tags: [ validate, idrac, pxe ]
 
+- name: Validate device_config_support
+  import_tasks: device_config_validations.yml
+  tags: [ validate, init ]
+
+- name: Validate device_config_support
+  import_tasks: validate_device_ip_file.yml
+  when: device_config_ip_file
+  tags: [ validate, init ]
+
 - name: Validate NIC parameters
   import_tasks: validate_nic_vars.yml

+ 20 - 4
control_plane/roles/control_plane_common/tasks/nfs_server_setup.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -52,11 +52,27 @@
     path: "{{ exports_file_path }}"
     line: "{{ item.path }} {{ item.ip }}(rw,sync,no_root_squash)"
   with_items:
-    - { path: "{{ nfs_share_offline_repo }}", ip: "{{ mngmnt_network_ip }}" }
     - { path: "{{ nfs_share_offline_repo }}", ip: "{{ public_ip }}" }
-    - { path: "{{ nfs_share_awx }}", ip: "{{ mngmnt_network_ip }}" }
     - { path: "{{ nfs_share_awx }}", ip: "{{ public_ip }}" }
+
+- name: Adding NFS share entries in /etc/exports when device_config_support
+  lineinfile:
+    path: "{{ exports_file_path }}"
+    line: "{{ item.path }} {{ item.ip }}(rw,sync,no_root_squash)"
+  with_items:
+    - { path: "{{ nfs_share_offline_repo }}", ip: "{{ mngmnt_network_ip }}" }
+    - { path: "{{ nfs_share_awx }}", ip: "{{ mngmnt_network_ip }}" }
     - { path: "{{ nfs_share_offline_repo }}", ip: "{{ mngmnt_network_subnet }}/{{ mngmnt_network_netmask }}" }
+  when: device_config_support
+
+- name: Adding NFS share entries in /etc/exports when device_config_support
+  lineinfile:
+    path: "{{ exports_file_path }}"
+    line: "{{ nfs_share_offline_repo  }} {{ item }}(rw,sync,no_root_squash)"
+  with_items: "{{ device_ip }}"
+  when:
+    - idrac_support
+    - device_config_ip_file
 
 - name: Exporting the shared directories
   command: exportfs -r
@@ -78,4 +94,4 @@
   service:
     name: firewalld
     state: stopped
-    enabled: no
+    enabled: no

+ 40 - 0
control_plane/roles/control_plane_common/tasks/validate_device_ip_file.yml

@@ -0,0 +1,40 @@
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Check IP
+  set_fact:
+    device_ip: "{{ lookup('file', '{{ device_ip_list_path }}').splitlines() |list }}"
+
+- name: Filter all the IP present
+  set_fact:
+    len1: "{{ device_ip | length }}"
+    device_ip_temp: "{{ device_ip | ipv4('address') | list }}"
+
+- name: size
+  set_fact:
+    len2: "{{ device_ip_temp | length }}"
+
+- name: Assert if valid device_ip_file
+  assert:
+    that:
+      - len1 == len2
+    fail_msg: "{{ fail_device_ip_format }}"
+    success_msg: "{{ success_device_ip_format }}"
+
+- name: Copy the Ips to file
+  copy:
+    src: "{{ device_ip_list_path }}"
+    dest: "{{ mgmnt_ip_path }}"
+    mode: "{{ file_perm }}"

+ 51 - 50
control_plane/roles/control_plane_common/tasks/validate_device_mapping_file.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,62 +11,63 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # limitations under the License.
 ---
+- block:
+  - name: Check that device mapping file exists at mentioned path
+    stat:
+      path: "{{ mngmnt_mapping_file_path }}"
+    register: stat_result
 
-- name: Check that device mapping file exists at mentioned path
-  stat:
-    path: "{{ mngmnt_mapping_file_path }}"
-  register: stat_result
+  - name: Fail if config file doesn't exist
+    fail:
+      msg: "{{ fail_msg_mapping_file + mngmnt_mapping_file_path }}"
+    when: not stat_result.stat.exists
 
-- name: Fail if config file doesn't exist
-  fail:
-    msg: "{{ fail_msg_mapping_file + mngmnt_mapping_file_path }}"
-  when: not stat_result.stat.exists
+  - name: Read device mapping file from CSV file and return a dictionary
+    read_csv:
+      path: "{{ mngmnt_mapping_file_path }}"
+      key: "{{ mapping_file_key }}"
+    register: device_mapping_file
+    delegate_to: localhost
 
-- name: Read device mapping file from CSV file and return a dictionary
-  read_csv:
-    path: "{{ mngmnt_mapping_file_path }}"
-    key: "{{ mapping_file_key }}"
-  register: device_mapping_file
-  delegate_to: localhost
+  - name: Check if header is present in mapping file
+    shell:  set -o pipefail && awk 'NR==1 { print $1}' "{{ mngmnt_mapping_file_path }}"
+    register: mngmnt_header
+    changed_when: false
 
-- name: Check if header is present in mapping file
-  shell:  set -o pipefail && awk 'NR==1 { print $1}' "{{ mngmnt_mapping_file_path }}"
-  register: mngmnt_header
-  changed_when: false
+  - name: Fail if header not in correct format
+    fail:
+      msg: "{{ fail_device_mapping_file_header }}"
+    when: mngmnt_header.stdout !=  device_mapping_header_format
 
-- name: Fail if header not in correct format
-  fail:
-    msg: "{{ fail_device_mapping_file_header }}"
-  when: mngmnt_header.stdout !=  device_mapping_header_format
+  - name: Check if mapping file is comma seperated
+    shell: awk -F\, '{print NF-1}' "{{ mngmnt_mapping_file_path }}"
+    register: mngmnt_comma_seperated
+    changed_when: false
 
-- name: Check if mapping file is comma seperated
-  shell: awk -F\, '{print NF-1}' "{{ mngmnt_mapping_file_path }}"
-  register: mngmnt_comma_seperated
-  changed_when: false
+  - name: Fail if not comma seperated or if all fields are not given
+    fail:
+      msg: "{{ fail_mapping_file_field_seperation }}"
+    when: not(item =="1")
+    with_items: "{{ mngmnt_comma_seperated.stdout_lines }}"
 
-- name: Fail if not comma seperated or if all fields are not given
-  fail:
-    msg: "{{ fail_mapping_file_field_seperation }}"
-  when: not(item =="1")
-  with_items: "{{ mngmnt_comma_seperated.stdout_lines }}"
+  - name: Initialize count variables
+    set_fact:
+      list_of_ips: []
+      count_total_items: "{{ device_mapping_file.dict |length }}"
 
-- name: Initialize count variables
-  set_fact:
-    list_of_ips: []
-    count_total_items: "{{ device_mapping_file.dict |length }}"
+  - name: Create list of IPs in mapping file
+    set_fact:
+      list_of_ips: "{{ [ item.value.IP ] + list_of_ips }}"
+    loop: "{{ device_mapping_file.dict | dict2items }}"
+    loop_control:
+      label: "{{ item.value.MAC }}"
 
-- name: Create list of IPs in mapping file
-  set_fact:
-    list_of_ips: "{{ [ item.value.IP ] + list_of_ips }}"
-  loop: "{{ device_mapping_file.dict | dict2items }}"
-  loop_control:
-    label: "{{ item.value.MAC }}"
+  - name: Find count of unique IPs
+    set_fact:
+      count_of_unique_ip : "{{ list_of_ips| unique| length }}"
 
-- name: Find count of unique IPs
-  set_fact:
-    count_of_unique_ip : "{{ list_of_ips| unique| length }}"
-
-- name: Validation to check if unique IPs are provided for each node
-  fail:
-    msg: "{{ fail_mapping_file_duplicate_ip + mngmnt_mapping_file_path }}"
-  when: not(count_of_unique_ip|int == count_total_items|int)
+  - name: Validation to check if unique IPs are provided for each node
+    fail:
+      msg: "{{ fail_mapping_file_duplicate_ip + mngmnt_mapping_file_path }}"
+    when: not(count_of_unique_ip|int == count_total_items|int)
+  when: device_config_support

+ 29 - 28
control_plane/roles/control_plane_common/tasks/validate_idrac_vars.yml

@@ -12,34 +12,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 ---
+- block:
+  - name: Include variable file idrac_vars.yml
+    include_vars: "{{ idrac_input_filename }}"
+    run_once: true
+    tags: init
 
-- name: Include variable file idrac_vars.yml
-  include_vars: "{{ idrac_input_filename }}"
-  run_once: true
-  tags: init
+  - name: Assert firmware_update_required value
+    assert:
+      that:
+        - firmware_update_required == true or firmware_update_required == false
+      success_msg: "{{ firmware_update_success_msg }}"
+      fail_msg: "{{ firmware_update_fail_msg }}"
+    tags: [ validate, firmware-repo ]
 
-- name: Assert firmware_update_required value
-  assert:
-    that:
-      - firmware_update_required == true or firmware_update_required == false
-    success_msg: "{{ firmware_update_success_msg }}"
-    fail_msg: "{{ firmware_update_fail_msg }}"
-  tags: [ validate, firmware-repo ]
+  - name: Read poweredge_model file
+    command: cat {{ role_path }}/files/poweredge_models.txt
+    failed_when: false
+    register: poweredge_models_file_output
+    changed_when: false
+    tags: [ validate, firmware-repo ]
 
-- name: Read poweredge_model file
-  command: cat {{ role_path }}/files/poweredge_models.txt
-  failed_when: false
-  register: poweredge_models_file_output
-  changed_when: false
-  tags: [ validate, firmware-repo ]
-
-- name: Assert poweredge_model value
-  assert:
-    that:
-      - item | length > 1
-      - item in poweredge_models_file_output.stdout
-    success_msg: "{{ poweredge_model_success_msg }}"
-    fail_msg: "{{ poweredge_model_fail_msg }}"
-  when: firmware_update_required
-  with_items: "{{ poweredge_model.split(',') | map('trim') }}"
-  tags: [ validate, firmware-repo ]
+  - name: Assert poweredge_model value
+    assert:
+      that:
+        - item | length > 1
+        - item in poweredge_models_file_output.stdout
+      success_msg: "{{ poweredge_model_success_msg }}"
+      fail_msg: "{{ poweredge_model_fail_msg }}"
+    when: firmware_update_required
+    with_items: "{{ poweredge_model.split(',') | map('trim') }}"
+    tags: [ validate, firmware-repo ]
+  when: idrac_support

+ 104 - 97
control_plane/roles/control_plane_common/tasks/validate_nic_vars.yml

@@ -35,97 +35,97 @@
   tags: init
 
 ### management network
-
-- name: Assert management network nic
-  assert:
-    that:
-      - mngmnt_network_nic in nic_addr_up.stdout
-    success_msg: "{{ success_msg_mngmnt_network_nic }}"
-    fail_msg: "{{ fail_msg_mngmnt_network_nic }}"
-  tags: [ validate, network-device ]
-
-- name: Fetch the management network ip, netmask and subnet
-  set_fact:
-    mngmnt_network_ip: "{{ lookup('vars','ansible_'+mngmnt_network_nic).ipv4.address }}"
-    mngmnt_network_netmask: "{{ lookup('vars','ansible_'+mngmnt_network_nic).ipv4.netmask }}"
-    mngmnt_network_subnet: "{{ lookup('vars','ansible_'+mngmnt_network_nic).ipv4.network }}"
-  tags: init
-
-- name: Check the subnet of management network dhcp start range
-  shell: |
-    IFS=. read -r i1 i2 i3 i4 <<< "{{ mngmnt_network_dhcp_start_range }}"
-    IFS=. read -r m1 m2 m3 m4 <<< "{{ mngmnt_network_netmask }}"
-    printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))"
-  args:
-    warn: no
-  register: dhcp_start_mgmnt_result
-  changed_when: false
-  tags: init
-
-- name: Set the start dhcp subnet for management network
-  set_fact:
-    dhcp_start_mgmnt: "{{ dhcp_start_mgmnt_result.stdout }}"
-  tags: init
-
-- name: Check the subnet of dhcp end range for management network
-  shell: |
-    IFS=. read -r i1 i2 i3 i4 <<< "{{ mngmnt_network_dhcp_end_range }}"
-    IFS=. read -r m1 m2 m3 m4 <<< "{{ mngmnt_network_netmask }}"
-    printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))"
-  register: dhcp_end_mgmnt_result
-  changed_when: false
-  tags: init
-
-- name: Set the end dhcp subnet for management network
-  set_fact:
-    dhcp_end_mgmnt: "{{ dhcp_end_mgmnt_result.stdout }}"
-  tags: init
-
-- name: Assert management_net_dhcp_start_range
-  assert:
-    that:
-      - mngmnt_network_dhcp_start_range |  length > 1
-      - mngmnt_network_dhcp_start_range | ipv4
-      - mngmnt_network_dhcp_start_range != mngmnt_network_ip
-      - mngmnt_network_dhcp_start_range != mngmnt_network_dhcp_end_range
-      - dhcp_start_mgmnt == mngmnt_network_subnet
-      - dhcp_start_mgmnt == dhcp_end_mgmnt
-    success_msg: "{{ success_dhcp_range }} for management network"
-    fail_msg: "{{ fail_dhcp_range }} for management network"
-  tags: [ validate, network-device ]
-
-- name: Assert management_net_dhcp_end_range
-  assert:
-    that:
-      - mngmnt_network_dhcp_end_range |  length > 1
-      - mngmnt_network_dhcp_end_range | ipv4
-      - mngmnt_network_dhcp_end_range != mngmnt_network_ip
-      - mngmnt_network_dhcp_start_range != mngmnt_network_dhcp_end_range
-      - dhcp_end_mgmnt == mngmnt_network_subnet
-      - dhcp_start_mgmnt == dhcp_end_mgmnt
-    success_msg: "{{ success_dhcp_range }} for management network"
-    fail_msg: "{{ fail_dhcp_range }} for management network"
-  tags: [ validate, network-device ]
-
-- name: Set the mapping file value for management network
-  set_fact:
-    mngmnt_mapping_file: true
-  when: mngmnt_mapping_file_path | length > 0
-  tags: init
-
-- name: Assert valid mngmnt_mapping_file_path
-  stat:
-    path: "{{ mngmnt_mapping_file_path }}"
-  when: mngmnt_mapping_file
-  register: result_mngmnt_mapping_file
-  tags: init
-
-- name : Valid mngmnt_mapping_file_path
-  fail:
-    msg: "{{ invalid_mapping_file_path }} for management network"
-  when: mngmnt_mapping_file and not result_mngmnt_mapping_file.stat.exists
-  tags: init
-
+- block:
+  - name: Assert management network nic
+    assert:
+      that:
+        - mngmnt_network_nic in nic_addr_up.stdout
+      success_msg: "{{ success_msg_mngmnt_network_nic }}"
+      fail_msg: "{{ fail_msg_mngmnt_network_nic }}"
+    tags: [ validate, network-device ]
+
+  - name: Fetch the management network ip, netmask and subnet
+    set_fact:
+      mngmnt_network_ip: "{{ lookup('vars','ansible_'+mngmnt_network_nic).ipv4.address }}"
+      mngmnt_network_netmask: "{{ lookup('vars','ansible_'+mngmnt_network_nic).ipv4.netmask }}"
+      mngmnt_network_subnet: "{{ lookup('vars','ansible_'+mngmnt_network_nic).ipv4.network }}"
+    tags: init
+
+  - name: Check the subnet of management network dhcp start range
+    shell: |
+      IFS=. read -r i1 i2 i3 i4 <<< "{{ mngmnt_network_dhcp_start_range }}"
+      IFS=. read -r m1 m2 m3 m4 <<< "{{ mngmnt_network_netmask }}"
+      printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))"
+    args:
+      warn: no
+    register: dhcp_start_mgmnt_result
+    changed_when: false
+    tags: init
+
+  - name: Set the start dhcp subnet for management network
+    set_fact:
+      dhcp_start_mgmnt: "{{ dhcp_start_mgmnt_result.stdout }}"
+    tags: init
+
+  - name: Check the subnet of dhcp end range for management network
+    shell: |
+      IFS=. read -r i1 i2 i3 i4 <<< "{{ mngmnt_network_dhcp_end_range }}"
+      IFS=. read -r m1 m2 m3 m4 <<< "{{ mngmnt_network_netmask }}"
+      printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))"
+    register: dhcp_end_mgmnt_result
+    changed_when: false
+    tags: init
+
+  - name: Set the end dhcp subnet for management network
+    set_fact:
+      dhcp_end_mgmnt: "{{ dhcp_end_mgmnt_result.stdout }}"
+    tags: init
+
+  - name: Assert management_net_dhcp_start_range
+    assert:
+      that:
+        - mngmnt_network_dhcp_start_range |  length > 1
+        - mngmnt_network_dhcp_start_range | ipv4
+        - mngmnt_network_dhcp_start_range != mngmnt_network_ip
+        - mngmnt_network_dhcp_start_range != mngmnt_network_dhcp_end_range
+        - dhcp_start_mgmnt == mngmnt_network_subnet
+        - dhcp_start_mgmnt == dhcp_end_mgmnt
+      success_msg: "{{ success_dhcp_range }} for management network"
+      fail_msg: "{{ fail_dhcp_range }} for management network"
+    tags: [ validate, network-device ]
+
+  - name: Assert management_net_dhcp_end_range
+    assert:
+      that:
+        - mngmnt_network_dhcp_end_range |  length > 1
+        - mngmnt_network_dhcp_end_range | ipv4
+        - mngmnt_network_dhcp_end_range != mngmnt_network_ip
+        - mngmnt_network_dhcp_start_range != mngmnt_network_dhcp_end_range
+        - dhcp_end_mgmnt == mngmnt_network_subnet
+        - dhcp_start_mgmnt == dhcp_end_mgmnt
+      success_msg: "{{ success_dhcp_range }} for management network"
+      fail_msg: "{{ fail_dhcp_range }} for management network"
+    tags: [ validate, network-device ]
+
+  - name: Set the mapping file value for management network
+    set_fact:
+      mngmnt_mapping_file: true
+    when: mngmnt_mapping_file_path | length > 0
+    tags: init
+
+  - name: Assert valid mngmnt_mapping_file_path
+    stat:
+      path: "{{ mngmnt_mapping_file_path }}"
+    when: mngmnt_mapping_file
+    register: result_mngmnt_mapping_file
+    tags: init
+
+  - name : Valid mngmnt_mapping_file_path
+    fail:
+      msg: "{{ invalid_mapping_file_path }} for management network"
+    when: mngmnt_mapping_file and not result_mngmnt_mapping_file.stat.exists
+    tags: init
+  when: device_config_support
 ### host network
 
 - name: Fetch the host network ip, netmask and subnet
@@ -196,14 +196,14 @@
     host_mapping_file: true
   when: host_mapping_file_path | length > 0
   tags: init
-  
+
 - name: Assert valid mapping_file_path
-  stat: 
+  stat:
     path: "{{ host_mapping_file_path }}"
   when: host_mapping_file
   register: result_host_mapping_file
   tags: init
-  
+
 - name: Valid mapping_file_path
   fail:
     msg: "{{ invalid_mapping_file_path }} for host_network"
@@ -218,12 +218,19 @@
       - public_nic != host_network_nic
     success_msg: "{{ success_msg_different_nics }}"
     fail_msg: "{{ fail_msg_different_nics }}"
+  when: device_config_support
   tags: [ validate, pxe, network-device ]
 
-### ib network
+- name: Verify different nics
+  assert:
+    that:
+      - public_nic != host_network_nic
+    success_msg: "{{ success_msg_different_nics }}"
+    fail_msg: "{{ fail_msg_different_nics }}"
+  tags: [ validate, pxe, network-device ]
 
-- name: Validate ib network vars
-  block:
+### ib network
+- block:
     - name: Fetch the infiniband network ip, netmask and subnet
       set_fact:
         ib_ip: "{{ lookup('vars','ansible_'+ib_network_nic).ipv4.address }}"

+ 11 - 6
control_plane/roles/control_plane_common/tasks/verify_login_inputs.yml

@@ -39,9 +39,7 @@
   register: input_config_check
   when:
     - provision_password | length < 1 or
-      cobbler_password | length < 1 or      
-      idrac_username | length < 1 or
-      idrac_password | length < 1
+      cobbler_password | length < 1
   tags: [ validate, pxe, idrac ]
 
 - name: Validate security parameters when enable_security_support is set to true
@@ -111,6 +109,7 @@
     - name: idrac credentials validation check
       fail:
         msg: "{{ fail_msg_idrac_credentials }}"
+  when: device_support_status
   tags: [ validate, idrac ]
 
 - name: Assert grafana credentials
@@ -160,7 +159,9 @@
     - name: ethernet switch credentials validation check
       fail:
         msg: "{{ fail_msg_ethernet_credentials }}"
-  when: ethernet_switch_support
+  when:
+    - device_support_status
+    - ethernet_switch_support
   tags: [ validate, network-device ]
 
 - name: Assert username and password for IB switches
@@ -185,7 +186,9 @@
     - name: IB switch credentials validation check
       fail:
         msg: "{{ fail_msg_ib_credentials }}"
-  when: ib_switch_support
+  when:
+    - device_support_status
+    - ib_switch_support
   tags: [ validate, network-ib ]
 
 - name: Assert username and password for powervault me4
@@ -217,7 +220,9 @@
     - name: Powervault me4 credentials validation check
       fail:
         msg: "{{ fail_msg_me4_credentials }}"
-  when: powervault_support
+  when:
+    - device_support_status
+    - powervault_support
   tags: [ validate, network-device ]
 
 - name: Assert ms_directory_manager_password

+ 19 - 17
control_plane/roles/control_plane_common/vars/main.yml

@@ -103,10 +103,8 @@ fail_msg_directory_manager_password: "Failed. Incorrect format provided for dire
 success_msg_ipa_admin_password: "ipa_admin_password successfully validated"
 fail_msg_ipa_admin_password: "Failed. Incorrect format provided for ipa_admin_password"
 omnia_input_config_failure_msg: "Failed. Please provide all the required parameters in omnia_config.yml for for login_node"
-login_node_required_success_msg: "login_node_required successfully validated"
-login_node_required_fail_msg: "Failed. login_node_required should be either true or false"
-secure_login_node_success_msg: "enable_secure_login_node successfully validated"
-secure_login_node_fail_msg: "Failed. enable_secure_login_node should be either true or false"
+login_node_required_success_msg: "Login_node_required successfully validated"
+login_node_required_fail_msg: "Failed. login_node_required can be either true or false"
 
 # Usage: fetch_base_inputs.yml
 base_vars_filename: "input_params/base_vars.yml"
@@ -148,8 +146,6 @@ success_msg_different_nics: "The nics of different containers and public nic are
 fail_msg_different_nics: "Failed. Incorrect nic information. public nic, management network nic and host network nic should not be the same"
 success_msg_different_nics_ib: "The nics of different containers and public nic are not the same as infiniband nic- Validated"
 fail_msg_different_nics_ib: "Failed. Infiniband nic cannot be the same as other nics"
-success_msg_ib: "Infiniband variables validated"
-fail_msg_ib: "Failed. Please provide all the InfiniBand related parameters in base_vars.yml"
 success_msg_lease_time: "Default lease time validated"
 fail_msg_lease_time: "Failed. Please provide a valid default lease time"
 provision_os_success_msg: "provision_os validated"
@@ -158,23 +154,24 @@ provision_state_success_msg: "provision_state validated"
 provision_state_fail_msg: "Failed. Incorrect provision_state selected. Supported only stateful"
 enable_security_support_success_msg: "enable_security_support validated"
 enable_security_support_fail_msg: "Failed. enable_security_support only accepts boolean values true or false"
-dns_empty_warning_msg: "[WARNING] primary_dns and secondary_dns is empty. DHCP routing in compute nodes for internet access won't be configured. Stop and re-run control_plane.yml, if DHCP routing is required."
-primary_dns_empty_msg: "primary_dns is empty and secondary_dns provided. If one dns entry present, provide primary_dns only."
-primary_dns_success_msg: "primary_dns successfully validated"
-primary_dns_fail_msg: "Failed. Incorrect primary_dns provided in base_vars.yml"
-primary_dns_not_reachable_msg: "Failed. primary_dns is not reachable. Provide valid dns"
-secondary_dns_success_msg: "secondary_dns successfully validated"
-secondary_dns_fail_msg: "Failed. Incorrect secondary_dns provided in base_vars.yml"
-secondary_dns_not_reachable_msg: "[WARNING] secondary_dns is not reachable"
-ping_search_key: "100% packet loss"
+success_device_config: " Success. Device_config_support has valid values"
+fail_device_config: "Failed. Give a valid value in device_config_support"
+device_ip_list_not_supported: "[Warning] Device_ip_list is invalid as minimum idrac_support should be true"
+mgmnt_device_fail: "Failed. Container already present. Either delete the container or make device_config_support as true"
+# Usage: device_config_validations
+success_msg_ib: "Infiniband variables validated"
+fail_msg_ib: "Failed. Please provide all the InfiniBand related parameters in base_vars.yml"
+ethernet_device_config: " Ethernet_switch_support will be set to false only since device_config_support is set to false"
+ib_device_config: " Ib_switch_support will be set to false only since device_config_support is set to false"
+pv_device_config: " Powervault_support will be set to false only since device_config_support is set to false"
+idrac_support_valid: "Idrac support is initiated."
+failed_idrac_support: " Failed. Atleast idrac_support should be true."
 
 # Usage: fetch_sm_inputs.yml
 ib_config_file: "{{ role_path }}/../../input_params/ib_vars.yml"
 opensm_conf_file: "{{ role_path }}/../../input_params/opensm.conf"
-
 fail_msg_config_file: ib_vars.yml file doesn't exist.
 fail_msg_opensm_config_file: opensm.conf file doesn't exist.
-
 fail_msg_ib_input_definition: Infiniband config directories must be defined.
 fail_msg_ib_input: Infiniband config directories can't be left empty.
 
@@ -265,3 +262,8 @@ firmware_update_success_msg: "firmware_update_required validated"
 firmware_update_fail_msg: "Failed. firmware_update_required accepts only true or false in idrac_vars.yml"
 poweredge_model_success_msg: "poweredge_model validated"
 poweredge_model_fail_msg: "Failed. poweredge_model is incorrect or unsupported. Please update the list with the supported models in the correct format"
+
+# Usage: validate_device_ip_file.yml
+fail_device_ip_format: "Failed.Incorrect file format. File should only contain IPs"
+success_device_ip_format: "File is correct"
+mgmnt_ip_path: "{{ role_path}}/../collect_device_info/files/mgmt_provisioned_hosts.yml"

+ 9 - 0
control_plane/roles/control_plane_customiso/tasks/check_prerequisites.yml

@@ -70,3 +70,12 @@
     line: "{{ mngmnt_network_ip }}"
     mode: "{{ file_permission }}"
     create: yes
+  when: device_config_support
+
+- name: Copy management station ip to {{ management_station_ip_file }}
+  lineinfile:
+    path: "{{ role_path }}/../provision_idrac/files/{{ management_station_ip_file }}"
+    line: "{{ public_ip }}"
+    mode: "{{ file_permission }}"
+    create: yes
+  when: not device_config_support

+ 15 - 13
control_plane/roles/control_plane_customiso/tasks/main.yml

@@ -15,20 +15,22 @@
 
 # tasks file for control_plane_customiso
 
-- name: Check iso mount path
-  include_tasks: check_prerequisites.yml
+- block:
+  - name: Check iso mount path
+    include_tasks: check_prerequisites.yml
 
-- name: Edit iso config files
-  include_tasks: edit_iso_config.yml
+  - name: Edit iso config files
+    include_tasks: edit_iso_config.yml
 
-- name: Create unattended iso file rocky
-  include_tasks: create_unattended_iso_rocky.yml
-  when: provision_os == os_supported_rocky
+  - name: Create unattended iso file rocky
+    include_tasks: create_unattended_iso_rocky.yml
+    when: provision_os == os_supported_rocky
 
-- name: Create unattended iso file leap
-  include_tasks: create_unattended_iso_leap.yml
-  when: provision_os == os_supported_leap
+  - name: Create unattended iso file leap
+    include_tasks: create_unattended_iso_leap.yml
+    when: provision_os == os_supported_leap
 
-- name: Create unattended iso file centos
-  include_tasks: create_unattended_iso_centos.yml
-  when: provision_os == os_supported_centos
+  - name: Create unattended iso file centos
+    include_tasks: create_unattended_iso_centos.yml
+    when: provision_os == os_supported_centos
+  when: device_support_status

+ 1 - 7
control_plane/roles/control_plane_device/tasks/check_prerequisites.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,11 +15,9 @@
 
 - name: Initialize variables
   set_fact:
-    mngmnt_network_container_status: false
     mngmnt_network_container_image_status: false
     backup_mngmnt_map_status: false
     new_mngmnt_node_status: false
-  tags: install
 
 - name: Check if any backup file exists
   block:
@@ -43,23 +41,19 @@
   register: mngmnt_network_container_image_result
   failed_when: false
   changed_when: false
-  tags: install
 
 - name: Check mngmnt_network_container status on the machine
   command: kubectl get pods -n network-config
   register: mngmnt_network_container_result
   failed_when: false
   changed_when: false
-  tags: install
 
 - name: Update mngmnt_network_container image status
   set_fact:
     mngmnt_network_container_image_status: true
   when: mngmnt_network_image_name in mngmnt_network_container_image_result.stdout
-  tags: install
 
 - name: Update mngmnt_network_container container status
   set_fact:
     mngmnt_network_container_status: true
   when: "'mngmnt-network-container' in mngmnt_network_container_result.stdout"
-  tags: install

+ 48 - 48
control_plane/roles/control_plane_device/tasks/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -14,50 +14,50 @@
 ---
 
 # Tasks file for mngmnt_network
-
-- name: Check mngmnt_network_container status on machine
-  include_tasks: check_prerequisites.yml
-
-- name: Modify firewall settings for mngmnt_network_container
-  import_tasks: firewall_settings.yml
-  when: not mngmnt_network_container_status
-
-- name: Include common variables
-  include_vars:  ../../control_plane_common/vars/main.yml
-  when: not mngmnt_network_container_status
-
-- name: Internet validation
-  include_tasks: ../../control_plane_common/tasks/internet_validation.yml
-  when: not mngmnt_network_container_status
-
-- name: Include variable file base_vars.yml
-  include_vars: "{{ base_mngmnt_file }}"
-
-- name: Dhcp Configuration
-  import_tasks: dhcp_configure.yml
-  when: (not mngmnt_network_container_image_status) or ( backup_mngmnt_map_status)
-
-- name: Mapping file validation
-  import_tasks: mapping_file.yml
-  when: (not mngmnt_network_container_image_status) and (mngmnt_mapping_file_path) or ( backup_mngmnt_map_status)
-
-- name: mngmnt_network_container image creation
-  import_tasks: mngmnt_network_container_image.yml
-  when: not mngmnt_network_container_status
-
-- name: mngmnt_network_container configuration
-  import_tasks: configure_mngmnt_network_container.yml
-
-- name: mngmnt_network_container container status message
-  block:
-    - name: management network container running
-      debug:
-        msg: "{{ message_skipped }}"
-        verbosity: 2
-      when: mngmnt_network_container_status
-    - name: management network container not running
-      debug:
-        msg: "{{ message_installed }}"
-        verbosity: 2
-      when: not mngmnt_network_container_status
-  tags: install
+- block:
+  - name: Check mngmnt_network_container status on machine
+    include_tasks: check_prerequisites.yml
+
+  - name: Modify firewall settings for mngmnt_network_container
+    include_tasks: firewall_settings.yml
+    when: not mngmnt_network_container_status
+
+  - name: Include common variables
+    include_vars:  ../../control_plane_common/vars/main.yml
+    when: not mngmnt_network_container_status
+
+  - name: Internet validation
+    include_tasks: ../../control_plane_common/tasks/internet_validation.yml
+    when: not mngmnt_network_container_status
+
+  - name: Include variable file base_vars.yml
+    include_vars: "{{ base_mngmnt_file }}"
+
+  - name: Dhcp Configuration
+    include_tasks: dhcp_configure.yml
+    when: (not mngmnt_network_container_image_status) or ( backup_mngmnt_map_status)
+
+  - name: Mapping file validation
+    include_tasks: mapping_file.yml
+    when: (not mngmnt_network_container_image_status) and (mngmnt_mapping_file_path) or ( backup_mngmnt_map_status)
+
+  - name: mngmnt_network_container image creation
+    include_tasks: mngmnt_network_container_image.yml
+    when: not mngmnt_network_container_status
+
+  - name: mngmnt_network_container configuration
+    include_tasks: configure_mngmnt_network_container.yml
+
+  - name: mngmnt_network_container container status message
+    block:
+      - name: management network container running
+        debug:
+          msg: "{{ message_skipped }}"
+          verbosity: 2
+        when: mngmnt_network_container_status
+      - name: management network container not running
+        debug:
+          msg: "{{ message_installed }}"
+          verbosity: 2
+        when: not mngmnt_network_container_status
+  when: device_config_support

+ 1 - 5
control_plane/roles/control_plane_ib/tasks/configure_infiniband_container.yml

@@ -22,22 +22,18 @@
 - name: Deploy infiniband pod
   command: "kubectl apply -f {{ role_path }}/files/k8s_infiniband.yml"
   changed_when: true
-  tags: install
   when: infiniband_container_status and  (not infiniband_container_config_status)
 
 - name: Wait for infiniband pod to come to ready state
   command: kubectl wait --for=condition=ready -n network-config pod -l app=infiniband
   changed_when: false
-  tags: install
 
 - name: Get infiniband pod name
   command: 'kubectl get pod -n network-config -l app=infiniband -o jsonpath="{.items[0].metadata.name}"'
   changed_when: false
   register: infiniband_pod_name
-  tags: install
 
 - name: Configuring infiniband container
   command: 'kubectl exec --stdin --tty -n network-config {{ infiniband_pod_name.stdout }} \
-    -- ansible-playbook /root/omnia/control_plane/roles/control_plane_ib/files/infiniband_container_configure.yml -e ib_nic= "{{ ib_network_nic }}"'
+    -- ansible-playbook /root/omnia/control_plane/roles/control_plane_ib/files/infiniband_container_configure.yml -e ib_nic="{{ ib_network_nic }}"'
   changed_when: false
-  tags: install

+ 5 - 2
control_plane/roles/control_plane_ib/tasks/main.yml

@@ -44,14 +44,17 @@
 
     - name: infiniband_container container status message
       block:
-        - debug:
+        - name: Infiniband container skipped
+          debug:
             msg: "{{ infiniband_message_skipped }}"
             verbosity: 2
           when: infiniband_container_status
-        - debug:
+        - name: Infiniband container installed
+          debug:
             msg: "{{ infiniband_message_installed }}"
             verbosity: 2
           when: not infiniband_container_status
   when:
+    - device_support_status
     - ib_switch_support
     - mgmt_os in os_supported_rocky

+ 1 - 1
control_plane/roles/control_plane_ib/vars/main.yml

@@ -25,5 +25,5 @@ mount_path: /root/omnia
 infiniband_message_skipped: "The container is already present"
 infiniband_message_installed: "The container is installed"
 ib_kube_config_file: "{{ role_path }}/files/k8s_infiniband.yml"
-ib_container_name: inifiniband-container"
+ib_container_name: infiniband-container
 infiniband_message_installed: "The container is installed"

+ 3 - 1
control_plane/roles/control_plane_repo/tasks/main.yml

@@ -22,4 +22,6 @@
 
     - name: Download iDRAC firmware updates
       include_tasks: download_fmw_updates.yml
-  when: firmware_update_required
+  when:
+    - device_support_status
+    - firmware_update_required

+ 221 - 218
control_plane/roles/deploy_job_templates/tasks/main.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,220 +12,223 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 ---
-- name: Check if {{ tower_config_file }} file is encrypted
-  command: cat {{ tower_config_file }}
-  changed_when: false
-  no_log: true
-  register: config_content
-
-- name: Decrpyt {{ tower_config_file }}
-  command: >-
-    ansible-vault decrypt {{ tower_config_file }}
-    --vault-password-file {{ tower_vault_file }}
-  when: "'$ANSIBLE_VAULT;' in config_content.stdout"
-  changed_when: false
-
-- name: Change file permissions
-  file:
-    path: "{{ tower_config_file }}"
-    mode: "{{ file_perm }}"
-
-- name: Fetch awx host
-  command: grep "host:" "{{ tower_config_file }}"
-  changed_when: false
-  register: fetch_awx_host
-
-- name: Fetch awx password
-  command: grep "password:" "{{ tower_config_file }}"
-  changed_when: false
-  no_log: true
-  register: fetch_awx_password
-
-- name: Set awx variables
-  set_fact:
-    awx_host: "{{ fetch_awx_host.stdout | regex_replace('host: ','') }}"
-    awx_password: "{{ fetch_awx_password.stdout | regex_replace('password: ','') }}"
-  no_log: true
-
-- name: Launch dynamic inventory
-  block:
-    - name: Launch device inventory job template
-      awx.awx.tower_job_launch:
-        job_template: "{{ device_inventory_template }}"
-        tower_config_file: "{{ tower_config_file }}"
-        wait: yes
-        timeout: "{{ awx_max_wait_time }}"
-      register: inventory_job_status
-  rescue:
-    - name: Restart awx pod
-      command: kubectl rollout restart deployment awx -n awx
-      changed_when: false
-      when:
-        - inventory_job_status.status is defined
-        - '"pending" in inventory_job_status.status'
-
-    - name: Wait for the awx pod to be up and running
-      wait_for:
-        timeout: "{{ pod_restart_time }}"
-      when:
-        - inventory_job_status.status is defined
-        - '"pending" in inventory_job_status.status'
-
-    - name: Launch device inventory job template
-      awx.awx.tower_job_launch:
-        job_template: "{{ device_inventory_template }}"
-        tower_config_file: "{{ tower_config_file }}"
-        wait: yes
-        timeout: "{{ awx_max_wait_time }}"
-      when:
-        - inventory_job_status.status is defined
-        - '"pending" in inventory_job_status.status'
-
-    - name: Warning message for device inventory template
-      debug:
-        msg: "{{ device_inventory_template_warn_msg }}"
-      when:
-        - inventory_job_status.status is defined
-        - '"pending" not in inventory_job_status.status'
-
-- name: Configure TOR Switches
-  block:
-    - name: Launch ethernet job template for TOR switches
-      awx.awx.tower_job_launch:
-        job_template: "{{ ethernet_job_template }}"
-        tower_config_file: "{{ tower_config_file }}"
-        wait: yes
-        timeout: "{{ awx_max_wait_time }}"
-      register: ethernet_job_status
-  rescue:
-    - name: Warning message for ethernet template
-      debug:
-        msg: "{{ ethernet_template_warn_msg }}"
-  when: ethernet_switch_support
-
-- name: Wait for 15 mins for DHCP to assign IP to devices
-  wait_for:
-    timeout: "{{ dhcp_wait_time }}"
-
-- name: Launch device inventory job template
-  awx.awx.tower_job_launch:
-    job_template: "{{ device_inventory_template }}"
-    tower_config_file: "{{ tower_config_file }}"
-    wait: yes
-    timeout: "{{ awx_max_wait_time }}"
-  register: inventory_job_status
-
-- name: Execute ethernet template
-  block:
-    - name: Launch ethernet job template for all switches
-      awx.awx.tower_job_launch:
-        job_template: "{{ ethernet_job_template }}"
-        tower_config_file: "{{ tower_config_file }}"
-        wait: yes
-        timeout: "{{ awx_max_wait_time }}"
-      register: ethernet_job_status
-  rescue:
-    - name: Warning message for ethernet template
-      debug:
-        msg: "{{ ethernet_template_warn_msg }}"
-  when: ethernet_switch_support
-
-- name: Execute infiniband template
-  block:
-    - name: Launch infiniband job template
-      awx.awx.tower_job_launch:
-        job_template: "{{ infiniband_job_template }}"
-        tower_config_file: "{{ tower_config_file }}"
-        wait: yes
-        timeout: "{{ awx_max_wait_time }}"
-      register: ib_job_status
-  rescue:
-    - name: Warning message for infiniband template
-      debug:
-        msg: "{{ infiniband_template_warn_msg }}"
-  when: ib_switch_support
-
-- name: Execute powervault_me4 template
-  block:
-    - name: Launch powervault_me4 job template
-      awx.awx.tower_job_launch:
-        job_template: "{{ powervault_me4_job_template }}"
-        tower_config_file: "{{ tower_config_file }}"
-        wait: yes
-        timeout: "{{ awx_max_wait_time }}"
-      register: powervault_job_status
-  rescue:
-    - name: Warning message for powervault_me4 template
-      debug:
-        msg: "{{ powervault_template_warn_msg }}"
-  when: powervault_support
-
-- name: Execute idrac template
-  block:
-    - name: Launch idrac job template
-      awx.awx.tower_job_launch:
-        job_template: "{{ idrac_job_template }}"
-        tower_config_file: "{{ tower_config_file }}"
-        wait: yes
-      register: idrac_job_status
-  rescue:
-    - name: Warning message for idrac template
-      debug:
-        msg: "{{ idrac_template_warn_msg }}"
-
-- name: Wait for 30 mins for idrac provisioning to be completed and inventory to be updated in AWX
-  wait_for:
-    timeout: "{{ provisioning_wait_time }}"
-  when: host_mapping_file
-
-- name: Check the host_mapping_file_path output
-  command: cat {{ host_mapping_file_path }}
-  changed_when: false
-  register: mapping_file
-  when: host_mapping_file
-
-- name: Group the hosts in node_inventory when mapping file is present
-  include_tasks: "{{ role_path }}/tasks/group_inventory.yml"
-  when: host_mapping_file and component_role_support
-
-- name: Launch deploy_omnia job template
-  awx.awx.tower_job_launch:
-    job_template: "{{ component_role_job_template }}"
-    tower_config_file: "{{ tower_config_file }}"
-    wait: yes
-  register: component_role_job_status
-  when: host_mapping_file and component_role_support
-
-- name: Create awx job template for configuring new devices
-  awx.awx.tower_job_template:
-    name: "{{ item.name }}"
-    job_type: "run"
-    organization: "{{ awx_organization }}"
-    inventory: "{{ item.inventory }}"
-    project: "{{ project_name }}"
-    playbook: "{{ item.playbook }}"
-    credentials:
-     - "{{ item.credential }}"
-    state: present
-    tower_config_file: "{{ tower_config_file }}"
-  loop: "{{ job_template_details }}"
-
-- name: Build a schedule for configure new devices
-  awx.awx.tower_schedule:
-    name: "{{ item.name }}"
-    unified_job_template: "{{ item.template }}"
-    rrule: "{{ item.rrule }}"
-    state: present
-    tower_config_file: "{{ tower_config_file }}"
-  loop: "{{ scheduled_template }}"
-
-- name: Encrypt {{ tower_config_file }}
-  command: >-
-    ansible-vault encrypt {{ tower_config_file }}
-    --vault-password-file {{ tower_vault_file }}
-  changed_when: false
-
-- name: Change file permissions
-  file:
-    path: "{{ tower_config_file }}"
-    mode: "{{ file_perm }}"
+- block:
+  - name: Check if {{ tower_config_file }} file is encrypted
+    command: cat {{ tower_config_file }}
+    changed_when: false
+    no_log: true
+    register: config_content
+
+  - name: Decrpyt {{ tower_config_file }}
+    command: >-
+      ansible-vault decrypt {{ tower_config_file }}
+      --vault-password-file {{ tower_vault_file }}
+    when: "'$ANSIBLE_VAULT;' in config_content.stdout"
+    changed_when: false
+
+  - name: Change file permissions
+    file:
+      path: "{{ tower_config_file }}"
+      mode: "{{ file_perm }}"
+
+  - name: Fetch awx host
+    command: grep "host:" "{{ tower_config_file }}"
+    changed_when: false
+    register: fetch_awx_host
+
+  - name: Fetch awx password
+    command: grep "password:" "{{ tower_config_file }}"
+    changed_when: false
+    no_log: true
+    register: fetch_awx_password
+
+  - name: Set awx variables
+    set_fact:
+      awx_host: "{{ fetch_awx_host.stdout | regex_replace('host: ','') }}"
+      awx_password: "{{ fetch_awx_password.stdout | regex_replace('password: ','') }}"
+    no_log: true
+
+  - name: Launch dynamic inventory
+    block:
+      - name: Launch device inventory job template
+        awx.awx.tower_job_launch:
+          job_template: "{{ device_inventory_template }}"
+          tower_config_file: "{{ tower_config_file }}"
+          wait: yes
+          timeout: "{{ awx_max_wait_time }}"
+        register: inventory_job_status
+    rescue:
+      - name: Restart awx pod
+        command: kubectl rollout restart deployment awx -n awx
+        changed_when: false
+        when:
+          - inventory_job_status.status is defined
+          - '"pending" in inventory_job_status.status'
+
+      - name: Wait for the awx pod to be up and running
+        wait_for:
+          timeout: "{{ pod_restart_time }}"
+        when:
+          - inventory_job_status.status is defined
+          - '"pending" in inventory_job_status.status'
+
+      - name: Launch device inventory job template
+        awx.awx.tower_job_launch:
+          job_template: "{{ device_inventory_template }}"
+          tower_config_file: "{{ tower_config_file }}"
+          wait: yes
+          timeout: "{{ awx_max_wait_time }}"
+        when:
+          - inventory_job_status.status is defined
+          - '"pending" in inventory_job_status.status'
+
+      - name: Warning message for device inventory template
+        debug:
+          msg: "{{ device_inventory_template_warn_msg }}"
+        when:
+          - inventory_job_status.status is defined
+          - '"pending" not in inventory_job_status.status'
+
+  - name: Configure TOR Switches
+    block:
+      - name: Launch ethernet job template for TOR switches
+        awx.awx.tower_job_launch:
+          job_template: "{{ ethernet_job_template }}"
+          tower_config_file: "{{ tower_config_file }}"
+          wait: yes
+          timeout: "{{ awx_max_wait_time }}"
+        register: ethernet_job_status
+    rescue:
+      - name: Warning message for ethernet template
+        debug:
+          msg: "{{ ethernet_template_warn_msg }}"
+    when: ethernet_switch_support
+
+  - name: Wait for 15 mins for DHCP to assign IP to devices
+    wait_for:
+      timeout: "{{ dhcp_wait_time }}"
+    when: device_config_support
+
+  - name: Launch device inventory job template
+    awx.awx.tower_job_launch:
+      job_template: "{{ device_inventory_template }}"
+      tower_config_file: "{{ tower_config_file }}"
+      wait: yes
+      timeout: "{{ awx_max_wait_time }}"
+    register: inventory_job_status
+
+  - name: Execute ethernet template
+    block:
+      - name: Launch ethernet job template for all switches
+        awx.awx.tower_job_launch:
+          job_template: "{{ ethernet_job_template }}"
+          tower_config_file: "{{ tower_config_file }}"
+          wait: yes
+          timeout: "{{ awx_max_wait_time }}"
+        register: ethernet_job_status
+    rescue:
+      - name: Warning message for ethernet template
+        debug:
+          msg: "{{ ethernet_template_warn_msg }}"
+    when: ethernet_switch_support
+
+  - name: Execute infiniband template
+    block:
+      - name: Launch infiniband job template
+        awx.awx.tower_job_launch:
+          job_template: "{{ infiniband_job_template }}"
+          tower_config_file: "{{ tower_config_file }}"
+          wait: yes
+          timeout: "{{ awx_max_wait_time }}"
+        register: ib_job_status
+    rescue:
+      - name: Warning message for infiniband template
+        debug:
+          msg: "{{ infiniband_template_warn_msg }}"
+    when: ib_switch_support
+
+  - name: Execute powervault_me4 template
+    block:
+      - name: Launch powervault_me4 job template
+        awx.awx.tower_job_launch:
+          job_template: "{{ powervault_me4_job_template }}"
+          tower_config_file: "{{ tower_config_file }}"
+          wait: yes
+          timeout: "{{ awx_max_wait_time }}"
+        register: powervault_job_status
+    rescue:
+      - name: Warning message for powervault_me4 template
+        debug:
+          msg: "{{ powervault_template_warn_msg }}"
+    when: powervault_support
+
+  - name: Execute idrac template
+    block:
+      - name: Launch idrac job template
+        awx.awx.tower_job_launch:
+          job_template: "{{ idrac_job_template }}"
+          tower_config_file: "{{ tower_config_file }}"
+          wait: yes
+        register: idrac_job_status
+    rescue:
+      - name: Warning message for idrac template
+        debug:
+          msg: "{{ idrac_template_warn_msg }}"
+
+  - name: Wait for 30 mins for idrac provisioning to be completed and inventory to be updated in AWX
+    wait_for:
+      timeout: "{{ provisioning_wait_time }}"
+    when: host_mapping_file
+
+  - name: Check the host_mapping_file_path output
+    command: cat {{ host_mapping_file_path }}
+    changed_when: false
+    register: mapping_file
+    when: host_mapping_file
+
+  - name: Group the hosts in node_inventory when mapping file is present
+    include_tasks: "{{ role_path }}/tasks/group_inventory.yml"
+    when: host_mapping_file and component_role_support
+
+  - name: Launch deploy_omnia job template
+    awx.awx.tower_job_launch:
+      job_template: "{{ component_role_job_template }}"
+      tower_config_file: "{{ tower_config_file }}"
+      wait: yes
+    register: component_role_job_status
+    when: host_mapping_file and component_role_support
+
+  - name: Create awx job template for configuring new devices
+    awx.awx.tower_job_template:
+      name: "{{ item.name }}"
+      job_type: "run"
+      organization: "{{ awx_organization }}"
+      inventory: "{{ item.inventory }}"
+      project: "{{ project_name }}"
+      playbook: "{{ item.playbook }}"
+      credentials:
+       - "{{ item.credential }}"
+      state: present
+      tower_config_file: "{{ tower_config_file }}"
+    loop: "{{ job_template_details }}"
+
+  - name: Build a schedule for configure new devices
+    awx.awx.tower_schedule:
+      name: "{{ item.name }}"
+      unified_job_template: "{{ item.template }}"
+      rrule: "{{ item.rrule }}"
+      state: present
+      tower_config_file: "{{ tower_config_file }}"
+    loop: "{{ scheduled_template }}"
+
+  - name: Encrypt {{ tower_config_file }}
+    command: >-
+      ansible-vault encrypt {{ tower_config_file }}
+      --vault-password-file {{ tower_vault_file }}
+    changed_when: false
+
+  - name: Change file permissions
+    file:
+      path: "{{ tower_config_file }}"
+      mode: "{{ file_perm }}"
+  when : device_support_status

+ 2 - 3
control_plane/roles/network_ethernet/tasks/main.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -32,5 +32,4 @@
       dellos10_command:
         commands: "copy running-configuration startup-configuration"
       when: save_changes_to_startup
-
-  when: ethernet_switch_support
+  when: ethernet_switch_support

+ 2 - 3
control_plane/roles/network_ib/tasks/main.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -36,5 +36,4 @@
 
     - name: Save running-config to startup-config
       include_tasks: save_config.yml
-
-  when: ib_switch_support
+  when: ib_switch_support

+ 3 - 2
control_plane/roles/webui_awx/tasks/awx_configuration.yml

@@ -149,7 +149,7 @@
     tower_config_file: "{{ tower_config_file }}"
   loop: "{{ omnia_job_template_details }}"
 
-- name: Build a schedule for idrac job template
+- name: Build a schedule for node inventory and device inventory
   awx.awx.tower_schedule:
     name: "{{ item.name }}"
     unified_job_template: "{{ item.template }}"
@@ -158,6 +158,7 @@
     tower_config_file: "{{ tower_config_file }}"
   register: result
   loop: "{{ scheduled_templates }}"
+  when: item.flag
 
 - name: Encrypt {{ tower_config_file }}
   command: >-
@@ -168,4 +169,4 @@
 - name: Change file permissions
   file:
     path: "{{ tower_config_file }}"
-    mode: "{{ file_perm }}"
+    mode: "{{ file_perm }}"

+ 10 - 10
control_plane/roles/webui_awx/vars/main.yml

@@ -75,31 +75,31 @@ organization_name: 'DellEMC'
 project_name: 'omnia'
 project_description: "Directory which contains configuration playbooks"
 inventory_names:
-  - { name: idrac_inventory, description: "Inventory to store IPs of idrac servers", flag: true }
+  - { name: node_inventory, description: "Inventory to store host IPs of servers", flag: true }
+  - { name: idrac_inventory, description: "Inventory to store IPs of idrac servers", flag: "{{ idrac_support }}" }
   - { name: ethernet_inventory, description: "Inventory to store IPs of ethernet switches", flag: "{{ ethernet_switch_support }}" }
   - { name: infiniband_inventory, description: "Inventory to store IPs of infiniband switches", flag: "{{ ib_switch_support }}" }
   - { name: powervault_me4_inventory, description: "Inventory to store IPs of ME4 servers", flag: "{{ powervault_support }}" }
-  - { name: node_inventory, description: "Inventory to store host IPs of servers", flag: true }
 group_names:
   - { name: manager, description: "Group to store IP of head node" }
   - { name: compute, description: "Group to store IPs of compute nodes" }
   - { name: login_node, description: "Group to store IP of login node" }
   - { name: nfs_node, description: "Group to store IP of NFS node" }
 credential_details:
-  - { name: idrac_credential, type: Network, username: "{{ idrac_username }}", password: "{{ idrac_password }}", flag: true }
+  - { name: node_credential, type: Machine, username: root, password: "{{ provision_password }}", flag: true }
+  - { name: idrac_credential, type: Network, username: "{{ idrac_username }}", password: "{{ idrac_password }}", flag: "{{ idrac_support }}" }
   - { name: ethernet_credential, type: Machine, username: "{{ ethernet_switch_username }}", password: "{{ ethernet_switch_password }}", flag: "{{ ethernet_switch_support }}" }
   - { name: infiniband_credential, type: Network, username: "{{ ib_username }}", password: "{{ ib_password }}", flag: "{{ ib_switch_support }}" }
   - { name: powervault_me4_credential, type: Network, username: "{{ powervault_me4_username }}", password: "{{ powervault_me4_password }}", flag: "{{ powervault_support }}" }
-  - { name: node_credential, type: Machine, username: root, password: "{{ provision_password }}", flag: true }
 job_template_details:
-  - { name: idrac_template, inventory: idrac_inventory, playbook: control_plane/idrac.yml, credential: idrac_credential, flag: true }
+  - { name: node_inventory_job, inventory: node_inventory, playbook: control_plane/collect_node_info.yml, credential: node_credential, flag: true }
+  - { name: device_inventory_job, inventory: node_inventory, playbook: control_plane/collect_device_info.yml, credential: node_credential, flag: "{{ device_support_status }}" }
+  - { name: idrac_template, inventory: idrac_inventory, playbook: control_plane/idrac.yml, credential: idrac_credential, flag: "{{ idrac_support }}" }
   - { name: ethernet_template, inventory: ethernet_inventory, playbook: control_plane/ethernet.yml, credential: ethernet_credential, flag: "{{ ethernet_switch_support }}" }
   - { name: infiniband_template, inventory: infiniband_inventory, playbook: control_plane/infiniband.yml, credential: infiniband_credential, flag: "{{ ib_switch_support }}" }
   - { name: powervault_me4_template, inventory: powervault_me4_inventory, playbook: control_plane/powervault_me4.yml, credential: powervault_me4_credential, flag: "{{ powervault_support }}" }
-  - { name: node_inventory_job, inventory: node_inventory, playbook: control_plane/collect_node_info.yml, credential: node_credential, flag: true }
-  - { name: device_inventory_job, inventory: node_inventory, playbook: control_plane/collect_device_info.yml, credential: node_credential, flag: true }
 omnia_job_template_details:
-  - { name: deploy_omnia_template, inventory: node_inventory, playbook: omnia.yml, credential: node_credential }  
+  - { name: deploy_omnia_template, inventory: node_inventory, playbook: omnia.yml, credential: node_credential }
 scheduled_templates:
-  - { name: NodeInventorySchedule, template: node_inventory_job, schedule_rule: "DTSTART:20210815T120000Z RRULE:FREQ=MINUTELY;INTERVAL=10" }
-  - { name: DeviceInventorySchedule, template: device_inventory_job, schedule_rule: "DTSTART:20210815T060000Z RRULE:FREQ=DAILY;INTERVAL=1"}
+  - { name: NodeInventorySchedule, template: node_inventory_job, schedule_rule: "DTSTART:20210815T120000Z RRULE:FREQ=MINUTELY;INTERVAL=10", flag: true }
+  - { name: DeviceInventorySchedule, template: device_inventory_job, schedule_rule: "DTSTART:20210815T060000Z RRULE:FREQ=DAILY;INTERVAL=1", flag: "{{ device_support_status }}"}

+ 19 - 8
docs/INSTALL_OMNIA.md

@@ -75,15 +75,26 @@ __Note:__ After the Omnia repository is cloned, a folder named __omnia__ is crea
 
 2. Change the directory to __omnia__: `cd omnia`
 
-3. In the `omnia_config.yml` file, provide the following details.  
-	a. The **k8s_version** variable specifies the Kubernetes version which will be installed on the manager and compute nodes. By default, it is set to **1.16.7**. Edit this variable to change the version. Supported versions are 1.16.7 and 1.19.3.  
-	b. The variable `login_node_required` is set to "true" by default to configure the login node. To configure the login node, edit the following variables:
-	* domain_name: Domain name you intend to configure.
-	* realm_name: A realm name is often, but not always, the upper case version of the name of the DNS domain over which it presides.
-	* directory_manager_password: Password of the Directory Manager with full access to the directory for system management tasks.
-	* ipa_admin_password: "admin" user password for the IPA server.  
+3. In the `omnia_config.yml` file, provide the following details:  
+
+| Parameter Name             | Default Value | Additional Information                                                                                                                                                                                                                               |
+|----------------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| mariadb_password           | password      | Password used to access the Slurm database. <br> Required Length: 8   characters <br> The password must not contain -,\, ',"                                                                                                                         |
+| k8s_version                | 1.16.7        | Kuberenetes Version <br> Accepted Values: "1.16.7" or   "1.19.3"                                                                                                                                                                                     |
+| k8s_cni                    | calico        | CNI type used by Kuberenetes. <br> Accepted values: calico, flannel                                                                                                                                                                                  |
+| k8s_pod_network_cidr       | 10.244.0.0/16 | Kubernetes pod network CIDR                                                                                                                                                                                                                          |
+| docker_username            |               | Username to login to Docker. A kubernetes secret will be created and   patched to the service account in default namespace. <br> This value is   optional but suggested to avoid docker pull limit issues                                            |
+| docker_password            |               | Password to login to Docker <br> This value is mandatory if a   docker_username is provided                                                                                                                                                          |
+| ansible_config_file_path   | /etc/ansible  | Path where the ansible.cfg file can be found. <br> If `dnf` is   used, the default value is valid. If `pip` is used, the variable must be set   manually                                                                                             |
+| login_node_required        | TRUE          | Boolean indicating whether the login node is required or not                                                                                                                                                                                         |
+| domain_name                | omnia.test    | Sets the intended domain name                                                                                                                                                                                                                        |
+| realm_name                 | OMNIA.TEST    | Sets the intended realm name                                                                                                                                                                                                                         |
+| directory_manager_password |               | Password authenticating admin level access to the Directory for system   management tasks. It will be added to the instance of directory server   created for IPA. <br> Required Length: 8 characters. <br> The   password must not contain -,\, '," |
+| ipa_admin_password         |               | IPA server admin password                                                                                                                                                                                                                            |
+| enable_secure_login_node   |  **false**, true             | Boolean value deciding whether security features are enabled on the Login Node. For more information, see [here](docs/Security/Enable_Security_LoginNode.md).                                                                                                                                                                                                                           |
 	
-	If you do not want to configure the login node, then you can set the `login_node_required` variable to "false". Without the login node, Slurm jobs can be scheduled only through the manager node.
+	
+>> __NOTE:__  Without the login node, Slurm jobs can be scheduled only through the manager node.
 
 4. Create an inventory file in the *omnia* folder. Add login node IP address under the *[login_node]* group, manager node IP address under the *[manager]* group, compute node IP addresses under the *[compute]* group, and NFS node IP address under the *[nfs_node]* group. A template file named INVENTORY is provided in the *omnia\docs* folder.  
 	**NOTE**: Ensure that all the four groups (login_node, manager, compute, nfs_node) are present in the template, even if the IP addresses are not updated under login_node and nfs_node groups. 

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 8 - 11
docs/INSTALL_OMNIA_CONTROL_PLANE.md


+ 12 - 7
docs/README.md

@@ -4,12 +4,14 @@
 1.2
 
 #### Previous release version
-1.1.1
+1.1.2
 
 ## Blogs about Omnia
 - [Introduction to Omnia](https://infohub.delltechnologies.com/p/omnia-open-source-deployment-of-high-performance-clusters-to-run-simulation-ai-and-data-analytics-workloads/)
 - [Taming the Accelerator Cambrian Explosion with Omnia](https://infohub.delltechnologies.com/p/taming-the-accelerator-cambrian-explosion-with-omnia/)
 - [Containerized HPC Workloads Made Easy with Omnia and Singularity](https://infohub.delltechnologies.com/p/containerized-hpc-workloads-made-easy-with-omnia-and-singularity/)
+- [Solution Overview: Dell EMC Omnia Software](https://infohub.delltechnologies.com/section-assets/omnia-solution-overview)
+- [Solution Brief: Omnia Software](https://infohub.delltechnologies.com/section-assets/omnia-solution-brief)
 
 ## What Omnia does
 Omnia can build clusters that use Slurm or Kubernetes (or both!) for workload management. Omnia will install software from a variety of sources, including:
@@ -49,7 +51,7 @@ The following table lists the software and operating system requirements on the
 
 Requirements  |   Version
 ----------------------------------  |   -------
-OS pre-installed on the management station  |  CentOS 8.4/ Rocky 8.5/ Leap 15.3
+OS pre-installed on the management station  |  Rocky 8.5/ Leap 15.3
 OS deployed by Omnia on bare-metal Dell EMC PowerEdge Servers | Rocky 8.5 Minimal Edition/ Leap 15.3
 Cobbler  |  3.2.2
 Ansible AWX  |  19.4.0
@@ -81,7 +83,6 @@ Software	|	License	|	Compatible Version	|	Description
 LeapOS 15.3	|	-	|	15.3|	Operating system on entire cluster
 CentOS Linux release 7.9.2009 (Core)	|	-	|	7.9	|	Operating system on entire cluster except for management station
 Rocky 8.5	|	-	|	8.5	|	Operating system on entire cluster except for management station
-CentOS Linux release 8.4.2105	|	-	|	8.4	|	Operating system on the management station	
 Rocky 8.5	|	-	|	8.5	|	Operating system on the management station
 MariaDB	|	GPL 2.0	|	5.5.68	|	Relational database used by Slurm
 Slurm	|	GNU General Public	|	20.11.7	|	HPC Workload Manager
@@ -113,11 +114,15 @@ Buildah	|	Apache-2.0	|	1.21.4	|	Tool to build and run container
 PostgreSQL	|	Copyright (c) 1996-2020, PostgreSQL Global Development Group	|	10.15	|	Database Management System
 Redis	|	BSD-3-Clause License	|	6.0.10	|	In-memory database
 NGINX	|	BSD-2-Clause License	|	1.14	|	-
-dellemc.openmanage	|	GNU-General Public License v3.0	|	3.5.0	|	It is a systems management and monitoring application that provides a comprehensive view of the Dell EMC servers, chassis, storage, and network switches on the enterprise network
 dellemc.os10	|	GNU-General Public License v3.1	|	1.1.1	|	It provides networking hardware abstraction through a common set of APIs
-Genisoimage-dnf	|	GPL v3	|	1.1.11	|	Genisoimage is a pre-mastering program for creating ISO-9660 CD-ROM  filesystem images
-OMSDK	|	Apache-2.0	|	1.2.456	|	Dell EMC OpenManage Python SDK (OMSDK) is a python library that helps developers and customers to automate the lifecycle management of PowerEdge Servers
-
+OMSDK	|	Apache-2.0	|	1.2.488	|	Dell EMC OpenManage Python SDK (OMSDK) is a python library that helps developers and customers to automate the lifecycle management of PowerEdge Servers
+| Loki                                  | Apache License 2.0               | 2.4.1  | Loki is a log aggregation system   designed to store and query logs from all your applications and   infrastructure                            |
+| Promtail                              | Apache License 2.1               | 2.4.1  | Promtail is an agent which ships the contents of local logs to   a private Grafana Loki instance or Grafana Cloud.                             |
+| kube-prometheus-stack                 | Apache License 2.2               | 25.0.0 | Kube Prometheus Stack is a collection of Kubernetes manifests,   Grafana dashboards, and Prometheus rules.                                     |
+| mailx                                 | MIT License                      | 12.5   | mailx is a Unix utility program for sending and receiving   mail.                                                                              |
+| postfix                               | IBM Public License               | 3.5.8  | Mail Transfer Agent (MTA) designed to determine routes and   send emails                                                                       |
+| xorriso                               | GPL version 3                    | 1.4.8  | xorriso copies file objects from POSIX compliant filesystems   into Rock Ridge enhanced ISO 9660 filesystems.                                  |
+| Dell EMC   OpenManage Ansible Modules | GNU- General Public License v3.0 | 5.0.0  | OpenManage Ansible Modules simplifies and automates   provisioning, deployment, and updates of PowerEdge servers and modular   infrastructure. |
 
 # Known issues  
 * **Issue**: Hosts are not displayed on the AWX UI.  

+ 15 - 0
docs/Security/Enable_Security_LoginNode.md

@@ -0,0 +1,15 @@
+# Enabling Security on the Login Node (RockyOS)
+
+* Ensure that `enable_secure_login_node` is set to **true** in `omnia_config.yml`
+* Set the following parameters in `omnia_security_config.yml`
+
+|  Parameter Name        |  Default Value  |  Additional Information                                                                                                                                          |
+|------------------------|-----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| max_failures           | 3               | Failures allowed before lockout. <br> This value cannot currently   be changed.                                                                                  |
+| failure_reset_interval | 60              | Period (in seconds) after which the number of failed login attempts is   reset <br> Accepted Values: 30-60                                                       |
+| lockout_duration       | 10              | Period (in seconds) for which users are locked out. <br> Accepted   Values: 5-10                                                                                 |
+| session_timeout        | 180             | Period (in seconds) after which idle users get logged out automatically   <br> Accepted Values: 30-90                                                            |
+| alert_email_address    |                 | Email address used for sending alerts in case of authentication failure   <br> If this variable is left blank, authentication failure alerts will   be disabled. |
+| allow_deny             | Allow           | This variable sets whether the user list is Allowed or Denied. <br>   Accepted Values: Allow, Deny                                                               |
+| user                   |                 | Array of users that are allowed or denied based on the `allow_deny`   value. Multiple users must be separated by a space.                                        |
+

+ 26 - 14
docs/Security/Enable_Security_ManagementStation.md

@@ -1,25 +1,37 @@
-# Enabling Security on the Management Station and Login Node
+# Enabling Security on the Management Station
 
-Omnia uses FreeIPA to enable security features like authorisation and access control.
+Omnia uses FreeIPA on RockyOS to enable security features like authorisation and access control.
 
 ## Enabling Authentication on the Management Station:
 
 Set the parameter 'enable_security_support' to true in `base_vars.yml`
 
-## Prerequisites Before Enabling FreeIPA:
-* Enter the relevant values in `security_vars.yml`:
-
-| Parameter Name | Default Value | Additional Information                                                                                           |
-|----------------|---------------|------------------------------------------------------------------------------------------------------------------|
-| domain_name    | omnia.test    | The domain name should not contain an underscore ( _ )                                                           |
-| realm_name     | omnia.test    | The realm name should follow the following rules per https://www.freeipa.org/page/Deployment_Recommendations <br> * The realm name must not conflict with any other existing Kerberos realm name (e.g. name used by Active Directory). <br> * The realm name should be upper-case (EXAMPLE.COM) version of primary DNS domain name (example.com).  |
+## Prerequisites Before Enabling Security:
 
 * Enter the relevant values in `login_vars.yml`:
 
 | Parameter Name             | Default Value | Additional Information                                                                           |
 |----------------------------|---------------|--------------------------------------------------------------------------------------------------|
-| directory_manager_password |               | Password of the Directory Manager with full access to the directory for system management tasks. |
-| ipa_admin_password         |               | "admin" user password for the IPA server                                                         |
+| ms_directory_manager_password |               | Password of the Directory Manager with full access to the directory for system management tasks. |
+| ms_kerberos_admin_password         |               | "admin" user password for the IPA server on RockyOS. If LeapOS is in use, it is used as the "kerberos admin" user password for 389-ds <br> This field is not relevant to Management Stations running `LeapOS`                                                         |
+
+
+
+* Enter the relevant values in `security_vars.yml:
+
+If `RockyOS` is in use on the Management Station:
+
+|  Parameter Name        |  Default Value  |  Additional Information                                                                                                                                                                                                                                                                                                                                      |
+|------------------------|-----------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|  domain_name           |  omnia.test     |  The domain name should not contain   an underscore ( _ )                                                                                                                                                                                                                                                                                                    |
+|  realm_name            |  OMNIA.TEST     |  The realm name should follow the   following rules per https://www.freeipa.org/page/Deployment_Recommendations   <br> * The realm name must not conflict with any other existing   Kerberos realm name (e.g. name used by Active Directory). <br> * The   realm name should be upper-case (EXAMPLE.COM) version of primary DNS domain   name (example.com). |
+| max_failures           | 3               | Failures allowed before lockout. <br> This value cannot currently   be changed.                                                                                                                                                                                                                                                                              |
+| failure_reset_interval | 60              | Period (in seconds) after which the number of failed login attempts is   reset <br> Accepted Values: 30-60                                                                                                                                                                                                                                                   |
+| lockout_duration       | 10              | Period (in seconds) for which users are locked out. <br> Accepted   Values: 5-10                                                                                                                                                                                                                                                                             |
+| session_timeout        | 180             | Period (in seconds) after which idle users get logged out automatically   <br> Accepted Values: 30-90                                                                                                                                                                                                                                                        |
+| alert_email_address    |                 | Email address used for sending alerts in case of authentication failure. Currently, only one email address is supported in this field.   <br> If this variable is left blank, authentication failure alerts will   be disabled.                                                                                                                                                                                             |
+| allow_deny             | Allow           | This variable sets whether the user list is Allowed or Denied. <br>   Accepted Values: Allow, Deny                                                                                                                                                                                                                                                           |
+| user                   |                 | Array of users that are allowed or denied based on the `allow_deny`   value. Multiple users must be separated by a space.                                                                                                                                                                                                                                    |
 
 
 ## Log Aggregation via Grafana
@@ -34,12 +46,12 @@ Set the parameter 'enable_security_support' to true in `base_vars.yml`
 
 Loki uses basic regex based syntax to filter for specific jobs, dates or timestamps.
 
-* Select the Explore ![Explore Icon](Telemetry_Visualization/Images/ExploreIcon.PNG) tab to select control-plane-loki from the drop down.
+* Select the Explore ![Explore Icon](../Telemetry_Visualization/Images/ExploreIcon.PNG) tab to select control-plane-loki from the drop down.
 * Using [LogQL queries](https://grafana.com/docs/loki/latest/logql/log_queries/), all logs in `/var/log` can be accessed using filters (Eg: `{job=”Omnia”}` )
 
 ## Viewing Logs on the Dashboard
 
-All log files can be viewed via the Dashboard tab (![Dashboard Icon](Telemetry_Visualization/Images/DashBoardIcon.PNG)). The Default Dashboard displays `omnia.log` and `syslog`. Custom dashboards can be created per user requirements.
+All log files can be viewed via the Dashboard tab (![Dashboard Icon](../Telemetry_Visualization/Images/DashBoardIcon.PNG)). The Default Dashboard displays `omnia.log` and `syslog`. Custom dashboards can be created per user requirements.
 
 Below is a list of all logs available to Loki and can be accessed on the dashboard:
 
@@ -49,7 +61,7 @@ Below is a list of all logs available to Loki and can be accessed on the dashboa
 | syslogs            | /var/log/messages                         | System Logging               | This log is configured by Default                                                                  |
 | Audit Logs         | /var/log/audit/audit.log                  | All Login Attempts           | This log is configured by Default                                                                  |
 | CRON logs          | /var/log/cron                             | CRON Job Logging             | This log is configured by Default                                                                  |
-| Pods logs          | /var/log/pods/*/*/*log                    | k8s pods                     | This log is configured by Default                                                                  |
+| Pods logs          | /var/log/pods/ * / * / * log                    | k8s pods                     | This log is configured by Default                                                                  |
 | Access Logs        | /var/log/dirsrv/slapd-<Realm Name>/access | Directory Server Utilization | This log is available when FreeIPA is set up ( ie when   enable_security_support is set to 'true') |
 | Error Log          | /var/log/dirsrv/slapd-<Realm Name>/errors | Directory Server Errors      | This log is available when FreeIPA is set up ( ie when   enable_security_support is set to 'true') |
 | CA Transaction Log | /var/log/pki/pki-tomcat/ca/transactions   | FreeIPA PKI Transactions     | This log is available when FreeIPA is set up ( ie when   enable_security_support is set to 'true') |

docs/login_node/login_user_creation.md → docs/Security/login_user_creation.md


+ 24 - 8
docs/Telemetry_Visualization/Visualization.md

@@ -2,7 +2,7 @@
 
 Using Grafana, users can poll multiple devices and create graphs/visualizations of key system metrics such as temperature, System power consumption, Memory Usage, IO Usage, CPU Usage, Total Memory Power, System Output Power, Total Fan Power, Total Storage Power, System Input Power, Total CPU Power, RPM Readings, Total Heat Dissipation, Power to Cool ratio, System Air Flow Efficiency etc.
 
-A lot of these metrics are collected using iDRAC telemetry. iDRAC telemetry allows you to stream telemetry data from your servers to a centralized log/metrics servers. For more information on iDRAC telemetry, click [here](https://github.com/dell/iDRAC-Telemetry-Scripting).
+A lot of these metrics are collected using iDRAC telemetry. iDRAC telemetry allows you to stream telemetry data from your servers to a centralized log/metrics servers. For more information on iDRAC telemetry, click [here]( https://github.com/dell/iDRAC-Telemetry-Reference-Tools).
 
 ## Prerequisites
 
@@ -11,21 +11,21 @@ A lot of these metrics are collected using iDRAC telemetry. iDRAC telemetry allo
 
 | Parameter Name        | Default Value | Information |
 |-----------------------|---------------|-------------|
-| timescaledb_user      | postgres      |  Username used for connecting to timescale db. Minimum Legth: 2 characters.          |
-| timescaledb_password  | postgres      |  Password used for connecting to timescale db. Minimum Legth: 2 characters.           |
-| mysqldb_user          | mysql         |  Username used for connecting to mysql db. Minimum Legth: 2 characters.         |
-| mysqldb_password      | mysql         |  Password used for connecting to mysql db. Minimum Legth: 2 characters.            |
-| mysqldb_root_password | mysql         |  Password used for connecting to mysql db for root user. Minimum Legth: 2 characters.         |
+| timescaledb_user      | 		        |  Username used for connecting to timescale db. Minimum Legth: 2 characters.          |
+| timescaledb_password  | 		        |  Password used for connecting to timescale db. Minimum Legth: 2 characters.           |
+| mysqldb_user          | 		        |  Username used for connecting to mysql db. Minimum Legth: 2 characters.         |
+| mysqldb_password      | 		        |  Password used for connecting to mysql db. Minimum Legth: 2 characters.            |
+| mysqldb_root_password | 		        |  Password used for connecting to mysql db for root user. Minimum Legth: 2 characters.         |
 
 3. All parameters in `telemetry/input_params/base_vars.yml` need to be filled in:
 
 | Parameter Name          | Default Value     | Information |
 |-------------------------|-------------------|-------------|
-| mount_location          | /mnt/omnia        | Sets the location all telemetry related files will be stored and both timescale and mysql databases will be mounted.            |
+| mount_location          | idrac_telemetrysource_services_db | Sets the location all telemetry related files will be stored and both timescale and mysql databases will be mounted.            |
 | idrac_telemetry_support | true              | This variable is used to enable iDRAC telemetry support and visualizations. Accepted Values: true/false            |
 | slurm_telemetry_support | true              | This variable is used to enable slurm telemetry support and visualizations. Slurm Telemetry support can only be activated when idrac_telemetry_support is set to true. Accepted Values: True/False.        |
 | timescaledb_name        | telemetry_metrics | Postgres DB with timescale extension is used for storing iDRAC and slurm telemetry metrics.            |
-| myscaledb_name          | mysql             | MySQL DB is used to store IPs and credentials of iDRACs having datacenter license           |
+| mysqldb_name			  | idrac_telemetrysource_services_db             | MySQL DB is used to store IPs and credentials of iDRACs having datacenter license           |
 
 3. Find the IP of the Grafana UI using:
  
@@ -48,6 +48,22 @@ Use any one of the following browsers to access the Grafana UI (https://< Grafan
 * The slurm manager and compute nodes are fetched at run time from node_inventory.
 * Slurm should be installed on the nodes, if not there is no point in executing slurm telemetry.
 
+## Initiating Telemetry
+
+1. Once `control_plane.yml` and `telemetry.yml` are executed, run the following commands from `omnia/telemetry`:
+
+`ansible-playbook telemetry.yml`
+
+>> __Note:__ Telemetry Collection is only initiated on iDRACs on AWX that have a datacenter license and are running a firmware version of 4 or higher.
+
+## Adding a New Node to Telemetry
+After initiation, new nodes can be added to telemetry by running the following commands from `omnia/telemetry`:
+		
+` ansible-playbook add_idrac_node.yml `
+		
+
+
+
 
 
 

BIN=BIN
docs/images/omnia-logo-transparent.png


+ 2 - 0
examples/device_ip_list.yml

@@ -0,0 +1,2 @@
+172.19.0.100
+172.19.0.200

+ 1 - 0
roles/common/files/module.conf

@@ -0,0 +1 @@
+allow_unsupported_modules 1

+ 85 - 33
roles/common/tasks/amd.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,35 +13,87 @@
 #  limitations under the License.
 ---
 
-- name: Add AMD ROCm repository for CentOS 7.x
-  yum_repository:
-    name: ROCm
-    description: AMD GPU ROCm Repository
-    baseurl: https://repo.radeon.com/rocm/yum/rpm
-    gpgcheck: yes
-    gpgkey: https://repo.radeon.com/rocm/rocm.gpg.key
-    enabled: yes
-  tags: install
-  when: ansible_facts['distribution_major_version'] == "7"
-
-- name: Add AMD ROCm repository for CentOS/RockyLinux 8.x
-  yum_repository:
-    name: ROCm
-    description: AMD GPU ROCm Repository
-    baseurl: https://repo.radeon.com/rocm/centos8/rpm
-    gpgcheck: yes
-    gpgkey: https://repo.radeon.com/rocm/rocm.gpg.key
-    enabled: yes
-  tags: install
-  when: ansible_facts['distribution_major_version'] == "8"
-
-- name: Install AMD ROCm drivers
-  package:
-    name: rocm-dkms
-    enablerepo: ROCm
-    state: present
-  tags: install
-
-- name: Reboot after installing GPU drivers
-  reboot:
-  tags: install
+- block:
+    - name: Add AMD ROCm repository for CentOS 7.x
+      yum_repository:
+        name: ROCm
+        description: AMD GPU ROCm Repository
+        baseurl: https://repo.radeon.com/rocm/yum/rpm
+        gpgcheck: yes
+        gpgkey: https://repo.radeon.com/rocm/rocm.gpg.key
+        enabled: yes
+      tags: install
+      when: ansible_facts['distribution_major_version'] == "7"
+
+    - name: Add AMD ROCm repository for CentOS/RockyLinux 8.x
+      yum_repository:
+        name: ROCm
+        description: AMD GPU ROCm Repository
+        baseurl: https://repo.radeon.com/rocm/centos8/rpm
+        gpgcheck: yes
+        gpgkey: https://repo.radeon.com/rocm/rocm.gpg.key
+        enabled: yes
+      tags: install
+      when: ansible_facts['distribution_major_version'] == "8"
+
+    - name: Install AMD ROCm drivers
+      package:
+        name: rocm-dkms
+        enablerepo: ROCm
+        state: present
+      tags: install
+
+    - name: Reboot after installing GPU drivers
+      reboot:
+      tags: install
+  when:  ( os_supported_leap not in compute_os )
+
+- block:
+    - name: Installing dkms
+      package:
+        name: dkms
+        state: present
+      changed_when: true
+      tags: install
+
+    - name: Add AMD ROCm repository for leap
+      zypper_repository:
+        name: rocm
+        repo: "{{ amd_repo }}"
+        state: present
+      failed_when: false
+      tags: install
+
+    - name: Import gpg-key for installing AMD ROCm
+      rpm_key:
+        key: "{{ gpg_key_amd }}"
+        state: present
+      tags: install
+
+    - name: Install AMD ROCm drivers
+      ansible.builtin.expect:
+        command: zypper install rocm-dkms
+        responses:
+            (.*) [1/2/c/d/?](.): '2'
+            (.*)(y): 'y'
+      tags: install
+
+    - name: Allowing modules
+      copy:
+        src: module.conf
+        dest: "{{ amd_gpu_dest }}"
+        owner: root
+        group: root
+        mode: "{{ conf_file_mode }}"
+      tags: install
+
+    - name: Enable the modules amdgpu
+      modprobe:
+        name: amdgpu
+        state: present
+      tags: install
+    
+    - name: Reboot after installing GPU drivers
+      reboot:
+      tags: install
+  when: ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )

+ 138 - 66
roles/common/tasks/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,6 +12,9 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 ---
+- name: Saving distribution of os
+  set_fact:
+    compute_os: "{{ ansible_facts['distribution'] | lower }}"
 
 - name: Create a custom fact directory on each host
   file:
@@ -27,76 +30,144 @@
     group: root
     mode: "{{ accelerator_discovery_script_mode }}"
 
-- name: Add epel-release repo
-  package:
-    name: epel-release
-    state: present
-  tags: install
+- block:
+    - name: Add epel-release repo
+      package:
+        name: epel-release
+        state: present
+      tags: install
+    
+    - name: Add elrepo GPG key
+      rpm_key:
+        state: present
+        key: "{{ elrepo_gpg_key_url }}"
+      register: elrepo_gpg_key
+      until: elrepo_gpg_key is not failed
+      retries: "{{ max_retries }}"
+      delay: "{{ max_delay }}"
+      tags: install
 
-- name: Add elrepo GPG key
-  rpm_key:
-    state: present
-    key: "{{ elrepo_gpg_key_url }}"
-  register: elrepo_gpg_key
-  until: elrepo_gpg_key is not failed
-  retries: 20
-  delay: 10
-  tags: install
+    - name: Add elrepo (nvidia kmod drivers)
+      package:
+        name: "{{ elrepo_rpm_url }}"
+        state: present
+      register: elrepo
+      until: elrepo is not failed
+      retries: "{{ max_retries }}"
+      delay: "{{ max_delay }}"
+      tags: install
+    
+    - name: Add docker community edition repository
+      get_url:
+        url: "{{ docker_repo_url }}"
+        dest: "{{ docker_repo_dest }}"
+      register: docker_repo
+      until: docker_repo is not failed
+      retries: "{{ max_retries }}"
+      delay: "{{ max_delay }}"
+      tags: install
 
-- name: Add elrepo (nvidia kmod drivers)
-  package:
-    name: "{{ elrepo_rpm_url }}"
-    state: present
-  register: elrepo
-  until: elrepo is not failed
-  retries: 20
-  delay: 10
-  tags: install
+    - name: Permanently Disable swap
+      mount:
+        name: "swap"
+        fstype: swap
+        state: absent
 
-- name: Add docker community edition repository
-  get_url:
-    url: "{{ docker_repo_url }}"
-    dest: "{{ docker_repo_dest }}"
-  register: docker_repo
-  until: docker_repo is not failed
-  retries: 20
-  delay: 10
-  tags: install
+    - name: Disable selinux
+      selinux:
+        state: disabled
+      tags: install
 
-- name: Permanently Disable swap
-  mount:
-    name: "swap"
-    fstype: swap
-    state: absent
+    - name: Install common packages
+      package:
+        name: "{{ common_packages }}"
+        state: present
+      tags: install
 
-- name: Disable selinux
-  selinux:
-    state: disabled
-  tags: install
+    - name: Install common packages
+      package:
+        name: "{{ common_packages_for_non_leap }}"
+        state: present
+      tags: install
 
-- name: Install common packages
-  package:
-    name: "{{ common_packages }}"
-    state: present
-  tags: install
+    - name: Versionlock docker
+      command: "yum versionlock '{{ item }}'"
+      args:
+        warn: false
+      with_items:
+        - "{{ docker_packages }}"
+      changed_when: true
+      tags: install
 
-- name: Versionlock docker
-  command: "yum versionlock '{{ item }}'"
-  args:
-    warn: false
-  with_items:
-    - "{{ docker_packages }}"
-  changed_when: true
-  tags: install
+    - name: Collect host facts (including acclerator information)
+      setup: ~
 
-- name: Collect host facts (including acclerator information)
-  setup: ~
+    - name: Install infiniBand support
+      package:
+        name: "@Infiniband Support"
+        state: present
+      tags: install
+  when: ( os_supported_leap not in compute_os )
 
-- name: Install infiniBand support
-  package:
-    name: "@Infiniband Support"
-    state: present
-  tags: install
+- block:
+    - name: Installing python-xml
+      package:
+        name: python-xml
+        state: present
+      tags: install
+
+    - name: Add nvidia repo
+      zypper_repository:
+        name: NVIDIA
+        repo: "{{ nvidia_repo }}"
+        state: present
+        autorefresh: yes
+      tags: install
+
+    - name: Install nvidia
+      command: zypper --gpg-auto-import-keys install -l -y x11-video-nvidiaG06
+      changed_when: false
+      tags: install
+
+    - name: Add docker community edition repository
+      get_url:
+        url: "{{ docker_repo_url_leap }}"
+        dest: "{{ docker_repo_dest_leap }}"
+      register: docker_repo
+      until: docker_repo is not failed
+      retries: "{{ max_retries }}"
+      delay: "{{ max_delay }}"
+      tags: install
+
+    - name: Permanently Disable swap
+      mount:
+        name: "swap"
+        fstype: swap
+        state: absent
+      tags: install
+
+    - name: Install common packages
+      package:
+        name: "{{ common_packages }}"
+        state: present
+      tags: install
+
+    - name: Install docker-compose
+      package:
+        name: docker-compose
+        state: present
+      tags: install
+
+    - name: Collect host facts (including acclerator information)
+      setup: ~
+      tags: install
+
+    - name: Install infiniBand support
+      package:
+        name: infiniband-diags
+        state: present
+      tags: install
+  when: ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
 
 - name: Deploy time ntp/chrony
   include_tasks: ntp.yml
@@ -106,15 +177,16 @@
   include_tasks: nvidia.yml
   when:
     - ansible_local.inventory.nvidia_gpu > 0
-    - ansible_facts['distribution'] == os_name
-    - ansible_facts['distribution_major_version'] == os_version
+    - ( ansible_facts['distribution'] == os_name and ansible_facts['distribution_major_version'] == os_version) or 
+      ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
   tags: install
 
 - name: Install AMD GPU drivers and software components
   include_tasks: amd.yml
   when:
     - ansible_local.inventory.amd_gpu > 0
-    - ansible_facts['distribution'] == os_name
+    - ansible_facts['distribution'] == os_name or 
+      ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
   tags: install
 
 - name: Get the hostname
@@ -194,4 +266,4 @@
     - "{{ groups['compute'] }}"
   when:
     - hostvars["127.0.0.1"]["login_node_required"]
-    - '"login_node" in group_names'
+    - '"login_node" in group_names'

+ 3 - 2
roles/common/tasks/ntp.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -53,4 +53,5 @@
         notify:
           - Restart chrony
           - Sync chrony sources
-    when:  ( ansible_distribution == "CentOS" or   ansible_distribution == "RedHat" ) and ansible_distribution_major_version  > os_version
+    when:  (( ansible_distribution == "CentOS" or   ansible_distribution == "RedHat" ) and ansible_distribution_major_version  > os_version) or
+           ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )

+ 89 - 47
roles/common/tasks/nvidia.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,56 +12,98 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 ---
+- block:
+    - name: Add libnvidia container Repo
+      yum_repository:
+        name: libnvidia-container
+        description:  libnvidia-container
+        baseurl: https://nvidia.github.io/libnvidia-container/stable/centos7/$basearch
+        repo_gpgcheck: no
+        gpgcheck: no
+        gpgkey: https://nvidia.github.io/libnvidia-container/gpgkey
+        sslverify: yes
+        sslcacert: /etc/pki/tls/certs/ca-bundle.crt
+        enabled: yes
+      tags: install
 
-- name: Add libnvidia container Repo
-  yum_repository:
-    name: libnvidia-container
-    description:  libnvidia-container
-    baseurl: https://nvidia.github.io/libnvidia-container/stable/centos7/$basearch
-    repo_gpgcheck: no
-    gpgcheck: no
-    gpgkey: https://nvidia.github.io/libnvidia-container/gpgkey
-    sslverify: yes
-    sslcacert: /etc/pki/tls/certs/ca-bundle.crt
-    enabled: yes
-  tags: install
+    - name: Add nvidia-container-runtime Repo
+      yum_repository:
+        name: nvidia-container-runtime
+        description:  nvidia-container-runtime
+        baseurl: https://nvidia.github.io/nvidia-container-runtime/stable/centos7/$basearch
+        repo_gpgcheck: no
+        gpgcheck: no
+        gpgkey: https://nvidia.github.io/nvidia-container-runtime/gpgkey
+        sslverify: yes
+        sslcacert: /etc/pki/tls/certs/ca-bundle.crt
+        enabled: yes
+      tags: install
 
-- name: Add nvidia-container-runtime Repo
-  yum_repository:
-    name: nvidia-container-runtime
-    description:  nvidia-container-runtime
-    baseurl: https://nvidia.github.io/nvidia-container-runtime/stable/centos7/$basearch
-    repo_gpgcheck: no
-    gpgcheck: no
-    gpgkey: https://nvidia.github.io/nvidia-container-runtime/gpgkey
-    sslverify: yes
-    sslcacert: /etc/pki/tls/certs/ca-bundle.crt
-    enabled: yes
-  tags: install
+    - name: Add nvidia-docker Repo
+      yum_repository:
+        name: nvidia-docker
+        description:  nvidia-docker
+        baseurl: https://nvidia.github.io/nvidia-docker/centos7/$basearch
+        repo_gpgcheck: no
+        gpgcheck: no
+        gpgkey: https://nvidia.github.io/nvidia-docker/gpgkey
+        enabled: yes
+        sslverify: yes
+        sslcacert: /etc/pki/tls/certs/ca-bundle.crt
+      tags: install
 
-- name: Add nvidia-docker Repo
-  yum_repository:
-    name: nvidia-docker
-    description:  nvidia-docker
-    baseurl: https://nvidia.github.io/nvidia-docker/centos7/$basearch
-    repo_gpgcheck: no
-    gpgcheck: no
-    gpgkey: https://nvidia.github.io/nvidia-docker/gpgkey
-    enabled: yes
-    sslverify: yes
-    sslcacert: /etc/pki/tls/certs/ca-bundle.crt
-  tags: install
+    - name: Install nvidia driver and nvidia-docker2
+      package:
+        name: "{{ nvidia_packages }}"
+        enablerepo: libnvidia-container,nvidia-docker
+        state: present
+      tags: install
+    
+    - name: Reboot after installing GPU drivers
+      reboot:
+      tags: install
+  when:  ( os_supported_leap not in compute_os )
 
-- name: Install nvidia driver and nvidia-docker2
-  package:
-    name: "{{ nvidia_packages }}"
-    enablerepo: libnvidia-container,nvidia-docker
-    state: present
-  tags: install
+- block:
+    - name: Fetching list of repos from zypper
+      command: /usr/bin/zypper lr --uri
+      changed_when: false
+      register: zypper_repos
+      tags: install
 
-- name: Reboot after installing GPU drivers
-  reboot:
-  tags: install
+    - name: Add nvidia repository for leap
+      command: zypper --gpg-auto-import-keys addrepo {{ nvidia_docker_repo }}
+      failed_when: false
+      changed_when: false
+      when: '"nvidia-docker" not in zypper_repos.stdout'
+      tags: install
+
+    - name: Install libnvidia container repo
+      zypper:
+        name: libnvidia-container1
+        disable_gpg_check: yes
+        state: present
+      tags: install
+
+    - name: Install nvidia-container-runtime repo
+      zypper:
+        name: nvidia-container-runtime
+        disable_gpg_check: yes
+        state: present
+      tags: install
+
+    - name: Install nvidia-docker
+      zypper:
+        name: nvidia-container-runtime
+        disable_gpg_check: yes
+        replacefiles: true
+        state: present
+      tags: install
+    
+    - name: Reboot after installing GPU drivers
+      reboot:
+      tags: install
+  when: ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
 
 - name: Set nvidia as default runtime
   copy:
@@ -78,4 +120,4 @@
     state: restarted
     enabled: yes
     daemon_reload: yes
-  tags: install
+  tags: install

+ 23 - 6
roles/common/vars/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,19 +13,36 @@
 #  limitations under the License.
 ---
 
+nvidia_repo: https://download.nvidia.com/opensuse/leap/15.3/
+docker_repo_url_leap: https://download.docker.com/linux/sles/docker-ce.repo
+docker_repo_dest_leap: /etc/YaST2/docker-ce.repo
+os_supported_leap: "leap"
+os_supported_leap_version: "15.3"
 common_packages:
-  - yum-plugin-versionlock
   - gcc
   - nfs-utils
   - python3-pip
   - bash-completion
-  - nvidia-detect
   - chrony
   - pciutils
-  - docker-ce-cli-20.10.2
-  - docker-ce-20.10.2
   - openssl
   - singularity
+  - python3-pexpect
+max_retries: 20
+max_delay: 20
+
+amd_gpu_dest: /etc/modprobe.d/10-unsupported-modules.conf
+gpg_key_amd: 'https://repo.radeon.com/rocm/rocm.gpg.key'
+amd_repo: 'https://repo.radeon.com/rocm/zyp/zypper/'
+
+nvidia_docker_repo: 'https://nvidia.github.io/nvidia-docker/opensuse-leap15.1/nvidia-docker.repo'
+conf_file_mode: 0644
+
+common_packages_for_non_leap:
+  - yum-plugin-versionlock
+  - nvidia-detect
+  - docker-ce-cli-20.10.2
+  - docker-ce-20.10.2
 
 docker_packages:
   - docker-ce-cli-20.10.2
@@ -78,4 +95,4 @@ daemon_file_dest: /etc/docker/
 daemon_file_mode: 0644
 
 hosts_file_dest: "/etc/hosts"
-hosts_file_mode: "0644"
+hosts_file_mode: "0644"