Browse Source

Issue 448: Uploading latest documents

Signed-off-by: avinashvishwanath <avinash_vishwanath@dell.com>
John Lockman 3 years ago
parent
commit
0d0cefe033

+ 0 - 4
control_plane/input_params/base_vars.yml

@@ -118,10 +118,6 @@ host_network_nic: "eno3"
 host_network_dhcp_start_range: ""
 host_network_dhcp_end_range: ""
 
-dhcp_gateway: ""
-dhcp_dns1: ""
-dhcp_dns2: ""
-
 # The mapping file consists of the MAC address and its respective IP address and hostname.
 # The format of mapping file should be MAC,hostname,IP and must be a CSV file.
 # Eg: xx:yy:zz:aa:bb,server,172.17.0.5

+ 15 - 17
control_plane/input_params/login_vars.yml

@@ -29,6 +29,18 @@ provision_password: ""
 # Mandatory value required
 cobbler_password: ""
 
+### Usage: provision_idrac ###
+
+# The username for idrac
+# The username must not contain -,\, ',"
+# Mandatory value required
+idrac_username: ""
+
+# Password used for idrac
+# The password must not contain -,\, ',"
+# Mandatory value required
+idrac_password: ""
+
 ### Usage: webui_awx ###
 
 # Password used for awx UI
@@ -43,7 +55,6 @@ cobbler_password: ""
 ethernet_switch_username: ""
 
 # Password used for ethernet switch
-# The Length of the password should be at least 8.
 # The password must not contain -,\, ',"
 ethernet_switch_password: ""
 
@@ -54,23 +65,9 @@ ethernet_switch_password: ""
 ib_username: ""
 
 # Password used for infiniband switch
-# The Length of the password should be at least 8.
 # The password must not contain -,\, ',"
 ib_password: ""
 
-### Usage: provision_idrac ###
-
-# The username for idrac
-# The username must not contain -,\, ',"
-# Mandatory value required
-idrac_username: ""
-
-# Password used for idrac
-# The Length of the password should be at least 8.
-# The password must not contain -,\, ',"
-# Mandatory value required
-idrac_password: ""
-
 ### Usage: powervault_me4 ###
 
 # The username for powervault_me4
@@ -78,6 +75,7 @@ idrac_password: ""
 powervault_me4_username: ""
 
 # Password used for powervault_me4
-# The Length of the password should be at least 8.
-# The password must not contain -,\, ',"
+# The password should have atleast one uppercase character, one lowercase character,
+# one numeric character and one non-alphanumeric character.
+# The password must not contain -,\, ',", . , < , comma(,)
 powervault_me4_password: ""

+ 43 - 35
control_plane/input_params/powervault_me4_vars.yml

@@ -15,27 +15,18 @@
 
 ### Usage: powervault_me4 ###
 
-# User type of powervault_me4
-# The vaules supported are "standard" and "SNMPv3"
-# The default value is "standard"
-powervault_me4_usertype: "standard"
-
-# The user roles for managing and monitoring powervault_me4
-# The values supported are "monitor" and "manage"
-# The default value is "manage"
-# Only "manage" role allows - Addition of disk groups and creation of volumes 
-powervault_me4_roles: "manage"
-
 # This variable indicates the language selection
 # Currently only "English" is supported
 locale: "English"
 
 # Specify the system name to identify the system
-# By default it is set to "Uninitialized Name"
-powervault_me4_system_name: "Unintialized Name"
+# By default it is set to "Uninitialized_Name"
+# Length should be less than 30 and it should not contain space.
+# This is "optional"
+powervault_me4_system_name: "Unintialized_Name"
 
 # Specify the snmp notification level
-# critical: Sends notifications for Critical events only.
+# crit: Sends notifications for Critical events only.
 # error: Sends notifications for Error and Critical events.
 # warn: Sends notifications for Warning, Error, and Critical events.
 # resolved: Sends notifications for Resolved, Warning, Error, and Critical events.
@@ -43,26 +34,19 @@ powervault_me4_system_name: "Unintialized Name"
 # none: All events are excluded from trap notification and traps are disabled. 
 # However, Critical events and managed-logs events 400–402 are sent regardless of the notification setting.
 # Default value is "none"
+# Compulsory
 powervault_me4_snmp_notify_level: "none"
 
-# Specify the disk group name
-# If left blank, system automatically assigns the name
-powervault_me4_disk_group_name: ""
-
-# Specify the disk type
-# Values supported are "Virtual" and "Read Cache"
-powervault_me4_disk_type: "Virtual"
-
 # Specify the required RAID Level
 # The different RAID levels and the min and max number of disks supported for each RAID are
-# RAID1: 2
-# RAID5: 3-16
-# RAID6: 4-16
-# RAID10: 4-16
-# ADAPT: 12-128
-# Default value is "RAID1"
-# If Type "Read Cache" is selected, then RAID levels are not required
-powervault_me4_raid_levels: "RAID1"
+# r1/raid1: 2
+# r5/raid5: 3-16
+# r6/raid6: 4-16
+# r10/raid10: 4-16
+# adapt: 12-128
+# Default value is "raid1"
+# Compulsory
+powervault_me4_raid_levels: "raid1"
 
 # Specify the range of disks
 # Select a range of disks within an enclosure by entering a comma-separated list that contains 
@@ -70,13 +54,37 @@ powervault_me4_raid_levels: "RAID1"
 # Use the format enclosure-number.disk-range,enclosure-number.disk-range. 
 # For example, to select disks 3-12 in enclosure 1 and 5-23 in enclosure 2, enter 1.3-12,2.5-23.
 # For ME4012 - 0.0-0.11,1.0-1.11 are the allowed values
-powervault_me4_disk_range: ""
+# Default value is 0.1-2
+# Compulsory
+powervault_me4_disk_range: "0.1-2"
 
-# Specify the volume name
+# Specify the volume names
 # Cannot be left blank
-# the default value is "pv_omnia"
-powervault_me4_volume_name: "pv_omnia"
+# the default value is "k8s_volume" and "slurm_volume"
+# Compulsory
+powervault_me4_k8s_volume_name: "k8s_volume"
+powervault_me4_slurm_volume_name: "slurm_volume"
+
+# Specify the disk group name
+# If left blank, system automatically assigns the name
+powervault_me4_disk_group_name: "omnia"
+
+# Specify the percentage for partition in disk
+# Default value is "60%"
+# Compulsory
+powervault_me4_disk_partition_size: "60"
 
 # Specify the volume size
 # Format: 100GB <SizeGB>
-powervault_me4_volume_size: "100GB"
+# Compulsory
+powervault_me4_volume_size: "100GB"
+
+#Specify the pool for volume
+# Pool can either be  a/A or b/B.
+# Compulsory
+powervault_me4_pool: "a"
+
+# Specify the nic of the server with which Powervault is connected.
+# Default value is eno1.
+# Compulsory
+powervault_me4_server_nic: "eno1"

+ 130 - 157
control_plane/roles/control_plane_common/tasks/password_config.yml

@@ -40,167 +40,140 @@
       idrac_username | length < 1 or
       idrac_password | length < 1
 
-- name: Assert provision_password
-  assert:
-    that:
-      - provision_password | length > min_length | int - 1
-      - provision_password | length < max_length | int + 1
-      - '"-" not in provision_password '
-      - '"\\" not in provision_password '
-      - '"\"" not in provision_password '
-      - " \"'\" not in provision_password "
-    success_msg: "{{ success_msg_provision_password }}"
-    fail_msg: "{{ fail_msg_provision_password }}"
-  register: provision_password_check
-
-- name: Assert cobbler_password
-  assert:
-    that:
-      - cobbler_password | length > min_length | int - 1
-      - cobbler_password | length < max_length | int + 1
-      - '"-" not in cobbler_password '
-      - '"\\" not in cobbler_password '
-      - '"\"" not in cobbler_password '
-      - " \"'\" not in cobbler_password "
-    success_msg: "{{ success_msg_cobbler_password }}"
-    fail_msg: "{{ fail_msg_cobbler_password }}"
-  register: cobbler_password_check
-
-- name: Assert idrac_username
-  assert:
-    that:
-      - idrac_username | length >= min_username_length
-      - idrac_username | length < max_length
-      - '"-" not in idrac_username '
-      - '"\\" not in idrac_username '
-      - '"\"" not in idrac_username '
-      - " \"'\" not in idrac_username "
-    success_msg: "{{ success_idrac_username }}"
-    fail_msg: "{{ fail_idrac_username }}"
-
-- name: Assert idrac_password
-  assert:
-    that:
-      - idrac_password | length > min_username_length | int - 1
-      - idrac_password | length < max_length | int + 1
-      - '"-" not in idrac_password '
-      - '"\\" not in idrac_password '
-      - '"\"" not in idrac_password '
-      - " \"'\" not in idrac_password "
-    success_msg: "{{ success_msg_idrac_password }}"
-    fail_msg: "{{ fail_msg_idrac_password }}"
-  register: idrac_password_check
-
-- name: Verify ethernet_switch_username and ethernet_switch_password are not empty
-  assert:
-    that:
-      - ethernet_switch_username | length > 0
-      - ethernet_switch_password | length > 0
-    success_msg: "{{ ethernet_params_success_msg }}"
-    fail_msg: "{{ ethernet_params_empty_fail_msg }}"
-  when: ethernet_switch_support
-
-- name: Assert ethernet_switch_username
-  assert:
-    that:
-      - ethernet_switch_username | length >= min_username_length
-      - ethernet_switch_username | length < max_length
-      - '"-" not in ethernet_switch_username '
-      - '"\\" not in ethernet_switch_username '
-      - '"\"" not in ethernet_switch_username '
-      - " \"'\" not in ethernet_switch_username "
-    success_msg: "{{ success_ethernet_switch_username }}"
-    fail_msg: "{{ fail_ethernet_switch_username }}"
-  when: ethernet_switch_support
-
-- name: Assert ethernet_switch_password
-  assert:
-    that:
-      - ethernet_switch_password | length > min_username_length | int - 1
-      - ethernet_switch_password | length < max_length | int + 1
-      - '"-" not in ethernet_switch_password '
-      - '"\\" not in ethernet_switch_password '
-      - '"\"" not in ethernet_switch_password '
-      - " \"'\" not in ethernet_switch_password "
-    success_msg: "{{ success_msg_ethernet_switch_password }}"
-    fail_msg: "{{ fail_msg_ethernet_switch_password }}"
+- name: Assert provision credentials
+  block:
+    - name: Assert provision_password
+      assert:
+        that:
+          - provision_password | length > min_length | int - 1
+          - provision_password | length < max_length | int + 1
+          - '"-" not in provision_password '
+          - '"\\" not in provision_password '
+          - '"\"" not in provision_password '
+          - " \"'\" not in provision_password "
+      no_log: true
+  rescue:
+    - name: Provision password validation check
+      fail:
+        msg: "{{ fail_msg_provision_password }}"
+
+- name: Assert cobbler credentials
+  block:
+    - name: Assert cobbler_password
+      assert:
+        that:
+          - cobbler_password | length > min_length | int - 1
+          - cobbler_password | length < max_length | int + 1
+          - '"-" not in cobbler_password '
+          - '"\\" not in cobbler_password '
+          - '"\"" not in cobbler_password '
+          - " \"'\" not in cobbler_password "
+      no_log: true
+  rescue:
+    - name: Cobbler password validation check
+      fail:
+        msg: "{{ fail_msg_cobbler_password }}"
+
+- name: Assert idrac credentials
+  block:
+    - name: Assert idrac_username and idrac_password
+      assert:
+        that:
+          - idrac_username | length >= min_username_length
+          - idrac_username | length < max_length
+          - '"-" not in idrac_username '
+          - '"\\" not in idrac_username '
+          - '"\"" not in idrac_username '
+          - " \"'\" not in idrac_username "
+          - idrac_password | length > min_username_length | int - 1
+          - idrac_password | length < max_length | int + 1
+          - '"-" not in idrac_password '
+          - '"\\" not in idrac_password '
+          - '"\"" not in idrac_password '
+          - " \"'\" not in idrac_password "
+      no_log: true
+  rescue:
+    - name: idrac credentials validation check
+      fail:
+        msg: "{{ fail_msg_idrac_credentials }}"
+
+- name: Assert username and password for ethernet switches
+  block:
+    - name: Verify ethernet_switch_username and ethernet_switch_password are not empty
+      assert:
+        that:
+          - ethernet_switch_username | length >= min_username_length
+          - ethernet_switch_username | length < max_length
+          - '"-" not in ethernet_switch_username '
+          - '"\\" not in ethernet_switch_username '
+          - '"\"" not in ethernet_switch_username '
+          - " \"'\" not in ethernet_switch_username "
+          - ethernet_switch_password | length > min_username_length | int - 1
+          - ethernet_switch_password | length < max_length | int + 1
+          - '"-" not in ethernet_switch_password '
+          - '"\\" not in ethernet_switch_password '
+          - '"\"" not in ethernet_switch_password '
+          - " \"'\" not in ethernet_switch_password "
+      no_log: true
+  rescue:
+    - name: ethernet switch credentials validation check
+      fail:
+        msg: "{{ fail_msg_ethernet_credentials }}"
   when: ethernet_switch_support
 
-- name: Verify ib_username and ib_password are not empty
-  assert:
-    that:
-      - ib_username | length > 0
-      - ib_password | length > 0
-    success_msg: "{{ ib_params_success_msg }}"
-    fail_msg: "{{ ib_params_empty_fail_msg }}"
+- name: Assert username and password for IB switches
+  block:
+    - name: Assert ib_username and ib_password
+      assert:
+        that:
+          - ib_username | length >= min_username_length
+          - ib_username | length < max_length
+          - '"-" not in ib_username '
+          - '"\\" not in ib_username '
+          - '"\"" not in ib_username '
+          - " \"'\" not in ib_username "
+          - ib_password | length > min_username_length | int - 1
+          - ib_password | length < max_length | int + 1
+          - '"-" not in ib_password '
+          - '"\\" not in ib_password '
+          - '"\"" not in ib_password '
+          - " \"'\" not in ib_password "
+      no_log: true
+  rescue:
+    - name: IB switch credentials validation check
+      fail:
+        msg: "{{ fail_msg_ib_credentials }}"
   when: ib_switch_support
 
-- name: Assert ib_username
-  assert:
-    that:
-      - ib_username | length >= min_username_length
-      - ib_username | length < max_length
-      - '"-" not in ib_username '
-      - '"\\" not in ib_username '
-      - '"\"" not in ib_username '
-      - " \"'\" not in ib_username "
-    success_msg: "{{ success_ib_username }}"
-    fail_msg: "{{ fail_ib_username }}"
-  when: ib_switch_support
-
-- name: Assert ib_password
-  assert:
-    that:
-      - ib_password | length > min_username_length | int - 1
-      - ib_password | length < max_length | int + 1
-      - '"-" not in ib_password '
-      - '"\\" not in ib_password '
-      - '"\"" not in ib_password '
-      - " \"'\" not in ib_password "
-    success_msg: "{{ success_msg_ib_password }}"
-    fail_msg: "{{ fail_msg_ib_password }}"
-  when: ib_switch_support
-
-- name: Verify powervault_me4_username and powervault_me4_password are not empty
-  assert:
-    that:
-      - powervault_me4_username | length > 0
-      - powervault_me4_password | length > 0
-    success_msg: "{{ pv_params_success_msg }}"
-    fail_msg: "{{ pv_params_empty_fail_msg }}"
-  when: powervault_support
-
-- name: Assert powervault_me4_username
-  assert:
-    that:
-      - powervault_me4_username | length >= min_username_length
-      - powervault_me4_username | length < max_length
-      - '"-" not in powervault_me4_username '
-      - '"\\" not in powervault_me4_username '
-      - '"\"" not in powervault_me4_username '
-      - " \"'\" not in powervault_me4_username "
-    success_msg: "{{ success_powervault_me4_username }}"
-    fail_msg: "{{ fail_powervault_me4_username }}"
-  when: powervault_support
-
-- name: Assert powervault_me4_password
-  assert:
-    that:
-      - powervault_me4_password | length > min_length | int - 1
-      - powervault_me4_password | length < max_length | int + 1
-      - '"-" not in powervault_me4_password '
-      - '"," not in powervault_me4_password '
-      - '"." not in powervault_me4_password '
-      - '"<" not in powervault_me4_password '
-      - '"\\" not in powervault_me4_password '
-      - '"\"" not in powervault_me4_password '
-      - " \"'\" not in powervault_me4_password "
-      - powervault_me4_password | regex_search('^(?=.*[a-z]).+$')
-      - powervault_me4_password | regex_search('^(?=.*[A-Z]).+$')
-      - powervault_me4_password | regex_search('^(?=.*\\d).+$')
-      - powervault_me4_password | regex_search('^(?=.*[!#$%&()*+/:;=>?@^_`{} ~]).+$')
-    success_msg: "{{ success_msg_powervault_me4_password }}"
-    fail_msg: "{{ fail_msg_powervault_me4_password }}"
+- name: Assert username and password for powervault me4
+  block:
+    - name: Assert powervault_me4_username and powervault_me4_password
+      assert:
+        that:
+          - powervault_me4_username | length >= min_username_length
+          - powervault_me4_username | length < max_length
+          - '"-" not in powervault_me4_username '
+          - '"\\" not in powervault_me4_username '
+          - '"\"" not in powervault_me4_username '
+          - " \"'\" not in powervault_me4_username "
+          - powervault_me4_password | length > min_length | int - 1
+          - powervault_me4_password | length < max_length | int + 1
+          - '"-" not in powervault_me4_password '
+          - '"," not in powervault_me4_password '
+          - '"." not in powervault_me4_password '
+          - '"<" not in powervault_me4_password '
+          - '"\\" not in powervault_me4_password '
+          - '"\"" not in powervault_me4_password '
+          - " \"'\" not in powervault_me4_password "
+          - powervault_me4_password | regex_search('^(?=.*[a-z]).+$')
+          - powervault_me4_password | regex_search('^(?=.*[A-Z]).+$')
+          - powervault_me4_password | regex_search('^(?=.*\\d).+$')
+          - powervault_me4_password | regex_search('^(?=.*[!#$%&()*+/:;=>?@^_`{} ~]).+$')
+      no_log: true
+  rescue:
+    - name: Powervault me4 credentials validation check
+      fail:
+        msg: "{{ fail_msg_me4_credentials }}"
   when: powervault_support
 
 - name: Create ansible vault key

+ 8 - 0
control_plane/roles/control_plane_common/tasks/verify_omnia_params.yml

@@ -91,6 +91,14 @@
     docker_password: "{{ docker_password }}"
   no_log: True
 
+- name: Validate the domain name
+  assert:
+    that:
+      - domain_name is regex("^(?!-)[A-Za-z0-9-]+([\\-\\.]{1}[a-z0-9]+)*\\.[A-Za-z]{2,6}$")
+    success_msg: "{{ domain_name_success_msg }}"
+    fail_msg: "{{ domain_name_fail_msg }}"
+  when: domain_name | length > 0
+
 - name: Encrypt input config file
   command: >-
     ansible-vault encrypt {{ role_path }}/../../../{{ config_filename }}

+ 5 - 25
control_plane/roles/control_plane_common/vars/main.yml

@@ -59,31 +59,11 @@ vault_file_perm: '0644'
 nic_min_length: 3
 input_config_failure_msg: "Please provide all the required parameters in login_vars.yml"
 fail_msg_provision_password: "Failed. Incorrect provision_password format provided in login_vars.yml"
-success_msg_provision_password: "provision_password validated"
 fail_msg_cobbler_password: "Failed. Incorrect cobbler_password format provided in login_vars.yml file"
-success_msg_cobbler_password: "cobbler_password validated"
-success_idrac_username: "idrac username validated"
-fail_idrac_username: "Failed. Incorrect idrac_username format provided in base_vars.yml"
-success_msg_idrac_password: "idrac password validated"
-fail_msg_idrac_password: "Failed. Incorrect idrac_password format provided in base_vars.yml"
-ethernet_params_success_msg: "Ethernet switch username and password are not blank"
-ethernet_params_empty_fail_msg: "Failed. ethernet switch username or password cannot be empty when ethernet_switch_support is true"
-success_ethernet_switch_username: "Ethernet switch username validated"
-fail_ethernet_switch_username: "Failed. Incorrect ethernet_switch_username format provided in base_vars.yml"
-success_msg_ethernet_switch_password: "Ethernet password validated"
-fail_msg_ethernet_switch_password: "Failed. Incorrect ethernet_switch_password format provided in base_vars.yml"
-ib_params_success_msg: "InfiniBand switch username and password are not blank"
-ib_params_empty_fail_msg: "Failed. InfiniBand username or password cannot be empty when ib_switch_support is true"
-success_ib_username: "ib username validated"
-fail_ib_username: "Failed. Incorrect ib_username format provided in base_vars.yml"
-success_msg_ib_password: "ib password validated"
-fail_msg_ib_password: "Failed. Incorrect ib_password format provided in base_vars.yml"
-pv_params_success_msg: "Powervault switch username and password are not blank"
-pv_params_empty_fail_msg: "Failed. Powervault username or password cannot be empty when powervault_support is true"
-success_powervault_me4_username: "powervault username validated"
-fail_powervault_me4_username: "Failed. Incorrect powervault_username format provided in base_vars.yml"
-success_msg_powervault_me4_password: "powervault password validated"
-fail_msg_powervault_me4_password: "Failed. Incorrect powervault_password format provided in base_vars.yml"
+fail_msg_idrac_credentials: "Failed. Incorrect idrac_username or idrac_password format provided in login_vars.yml"
+fail_msg_ethernet_credentials: "Failed. Incorrect ethernet_switch_username or ethernet_switch_password format provided in login_vars.yml"
+fail_msg_ib_credentials: "Failed. Incorrect ib_username or ib_password format provided in login_vars.yml"
+fail_msg_me4_credentials: "Failed. Incorrect powervault_me4_username or powervault_me4_password format provided in login_vars.yml"
 
 # Usage: verify_omnia_params.yml
 config_filename: "omnia_config.yml"
@@ -152,7 +132,7 @@ idrac_tools_vars_filename: input_params/idrac_tools_vars.yml
 # Usage: nfs_server_setup.yml
 nfs_share_offline_repo: /var/nfs_repo
 nfs_share_awx: /var/nfs_awx
-nfs_share_dir_mode: 0644
+nfs_share_dir_mode: 0777
 exports_file_path: /etc/exports
 nfs_services:
   - mountd

+ 37 - 37
control_plane/roles/control_plane_ib/tasks/main.yml

@@ -15,47 +15,47 @@
 
 # Tasks file for infiniband
 
-- name: Check infiniband_container status on machine
-  include_tasks: check_prerequisites.yml
-
-- name: Include common variables
-  include_vars:  ../../control_plane_common/vars/main.yml
-  when: not infiniband_container_status
-
-- name: Internet validation
-  include_tasks:  ../../control_plane_common/tasks/internet_validation.yml
-  when: not infiniband_container_status
-
-#- name: Fetch base inputs
-#  include_tasks: ../../control_plane_common/tasks/fetch_base_inputs.yml
-#  when: not infiniband_container_status
+- name: Check if IB switch is supported
+  block:
+    - name: Check infiniband_container status on machine
+      include_tasks: check_prerequisites.yml
 
-- name: Include variable file base_vars.yml
-  include_vars: "{{ ib_base_file }}"
+    - name: Include common variables
+      include_vars:  ../../control_plane_common/vars/main.yml
+      when: not infiniband_container_status
 
-- name: Dhcp Configuration
-  import_tasks: dhcp_configure.yml
-  when: (not infiniband_container_image_status) or ( infiniband_backup_map_status == true)
+    - name: Internet validation
+      include_tasks:  ../../control_plane_common/tasks/internet_validation.yml
+      when: not infiniband_container_status
 
-#- name: Mapping file validation
-#  import_tasks: mapping_file.yml
-#  when: (not infiniband_container_image_status) and (mapping_file == true) or ( backup_map_status == true)
+    - name: Include variable file base_vars.yml
+      include_vars: "{{ base_file }}"
 
-- name: infiniband_container image creation
-  import_tasks: infiniband_container_image.yml
-  when: not infiniband_container_status
+    - name: Dhcp Configuration
+      import_tasks: dhcp_configure.yml
+      when: (not infiniband_container_image_status) or ( infiniband_backup_map_status == true)
 
-- name: infiniband_container configuration
-  import_tasks: configure_infiniband_container.yml
+    #- name: Mapping file validation
+    #  import_tasks: mapping_file.yml
+    #  when: (not infiniband_container_image_status) and (mapping_file == true) or ( backup_map_status == true)
 
-- name: infiniband_container container status message
-  block:
-    - debug:
-        msg: "{{ infiniband_message_skipped }}"
-        verbosity: 2
-      when: infiniband_container_status
-    - debug:
-        msg: "{{ infiniband_message_installed }}"
-        verbosity: 2
+    - name: infiniband_container image creation
+      import_tasks: infiniband_container_image.yml
       when: not infiniband_container_status
-  tags: install
+
+    - name: infiniband_container configuration
+      import_tasks: configure_infiniband_container.yml
+
+    - name: infiniband_container container status message
+      block:
+        - debug:
+            msg: "{{ infiniband_message_skipped }}"
+            verbosity: 2
+          when: infiniband_container_status
+        - debug:
+            msg: "{{ infiniband_message_installed }}"
+            verbosity: 2
+          when: not infiniband_container_status
+      tags: install
+
+  when: ib_switch_support

+ 0 - 13
docs/PREINSTALL_OMNIA.md

@@ -1,16 +1,3 @@
-# Preparation to install Omnia
-
-## Assumptions
-Ensure that the following prerequisites are met:
-* The manager and compute nodes must be running CentOS 7.9 2009 OS.
-* All nodes are connected to the network and have access to Internet.
-* SSH Keys for root have been installed on all nodes to allow for password-less SSH.
-* On the manager node, install Ansible and Git using the following commands:
-	* `yum install epel-release -y`
-	* `yum install ansible-2.9.18 git -y`  
-__Note:__ Ansible must be installed using __yum__. If Ansible is installed using __pip3__, re-install it using the __yum__ command again.
-
-
 ## Example system designs
 Omnia can configure systems which use Ethernet or Infiniband-based fabric to connect the compute servers.
 

+ 46 - 16
docs/FAQ.md

@@ -1,20 +1,17 @@
 # Frequently Asked Questions
 
-* TOC
-{:toc}
-
-## Why is the error "Wait for AWX UI to be up" displayed when `appliance.yaml` fails?  
+## Why is the error "Wait for AWX UI to be up" displayed when `appliance.yml` fails?  
 Cause: 
 1. When AWX is not accessible even after five minutes of wait time. 
 2. When __isMigrating__ or __isInstalling__ is seen in the failure message.
 	
 Resolution:  
-Wait for AWX UI to be accessible at http://\<management-station-IP>:8081, and then run the `appliance.yaml` file again, where __management-station-IP__ is the ip address of the management node.
+Wait for AWX UI to be accessible at http://\<management-station-IP>:8081, and then run the `appliance.yml` file again, where __management-station-IP__ is the IP address of the management node.
 
-## What are the next steps after the nodes in a Kubernetes cluster reboots?  
+## What are the next steps after the nodes in a Kubernetes cluster reboot?  
 Resolution: 
-Wait for upto 15 minutes after the Kubernetes cluster reboots. Next, verify status of the cluster using the following services:
-* `kubectl get nodes` on the manager node provides correct k8s cluster status.  
+Wait for 15 minutes after the Kubernetes cluster reboots. Next, verify the status of the cluster using the following services:
+* `kubectl get nodes` on the manager node provides the correct k8s cluster status.  
 * `kubectl get pods --all-namespaces` on the manager node displays all the pods in the **Running** state.
 * `kubectl cluster-info` on the manager node displays both k8s master and kubeDNS are in the **Running** state.
 
@@ -24,9 +21,9 @@ Resolution:
 2. If the pods are not in the **Running** state, delete the pods using the command:`kubectl delete pods <name of pod>`
 3. Run the corresponding playbook that was used to install Kubernetes: `omnia.yml`, `jupyterhub.yml`, or `kubeflow.yml`.
 
-## What to do when the JupyterHub or Prometheus UI are not accessible?  
+## What to do when the JupyterHub or Prometheus UI is not accessible?  
 Resolution:
-Run the command `kubectl get pods --namespace default` to ensure **nfs-client** pod and all prometheus server pods are in the **Running** state. 
+Run the command `kubectl get pods --namespace default` to ensure **nfs-client** pod and all Prometheus server pods are in the **Running** state. 
 
 ## While configuring the Cobbler, why does the `appliance.yml` fail with an error during the Run import command?  
 Cause:
@@ -67,7 +64,7 @@ slurmctld -Dvvv
 ```
 2. Verify `/var/lib/log/slurmctld.log` file.
 
-## What to do when when the error "ports are unavailable" is displayed?
+## How to resolve the "Ports are unavailable" error?
 Cause: Slurm database connection fails.  
 Resolution:
 1. Run the following commands:
@@ -86,15 +83,48 @@ systemctl restart slurmd on compute node
 ```
 		
 ## What to do if Kubernetes Pods are unable to communicate with the servers when the DNS servers are not responding?  
-Cause: With the host network which is DNS issue.  
+Cause: With the host network which is a DNS issue.  
 Resolution:
 1. In your Kubernetes cluster, run `kubeadm reset -f` on the nodes.
-2. In the management node, edit the `omnia_config.yml` file to change the Kubernetes Pod Network CIDR. Suggested IP range is 192.168.0.0/16 and ensure you provide an IP which is not in use in your host network.
+2. In the management node, edit the `omnia_config.yml` file to change the Kubernetes Pod Network CIDR. The suggested IP range is 192.168.0.0/16 and ensure that you provide an IP that is not in use in your host network.
 3. Execute omnia.yml and skip slurm using __skip_ tag __slurm__.
 
-## What to do if time taken to pull the images to create the Kubeflow containers exceeds the limit and the Apply Kubeflow configurations task fails?  
+## What to do if the time taken to pull the images to create the Kubeflow containers exceeds the limit and the Apply Kubeflow configurations task fails?  
 Cause: Unstable or slow Internet connectivity.  
 Resolution:
-1. Complete the PXE booting/ format the OS on manager and compute nodes.
+1. Complete the PXE booting/ format the OS on the manager and compute nodes.
 2. In the omnia_config.yml file, change the k8s_cni variable value from calico to flannel.
-3. Run the Kubernetes and Kubeflow playbooks.
+3. Run the Kubernetes and Kubeflow playbooks.  
+
+## How to resolve the "Permission denied" error while executing the `idrac.yml` file or other .yml files from AWX?
+Cause: The "PermissionError: [Errno 13] Permission denied" error is displayed if you have used the ansible-vault decrypt or encrypt commands.  
+Resolution:
+* Provide Chmod 644 permission to the .yml files which is missing the required permission. 
+
+It is suggested that you use the ansible-vault view or edit commands and that you do not use the ansible-vault decrypt or encrypt commands.
+
+## What to do if LC is not ready?
+Resolution:
+* Ensure LC is in a ready state for all the servers.
+* Launch iDRAC template.
+
+## What to do if the network CIDR entry of iDRAC IP in /etc/exports file is missing?
+Resolution:
+* Add additional network CIDR range of idrac IP in the */etc/exports* file if iDRAC IP is not in the management network range provided in base_vars.yml.
+
+## What to do if a custom ISO file is not present in the device?
+Resolution:
+* Re-run the *control_plane.yml* file.
+
+## What to do if the *management_station_ip.txt* file under *provision_idrac/files* folder is missing?
+Resolution:
+* Re-run the *control_plane.yml* file.
+
+## Is Disabling 2FA supported by Omnia?
+Resolution:
+* Disabling 2FA is not supported by Omnia and must be manually disabled.
+
+## The provisioning of PowerEdge servers failed. How to resolve the issue and reprovision the servers?
+Resolution:
+1. Delete the respective iDRAC IP addresses from the *provisioned_idrac_inventory* on the AWX UI or delete the *provisioned_idrac_inventory* to delete the iDRAC IP addresses of all the servers in the cluster.
+2. Launch the iDRAC template from the AWX UI.

+ 120 - 32
docs/INSTALL_OMNIA.md

@@ -1,47 +1,121 @@
 # Install Omnia using CLI
 
-The following sections provide details on installing Omnia using CLI. If you want to install the Omnia appliance and manage workloads using the Omnia appliance, see [Install the Omnia appliance](INSTALL_OMNIA_APPLIANCE.md) and [Monitor Kubernetes and Slurm](MONITOR_CLUSTERS.md) for more information.
+The following sections provide details on installing Omnia using CLI.  
+
+To install the Omnia control plane and manage workloads on your cluster using the Omnia control plane, see [Install the Omnia Control Plane](INSTALL_OMNIA_CONTROL_PLANE.md) and [Monitor Kubernetes and Slurm](MONITOR_CLUSTERS.md) for more information.
 
 ## Prerequisites
-* Ensure that all the prerequisites listed in the [Preparation to install Omnia](PREINSTALL_OMNIA.md) are met before installing Omnia.
-* If there are errors when any of the following Ansible playbook commands are run, re-run the commands again. 
+* The login, manager, and compute nodes must be running CentOS 7.9 2009 OS.
+* If you have configured the `omnia_config.yml` file to enable the login node, the login node must be part of the cluster. 
+* All nodes must be connected to the network and must have access to the Internet.
+* Set the hostnames of all the nodes in the cluster.
+	* If the login node is enabled, then set the hostnames in the format: __hostname.domainname__. For example, "manager.example.com" is a valid hostname.
+	* Include the hostnames under /etc/hosts in the format: </br>*ipaddress hostname.domainname*. For example, "192.168.12.1 manager.example.com" is a valid entry.
+* SSH Keys for root are installed on all nodes to allow for password-less SSH.
 * The user should have root privileges to perform installations and configurations.
- 
-## Install Omnia using CLI
+* On the management station, ensure that you install Python 3.6 and Ansible.  
+	* Run the following commands to install Python 3.6:  
+		```
+		dnf install epel-release -y
+		dnf install python3 -y
+		```
+	* Run the following commands to install Ansible:
+		 ```
+		 pip3.6 install --upgrade pip
+		 python3.6 -m pip install ansible
+		 ```
+	After the installation is complete, run `ansible --version` to verify if the installation is successful. In the output, ensure that the executable location path is present in the PATH variable by running `echo $PATH`.
+	If the executable location path is not available, update the path by running `export PATH=$PATH:<executable location>\`.  
+	
+	For example,  
+	```
+	ansible -- version
+    ansible 2.10.9
+    config file = None
+    configured module search path = ['/root/.ansible/plugins/modules', '/usr/share/ansible/plugins/modules']
+    ansible python module location = /usr/local/lib/python3.6/site-packages/ansible
+    executable location = /usr/local/bin/ansible
+    python version = 3.6.8 (default, Aug 24 2020, 17:57:11) [GCC 8.3.1 20191121 (Red Hat 8.3.1-5)]
+    ```
+	The executable location is `/usr/local/bin/ansible`. Update the path by running the following command:
+    ```
+	export PATH=$PATH:/usr/local/bin
+	```  
+	
+**Note**: To deploy Omnia, Python 3.6 provides bindings to system tools such as RPM, DNF, and SELinux. As versions greater than 3.6 do not provide these bindings to system tools, ensure that you install Python 3.6 with dnf.  
+
+**Note**: If Ansible version 2.9 or later is installed, ensure it is uninstalled before installing a newer version of Ansible. Run the following commands to uninstall Ansible before upgrading to a newer version.  
+1. `pip uninstall ansible`
+2. `pip uninstall ansible-base (if ansible 2.9 is installed)`
+3. `pip uninstall ansible-core (if ansible 2.10  > version is installed)`
+
+	 
+* On the management station, run the following commands to install Git:
+	```
+	dnf install epel-release -y
+	dnf install git -y
+	```
+
+**Note**: If there are errors while executing the Ansible playbook commands, then re-run the commands.  
+
+## Steps to install Omnia using CLI
 
 1. Clone the Omnia repository:
 ``` 
+git clone https://github.com/dellhpc/omnia.git 
+```  
+
+<!---
+From release branch: 
+``` 
 git clone -b release https://github.com/dellhpc/omnia.git 
-```
+```-->  
+
 __Note:__ After the Omnia repository is cloned, a folder named __omnia__ is created. Ensure that you do not rename this folder.
 
 2. Change the directory to __omnia__: `cd omnia`
 
-3. An inventory file must be created in the __omnia__ folder. Add compute node IPs under **[compute]** group and the manager node IP under **[manager]** group. See the INVENTORY template file under `omnia\docs` folder.
+3. In the `omnia_config.yml` file, provide the following details.  
+	a. The **k8s_version** variable specifies the Kubernetes version which will be installed on the manager and compute nodes. By default, it is set to **1.16.7**. Edit this variable to change the version. Supported versions are 1.16.7 and 1.19.3.  
+	b. The variable `login_node_required` is set to "true" by default to configure the login node. To configure the login node, edit the following variables:
+	* domain_name: Domain name you intend to configure.
+	* realm_name: A realm name is often, but not always, the upper case version of the name of the DNS domain over which it presides.
+	* directory_manager_password: Password of the Directory Manager with full access to the directory for system management tasks.
+	* ipa_admin_password: "admin" user password for the IPA server.  
+	
+	If you do not want to configure the login node, then you can set the `login_node_required` variable to "false". Without the login node, Slurm jobs can be scheduled only through the manager node.
 
-4. To install Omnia:
+4. Create an inventory file in the *omnia* folder. Add login node IP address under the *[login_node]* group, manager node IP address under the *[manager]* group, compute node IP addresses under the *[compute]* group, and NFS node IP address under the *[nfs_node]* group. A template file named INVENTORY is provided in the *omnia\docs* folder.  
+	**NOTE**: Ensure that all the four groups (login_node, manager, compute, nfs_node) are present in the template, even if the IP addresses are not updated under login_node and nfs_node groups. 
+
+5. To install Omnia:
 ```
-ansible-playbook omnia.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2" 
+ansible-playbook omnia.yml -i inventory 
 ```
 
-5. By default, no skip tags are selected, and both Kubernetes and Slurm will be deployed.
+6. By default, no skip tags are selected, and both Kubernetes and Slurm will be deployed.  
 
-To skip the installation of Kubernetes, enter:  
-`ansible-playbook omnia.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2"  --skip-tags "kubernetes"` 
+	To skip the installation of Kubernetes, enter:  
+	`ansible-playbook omnia.yml -i inventory --skip-tags "kubernetes"` 
+	
+	To skip the installation of Slurm, enter:  
+	`ansible-playbook omnia.yml -i inventory --skip-tags "slurm"`  
 
-To skip the installation of Slurm, enter:  
-`ansible-playbook omnia.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2"  --skip-tags "slurm"`  
+	To skip the NFS client setup, enter the following command to skip the k8s_nfs_client_setup role of Kubernetes:  
+	`ansible-playbook omnia.yml -i inventory --skip-tags "nfs_client"`
 
-To skip the NFS client setup, enter the following command to skip the k8s_nfs_client_setup role of Kubernetes:  
-`ansible-playbook omnia.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2"  --skip-tags "nfs_client"`
+	The default path of the Ansible configuration file is `/etc/ansible/`. If the file is not present in the default path, then edit the `ansible_config_file_path` variable to update the configuration path.
 
-6. To provide passwords for mariaDB Database (for Slurm accounting), Kubernetes Pod Network CIDR, and Kubernetes CNI, edit the `omnia_config.yml` file.  
+7. To provide passwords for mariaDB Database (for Slurm accounting), Kubernetes Pod Network CIDR, and Kubernetes CNI, edit the `omnia_config.yml` file.  
 __Note:__ 
 * Supported values for Kubernetes CNI are calico and flannel. The default value of CNI considered by Omnia is calico. 
 * The default value of Kubernetes Pod Network CIDR is 10.244.0.0/16. If 10.244.0.0/16 is already in use within your network, select a different Pod Network CIDR. For more information, see __https://docs.projectcalico.org/getting-started/kubernetes/quickstart__.
 
-To view the set passwords of omnia_config.yml at a later time:  
-`ansible-vault view omnia_config.yml --vault-password-file .omnia_vault_key`
+**NOTE**: If you want to view or edit the `omnia_config.yml` file, run the following commands:
+1. `cd input_params`
+2. `ansible-vault view omnia_config.yml --vault-password-file .vault_key` or `ansible-vault edit omnia_config.yml --vault-password-file .vault_key`.  
+
+**NOTE**: It is suggested that you use the ansible-vault view or edit commands and that you do not use the ansible-vault decrypt or encrypt commands. If you have used the ansible-vault decrypt or encrypt commands, provide 644 permission to `omnia_config.yml`.  
 
 Omnia considers `slurm` as the default username for MariaDB.  
 
@@ -77,10 +151,12 @@ The following __kubernetes__ roles are provided by Omnia when __omnia.yml__ file
 
 __Note:__ 
 * After Kubernetes is installed and configured, few Kubernetes and calico/flannel related ports are opened in the manager and compute nodes. This is required for Kubernetes Pod-to-Pod and Pod-to-Service communications. Calico/flannel provides a full networking stack for Kubernetes pods.
-* If Kubernetes Pods are unable to communicate with the servers when the DNS servers are not responding, then the Kubernetes Pod Network CIDR may be overlapping with the host network which is DNS issue. To resolve this issue follow the below steps:
-1. In your Kubernetes cluster, run `kubeadm reset -f` on the nodes.
-2. In the management node, edit the `omnia_config.yml` file to change the Kubernetes Pod Network CIDR. Suggested IP range is 192.168.0.0/16 and ensure you provide an IP which is not in use in your host network.
-3. Execute omnia.yml and skip slurm using --skip-tags slurm.
+* If Kubernetes Pods are unable to communicate with the servers (i.e., unable to access the Internet) when the DNS servers are not responding, then the Kubernetes Pod Network CIDR may be overlapping with the host network which is DNS issue. To resolve this issue:
+	1. Disable firewalld.service.
+	2. If the issue persists, then perform the following actions:  
+		a. In your Kubernetes cluster, run `kubeadm reset -f` on the nodes.  
+		b. In the management station, edit the *omnia_config.yml* file to change the Kubernetes Pod Network CIDR or CNI value. Suggested IP range is 192.168.0.0/16 and ensure you provide an IP which is not in use in your host network.  
+		c. Execute `omnia.yml` and skip slurm using `--skip-tags slurm`.
 
 ## Slurm roles
 
@@ -98,20 +174,32 @@ The following __Slurm__ roles are provided by Omnia when __omnia.yml__ file is r
 - **slurm_start_services** role: 
 	- Starting the Slurm services so that compute node communicates with manager node.
 - **slurm_exporter** role: 
-	- Slurm exporter is a package for exporting metrics collected from Slurm resource scheduling system to prometheus.
-	- Slurm exporter is installed on the host like Slurm, and Slurm exporter will be successfully installed only if Slurm is installed.
+	- Slurm exporter is a package for exporting metrics collected from Slurm resource scheduling system to Prometheus.
+	- Slurm exporter is installed on the host like Slurm, and Slurm exporter will be successfully installed only if Slurm is installed.  
+
+## Login node roles
+To enable the login node, the *login_node_required* variable must be set to "true" in the *omnia_config.yml* file.  
+- **login_common** role: The firewall ports are opened on the manager and login nodes.  
+- **login_server** role: FreeIPA server is installed and configured on the manager node to provide authentication using LDAP and Kerberos principles.  
+- **login_node** role: FreeIPA client is installed and configured on the login node and is integrated with the server running on the manager node.  
 
-**Note:** If you want to install JupyterHub and Kubeflow playbooks, you have to first install the JupyterHub playbook and then install the Kubeflow playbook.
+**NOTE**: To skip the installation of:
+* The login node-In the `omnia_config.yml` file, set the *login_node_required* variable to "false".  
+* The FreeIPA server and client: Use `--skip-tags freeipa` while executing the *omnia.yml* file. 
+
+### Installing JupyterHub and Kubeflow playbooks  
+If you want to install JupyterHub and Kubeflow playbooks, you have to first install the JupyterHub playbook and then install the Kubeflow playbook.
 
 Commands to install JupyterHub and Kubeflow:
-* `ansible-playbook platforms/jupyterhub.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2"`
-* `ansible-playbook platforms/kubeflow.yml -i inventory -e "ansible_python_interpreter=/usr/bin/python2" `
+* `ansible-playbook platforms/jupyterhub.yml -i inventory`
+* `ansible-playbook platforms/kubeflow.yml -i inventory`
 
-__Note:__ When the Internet connectivity is unstable or slow, it may take more time to pull the images to create the Kubeflow containers. If the time limit is exceeded, the Apply Kubeflow configurations task may fail. To resolve this issue, you must redeploy Kubernetes cluster and reinstall Kubeflow by completing the following steps:
+__Note:__ When the Internet connectivity is unstable or slow, it may take more time to pull the images to create the Kubeflow containers. If the time limit is exceeded, the **Apply Kubeflow configurations** task may fail. To resolve this issue, you must redeploy Kubernetes cluster and reinstall Kubeflow by completing the following steps:
 * Format the OS on manager and compute nodes.
-* In the omnia_config.yml file, change the k8s_cni variable value from calico to flannel.
-* Run the Kubernetes and Kubeflow playbooks.
+* In the `omnia_config.yml` file, change the k8s_cni variable value from calico to flannel.
+* Run the Kubernetes and Kubeflow playbooks. 
 
 ## Add a new compute node to the cluster
 
-To update the INVENTORY file present in `omnia` directory with the new node IP address under the compute group. Ensure the other nodes which are already a part of the cluster are also present in the compute group along with the new node. Then, run`omnia.yml` to add the new node to the cluster and update the configurations of the manager node.
+To update the INVENTORY file present in `omnia` directory with the new node IP address under the compute group. Ensure the other nodes which are already a part of the cluster are also present in the compute group along with the new node. Then, run `omnia.yml` to add the new node to the cluster and update the configurations of the manager node.
+

+ 0 - 189
docs/INSTALL_OMNIA_APPLIANCE.md

@@ -1,189 +0,0 @@
-# Install the Omnia appliance
-
-## Prerequisites
-* Ensure that all the prerequisites listed in the [Prerequisites to install the Omnia appliance](PREINSTALL_OMNIA_APPLIANCE.md) file are met before installing the Omnia appliance.
-* After the installation of the Omnia appliance, changing the manager node is not supported. If you need to change the manager node, you must redeploy the entire cluster.  
-* You must have root privileges to perform installations and configurations using the Omnia appliance.
-* If there are errors when any of the following Ansible playbook commands are run, re-run the commands again.
-
-## Steps to install the Omnia appliance
-
-1. On the management node, change the working directory to the directory where you want to clone the Omnia Git repository.
-2. Clone the Omnia repository:
-``` 
-git clone -b release https://github.com/dellhpc/omnia.git 
-```
-3. Change the directory to __omnia__: `cd omnia`
-4. Edit the `omnia_config.yml` file to:
-* Provide passwords for mariaDB Database (for Slurm accounting), Kubernetes Pod Network CIDR, Kubernetes CNI under `mariadb_password` and `k8s_cni` respectively.  
-__Note:__ 
-* Supported values for Kubernetes CNI are calico and flannel. The default value of CNI considered by Omnia is calico.	
-* The default value of Kubernetes Pod Network CIDR is 10.244.0.0/16. If 10.244.0.0/16 is already in use within your network, select a different Pod Network CIDR. For more information, see __https://docs.projectcalico.org/getting-started/kubernetes/quickstart__.
-
-5. Change the directory to __omnia__->__appliance__: `cd omnia/appliance`
-6. Edit the `appliance_config.yml` file to:  
-	a. Provide passwords for Cobbler and AWX under `provision_password` and `awx_password` respectively.  
-	__Note:__ Minimum length of the password must be at least eight characters and a maximum of 30 characters. Do not use these characters while entering a password: -, \\, "", and \'  
-	
-	b. Change the NIC for the DHCP server under `hpc_nic`, and the NIC used to connect to the Internet under `public_nic`. The default values of **hpc_nic** and **public_nic** are set to em1 and em2 respectively.  
-	
-	c. Provide the CentOS-7-x86_64-Minimal-2009 ISO file path under `iso_file_path`. This ISO file is used by Cobbler to provision the OS on the compute nodes.  
-	__Note:__ It is recommended that the ISO image file is not renamed. And, you **must not** change the path of this ISO image file as the provisioning of the OS on the compute nodes may be impacted.
-	
-	d. Provide a mapping file for DHCP configuration under `mapping_file_path`. The **mapping_file.csv** template file is present under `omnia/examples`. Enter the details in the order: `MAC, Hostname, IP`. The header in the template file must not be deleted before saving the file.  
-	If you want to continue without providing a mapping file, leave the `mapping_file_path` value as blank.  
-	__Note:__ Ensure that duplicate values are not provided for MAC, Hostname, and IP in the mapping file. The Hostname should not contain the following characters: , (comma), \. (period), and _ (underscore).
-	
-	e. Provide valid DHCP range for HPC cluster under the variables `dhcp_start_ip_range` and `dhcp_end_ip_range`. 
-	
-	f. **GMT** is the default configured time zone set during the provisioning of OS on compute nodes. To change the time zone, edit the `timezone` variable and enter a time zone. You can set the time zone to **EST**, **CET**, **MST**, **CST6CDT**, or **PST8PDT**. For a list of available time zones, see the `appliance/roles/common/files/timezone.txt` file. 
-	
-Omnia considers the following usernames as default:  
-* `cobbler` for Cobbler Server
-* `admin` for AWX
-* `slurm` for MariaDB
-
-7. Run `ansible-playbook appliance.yml` to install the Omnia appliance.  
-
-Omnia creates a log file which is available at: `/var/log/omnia.log`.
-
-**Note**: If you want to view the Cobbler and AWX passwords provided in the **appliance_config.yml** file, run `ansible-vault view appliance_config.yml --vault-password-file .vault_key`.  
-
-## Provision operating system on the target nodes 
-Omnia role used: *provision*  
-Ports used by Cobbler:  
-* TCP ports: 80,443,69
-* UDP ports: 69,4011
-
-To create the Cobbler image, Omnia configures the following:
-* Firewall settings.
-* The kickstart file of Cobbler which will enable the UEFI PXE boot.
-
-To access the Cobbler dashboard, enter `https://<IP>/cobbler_web` where `<IP>` is the Global IP address of the management node. For example, enter
-`https://100.98.24.225/cobbler_web` to access the Cobbler dashboard.
-
-__Note__: After the Cobbler Server provisions the operating system on the nodes, IP addresses and host names are assigned by the DHCP service.  
-* If a mapping file is not provided, the hostname to the server is provided based on the following format: **computexxx-xxx** where "xxx-xxx" is the last two octets of Host IP address. For example, if the Host IP address is 172.17.0.11 then the assigned hostname by Omnia is compute0-11.  
-* If a mapping file is provided, the hostnames follow the format provided in the mapping file.  
-
-__Note__: If you want to add more nodes, append the new nodes in the existing mapping file. However, do not modify the previous nodes in the mapping file as it may impact the existing cluster.  
-
-## Install and configure Ansible AWX 
-Omnia role used: *web_ui*  
-The port used by AWX is __8081__.  
-The AWX repository is cloned from the GitHub path: https://github.com/ansible/awx.git 
-
-Omnia performs the following configurations on AWX:
-* The default organization name is set to **Dell EMC**.
-* The default project name is set to **omnia**.
-* The credentials are stored in the **omnia_credential**.
-* Two groups, namely compute and manager groups, are provided under **omnia_inventory**. You can add hosts to these groups using the AWX UI. 
-* Pre-defined templates are provided: **DeployOmnia** and **DynamicInventory**
-* **DynamicInventorySchedule** which is scheduled to run every 10 minutes updates the inventory details dynamically. 
-
-To access the AWX dashboard, enter `http://<IP>:8081` where **\<IP>** is the Global IP address of the management node. For example, enter `http://100.98.24.225:8081` to access the AWX dashboard.
-
-**Note**: The AWX configurations are automatically performed Omnia and Dell Technologies recommends that you do not change the default configurations provided by Omnia as the functionality may be impacted.
-
-__Note__: Although AWX UI is accessible, hosts will be shown only after few nodes have been provisioned by Cobbler. It takes approximately 10 to 15 minutes to display the host details after the provisioning by Cobbler. If a server is provisioned but you are unable to view the host details on the AWX UI, then you can run the following command from __omnia__ -> __appliance__ ->__tools__ folder to view the hosts which are reachable.
-```
-ansible-playbook -i ../roles/inventory/provisioned_hosts.yml provision_report.yml
-```
-
-## Install Kubernetes and Slurm using AWX UI
-Kubernetes and Slurm are installed by deploying the **DeployOmnia** template on the AWX dashboard.
-
-1. On the AWX dashboard, under __RESOURCES__ __->__ __Inventories__, select **omnia_inventory**.
-2. Select __GROUPS__, and then select either __compute__ or __manager__ group.
-3. Select the __HOSTS__ tab.
-4. To add the hosts provisioned by Cobbler, click **+**, and then select **Existing Host**. 
-5. Select the hosts from the list and click __SAVE__.
-6. To deploy Omnia, under __RESOURCES__ -> __Templates__, select __DeployOmnia__, and then click __LAUNCH__.
-7. By default, no skip tags are selected and both Kubernetes and Slurm will be deployed. 
-8. To install only Kubernetes, enter `slurm` and select **slurm**. 
-9. To install only Slurm, select and add `kubernetes` skip tag. 
-
-__Note:__
-*	If you would like to skip the NFS client setup, enter `nfs_client` in the skip tag section to skip the **k8s_nfs_client_setup** role of Kubernetes.
-
-10. Click **NEXT**.
-11. Review the details in the **PREVIEW** window, and click **LAUNCH** to run the DeployOmnia template. 
-
-__Note:__ If you want to install __JupyterHub__ and __Kubeflow__ playbooks, you have to first install the __JupyterHub__ playbook and then install the __Kubeflow__ playbook.
-
-__Note:__ To install __JupyterHub__ and __Kubeflow__ playbooks:
-*	From AWX UI, under __RESOURCES__ -> __Templates__, select __DeployOmnia__ template.
-*	From __PLAYBOOK__ dropdown menu, select __platforms/jupyterhub.yml__ and launch the template to install JupyterHub playbook.
-*	From __PLAYBOOK__ dropdown menu, select __platforms/kubeflow.yml__ and launch the template to install Kubeflow playbook.
-
-__Note:__ When the Internet connectivity is unstable or slow, it may take more time to pull the images to create the Kubeflow containers. If the time limit is exceeded, the **Apply Kubeflow configurations** task may fail. To resolve this issue, you must redeploy Kubernetes cluster and reinstall Kubeflow by completing the following steps:
-* Complete the PXE booting of the manager and compute nodes.
-* In the `omnia_config.yml` file, change the k8s_cni variable value from calico to flannel.
-* Run the Kubernetes and Kubeflow playbooks.
-
-The DeployOmnia template may not run successfully if:
-- The Manager group contains more than one host.
-- The Compute group does not contain a host. Ensure that the Compute group is assigned with at least one host node.
-- Under Skip Tags, when both kubernetes and slurm tags are selected.
-
-After **DeployOmnia** template is run from the AWX UI, the **omnia.yml** file installs Kubernetes and Slurm, or either Kubernetes or slurm, as per the selection in the template on the management node. Additionally, appropriate roles are assigned to the compute and manager groups.
-
-## Kubernetes roles
-
-The following __kubernetes__ roles are provided by Omnia when __omnia.yml__ file is run:
-- __common__ role:
-	- Install common packages on manager and compute nodes
-	- Docker is installed
-	- Deploy time ntp/chrony
-	- Install Nvidia drivers and software components
-- **k8s_common** role: 
-	- Required Kubernetes packages are installed
-	- Starts the docker and Kubernetes services.
-- **k8s_manager** role: 
-	- __helm__ package for Kubernetes is installed.
-- **k8s_firewalld** role: This role is used to enable the required ports to be used by Kubernetes. 
-	- For __head-node-ports__: 6443, 2379-2380,10251,10250,10252
-	- For __compute-node-ports__: 10250,30000-32767
-	- For __calico-udp-ports__: 4789
-	- For __calico-tcp-ports__: 5473,179
-	- For __flanel-udp-ports__: 8285,8472
-- **k8s_nfs_server_setup** role: 
-	- A __nfs-share__ directory, `/home/k8snfs`, is created. Using this directory, compute nodes share the common files.
-- **k8s_nfs_client_setup** role
-- **k8s_start_manager** role: 
-	- Runs the __/bin/kubeadm init__ command to initialize the Kubernetes services on manager node.
-	- Initialize the Kubernetes services in the manager node and create service account for Kubernetes Dashboard
-- **k8s_start_workers** role: 
-	- The compute nodes are initialized and joined to the Kubernetes cluster with the manager node. 
-- **k8s_start_services** role
-	- Kubernetes services are deployed such as Kubernetes Dashboard, Prometheus, MetalLB and NFS client provisioner
-
-__Note:__ 
-* After Kubernetes is installed and configured, few Kubernetes and calico/flannel related ports are opened in the manager and compute nodes. This is required for Kubernetes Pod-to-Pod and Pod-to-Service communications. Calico/flannel provides a full networking stack for Kubernetes pods.
-* If Kubernetes Pods are unable to communicate with the servers when the DNS servers are not responding, then the Kubernetes Pod Network CIDR may be overlapping with the host network which is DNS issue. To resolve this issue:
-1. In your Kubernetes cluster, run `kubeadm reset -f` on the nodes.
-2. In the management node, edit the `omnia_config.yml` file to change the Kubernetes Pod Network CIDR. Suggested IP range is 192.168.0.0/16 and ensure you provide an IP which is not in use in your host network.
-3. Execute omnia.yml and skip slurm using --skip-tags slurm
- 
-## Slurm roles
-
-The following __Slurm__ roles are provided by Omnia when __omnia.yml__ file is run:
-- **slurm_common** role:
-	- Installs the common packages on manager node and compute node.
-- **slurm_manager** role:
-	- Installs the packages only related to manager node
-	- This role also enables the required ports to be used by Slurm.  
-	    **tcp_ports**: 6817,6818,6819  
-		**udp_ports**: 6817,6818,6819
-	- Creating and updating the Slurm configuration files based on the manager node requirements.
-- **slurm_workers** role:
-	- Installs the Slurm packages into all compute nodes as per the compute node requirements.
-- **slurm_start_services** role: 
-	- Starting the Slurm services so that communicates with manager node.
-- **slurm_exporter** role: 
-	- Slurm exporter is a package for exporting metrics collected from Slurm resource scheduling system to prometheus.
-	- Slurm exporter is installed on the host like Slurm, and Slurm exporter will be successfully installed only if Slurm is installed.
-
-## Add a new compute node to the cluster
-
-If a new node is provisioned through Cobbler, the node address is automatically displayed on the AWX dashboard. The node is not assigned to any group. You can add the node to the compute group along with the existing nodes and run `omnia.yml` to add the new node to the cluster and update the configurations in the manager node.

File diff suppressed because it is too large
+ 293 - 0
docs/INSTALL_OMNIA_CONTROL_PLANE.md


+ 6 - 0
docs/INVENTORY

@@ -4,3 +4,9 @@ compute-02
 
 [manager]
 manager-01
+
+[nfs_node]
+nfs-node-01
+
+[login_node]
+login-node-01

+ 80 - 78
docs/MONITOR_CLUSTERS.md

@@ -1,93 +1,95 @@
-# Monitor Kuberentes and Slurm
+# Monitor Kubernetes and Slurm
 Omnia provides playbooks to configure additional software components for Kubernetes such as JupyterHub and Kubeflow. For workload management (submitting, conrolling, and managing jobs) of HPC, AI, and Data Analytics clusters, you can access Kubernetes and Slurm dashboards and other supported applications. 
 
-To access any of the dashboards login to the manager node and open the installed web browser.
-
-If you are connecting remotely ensure your putty or any X11 based clients and you are using mobaxterm version 8 and above, follow the below mentioned steps:
-
-1. To provide __ssh__ to the manager node.
-   `ssh -x root@<ip>` (where IP is the private IP of manager node)
-2. `yum install firefox -y`
-3. `yum install xorg-x11-xauth`
-4. `export DISPLAY=:10.0`
-5. `logout and login back`
-6. To launch firefox from terminal use the following command: 
-   `firefox&`
-
-__Note:__ When the putty/mobaxterm session ends, you must run __export DISPLAY=:10.0__ command each time, else Firefox cannot be launched again.
-
-## Setup user account in manager node
-1. Login to head node as root user and run `adduser __<username>__`.
-2. Run `passwd __<username>__` to set password.
-3. Run `usermod -a -G wheel __<username>__` to give sudo permission.
-
-__Note:__ Kuberenetes and Slurm job can be scheduled only for users with __sudo__ privileges.
+## Before accessing the dashboards
+To access any of the dashboards, ensure that a compatible web browser is installed. If you are connecting remotely to your Linux server by using MobaXterm version later than 8 or other X11 Clients though *ssh*, follow the below mentioned steps to launch the Firefox Browser:  
+* On the management station:
+	1. Connect using *ssh*. Run `ssh <user>@<IP-address>`, where *IP-address* is the private IP of the management station.
+	2. `dnf install mesa-libGL-devel -y`
+	3. `dnf install firefox -y`
+	4. `dnf install xorg-x11-xauth`
+	5. `export DISPLAY=:10.0`
+	6. `logout and login back`
+	7. To launch Firefox from terminal, run `firefox&`.  
+	
+* On the manager node:
+	1. Connect using *ssh*. Run `ssh <user>@<IP-address>`, where *IP-address* is the private IP of the manager node.
+	2. `yum install firefox -y`
+	3. `yum install xorg-x11-xauth`
+	4. `export DISPLAY=:10.0`
+	5. `logout and login back`
+	6. To launch Firefox from terminal, run `firefox&`
+
+**NOTE**: When the PuTTY or MobaXterm session ends, you must run **export DISPLAY=:10.0** command each time, else Firefox cannot be launched again.  
+
+## Access FreeIPA Dashboard  
+The FreeIPA Dashboard can be accessed from the management station, manager, and login nodes. To access the dashboard:
+1.	Install the Firefox Browser.
+2.	Open the Firefox Browser and enter the url: `https://<hostname>`. For example, enter `https://manager.example.com`.
+3.	Enter the username and password. If the admin or user has obtained a Kerberos ticket, then the credentials need not be provided.  
+
+**Note**: To obtain a Kerberos ticket, perform the following actions:
+1. Enter `kinit <username>`
+2. When prompted, enter the password.
+
+An administrator can create users on the login node using FreeIPA. The users will be prompted to change the passwords upon first login.
 
 ## Access Kuberentes Dashboard
-1. To verify if the __Kubernetes-dashboard service__ is __running__, run `kubectl get pods --namespace kubernetes-dashboard`.
+1. To verify if the **Kubernetes-dashboard** service is in the Running state, run `kubectl get pods --namespace kubernetes-dashboard`.
 2. To start the Kubernetes dashboard, run `kubectl proxy`.
-3. From the CLI, run `kubectl get secrets` to see the generated tokens.
-4. Copy the token with the name __prometheus-__-kube-state-metrics__ of the type __kubernetes.io/service-account-token__.
-5. Run `kubectl describe secret __<copied token name>__`
-6. Copy the encrypted token value.
-7. On a web browser(installed on the manager node), enter http://localhost:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:/proxy/ to access the Kubernetes Dashboard.
-8. Select the authentication method as __Token__.
-9. On the Kuberenetes Dashboard, paste the copied encrypted token and click __Sign in__.
+3. To retrieve the encrypted token, run `kubectl get secret -n kubernetes-dashboard $(kubectl get serviceaccount admin-user -n kubernetes-dashboard -o jsonpath="{.secrets[0].name}") -o jsonpath="{.data.token}" | base64 --decode`.
+4. Copy the encrypted token value.
+5. On a web browser on the management station (for control_plane.yml) or manager node (for omnia.yml) enter http://localhost:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:/proxy/.
+6. Select the authentication method as __Token__.
+7. On the Kuberenetes Dashboard, paste the copied encrypted token and click **Sign in** to access the Kubernetes Dashboard.
 
 ## Access Kubeflow Dashboard
-
-It is recommended that you use port numbers between __8000-8999__ and the suggested port number is __8085__.
-
-1. To view the ports which are in use, run the following command:
-   `netstat -an`
-2. Select a port number between __8000-8999__ which is not in use.
-3. To run the **Kubeflow Dashboard** at selected port number, run one of the following commands:  
-	`kubectl port-forward -n kubeflow service/centraldashboard __selected_port_number__:80`  
-	(Or)  
-	`kubectl port-forward -n istio-system svc/istio-ingressgateway __selected_port_number__:80`
-4. On a web browser installed on the manager node, go to http://localhost:selected-port-number/ to launch the Kubeflow Central Dashboard.  
+1. Before accessing the Kubeflow Dashboard, run `kubectl -n kubeflow get applications -o yaml profiles`. Wait till **profiles-deployment** enters the Ready state.
+2. To retrieve the **External IP** or **CLUSTER IP**, run `kubectl get services istio-ingressgateway --namespace istio-system`.
+3. On a web browser installed on the manager node, enter the **External IP** or **Cluster IP** to open the Kubeflow Central Dashboard.  
 
 For more information about the Kubeflow Central Dashboard, see https://www.kubeflow.org/docs/components/central-dash/overview/.
 
 ## Access JupyterHub Dashboard
 
 1. To verify if the JupyterHub services are running, run `kubectl get pods --namespace jupyterhub`.
-2. Ensure that the pod names starting with __hub__ and __proxy__ are in __Running__ status.
-3. Run `kubectl get services --namespace jupyterhub`.
-4. Copy the **External IP** of __proxy-public__ service.
-5. On a web browser installed on the __manager node__, use the External IP address to access the JupyterHub Dashboard.
-6. Enter any __username__ and __password__ combination to enter the Jupyterhub. The __username__ and __password__ can be later configured from the JupyterHub dashboard.
-
-## Prometheus
-
-Prometheus is installed in two different ways:
-  * It is installed on the host when Slurm is installed without installing Kubernetes.
-  * It is installed as a Kubernetes role, if you install both Slurm and Kubernetes.
-
-If Prometheus is installed as part of kubernetes role, run the following commands before starting the Prometheus UI:
-1. `export POD_NAME=$(kubectl get pods --namespace default -l "app=prometheus,component=server" -o jsonpath="{.items[0].metadata.name}")`
-2. `echo $POD_NAME`
-3. `kubectl --namespace default port-forward $POD_NAME 9090`
-
-If Prometheus is installed on the host, start the Prometheus web server by run the following command:
-1. Navigate to Prometheus folder. The default path is __/var/lib/prometheus-2.23.0.linux-amd64/__.
-2. Start the web server, 
-  `./prometheus`
-
-Go to http://localhost:9090 to launch the Prometheus UI in the browser.
+2. Ensure that the pod names starting with __hub__ and __proxy__ are in the **Running** state.
+3. To retrieve the **External IP** or **CLUSTER IP**, run `kubectl get services proxy-public --namespace jupyterhub`.
+4. On a web browser installed on the manager node, enter the **External IP** or **Cluster IP** to open the JupyterHub Dashboard.
+5. JupyterHub is running with a default dummy authenticator. Enter any username and password combination to access the dashboard.
+
+For more information about configuring username and password, and to access the JupyterHub Dashboard, see https://zero-to-jupyterhub.readthedocs.io/en/stable/jupyterhub/customization.html.
+
+## Access Prometheus UI
+
+Prometheus is installed:
+  * As a Kubernetes role (**A**), when both Slurm and Kubernetes are installed.
+  * On the host when only Slurm is installed (**B**).
+
+**A**. When Prometheus is installed as a Kubernetes role.  
+* Access Prometheus with local host:  
+    1. Run the following commands:  
+       `export POD_NAME=$(kubectl get pods --namespace default -l "app=prometheus,component=server" -o jsonpath="{.items[0].metadata.name}")`  
+       `echo $POD_NAME`  
+       `kubectl --namespace default port-forward $POD_NAME 9090`  
+    2. To launch the Prometheus UI, in the web browser, enter `http://localhost:9090`.
+  
+* Access Prometheus with a private IP address:
+    1. Run `kubectl get services --all-namespaces`.
+    2. From the list of services, find  the **prometheus-xxxx-server** service under the **Name** column, and copy the **EXTERNAL-IP** address.  
+   For example, in the below list of services, `192.168.2.150` is the external IP address for the service `prometheus-1619158141-server`.  
+		NAMESPACE	|	NAME	|	TYPE	|	CLUSTER-IP	|	EXTERNAL-IP	|	PORT(S)	|	AGE  
+		---------	|	----	|	----	|	----------	|	-----------	|	-------	|	----  
+		default	|	kubernetes	|	ClusterIP	|	10.96.0.1	|	none	|	443/TCP	|	107m  
+		default	|	**prometheus-1619158141-server**	|	LoadBalancer	|	10.97.40.140	|	**192.168.2.150**	|	80:31687/TCP	|	106m  
+    3. To open Firefox, run `firefox&`.
+    4. Enter the copied External IP address to access Prometheus. For example, enter `192.168.2.150` to access Prometheus UI.
+
+**B**. When Prometheus is installed on the host.
+1. Navigate to Prometheus folder. The default path is `/var/lib/prometheus-2.23.0.linux-amd64/`.
+2. Start the web server: `./prometheus`.  
+3. To launch the Prometheus UI, in the web browser, enter `http://localhost:9090`. 
 
 __Note:__ 
-* If Prometheus was installed through slurm without Kubernetes then it will be removed when Kubernetes is installed as Prometheus would be running as a pod. 
-* You can use a single instance of Prometheus when both kubernetes and slurm are installed.
-
-
-
-
-
- 
-
-
-
-
-
-
+* If Prometheus is installed through Slurm without installing Kubernetes, then it will be removed when Kubernetes is installed because Prometheus would be running as a pod. 
+* Only a single instance of Prometheus is installed when both Kubernetes and Slurm are installed.

+ 0 - 36
docs/PREINSTALL_OMNIA_APPLIANCE.md

@@ -1,36 +0,0 @@
-# Prerequisites to install the Omnia appliance
-
-Ensure that the following prequisites are met before installing the Omnia appliance:
-* On the management node, install Ansible and Git using the following commands:
-	* `yum install epel-release -y`
-	* `yum install ansible-2.9.18 git -y`  
-	__Note:__ Ansible must be installed using __yum__. If Ansible is installed using __pip3__, re-install it using the __yum__ command again.
-* Ensure a stable Internet connection is available on management node and target nodes. 
-* CentOS 7.9 2009 is installed on the management node.
-* To provision the bare metal servers, go to http://isoredirect.centos.org/centos/7/isos/x86_64/ and download the **CentOS-7-x86_64-Minimal-2009** ISO file.
-* For DHCP configuration, you can provide a mapping file. The provided details must be in the format: MAC, Hostname, IP. For example, `xx:xx:4B:C4:xx:44,validation01,172.17.0.81` and  `xx:xx:4B:C5:xx:52,validation02,172.17.0.82` are valid entries.  
-__Note:__ A template for mapping file is present in the `omnia/examples`, named `mapping_file.csv`. The header in the template file must not be deleted before saving the file.  
-__Note:__ Ensure that duplicate values are not provided for MAC, Hostname, and IP in the mapping file. The Hostname should not contain the following characters: , (comma), \. (period), and _ (underscore).
-* Connect one of the Ethernet cards on the management node to the HPC switch and the other ethernet card connected to the global network.
-* If SELinux is not disabled on the management node, disable it from `/etc/sysconfig/selinux` and restart the management node.
-* The default mode of PXE is __UEFI__, and the BIOS Legacy Mode is not supported.
-* The default boot order for the bare metal servers must be __PXE__.
-* Configuration of __RAID__ is not part of Omnia. If bare metal servers have __RAID__ controller installed then it is mandatory to create **VIRTUAL DISK**.
-
-## Assumptions
-
-## Example system designs
-Omnia can configure systems which use Ethernet or Infiniband-based fabric to connect the compute servers.
-
-![Example system configuration with Ethernet fabric](images/example-system-ethernet.png)
-
-![Example system configuration with Infiniband fabric](images/example-system-infiniband.png)
-
-## Network Setup
-Omnia assumes that servers are already connected to the network and have access to the internet.
-### Network Topology
-Possible network configurations include:
-* A flat topology where all nodes are connected to a switch which includes an uplink to the internet. This requires multiple externally-facing IP addresses
-* A hierarchical topology where compute nodes are connected to a common switch, but the manager node contains a second network connection which is connected to the internet. All outbound/inbound traffic would be routed through the manager node. This requires setting up firewall rules for IP masquerade, see [here](https://www.server-world.info/en/note?os=CentOS_7&p=firewalld&f=2) for an example.
-### IP and Hostname Assignment
-The recommended setup is to assign IP addresses to individual servers. This can be done manually by logging onto each node, or via DHCP.

+ 131 - 46
docs/README.md

@@ -1,12 +1,13 @@
-**Omnia** (Latin: all or everything) is a deployment tool to configure Dell EMC PowerEdge servers running standard RPM-based Linux OS images into clusters capable of supporting HPC, AI, and data analytics workloads. It uses Slurm, Kubernetes, and other packages to manage jobs and run diverse workloads on the same converged solution. It is a collection of [Ansible](https://ansible.org) playbooks, is open source, and is constantly being extended to enable comprehensive workloads.
+**Omnia** (Latin: all or everything) is a deployment tool to configure Dell EMC PowerEdge servers running standard RPM-based Linux OS images into clusters capable of supporting HPC, AI, and data analytics workloads. It uses Slurm, Kubernetes, and other packages to manage jobs and run diverse workloads on the same converged solution. It is a collection of [Ansible](https://ansible.com) playbooks, is open source, and is constantly being extended to enable comprehensive workloads.
 
-## Blogs about Omnia
-- [Introduction to Omnia](https://infohub.delltechnologies.com/p/omnia-open-source-deployment-of-high-performance-clusters-to-run-simulation-ai-and-data-analytics-workloads/)
-- [Taming the Accelerator Cambrian Explosion with Omnia](https://infohub.delltechnologies.com/p/taming-the-accelerator-cambrian-explosion-with-omnia/)
-- [Containerized HPC Workloads Made Easy with Omnia and Singularity](https://infohub.delltechnologies.com/p/containerized-hpc-workloads-made-easy-with-omnia-and-singularity/)
+#### Current release version
+1.1
+
+#### Previous release version
+1.0
 
 ## What Omnia does
-Omnia can build clusters which use Slurm or Kubernetes (or both!) for workload management. Omnia will install software from a variety of sources, including:
+Omnia can build clusters that use Slurm or Kubernetes (or both!) for workload management. Omnia will install software from a variety of sources, including:
 - Standard CentOS and [ELRepo](http://elrepo.org) repositories
 - Helm repositories
 - Source code compilation
@@ -21,50 +22,69 @@ Whenever possible, Omnia will leverage existing projects rather than reinvent th
 Omnia can install Kubernetes or Slurm (or both), along with additional drivers, services, libraries, and user applications.
 ![Omnia Kubernetes Stack](images/omnia-k8s.png)
 
-![Omnia Slurm Stack](images/omnia-slurm.png) 
-
-## Deploying clusters using the Omnia Appliance
-The Omnia Appliance will automate the entire cluster deployment process, starting with provisioning the operating system to servers.
+![Omnia Slurm Stack](images/omnia-slurm.png)  
 
-Ensure all the prerequisites listed in [preparation to install Omnia Appliance](PREINSTALL_OMNIA_APPLIANCE.md) are met before installing the Omnia appliance.
+## What's new in this release
+* Provisioning of CentOS 7.9 custom ISO on supported PowerEdge servers using iDRAC.
+* Configuring Dell EMC networking switches, Mellanox InfiniBand switches, and PowerVault storage devices in the cluster. 
+* An option to configure a login node with the same configurations as the compute nodes in the cluster. With appropriate user privileges provided by the cluster administrator, users can log in to the login node and schedule Slurm jobs. The authentication mechanism in the login node uses the FreeIPA solution.
+* Options to enable the security settings on the iDRAC such as system lockdown mode, secure boot mode, 2-factor authentication (2FA), and LDAP directory services.
 
-For detailed instructions on installing the Omnia appliance, see [Install Omnia Appliance](INSTALL_OMNIA_APPLIANCE.md).
+## Deploying clusters using the Omnia control plane
+The Omnia Control Plane will automate the entire cluster deployment process, starting with provisioning the operating system on the supported devices and updating the firmware versions of PowerEdge Servers. 
+For detailed instructions, see [Install the Omnia Control Plane](INSTALL_OMNIA_CONTROL_PLANE.md).  
 
 ## Installing Omnia to servers with a pre-provisioned OS
-Omnia can be deploy clusters to servers that already have an RPM-based Linux OS running on them, and are all connected to the Internet. Currently all Omnia testing is done on [CentOS](https://centos.org). Please see [Preparation to install Omnia](PREINSTALL_OMNIA.md) for instructions on network setup.
+Omnia can be deployed on clusters that already have an RPM-based Linux OS running on them and are all connected to the Internet. Currently, all Omnia testing is done on [CentOS](https://centos.org). Please see [Example system designs](EXAMPLE_SYSTEM_DESIGNS.md) for instructions on the network setup.
 
-Once servers have functioning OS and networking, you can use Omnia to install and start Slurm and/or Kubernetes. Please see [Install Omnia using CLI](INSTALL_OMNIA.md) for detailed instructions.  
+Once servers have functioning OS and networking, you can use Omnia to install and start Slurm and/or Kubernetes. For detailed instructions, see [Install Omnia using CLI](INSTALL_OMNIA.md). 
 
 # System requirements  
-Ensure the supported version of all the software are installed as per the following table and other versions than those listed are not supported by Omnia. This is to ensure that there is no impact to the functionality of Omnia.
+The following table lists the software and operating system requirements on the management station, manager, and compute nodes. To avoid any impact on the proper functioning of Omnia, other versions than those listed are not supported.  
 
-Software and hardware requirements  |   Version
+Requirements  |   Version
 ----------------------------------  |   -------
-OS installed on the management node  |  CentOS 7.9 2009
-OS deployed by Omnia on bare-metal servers | CentOS 7.9 2009 Minimal Edition
+OS pre-installed on the management station  |  CentOS 8.3
+OS deployed by Omnia on bare-metal Dell EMC PowerEdge Servers | CentOS 7.9 2009 Minimal Edition
 Cobbler  |  2.8.5
-Ansible AWX  |  15.0.0
+Ansible AWX  |  19.1.0
 Slurm Workload Manager  |  20.11.2
-Kubernetes Controllers  |  1.16.7
+Kubernetes on the management station  |  1.21.0
+Kubernetes on the manager and compute nodes	|	1.16.7 or 1.19.3
 Kubeflow  |  1
 Prometheus  |  2.23.0
-Supported PowerEdge servers  |  R640, R740, R7525, C4140, DSS8440, and C6420
+
+## Hardware managed by Omnia
+The following table lists the supported devices managed by Omnia. Other devices than those listed in the following table will be discovered by Omnia, but features offered by Omnia will not be applicable.
+
+Device type	|	Supported models	
+-----------	|	-------	
+Dell EMC PowerEdge Servers	|	PowerEdge C4140, C6420, C6520, R240, R340, R440, R540, R640, R650, R740, R740xd, R740xd2, R750, R750xa, R840, R940, R940xa
+Dell EMC PowerVault Storage	|	PowerVault ME4084, ME4024, and ME4012 Storage Arrays
+Dell EMC Networking Switches	|	PowerSwitch S3048-ON and PowerSwitch S5232F-ON
+Mellanox InfiniBand Switches	|	NVIDIA MQM8700-HS2F Quantum HDR InfiniBand Switch 40 QSFP56
+
 
 ## Software managed by Omnia
-Ensure the supported version of all the software are installed as per the following table and other versions than those listed are not supported by Omnia. This is to ensure that there is no impact to the functionality of Omnia.
+The following table lists the software and its compatible version managed by Omnia. To avoid any impact on the proper functioning of Omnia, other versions than those listed are not supported.
 
-Software	|	Licence	|	Compatible Version	|	Description
+Software	|	License	|	Compatible Version	|	Description
 -----------	|	-------	|	----------------	|	-----------------
+CentOS Linux release 7.9.2009 (Core)	|	-	|	7.9	|	Operating system on entire cluster except for management station
+CentOS Linux release 8.3.2011	|	-	|	8.3	|	Operating system on the management station	
 MariaDB	|	GPL 2.0	|	5.5.68	|	Relational database used by Slurm
-Slurm	|	GNU General Public	|	20.11.2	|	HPC Workload Manager
+Slurm	|	GNU General Public	|	20.11.7	|	HPC Workload Manager
 Docker CE	|	Apache-2.0	|	20.10.2	|	Docker Service
+FreeIPA	|	GNU General Public License v3	|	4.6.8	|	Authentication system used in the login node
+OpenSM	|	GNU General Public License 2	|	3.3.21	|	-
 NVIDIA container runtime	|	Apache-2.0	|	3.4.2	|	Nvidia container runtime library
-Python PIP	|	MIT Licence	|	3.2.1	|	Python Package
-Python2	|	-	|	2.7.5	|	-
-Kubelet	|	Apache-2.0	|	1.16.7	|	Provides external, versioned ComponentConfig API types for configuring the kubelet
-Kubeadm	|	Apache-2.0	|	1.16.7	|	"fast paths" for creating Kubernetes clusters
-Kubectl	|	Apache-2.0	|	1.16.7	|	Command line tool for Kubernetes
-JupyterHub	|	Modified BSD Licence	|	1.1.0	|	Multi-user hub
+Python PIP	|	MIT License	|	21.1.2	|	Python Package
+Python3	|	-	|	3.6.8	|	-
+Kubelet	|	Apache-2.0	|	1.16.7,1.19,1.21	|	Provides external, versioned ComponentConfig API types for configuring the kubelet
+Kubeadm	|	Apache-2.0	|	1.16.7,1.19,1.21	|	"fast paths" for creating Kubernetes clusters
+Kubectl	|	Apache-2.0	|	1.16.7,1.19,1.21	|	Command line tool for Kubernetes
+JupyterHub	|	Modified BSD License	|	1.1.0	|	Multi-user hub
+kubernetes Controllers	|	Apache-2.0	|	1.16.7,1.19,1.21	|	Orchestration tool	
 Kfctl	|	Apache-2.0	|	1.0.2	|	CLI for deploying and managing Kubeflow
 Kubeflow	|	Apache-2.0	|	1	|	Cloud Native platform for machine learning
 Helm	|	Apache-2.0	|	3.5.0	|	Kubernetes Package Manager
@@ -74,29 +94,94 @@ Horovod	|	Apache-2.0	|	0.21.1	|	Distributed deep learning training framework for
 MPI	|	Copyright (c) 2018-2019 Triad National Security,LLC. All rights reserved.	|	0.2.3	|	HPC library
 CoreDNS	|	Apache-2.0	|	1.6.2	|	DNS server that chains plugins
 CNI	|	Apache-2.0	|	0.3.1	|	Networking for Linux containers
-AWX	|	Apache-2.0	|	15.0.0	|	Web-based User Interface
+AWX	|	Apache-2.0	|	19.1.0	|	Web-based User Interface
+AWX.AWX	|	Apache-2.0	|	19.1.0	|	Galaxy collection to perform awx configuration
+AWXkit	|	Apache-2.0	|	to be updated	|	To perform configuration through CLI commands
+Cri-o	|	Apache-2.0	|	1.21	|	Container Service
+Buildah	|	Apache-2.0	|	1.19.8	|	Tool to build and run container
 PostgreSQL	|	Copyright (c) 1996-2020, PostgreSQL Global Development Group	|	10.15	|	Database Management System
-Redis	|	BSD-3-Clause Licence	|	6.0.10	|	In-memory database
-NGINX	|	BSD-2-Clause Licence	|	1.14	|	-
-
-# Known issue  
-Issue: Hosts do not display on the AWX UI.  
+Redis	|	BSD-3-Clause License	|	6.0.10	|	In-memory database
+NGINX	|	BSD-2-Clause License	|	1.14	|	-
+dellemc.openmanage	|	GNU-General Public License v3.0	|	3.5.0	|	It is a systems management and monitoring application that provides a comprehensive view of the Dell EMC servers, chassis, storage, and network switches on the enterprise network
+dellemc.os10	|	GNU-General Public License v3.1	|	1.1.1	|	It provides networking hardware abstraction through a common set of APIs
+Genisoimage-dnf	|	GPL v3	|	1.1.11	|	Genisoimage is a pre-mastering program for creating ISO-9660 CD-ROM  filesystem images
+OMSDK	|	Apache-2.0	|	1.2.456	|	Dell EMC OpenManage Python SDK (OMSDK) is a python library that helps developers and customers to automate the lifecycle management of PowerEdge Servers
+
+# Supported interface keys of PowerSwitch S3048-ON (ToR Switch)
+The following table provides details about the interface keys supported by the S3048-ON ToR Switch. Dell EMC Networking OS10 Enterprise Edition is the supported operating system.
+
+Interface key name	|	Type	|	Description
+---------	|   ----	|	-----------
+desc	|	string	|	Configures a single line interface description
+portmode	|	string	|	Configures port mode according to the device type
+switchport	|	boolean: true, false*	|	Configures an interface in L2 mode
+admin	|	string: up, down*	|	Configures the administrative state for the interface; configuring the value as administratively "up" enables the interface; configuring the value as administratively "down" disables the interface
+mtu	|	integer	|	Configures the MTU size for L2 and L3 interfaces (1280 to 65535)
+speed	|	string: auto, 1000, 10000, 25000, ...	|	Configures the speed of the interface
+fanout	|	string: dual, single; string:10g-4x, 40g-1x, 25g-4x, 100g-1x, 50g-2x (os10)	|	Configures fanout to the appropriate value
+suppress_ra	|	string: present, absent	|	Configures IPv6 router advertisements if set to present
+ip_type_dynamic	|	boolean: true, false	|	Configures IP address DHCP if set to true (ip_and_mask is ignored if set to true)
+ipv6_type_dynamic	|	boolean: true, false	|	Configures an IPv6 address for DHCP if set to true (ipv6_and_mask is ignored if set to true)
+ipv6_autoconfig	|	boolean: true, false	|	Configures stateless configuration of IPv6 addresses if set to true (ipv6_and_mask is ignored if set to true)
+vrf	|	string	|	Configures the specified VRF to be associated to the interface
+min_ra	|	string	|	Configures RA minimum interval time period
+max_ra	|	string	|	Configures RA maximum interval time period
+ip_and_mask	|	string	|	Configures the specified IP address to the interface
+ipv6_and_mask	|	string	|	Configures a specified IPv6 address to the interface
+virtual_gateway_ip	|	string	|	Configures an anycast gateway IP address for a VXLAN virtual network as well as VLAN interfaces
+virtual_gateway_ipv6	|	string	|	Configures an anycast gateway IPv6 address for VLAN interfaces
+state_ipv6	|	string: absent, present*	|	Deletes the IPV6 address if set to absent
+ip_helper	|	list	|	Configures DHCP server address objects (see ip_helper.*)
+ip_helper.ip	|	string (required)	|	Configures the IPv4 address of the DHCP server (A.B.C.D format)
+ip_helper.state	|	string: absent, present*	|	Deletes the IP helper address if set to absent
+flowcontrol	|	dictionary	|	Configures the flowcontrol attribute (see flowcontrol.*)
+flowcontrol.mode	|	string: receive, transmit	|	Configures the flowcontrol mode
+flowcontrol.enable	|	string: on, off	|	Configures the flowcontrol mode on
+flowcontrol.state	|	string: absent, present	|	Deletes the flowcontrol if set to absent
+ipv6_bgp_unnum	|	dictionary	|	Configures the IPv6 BGP unnum attributes (see ipv6_bgp_unnum.*) below
+ipv6_bgp_unnum.state	|	string: absent, present*	|	Disables auto discovery of BGP unnumbered peer if set to absent
+ipv6_bgp_unnum.peergroup_type	|	string: ebgp, ibgp	|	Specifies the type of template to inherit from
+stp_rpvst_default_behaviour	|	boolean: false, true	|	Configures RPVST default behavior of BPDU's when set to True, which is default
+
+* *(Asterisk) denotes the default value.
+
+# Known issues  
+* **Issue**: Hosts are not displayed on the AWX UI.  
+	**Resolution**:  
+	* Verify if the *provisioned_hosts.yml* file is present in the *omnia/appliance/roles/inventory/files* folder.
+	* Verify whether the hosts are listed in the *provisioned_hosts.yml* file.  
+		* If hosts are not listed, then servers are not PXE booted yet.
+		* If hosts are listed, then an IP address has been assigned to them by DHCP. However, hosts are not displayed on the AWX UI as the PXE boot is still in process or is not initiated.
+	* Check for the reachable and unreachable hosts using the **provisioned_report.yml** tool present in the *omnia/appliance/tools* folder. To run provisioned_report.yml, in the omnia/appliance directory, run `playbook -i roles/inventory/files/provisioned_hosts.yml tools/provisioned_report.yml`.
+
+* **Issue**: There are **ImagePullBack** or **ErrPullImage** errors in the status of Kubernetes pods.  
+	**Cause**: The errors occur when the Docker pull limit is exceeded.  
+	**Resolution**:
+	* For **omnia.yml** and **control_plane.yml**: Provide the docker username and password for the Docker Hub account in the *omnia_config.yml* file and execute the playbook. 
+	* **Note**: If the playbook is already executed and the pods are in __ImagePullBack__ error, then run `kubeadm reset -f` in all the nodes before re-executing the playbook with the docker credentials.
+
+* **Issue**: The `kubectl` command stops working after a reboot and displays the following error message: *The connection to the server head_node_ip:port was refused - did you specify the right host or port?*  
+	**Resolution**:
+	On the management station or the manager node, run the following commands:  
+	* `swapoff -a`
+	* `systemctl restart kubelet`  
 	
-Resolution:  
-* Verify if `provisioned_hosts.yml` is present in the `omnia/appliance/roles/inventory/files` folder.
-* Verify if hosts are not listed in the `provisioned_hosts.yml` file. If hosts are not listed, then servers are not PXE booted yet.
-* If hosts are listed in the `provisioned_hosts.yml` file, then an IP address has been assigned to them by DHCP. However, hosts are not displayed on the AWX UI as the PXE boot is still in process or is not initiated.
-* Check for the reachable and unreachable hosts using the `provisioned_report.yml` tool present in the `omnia/appliance/tools` folder. To run provisioned_report.yml, in the omnia/appliance directory, run `playbook -i roles/inventory/files/provisioned_hosts.yml tools/provisioned_report.yml`.
+* **Issue**: If control_plane.yml fails at the webui_awx role, then the previous IP address and password are not cleared when control_plane.yml is re-run.   
+	**Resolution**: In the *webui_awx/files* directory, delete the *.tower_cli.cfg* and *.tower_vault_key* files, and then re-run `control_plane.yml`.
 
 # [Frequently asked questions](FAQ.md)
 
 # Limitations
-1. Removal of Slurm and Kubernetes component roles are not supported. However, skip tags can be provided at the start of installation to select the component roles.​
-2. After the installation of the Omnia appliance, changing the manager node is not supported. If you need to change the manager node, you must redeploy the entire cluster.  
-3. Dell Technologies provides support to the Dell developed modules of Omnia. All the other third-party tools deployed by Omnia are outside the support scope.​
-4. To change the Kubernetes single node cluster to a multi-node cluster or to change a multi-node cluster to a single node cluster, you must either redeploy the entire cluster or run `kubeadm reset -f` on all the nodes of the cluster. You then need to run `omnia.yml` file and skip the installation of Slurm using the skip tags.
+* Removal of Slurm and Kubernetes component roles are not supported. However, skip tags can be provided at the start of installation to select the component roles.​  
+* After installing the Omnia control plane, changing the manager node is not supported. If you need to change the manager node, you must redeploy the entire cluster.  
+* Dell Technologies provides support to the Dell-developed modules of Omnia. All the other third-party tools deployed by Omnia are outside the support scope.​
+* To change the Kubernetes single node cluster to a multi-node cluster or change a multi-node cluster to a single node cluster, you must either redeploy the entire cluster or run `kubeadm reset -f` on all the nodes of the cluster. You then need to run the *omnia.yml* file and skip the installation of Slurm using the skip tags.  
+* In a single node cluster, the login node and Slurm functionalities are not applicable. However, Omnia installs FreeIPA Server and Slurm on the single node.  
+* To change the Kubernetes version from 1.16 to 1.19 or 1.19 to 1.16, you must redeploy the entire cluster.  
+* The Kubernetes pods will not be able to access the Internet or start when firewalld is enabled on the node. This is a limitation in Kubernetes. So, the firewalld daemon will be disabled on all the nodes as part of omnia.yml execution.
+
 # Contributing to Omnia
-The Omnia project was started to give members of the [Dell Technologies HPC Community](https://dellhpc.org) a way to easily setup clusters of Dell EMC servers, and to contribute useful tools, fixes, and functionality back to the HPC Community.
+The Omnia project was started to give members of the [Dell Technologies HPC Community](https://dellhpc.org) a way to easily set up clusters of Dell EMC servers, and to contribute useful tools, fixes, and functionality back to the HPC Community.
 
 # Open to All
 While we started Omnia within the Dell Technologies HPC Community, that doesn't mean that it's limited to Dell EMC servers, networking, and storage. This is an open project, and we want to encourage *everyone* to use and contribute to Omnia!

File diff suppressed because it is too large
+ 40 - 0
docs/control_plane/device_templates/CONFIGURE_INFINIBAND_SWITCHES.md


+ 32 - 0
docs/control_plane/device_templates/CONFIGURE_NETWORK_SWITCHES.md

@@ -0,0 +1,32 @@
+# Configuring Dell EMC Networking Switches  
+
+## Update the input parameters 
+Under the `control_plane/input_params` directory, edit the following files:
+1. `base_vars.yml` file: Update the following variable to enable or disable Ethernet switch configurations in the cluster.  
+
+	Variable	|	Default, choices	|	Description
+	-------	|	----------------	|	-----------------
+	ethernet_switch_support	|	<ul><li>**false**</li><li>true</li></ul>	|	Set the variable to "true" to enable Ethernet switch configurations.  
+
+2. `login_vars.yml` file:  Enter the following details to configure Ethernet switches.  
+	a. `ethernet_switch_username`- username for Ethernet switches.  
+	**NOTE**: The username must not contain the following characters: -, \\, "", and \'  
+	b. `ethernet_switch_password`- password for Ethernet switches.   
+	**NOTE**: Minimum length of the password must be eight characters and the maximum limit is 30 characters. Do not use these characters while entering a password: -, \\, "", and \'  
+
+3. `ethernet_vars.yml` file: If **ethernet_switch_support** is set to "true" in the *base_vars.yml* file, then update the following variables.
+
+	Variables	|	Default, choices	|	Description
+	----------------	|	-----------------	|	-----------------
+	os10_config	|	<ul><li>"interface vlan1"</li><li>"exit"</li></ul>	|	Global configurations for the switch.
+	os10_interface	|	By default: <ul><li>Port description is provided.</li> <li>Each interface is set to "up" state.</li>	|	Update the individual interfaces of the PowerSwitch S3048-ON (ToR Switch). </br>The interfaces are from **ethernet 1/1/1** to **ethernet 1/1/30**. For more information about the interfaces, see the *Supported interface keys of PowerSwitch S3048-ON (ToR Switch)* section in the README file. </br>**NOTE**: The playbooks will fail if any invalid configurations are entered.
+	save_changes_to_startup	|	<ul><li>**false**</li><li>true</li></ul>	|	Change it to "true" only when you are certain that the updated configurations and commands are valid. </br>**WARNING**: When set to "true", the startup configuration file is updated. If incorrect configurations or commands are entered, the Ethernet switches may not operate as expected.   
+	
+## Configuring Dell EMC Networking Switches
+
+### Run ethernet_template on the AWX UI.
+1. Run `kubectl get svc -n awx`.
+2. Copy the Cluster-IP address of the awx-service. 
+3. To retrieve the AWX UI password, run `kubectl get secret awx-admin-password -n awx -o jsonpath="{.data.password}" | base64 --decode`.
+4. Open the default web browser on the management station and enter the awx-service IP address. Log in to the AWX UI using the username as `admin` and the retrieved password.
+5. Under __RESOURCES__ -> __Templates__, launch the **ethernet_template**.

File diff suppressed because it is too large
+ 41 - 0
docs/control_plane/device_templates/CONFIGURE_POWERVAULT_STORAGE.md


+ 138 - 0
docs/control_plane/device_templates/PROVISION_SERVERS.md

@@ -0,0 +1,138 @@
+# Custom ISO provisioning on Dell EMC PowerEdge Servers
+
+## Update the input parameters
+
+Edit the following files under the `control_plane/input_params` directory to provide the required input parameters.
+1. Edit the `login_vars.yml` file to enter the following details:  
+	a. `provision_password`- password used while provisioning OS on bare metal servers.  
+	b. `cobbler_password`- password for Cobbler.    
+	c. `idrac_username` and `idrac_password`- iDRAC username and password.   
+	**NOTE**: Minimum length of the password must be at least eight characters and a maximum of 30 characters. Do not use these characters while entering a password: -, \\, "", and \'
+2. Edit the following variables in the `idrac_vars.yml` file.  
+
+	File name	|	Variables</br> [Required/ Optional]	|	Default, choices	|	Description
+	-------	|	----------------	|	-----------------	|	-----------------
+	idrac_vars.yml	|	idrac_system_profile</br> [Required]	|	<ul><li>**Performance**</li> <li>PerformancePerWatt(DAPC)</li> <li>PerformancePerWatt(OS)</li> <li>WorkstationPerformance</li></ul>	|	The system profile used for BIOS configuration. 
+	<br>	|	firmware_update_required</br> [Required]	|	<ul><li>**true**</li> <li>false</li></ul>	|	By default, Omnia updates the firmware on the servers. To disable the firmware update, set the variable to "false".
+	<br>	|	poweredge_model</br> [Required if "firmware_update_required" is set to "true"]	|	<ul><li>**C6420**</li> <li>R640</li><li>R740</li><li>C4140</li> <li>And other supported PowerEdge servers</li></ul>	|	Enter the required PowerEdge server models to update the firmware. For example, enter `R640,R740,C4140` to update firmware on these models of PowerEdge servers. For a complete list of supported PowerEdge servers, see the *Hardware managed by Omnia* section in the Readme file.
+	<br>	|	uefi_secure_boot</br> [Optional]	|	<ul><li>**disabled**</li> <li>enabled</li></ul>	|	Option to enable or disable the secure boot mode.
+	<br>	|	system_lockdown</br> [Optional]	|	<ul><li>**disabled**</li> <li>enabled</li></ul>	|	Option to enable or disable system lockdown.
+	<br>	|	two_factor_authentication</br> [Optional]	|	<ul><li>**disabled**</li> <li>enabled</li></ul>	|	Option to enable or disable the 2FA on iDRAC.</br> If enabled, update the required variables in the `idrac_tools_vars.yml` file.</br> **[WARNING]**: For the other iDRAC playbooks to run, you must manually disable 2FA by setting the *Easy 2FA State* to "Disabled" in the iDRAC settings.
+	<br>	|	ldap_directory_services</br> [Optional]	|	<ul><li>**disabled**</li> <li>enabled</li></ul>	|	Option to enable or disable the LDAP directory services on iDRAC.</br> If enabled, update the required variables in the `idrac_tools_vars.yml` file.
+
+## Custom ISO file creation for Out-of-band server management
+Omnia role used to create the custom ISO: *control_plane_customiso*  
+Based on the inputs provided in the `login_vars.yml` and `base_vars.yml` files, the Kickstart file is configured and added to the custom ISO file. The *unattended_centos7.iso* file is copied to an NFS share on the management station to provision the PowerEdge servers using iDRAC.  
+
+## Provisioning of PowerEdge Servers using iDRAC (Out-of-band server management)
+
+### Run idrac_template on the AWX UI.
+1. Run `kubectl get svc -n awx`.
+2. Copy the Cluster-IP address of the awx-service. 
+3. To retrieve the AWX UI password, run `kubectl get secret awx-admin-password -n awx -o jsonpath="{.data.password}" | base64 --decode`.
+4. Open the default web browser on the management station and enter the awx-service IP address. Log in to the AWX UI using the username as `admin` and the retrieved password.
+5. Under __RESOURCES__ -> __Templates__, launch the **idrac_template**.
+
+Omnia role used to provision custom ISO on PowerEdge Servers using iDRAC: *provision_idrac*  
+
+For the `idrac.yml` file to successfully provision the custom ISO on the PowerEdge Servers, ensure that the following prerequisites are met:
+* The **idrac_inventory** file is updated with the iDRAC IP addresses.
+* Required input parameters are updated in the **idrac_vars.yml** file under **omnia/control_plane/input_params** directory.
+* An *unattended_centos7.iso* file is available in an NFS path.
+* The Lifecycle Controller Remote Services of PowerEdge Servers is in the 'ready' state.
+* The Redfish services are enabled in the iDRAC settings under **Services**.
+* The PowerEdge Servers have the iDRAC Enterprise or Datacenter license. If the license is not found, servers will be PXE booted and provisioned using Cobbler.  
+
+The **provision_idrac** file configures and validates the following:
+* Required input parameters and prerequisites.
+* BIOS and SNMP settings.
+* The latest available version of the iDRAC firmware is updated.
+* If bare metal servers have a RAID controller installed, Virtual disks are created for RAID configuration.
+* Availability of iDRAC Enterprise or Datacenter License on iDRAC.  
+
+After the configurations are validated, the **provision_idrac** file provisions the custom ISO on the PowerEdge Servers. After the OS is provisioned successfully, iDRAC IP addresses are updated in the *provisioned_idrac_inventory* in AWX.
+
+**NOTE**: The `idrac.yml` file initiates the provisioning of custom ISO on the PowerEdge servers. Wait for some time for the node inventory to be updated on the AWX UI. 
+
+### Provisioning newly added PowerEdge servers in the cluster
+To provision newly added servers, wait till the iDRAC IP addresses are automatically added to the *idrac_inventory*. After the iDRAC IP addresses are added, launch the iDRAC template on the AWX UI to provision CentOS custom OS on the servers.  
+
+If you want to reprovision all the servers in the cluster or any of the faulty servers, you must remove the respective iDRAC IP addresses from *provisioned_idrac_inventory* on AWX UI and then launch the iDRAC template. If required, you can delete the *provisioned_idrac_inventory* from the AWX UI to remove the IP addresses of provisioned servers. After the servers are provisioned, *provisioned_idrac_inventory* is created and updated on the AWX UI.
+
+## OS provisioning on PowerEdge Servers using Cobbler on the host network  
+
+Omnia role used: *provision_cobbler*  
+Ports used by Cobbler:  
+* TCP ports: 80,443,69
+* UDP ports: 69,4011
+
+To create the Cobbler image, Omnia configures the following:
+* Firewall settings.
+* The kickstart file of Cobbler to enable the UEFI PXE boot.
+
+To access the Cobbler dashboard, enter `https://<IP>/cobbler_web` where `<IP>` is the Global IP address of the management station. For example, enter
+`https://100.98.24.225/cobbler_web` to access the Cobbler dashboard.
+
+__Note__: After the Cobbler Server provisions the operating system on the servers, IP addresses and hostnames are assigned by the DHCP service.  
+* If a mapping file is not provided, the hostname to the server is provided based on the following format: **computexxx-xxx** where "xxx-xxx" is the last two octets of the Host IP address. For example, if the Host IP address is 172.17.0.11 then the assigned hostname by Omnia is compute0-11.  
+* If a mapping file is provided, the hostnames follow the format provided in the mapping file.  
+
+__Note__: If you want to add more nodes, append the new nodes in the existing mapping file. However, do not modify the previous nodes in the mapping file as it may impact the existing cluster.
+
+## Security enhancements  
+Omnia provides the following options to enhance security on the provisioned PowerEdge servers:
+* **System lockdown mode**: To enable the system lockdown mode on iDRAC, set the *system_lockdown* variable to "enabled" in the `idrac_vars.yml` file.
+* **Secure boot mode**: To enable the secure boot mode on iDRAC, set the *uefi_secure_boot* variable to "enabled" in the `idrac_vars.yml` file.
+* **2-factor authentication (2FA)**: To enable the 2FA on iDRAC, set the *two_factor_authentication* variable to "enabled" in the `idrac_vars.yml` file.  
+	
+	**WARNING**: If 2FA is enabled on iDRAC, you must manually disable 2FA on iDRAC by setting the *Easy 2FA State* to "Disabled" for the user specified in the `login_vars.yml` file to run other iDRAC playbooks. 
+	
+	Before executing the **idrac_2fa.yml**, you must edit the `idrac_tools_vars.yml` by running the following command: `ansible-vault edit idrac_tools_vars.yml --vault-password-file .idrac_vault_key`.   
+	
+	Provide the following details in the **idrac_2fa.yml** file.  
+	
+	File name	|	Variables</br> [Required if two_factor_authentication is enabled/ Optional]	|	Default, choices	|	Description
+	-------	|	----------------	|	-----------------	|	-----------------
+	idrac_2fa.yml	|	dns_domain_name</br> [Required]	|		|	DNS domain name to be set for iDRAC. 
+	<br>	|	ipv4_static_dns1, ipv4_static_dns2</br> [Required] 	|		|	DNS1 and DNS2 static IPv4 addresses.
+	<br>	|	smtp_server_ip</br> [Required]	|		|	Server IP address used for SMTP.
+	<br>	|	smtp_username</br> [Required]	|		|	Username for SMTP.
+	<br>	|	smtp_password</br> [Required]	|		|	Password for SMTP.
+	<br>	|	use_email_address_2fa</br> [Required]	|		|	Email address used for enabling 2FA. After 2FA is enabled, an authentication code is sent to the provided email address. 
+
+	**NOTE**: 2FA will be enabled on the iDRAC only if SMTP server details are valid and a test email notification is working using SMTP.  
+* **LDAP Directory Services**: To enable or disable the LDAP directory services, set the *ldap_directory_services* variable to "enabled" in the `idrac_vars.yml` file.  
+
+	Before executing the **idrac_ldap.yml** file, you must edit `idrac_tools_vars.yml` by running the following command: `ansible-vault edit idrac_tools_vars.yml --vault-password-file .idrac_vault_key`.  
+	
+	Provide the following values in the **idrac_ldap.yml** file.  
+
+	File name	|	Variables</br> [Required if ldap_directory_services is enabled/ Optional]	|	Default, choices	|	Description
+	-------	|	----------------	|	-----------------	|	-----------------
+	idrac_ldap.yml	|	cert_validation_enable</br> [Required]	|	<ul><li>**disabled**</li></ul>	|	This option will be disabled by default. If required, you must manually upload the CA certificate.
+	<br>	|	ldap_server_address</br> [Required] 	|		|	Server address used for LDAP.
+	<br>	|	ldap_port</br> [Required]	|	<ul><li>636</li></ul>	|	TCP port at which the LDAP server is listening for connections.
+	<br>	|	bind_dn</br> [Optional]	|		|	Distinguished Name of the node in your directory tree from which records are searched.
+	<br>	|	bind_password</br> [Optional]	|		|	Password used for "bind_dn".
+	<br>	|	base_dn</br> [Required]	|		|	Distinguished Name of the search base.
+	<br>	|	user_attribute</br> [Optional]	|		|	User attribute used for searching in LDAP server.
+	<br>	|	group_attribute</br> [Optional]	|		|	Group attribute used for searching in LDAP server.
+	<br>	|	group_attribute_is_dn</br> [Required]	|	<ul><li>**enabled**</li> <li>disabled</li></ul>	|	Specify whether the group attribute type is DN or not.
+	<br>	|	search_filter</br> [Optional]	|		|	Search scope is related to the Base DN. 
+	<br>	|	role_group1_dn</br> [Required]	|		|	DN of LDAP group to be added.
+	<br>	|	role_group1_privilege</br> [Required]	|	<ul><li>**Administrator**</li><li>Operator</li><li>ReadOnly</li></ul>	|	Privilege to LDAP role group 1.  
+	
+	To view the `idrac_tools_vars.yml` file, run the following command: `ansible-vault view idrac_tools_vars.yml --vault-password-file .idrac_vault_key`  
+	
+	**NOTE**: It is suggested that you use the ansible-vault view or edit commands and that you do not use the ansible-vault decrypt or encrypt commands. If you have used the ansible-vault decrypt or encrypt commands, provide 644 permission to `idrac_tools_vars.yml`.  
+
+On the AWX Dashboard, select the respective security requirement playbook and launch the iDRAC template by performing the following steps.
+1. On the AWX Dashboard, under __RESOURCES__ -> __Templates__, select the **idrac_template**.
+2. Under the **Details** tab, click **Edit**.
+3. In the **Edit Details** page, click the **Playbook** drop-down menu and select **tools/idrac_system_lockdown.yml**, **tools/idrac_secure_boot.yml**, **tools/idrac_2fa.yml**, or **tools/idrac_ldap.yml**.
+4. Click **Save**.
+5. To launch the iDRAC template with the respective playbook selected, click **Launch**.  
+
+ 
+
+

File diff suppressed because it is too large
+ 34 - 0
docs/control_plane/input_parameters/INFINIBAND_SWITCHES.md


+ 26 - 0
docs/control_plane/input_parameters/NETWORK_SWITCHES.md

@@ -0,0 +1,26 @@
+# Dell EMC Networking Switches  
+
+## Update the input parameters 
+Under the `control_plane/input_params` directory, edit the following files:
+1. `base_vars.yml` file: Update the following variable to enable or disable Ethernet switch configurations in the cluster.  
+
+	Variable	|	Default, choices	|	Description
+	-------	|	----------------	|	-----------------
+	ethernet_switch_support	|	<ul><li>**false**</li><li>true</li></ul>	|	Set the variable to "true" to enable Ethernet switch configurations.  
+
+2. `login_vars.yml` file:  Enter the following details to configure Ethernet switches.  
+	a. `ethernet_switch_username`- username for Ethernet switches.  
+	**NOTE**: The username must not contain the following characters: -, \\, "", and \'  
+	b. `ethernet_switch_password`- password for Ethernet switches.   
+	**NOTE**: Minimum length of the password must be eight characters and the maximum limit is 30 characters. Do not use these characters while entering a password: -, \\, "", and \'  
+
+3. `ethernet_vars.yml` file: If **ethernet_switch_support** is set to "true" in the *base_vars.yml* file, then update the following variables.
+
+	Variables	|	Default, choices	|	Description
+	----------------	|	-----------------	|	-----------------
+	os10_config	|	<ul><li>"interface vlan1"</li><li>"exit"</li></ul>	|	Global configurations for the switch.
+	os10_interface	|	By default: <ul><li>Port description is provided.</li> <li>Each interface is set to "up" state.</li>	|	Update the individual interfaces of the PowerSwitch S3048-ON (ToR Switch). </br>The interfaces are from **ethernet 1/1/1** to **ethernet 1/1/30**. For more information about the interfaces, see the *Supported interface keys of PowerSwitch S3048-ON (ToR Switch)* section in the README file. </br>**NOTE**: The playbooks will fail if any invalid configurations are entered.
+	save_changes_to_startup	|	<ul><li>**false**</li><li>true</li></ul>	|	Change it to "true" only when you are certain that the updated configurations and commands are valid. </br>**WARNING**: When set to "true", the startup configuration file is updated. If incorrect configurations or commands are entered, the Ethernet switches may not operate as expected.   
+	
+## Deploy Omnia Control Plane
+Before you configure the Dell EMC Networking Switches, you must complete the deployment of Omnia control plane. Go to Step 8 in the [Steps to install the Omnia Control Plane](../../INSTALL_OMNIA_CONTROL_PLANE.md#steps-to-deploy-the-omnia-control-plane) file to run the `ansible-playbook control_plane.yml` file.  

File diff suppressed because it is too large
+ 35 - 0
docs/control_plane/input_parameters/POWERVAULT_STORAGE.md


+ 25 - 0
docs/control_plane/input_parameters/PROVISION_SERVERS.md

@@ -0,0 +1,25 @@
+# Dell EMC PowerEdge Servers
+
+## Update the input parameters
+
+Edit the following files under the `control_plane/input_params` directory to provide the required input parameters.
+1. Edit the `login_vars.yml` file to enter the following details:  
+	a. `provision_password`- password used while provisioning OS on bare metal servers.  
+	b. `cobbler_password`- password for Cobbler.    
+	c. `idrac_username` and `idrac_password`- iDRAC username and password.   
+	**NOTE**: Minimum length of the password must be at least eight characters and a maximum of 30 characters. Do not use these characters while entering a password: -, \\, "", and \'
+2. Edit the following variables in the `idrac_vars.yml` file.  
+
+	File name	|	Variables</br> [Required/ Optional]	|	Default, choices	|	Description
+	-------	|	----------------	|	-----------------	|	-----------------
+	idrac_vars.yml	|	idrac_system_profile</br> [Required]	|	<ul><li>**Performance**</li> <li>PerformancePerWatt(DAPC)</li> <li>PerformancePerWatt(OS)</li> <li>WorkstationPerformance</li></ul>	|	The system profile used for BIOS configuration. 
+	<br>	|	firmware_update_required</br> [Required]	|	<ul><li>**true**</li> <li>false</li></ul>	|	By default, Omnia updates the firmware on the servers. To disable the firmware update, set the variable to "false".
+	<br>	|	poweredge_model</br> [Required if "firmware_update_required" is set to "true"]	|	<ul><li>**C6420**</li> <li>R640</li><li>R740</li><li>C4140</li> <li>And other supported PowerEdge servers</li></ul>	|	Enter the required PowerEdge server models to update the firmware. For example, enter `R640,R740,C4140` to update firmware on these models of PowerEdge servers. For a complete list of supported PowerEdge servers, see the *Hardware managed by Omnia* section in the Readme file.
+	<br>	|	uefi_secure_boot</br> [Optional]	|	<ul><li>**disabled**</li> <li>enabled</li></ul>	|	Option to enable or disable the secure boot mode.
+	<br>	|	system_lockdown</br> [Optional]	|	<ul><li>**disabled**</li> <li>enabled</li></ul>	|	Option to enable or disable system lockdown.
+	<br>	|	two_factor_authentication</br> [Optional]	|	<ul><li>**disabled**</li> <li>enabled</li></ul>	|	Option to enable or disable the 2FA on iDRAC.</br> If enabled, update the required variables in the `idrac_tools_vars.yml` file.</br> **[WARNING]**: For the other iDRAC playbooks to run, you must manually disable 2FA by setting the *Easy 2FA State* to "Disabled" in the iDRAC settings.
+	<br>	|	ldap_directory_services</br> [Optional]	|	<ul><li>**disabled**</li> <li>enabled</li></ul>	|	Option to enable or disable the LDAP directory services on iDRAC.</br> If enabled, update the required variables in the `idrac_tools_vars.yml` file.
+
+## Deploy Omnia Control Plane
+Before you provision the Dell EMC PowerEdge Servers, you must complete the deployment of Omnia control plane. Go to Step 8 in the [Steps to install the Omnia Control Plane](../../INSTALL_OMNIA_CONTROL_PLANE.md#steps-to-deploy-the-omnia-control-plane) file to run the `ansible-playbook control_plane.yml` file.
+

BIN
docs/images/typical_layout_hpc_clsuter_passthrough_network.jpg


BIN
docs/images/typical_layout_hpc_cluster.jpg