Browse Source

Issue#550: Syncing GitHub and GitLab

Signed-off-by: cgoveas <cassandra.goveas@dell.com>
cgoveas 3 years ago
parent
commit
1db504b572
25 changed files with 331 additions and 81 deletions
  1. 3 1
      control_plane/input_params/powervault_me4_vars.yml
  2. 15 1
      control_plane/roles/control_plane_common/tasks/fetch_base_inputs.yml
  3. 2 0
      control_plane/roles/control_plane_common/vars/main.yml
  4. 3 3
      control_plane/roles/control_plane_device/files/temp_dhcp.template
  5. 12 0
      control_plane/roles/control_plane_device/tasks/dhcp_configure.yml
  6. 2 2
      control_plane/roles/control_plane_ib/files/temp_dhcp.template
  7. 13 1
      control_plane/roles/control_plane_ib/tasks/dhcp_configure.yml
  8. 3 4
      control_plane/roles/provision_cobbler/files/menu.yml
  9. 3 3
      control_plane/roles/provision_cobbler/files/temp_dhcp.template
  10. 13 1
      control_plane/roles/provision_cobbler/tasks/dhcp_configure.yml
  11. 43 6
      docs/FAQ.md
  12. 5 4
      docs/INSTALL_OMNIA.md
  13. 33 29
      docs/INSTALL_OMNIA_CONTROL_PLANE.md
  14. 22 10
      docs/README.md
  15. 5 3
      docs/control_plane/device_templates/CONFIGURE_INFINIBAND_SWITCHES.md
  16. 2 2
      docs/control_plane/device_templates/CONFIGURE_POWERSWITCHES.md
  17. 3 2
      docs/control_plane/device_templates/CONFIGURE_POWERVAULT_STORAGE.md
  18. 4 3
      docs/control_plane/device_templates/PROVISION_SERVERS.md
  19. 4 3
      docs/control_plane/input_parameters/INFINIBAND_SWITCHES.md
  20. 1 1
      docs/control_plane/input_parameters/POWERSWITCHES.md
  21. 3 2
      docs/control_plane/input_parameters/POWERVAULT_STORAGE.md
  22. 59 0
      docs/login_node/login_user_creation.md
  23. 20 0
      examples/login_node_example/implement_login_node.yml
  24. 30 0
      examples/login_node_example/roles/login_user/tasks/main.yml
  25. 28 0
      examples/login_node_example/roles/login_user/vars/main.yml

+ 3 - 1
control_plane/input_params/powervault_me4_vars.yml

@@ -84,6 +84,7 @@ powervault_me4_disk_partition_size: "60"
 
 # Specify the volume size
 # Format: 100GB <SizeGB>
+# Default: 100GB
 # Compulsory
 powervault_me4_volume_size: "100GB"
 
@@ -93,6 +94,7 @@ powervault_me4_volume_size: "100GB"
 powervault_me4_pool: "a"
 
 # Specify the nic of the server with which Powervault is connected.
+# Make sure nfs server also has 3 nics (for internet, OS provision and powervault connection)
 # Default value is em1.
 # Compulsory
-powervault_me4_server_nic: "em1"
+powervault_me4_server_nic: "em1"

+ 15 - 1
control_plane/roles/control_plane_common/tasks/fetch_base_inputs.yml

@@ -35,7 +35,21 @@
       host_network_nic | length < 1 or
       host_network_dhcp_start_range | length < 1 or
       host_network_dhcp_end_range | length < 1 or
-      provision_method | length < 1
+      provision_method | length < 1 or
+      default_lease_time | length < 1
+      
+- name: Validate default lease time
+  assert:
+    that:
+      - default_lease_time|int
+      - default_lease_time|int <= 31536000
+      - default_lease_time|int >= 21600
+    success_msg: "{{ success_msg_lease_time }}"
+    fail_msg: "{{ fail_msg_lease_time }}"
+
+- name: Calculate max lease time
+  set_fact:
+    max_lease_time: "{{ default_lease_time|int + 10000 }}"
 
 - name: Validate infiniband base_vars are not empty
   assert:

+ 2 - 0
control_plane/roles/control_plane_common/vars/main.yml

@@ -127,6 +127,8 @@ success_msg_different_nics_ib: "The nics of different containers and public nic
 fail_msg_different_nics_ib: "Failed. Infiniband nic cannot be the same as other nics"
 success_msg_ib: "Infiniband variables validated"
 fail_msg_ib: "Failed. Please provide all the InfiniBand related parameters in base_vars.yml"
+success_msg_lease_time: "Default lease time validated"
+fail_msg_lease_time: "Failed. Please provide a valid default lease time"
 
 # Usage: fetch_sm_inputs.yml
 ib_config_file: "{{ role_path }}/../../input_params/ib_vars.yml"

+ 3 - 3
control_plane/roles/control_plane_device/files/temp_dhcp.template

@@ -21,8 +21,8 @@ option pxe-system-type code 93 = unsigned integer 16;
 subnet subnet_mask netmask net_mask {
 option subnet-mask net_mask;
 range dynamic-bootp start end;
-default-lease-time  21600;
-max-lease-time  43200;
+default-lease-time default;
+max-lease-time max;
 next-server next_server;
 #insert the static DHCP leases for configuration here
 
@@ -44,4 +44,4 @@ next-server next_server;
 
 }
 
-#end for
+#end for

+ 12 - 0
control_plane/roles/control_plane_device/tasks/dhcp_configure.yml

@@ -39,6 +39,18 @@
     regexp: '^range dynamic-bootp start end;'
     replace: 'range dynamic-bootp {{ mngmnt_network_dhcp_start_range }} {{ mngmnt_network_dhcp_end_range }};'
 
+- name: Assign default lease time
+  replace:
+    path: "{{ role_path }}/files/dhcpd.conf"
+    regexp: '^default-lease-time default;'
+    replace: 'default-lease-time {{ default_lease_time }};'
+    
+- name: Assign max lease time
+  replace:
+    path: "{{ role_path }}/files/dhcpd.conf"
+    regexp: '^max-lease-time max;'
+    replace: 'max-lease-time {{ max_lease_time }};'
+
 - name: Assign next server range
   replace:
     path: "{{ role_path }}/files/dhcpd.conf"

+ 2 - 2
control_plane/roles/control_plane_ib/files/temp_dhcp.template

@@ -22,8 +22,8 @@ option pxe-system-type code 93 = unsigned integer 16;
 subnet subnet_mask netmask net_mask {
 option subnet-mask net_mask;
 range dynamic-bootp start end;
-default-lease-time 26100;
-max-lease-time 43200;
+default-lease-time default;
+max-lease-time max;
 next-server next_server;
 #insert the static DHCP leases for configuration here
 

+ 13 - 1
control_plane/roles/control_plane_ib/tasks/dhcp_configure.yml

@@ -39,8 +39,20 @@
     regexp: '^range dynamic-bootp start end;'
     replace: 'range dynamic-bootp {{ ib_network_dhcp_start_range }} {{ ib_network_dhcp_end_range }};'
 
+- name: Assign default lease time
+  replace:
+    path: "{{ role_path }}/files/dhcpd.conf"
+    regexp: '^default-lease-time default;'
+    replace: 'default-lease-time {{ default_lease_time }};'
+
+- name: Assign max lease time
+  replace:
+    path: "{{ role_path }}/files/dhcpd.conf"
+    regexp: '^max-lease-time max;'
+    replace: 'max-lease-time {{ max_lease_time }};'
+
 - name: Assign next server range
   replace:
     path: "{{ role_path }}/files/dhcpd.conf"
     regexp: '^next-server next_server;'
-    replace: 'next-server {{ ib_ip }};'
+    replace: 'next-server {{ ib_ip }};'

+ 3 - 4
control_plane/roles/provision_cobbler/files/menu.yml

@@ -1,8 +1,7 @@
 DEFAULT menu
 PROMPT 0
-MENU TITLE Cobbler | https://cobbler.github.io
-TIMEOUT 2
+MENU TITLE Cobbler OS provisioning | https://cobbler.github.io
+TIMEOUT 3
 TOTALTIMEOUT 5
 $pxe_menu_items
-
-MENU end
+MENU end

+ 3 - 3
control_plane/roles/provision_cobbler/files/temp_dhcp.template

@@ -23,8 +23,8 @@ option system-arch code 93 = unsigned integer 16;
 subnet subnet_mask netmask net_mask {
 option subnet-mask net_mask;
 range dynamic-bootp start end;
-default-lease-time 2160000;
-max-lease-time 4320000;
+default-lease-time default;
+max-lease-time max;
 next-server $next_server;
 #insert the static DHCP leases for configuration here
 
@@ -144,4 +144,4 @@ next-server $next_server;
         }
     #end for
 }
-#end for
+#end for

+ 13 - 1
control_plane/roles/provision_cobbler/tasks/dhcp_configure.yml

@@ -32,6 +32,18 @@
     path: "{{ role_path }}/files/dhcp.template"
     regexp: '^option subnet-mask net_mask;'
     replace: 'option subnet-mask {{ netmask }};'
+    
+- name: Assign default lease time
+  replace:
+    path: "{{ role_path }}/files/dhcp.template"
+    regexp: '^default-lease-time default;'
+    replace: 'default-lease-time {{ default_lease_time }};'
+
+- name: Assign max lease time
+  replace:
+    path: "{{ role_path }}/files/dhcp.template"
+    regexp: '^max-lease-time max;'
+    replace: 'max-lease-time {{ max_lease_time }};'
 
 - name: Assign DHCP range
   replace:
@@ -56,4 +68,4 @@
   replace:
     path: "{{ role_path }}/files/settings.yaml"
     regexp: '^next_server: 127.0.0.1'
-    replace: 'next_server: {{ hpc_ip }}'
+    replace: 'next_server: {{ hpc_ip }}'

+ 43 - 6
docs/FAQ.md

@@ -112,7 +112,7 @@ Resolution:
 It is recommended that the ansible-vault view or edit commands are used and not the ansible-vault decrypt or encrypt commands.
 
 ## What to do if the LC is not ready?
-* Verify the state of the LC in all servers by running `racadm getremoteservicesstatus`
+* Verify that the LC is in a ready state for all servers: `racadm getremoteservicesstatus`
 * Launch iDRAC template.
 
 ## What to do if the network CIDR entry of iDRAC IP in /etc/exports file is missing?
@@ -127,15 +127,52 @@ It is recommended that the ansible-vault view or edit commands are used and not
 ## Is Disabling 2FA supported by Omnia?
 * Disabling 2FA is not supported by Omnia and must be manually disabled.
 
-## Is provisioning server using BOSS controller supported by Omnia?
-* Provisioning server using BOSS controller is not supported by Omnia. It will be supported in upcoming releases.
-
 ## The provisioning of PowerEdge servers failed. How do I clean up before starting over?
 1. Delete the respective iDRAC IP addresses from the *provisioned_idrac_inventory* on the AWX UI or delete the *provisioned_idrac_inventory* to delete the iDRAC IP addresses of all the servers in the cluster.
 2. Launch the iDRAC template from the AWX UI.
 
-## What to do when WARNING message regarding older firmware displayed during idrac_template execution and idrac_template task failed?
+## What to do if PowerVault throws the error: `Error: The specified disk is not available. - Unavailable disk (0.x) in disk range '0.x-x'`
+1. Verify that the disk in question is not part of any pool: `show disks`
+2. If the disk is part of a pool, remove it and try again.
+
+## Why does PowerVault throw the error: `You cannot create a linear disk group when a virtual disk group exists on the system.`?
+At any given time only one type of disk group can be created on the system. That is, all disk groups on the system have to exclusively be linear or virtual. To fix the issue, either delete the existing disk group or change the type of pool you are creating.
+
+## Is provisioning server using BOSS controller supported by Omnia?
+* Provisioning server using BOSS controller is not supported by Omnia. It will be supported in upcoming releases.
+
+
+## What to do when iDRAC template execution throws a warning regarding older firmware versions?
 Potential Cause: Older firmware version in PowerEdge servers. Omnia supports only iDRAC 8 based Dell EMC PowerEdge Servers with firmware versions 2.75.75.75 and above and iDRAC 9 based Dell EMC PowerEdge Servers with Firmware versions 4.40.40.00 and above.
 
-1. Update idrac firmware version in PowerEdge servers manually to the supported version.
+1. Update iDRAC firmware version in PowerEdge servers manually to the supported version.
 2. Re-run idrac_template.
+
+## What steps have to be taken to re-run control_plane.yml after a Kubernetes reset?
+1. Delete the folder: `/var/nfs_awx`
+2. Delete the file:  `/<project name>/control_plane/roles/webui_awx/files/.tower_cli.cfg`
+
+Once complete, it's safe to re-run control_plane.yml.
+
+## Why does the Initialize Kubeadm task fail with 'nnode.Registration.name: Invalid value: \"<Host name>\"'?
+
+Potential Cause: The control_plane playbook does not support hostnames with an underscore in it such as 'mgmt_station'.
+
+As defined in RFC 822, the only legal characters are the following:
+1. Alphanumeric (a-z and 0-9): Both uppercase and lowercase letters are acceptable, and the hostname is case insensitive. In other words, dvader.empire.gov is identical to DVADER.EMPIRE.GOV and Dvader.Empire.Gov.
+
+2. Hyphen (-): Neither the first nor the last character in a hostname field should be a hyphen.
+
+3. Period (.): The period should be used only to delimit fields in a hostname (e.g., dvader.empire.gov)
+
+## What to do when JupyterHub pods are in 'ImagePullBackOff' or 'ErrImagePull' status after executing jupyterhub.yml?
+Potential Cause: Your Docker pull limit has been exceeded. For more information, click [here](https://www.docker.com/increase-rate-limits)
+1. Delete Jupyterhub deployment by executing the following command in manager node: `helm delete jupyterhub -n jupyterhub`
+2. Re-execute jupyterhub.yml after 8-9 hours.
+
+## What to do when Kubeflow pods are in 'ImagePullBackOff' or 'ErrImagePull' status after executing kubeflow.yml?
+Potential Cause: Your Docker pull limit has been exceeded. For more information, click [here](https://www.docker.com/increase-rate-limits)
+1. Delete Kubeflow deployment by executing the following command in manager node: `kfctl delete -V -f /root/k8s/omnia-kubeflow/kfctl_k8s_istio.v1.0.2.yaml`
+2. Re-execute kubeflow.yml after 8-9 hours
+
+

+ 5 - 4
docs/INSTALL_OMNIA.md

@@ -9,7 +9,7 @@ To install the Omnia control plane and manage workloads on your cluster using th
 * If you have configured the `omnia_config.yml` file to enable the login node, the login node must be part of the cluster. 
 * All nodes must be connected to the network and must have access to the Internet.
 * Set the hostnames of all the nodes in the cluster.
-	* If the login node is enabled, then set the hostnames in the format: __hostname.domainname__. For example, "manager.omnia.test" is a valid hostname.
+	* If the login node is enabled, then set the hostnames in the format: __hostname.domainname__. For example, "manager.omnia.test" is a valid hostname. **Do not** use underscores ( _ ) in the host names.
 	* Include the hostnames under /etc/hosts in the format: </br>*ipaddress hostname.domainname*. For example, "192.168.12.1 manager.example.com" is a valid entry.
 * SSH Keys for root are installed on all nodes to allow for password-less SSH.
 * The user should have root privileges to perform installations and configurations.
@@ -112,9 +112,8 @@ __Note:__
 * The default value of Kubernetes Pod Network CIDR is 10.244.0.0/16. If 10.244.0.0/16 is already in use within your network, select a different Pod Network CIDR. For more information, see __https://docs.projectcalico.org/getting-started/kubernetes/quickstart__.
 
 **NOTE**: If you want to view or edit the `omnia_config.yml` file, run the following command:  
-					
 - `ansible-vault view omnia_config.yml --vault-password-file .omnia_vault_key` -- To view the file. 
-- `ansible-vault edit omnia_config.yml --vault-password-file .omnia_vault_key` -- To edit the file.  
+- `ansible-vault edit omnia_config.yml --vault-password-file .omnia_vault_key` -- To edit the file.
 
 **NOTE**: It is suggested that you use the ansible-vault view or edit commands and that you do not use the ansible-vault decrypt or encrypt commands. If you have used the ansible-vault decrypt or encrypt commands, provide 644 permission to `omnia_config.yml`.  
 
@@ -151,11 +150,13 @@ The following __kubernetes__ roles are provided by Omnia when __omnia.yml__ file
 	- Kubernetes services are deployed such as Kubernetes Dashboard, Prometheus, MetalLB and NFS client provisioner
 
 __Note:__ 
+
+* Whenever k8s_version, k8s_cni or k8s_pod_network_cidr needs to be modified after the HPC cluster is setup, the OS in the manager and compute nodes in the cluster must be re-flashed before executing omnia.yml again.
 * After Kubernetes is installed and configured, few Kubernetes and calico/flannel related ports are opened in the manager and compute nodes. This is required for Kubernetes Pod-to-Pod and Pod-to-Service communications. Calico/flannel provides a full networking stack for Kubernetes pods.
 * If Kubernetes Pods are unable to communicate with the servers (i.e., unable to access the Internet) when the DNS servers are not responding, then the Kubernetes Pod Network CIDR may be overlapping with the host network which is DNS issue. To resolve this issue:
 	1. Disable firewalld.service.
 	2. If the issue persists, then perform the following actions:  
-		a. In your Kubernetes cluster, run `kubeadm reset -f` on the nodes.  
+		a. Format the OS on manager and compute nodes.  
 		b. In the management station, edit the *omnia_config.yml* file to change the Kubernetes Pod Network CIDR or CNI value. Suggested IP range is 192.168.0.0/16 and ensure you provide an IP which is not in use in your host network.  
 		c. Execute `omnia.yml` and skip slurm using `--skip-tags slurm`.
 

File diff suppressed because it is too large
+ 33 - 29
docs/INSTALL_OMNIA_CONTROL_PLANE.md


+ 22 - 10
docs/README.md

@@ -51,7 +51,7 @@ Requirements  |   Version
 ----------------------------------  |   -------
 OS pre-installed on the management station  |  CentOS 8.4
 OS deployed by Omnia on bare-metal Dell EMC PowerEdge Servers | CentOS 7.9 2009 Minimal Edition
-Cobbler  |  2.8.5
+Cobbler  |  3.2.1
 Ansible AWX  |  19.1.0
 Slurm Workload Manager  |  20.11.2
 Kubernetes on the management station  |  1.21.0
@@ -64,7 +64,7 @@ The following table lists the supported devices managed by Omnia. Other devices
 
 Device type	|	Supported models	
 -----------	|	-------	
-Dell EMC PowerEdge Servers	|	PowerEdge C4140, C6420, C6520, R240, R340, R440, R540, R640, R650, R740, R740xd, R740xd2, R750, R750xa, R840, R940, R940xa
+Dell EMC PowerEdge Servers	|	PowerEdge C4140, C6420, R240, R340, R440, R540, R640, R740, R740xd, R740xd2, R840, R940, R940xa
 Dell EMC PowerVault Storage	|	PowerVault ME4084, ME4024, and ME4012 Storage Arrays
 Dell EMC Networking Switches	|	PowerSwitch S3048-ON and PowerSwitch S5232F-ON
 Mellanox InfiniBand Switches	|	NVIDIA MQM8700-HS2F Quantum HDR InfiniBand Switch 40 QSFP56
@@ -153,16 +153,17 @@ stp_rpvst_default_behaviour	|	boolean: false, true	|	Configures RPVST default be
 # Known issues  
 * **Issue**: Hosts are not displayed on the AWX UI.  
 	**Resolution**:  
-	* Verify if the *provisioned_hosts.yml* file is present in the *omnia/appliance/roles/inventory/files* folder.
-	* Verify whether the hosts are listed in the *provisioned_hosts.yml* file.  
-		* If hosts are not listed, then servers are not PXE booted yet.
-		* If hosts are listed, then an IP address has been assigned to them by DHCP. However, hosts are not displayed on the AWX UI as the PXE boot is still in process or is not initiated.
-	* Check for the reachable and unreachable hosts using the **provisioned_report.yml** tool present in the *omnia/appliance/tools* folder. To run provisioned_report.yml, in the omnia/appliance directory, run `playbook -i roles/inventory/files/provisioned_hosts.yml tools/provisioned_report.yml`.
+	* Verify if the provisioned_hosts.yml file is present in the omnia/control_plane/roles/collect_node_info/files/ folder.
+	* Verify whether the hosts are listed in the provisioned_hosts.yml file.
+	* If hosts are not listed, then servers are not PXE booted yet.
+If hosts are listed, then an IP address has been assigned to them by DHCP. However, hosts are not displayed on the AWX UI as the PXE boot is still in process or is not initiated.
+	* Check for the reachable and unreachable hosts using the provision_report.yml tool present in the omnia/control_plane/tools folder. To run provision_report.yml, in the omnia/control_plane/ directory, run playbook -i roles/collect_node_info/files/provisioned_hosts.yml tools/provision_report.yml.
 
 * **Issue**: There are **ImagePullBack** or **ErrPullImage** errors in the status of Kubernetes pods.  
 	**Cause**: The errors occur when the Docker pull limit is exceeded.  
 	**Resolution**:
 	* For **omnia.yml** and **control_plane.yml**: Provide the docker username and password for the Docker Hub account in the *omnia_config.yml* file and execute the playbook. 
+	* For HPC cluster, during omnia.yml execution, a kubernetes secret 'dockerregcred' will be created in default namespace and patched to service account. User needs to patch this secret in their respective namespace while deploying custom applications and use the secret as imagePullSecrets in yaml file to avoid ErrImagePull. [Click here for more info](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/)
 	* **Note**: If the playbook is already executed and the pods are in __ImagePullBack__ error, then run `kubeadm reset -f` in all the nodes before re-executing the playbook with the docker credentials.
 
 * **Issue**: The `kubectl` command stops working after a reboot and displays the following error message: *The connection to the server head_node_ip:port was refused - did you specify the right host or port?*  
@@ -172,15 +173,26 @@ stp_rpvst_default_behaviour	|	boolean: false, true	|	Configures RPVST default be
 	* `systemctl restart kubelet`  
 	
 * **Issue**: If control_plane.yml fails at the webui_awx role, then the previous IP address and password are not cleared when control_plane.yml is re-run.   
-	**Resolution**: In the *webui_awx/files* directory, delete the *.tower_cli.cfg* and *.tower_vault_key* files, and then re-run `control_plane.yml`.  
+	**Resolution**: In the *webui_awx/files* directory, delete the *.tower_cli.cfg* and *.tower_vault_key* files, and then re-run `control_plane.yml`.
 
 * **Issue**: The FreeIPA server and client installation fails.  
 	**Cause**: The hostnames of the manager and login nodes are not set in the correct format.  
 	**Resolution**: If you have enabled the option to install the login node in the cluster, set the hostnames of the nodes in the format: *hostname.domainname*. For example, *manager.omnia.test* is a valid hostname for the login node. **Note**: To find the cause for the failure of the FreeIPA server and client installation, see *ipaserver-install.log* in the manager node or */var/log/ipaclient-install.log* in the login node.  
 	
-* **Issue**: The inventoy details are not updated in AWX when device or host credentials are invalid.  
-	**Resolution**: Provide valid credentials of the devices and hosts in the cluster.  
+* **Issue**: The inventory details are not updated in AWX when device or host credentials are invalid.  
+	**Resolution**: Provide valid credentials of the devices and hosts in the cluster. 
+
+* **Issue**: The Host list is empty after executing the control_plane playbook.  
+	**Resolution**: Ensure that all devices used are in DHCP enabled mode.
+	
+* **Issue**: The task 'Install Packages' fails on the NFS node with the message: `Failure in talking to yum: Cannot find a valid baseurl for repo: base/7/x86_64.`  
+	**Cause**: There are connections missing on the NFS node.  
+	**Resolution**: Ensure that there are 3 nics being used on the NFS node:
+	1. For provisioning the OS
+	2. For connecting to the internet (Management purposes)
+	3. For connecting to PowerVault (Data Connection)  
 	
+
 # [Frequently asked questions](FAQ.md)
 
 # Limitations

+ 5 - 3
docs/control_plane/device_templates/CONFIGURE_INFINIBAND_SWITCHES.md

@@ -3,10 +3,12 @@ In your HPC cluster, connect the Mellanox InfiniBand switches using the Fat-Tree
 
 Omnia uses the server-based Subnet Manager (SM). SM runs as a Kubernetes pod on the management station. To enable the SM, Omnia configures the required parameters in the `opensm.conf` file. Based on the requirement, the parameters can be edited.  
 
-**NOTE**: Install the InfiniBand hardware drivers by running the command: `yum groupinstall "Infiniband Support" -y`.   
+**NOTE**: Install the InfiniBand hardware drivers by running the command: `yum groupinstall "Infiniband Support" -y`.  
+
 ## Edit the "input_params" file 
-Under the `control_plane/input_params` directory, edit the following files:
-1. `base_vars.yml` file
+Under the `control_plane/input_params` directory, edit the following files:  
+
+1. `base_vars.yml` file    
 
 	File name	|	Variables	|	Default, choices	|	Description
 	-----------	|	-------	|	----------------	|	-----------------

File diff suppressed because it is too large
+ 2 - 2
docs/control_plane/device_templates/CONFIGURE_POWERSWITCHES.md


+ 3 - 2
docs/control_plane/device_templates/CONFIGURE_POWERVAULT_STORAGE.md

@@ -29,7 +29,8 @@ Under the `control_plane/input_params` directory, edit the following files:
 	powervault_me4_disk_partition_size [Required] |	<ul><li>**5**</li><li>Any value between 5-99</li></ul> |	Enter the partition size which would be used as an NFS share.  
 	powervault_me4_volume_size [Required] |	<ul><li>**100GB**</li><li>Custom value</li></ul> |	Enter the volume size in the format: *SizeTB*, *SizeGB*, *SizeMB*, or *SizeB*.  
 	powervault_me4_pool [Required] |	<ul><li>**a** (or A)</li><li>b (or B)</li></ul> |	Enter the pool for the volume.  
-	powervault_me4_server_nic [Required] |	<ul><li>**em1**</li></ul> |	Enter the NIC of the server to which the PowerVault Storage is connected.    
+	powervault_me4_server_nic [Required] |	<ul><li>**em1**</li></ul> |	Enter the NIC of the server to which the PowerVault Storage is connected.
+	powervault_me4_pool_type [Required] |	<ul><li>**Virtual**</li><li>Linear </li></ul> |	Select the type of pool to be deployed on PowerVault. Ensure that all pools on the device are exclusively virtual or linear.
 	
 ## Configuring PowerVault Storage
 
@@ -38,4 +39,4 @@ Under the `control_plane/input_params` directory, edit the following files:
 2. Copy the Cluster-IP address of the awx-ui. 
 3. To retrieve the AWX UI password, run `kubectl get secret awx-admin-password -n awx -o jsonpath="{.data.password}" | base64 --decode`.
 4. Open the default web browser on the management station and enter `http://<IP>:8052`, where IP is the awx-ui IP address and 8052 is the awx-ui port number. Log in to the AWX UI using the username as `admin` and the retrieved password.  
-5. Under __RESOURCES__ -> __Templates__, launch the **powervault_me4_template**.
+5. Under __RESOURCES__ -> __Templates__, launch the **powervault_me4_template**.

+ 4 - 3
docs/control_plane/device_templates/PROVISION_SERVERS.md

@@ -31,7 +31,7 @@ Based on the inputs provided in the `login_vars.yml` and `base_vars.yml` files,
 2. Copy the Cluster-IP address of the awx-ui. 
 3. To retrieve the AWX UI password, run `kubectl get secret awx-admin-password -n awx -o jsonpath="{.data.password}" | base64 --decode`.
 4. Open the default web browser on the management station and enter `http://<IP>:8052`, where IP is the awx-ui IP address and 8052 is the awx-ui port number. Log in to the AWX UI using the username as `admin` and the retrieved password.  
-5. Under __RESOURCES__ -> __Templates__, launch the **idrac_template**.  
+5. Under __RESOURCES__ -> __Templates__, launch the **idrac_template**.
 
 Omnia role used to provision custom ISO on PowerEdge Servers using iDRAC: *provision_idrac*  
 
@@ -96,9 +96,10 @@ Omnia provides the following options to enhance security on the provisioned Powe
 	idrac_2fa.yml	|	dns_domain_name</br> [Required]	|		|	DNS domain name to be set for iDRAC. 
 	<br>	|	ipv4_static_dns1, ipv4_static_dns2</br> [Required] 	|		|	DNS1 and DNS2 static IPv4 addresses.
 	<br>	|	smtp_server_ip</br> [Required]	|		|	Server IP address used for SMTP.
-	<br>	|	smtp_username</br> [Required]	|		|	Username for SMTP.
-	<br>	|	smtp_password</br> [Required]	|		|	Password for SMTP.
 	<br>	|	use_email_address_2fa</br> [Required]	|		|	Email address used for enabling 2FA. After 2FA is enabled, an authentication code is sent to the provided email address. 
+	<br>	| smtp_authentication [Required]	| <ul> <li>__Disabled__</li> <li>Enabled </li> </ul> | Enable SMTP authentication 
+	<br>	|	smtp_username</br> [Optional]	|		|	Username for SMTP.
+	<br>	|	smtp_password</br> [Optional]	|		|	Password for SMTP.
 
 	**NOTE**: 2FA will be enabled on the iDRAC only if SMTP server details are valid and a test email notification is working using SMTP.  
 * **LDAP Directory Services**: To enable or disable the LDAP directory services, set the *ldap_directory_services* variable to "enabled" in the `idrac_vars.yml` file.  

File diff suppressed because it is too large
+ 4 - 3
docs/control_plane/input_parameters/INFINIBAND_SWITCHES.md


+ 1 - 1
docs/control_plane/input_parameters/POWERSWITCHES.md

@@ -12,7 +12,7 @@ Under the `control_plane/input_params` directory, edit the following files:
 	a. `ethernet_switch_username`- username for Ethernet switches.  
 	**NOTE**: The username must not contain the following characters: -, \\, "", and \'  
 	b. `ethernet_switch_password`- password for Ethernet switches.   
-	**NOTE**: Minimum length of the password must be eight characters and the maximum limit is 30 characters. Do not use these characters while entering a password: -, \\, "", and \'    
+	**NOTE**: Minimum length of the password must be eight characters and the maximum limit is 30 characters. Do not use these characters while entering a password: -, \\, "", and \'  
 
 3. `ethernet_tor_vars.yml` or `ethernet_vars.yml` file: If **ethernet_switch_support** is set to "true" in the *base_vars.yml* file, then update the following variables.  
 

+ 3 - 2
docs/control_plane/input_parameters/POWERVAULT_STORAGE.md

@@ -28,8 +28,9 @@ Under the `control_plane/input_params` directory, edit the following files:
 	powervault_me4_disk_group_name |	<ul><li>**omnia**</li><li>User-defined name</li></ul> |	Enter the group name of the disk.
 	powervault_me4_disk_partition_size [Required] |	<ul><li>**5**</li><li>Any value between 5-99</li></ul> |	Enter the partition size which would be used as an NFS share.  
 	powervault_me4_volume_size [Required] |	<ul><li>**100GB**</li><li>Custom value</li></ul> |	Enter the volume size in the format: *SizeTB*, *SizeGB*, *SizeMB*, or *SizeB*.  
-	powervault_me4_pool [Required] |	<ul><li>**a** (or A)</li><li>b (or B)</li></ul> |	Enter the pool for the volume.  
-	powervault_me4_server_nic [Required] |	<ul><li>**em1**</li></ul> |	Enter the NIC of the server to which the PowerVault Storage is connected.   
+	powervault_me4_pool [Required] |	<ul><li>**a** (or A)</li><li>b (or B)</li></ul> |	Enter the pool for the volume.
+	powervault_me4_pool_type [Required] |	<ul><li>Virtual</li><li>**Linear** </li></ul> |	Select the type of pool to be deployed on PowerVault. Ensure that all pools on the device are exclusively virtual or linear.
+		
 	
 ## Deploy Omnia Control Plane
 Before you configure the PowerVault Storage devices, you must complete the deployment of Omnia control plane. Go to Step 8 in the [Steps to install the Omnia Control Plane](../../INSTALL_OMNIA_CONTROL_PLANE.md#steps-to-deploy-the-omnia-control-plane) file to run the `ansible-playbook control_plane.yml` file.  

+ 59 - 0
docs/login_node/login_user_creation.md

@@ -0,0 +1,59 @@
+# How to Create a user using Freeipa
+
+## Prerequisites:
+1. Make sure the server and client are installed
+2. The admin user has to be initialized using kerberos authentication.
+
+   `kinit admin` (When prompted provide the password)
+   
+
+##Adding the New User
+1. ssh to manager node
+
+`ssh xxxxx@192.168.1.5`
+
+2. Use the command below to create a user:
+
+`ipa user-add '<new username>' --first='<User's first name>'
+    --last='<User's last name>' --homedir='Home Directory path (optional)' 
+    --random`
+
+3. The output will display the random password set. 
+```
+ "----------------------",
+            "Added user \"omniauser\"",
+            "----------------------",
+            "  User login: omniauser",
+            "  First name: omnia",
+            "  Last name: user",
+            "  Full name: omnia user",
+            "  Display name: omnia user",
+            "  Initials: ou",
+            "  Home directory: /home/omniauser",
+            "  GECOS: omnia user",
+            "  Login shell: /bin/sh",
+            "  Principal name: omniauser@MYIPA.TEST",
+            "  Principal alias: omniauser@MYIPA.TEST",
+            "  User password expiration: 20210804180355Z",
+            "  Email address: omniauser@myipa.test",
+            "  Random password: 0Qr:Ir;:q_vFKP+*b|0)0D",
+            "  UID: 893800014",
+            "  GID: 893800014",
+            "  Password: True",
+            "  Member of groups: ipausers",
+            "  Kerberos keys available: True"			
+			
+```
+			
+4. The random password displayed can be used to login to the login node using the newly created user.
+
+` ssh omniauser@192.168.1.6`
+
+5. Change the password on first login and then login with the new password.
+
+6. To assign permissions to the newly created user to execute slurm jobs run the command:
+
+   `usermod -a -G slurm 'new_login_user'`
+7. The user has been assigned appropriate permissions to execute slurm jobs. Jobs can be executed
+
+` srun --nodes 1 --ntasks-per-node 1 --partition normal hostname`

+ 20 - 0
examples/login_node_example/implement_login_node.yml

@@ -0,0 +1,20 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+- name: Create user and assign slurm permission
+  hosts: manager
+  gather_facts: false
+  roles:
+    - login_user

+ 30 - 0
examples/login_node_example/roles/login_user/tasks/main.yml

@@ -0,0 +1,30 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Create a user
+  command: >-
+    ipa user-add '{{ new_login_user }}' --first='{{ user_first_name }}'
+    --last='{{ user_last_name }}' --homedir='{{ user_home_dir }}' 
+    --random
+  register: output
+  changed_when: false
+
+- name: Debug
+  debug:
+    msg: "{{ output }}"
+
+- name: Add the user to slurm group
+  command: usermod -a -G slurm '{{ new_login_user }}'
+  changed_when: false

+ 28 - 0
examples/login_node_example/roles/login_user/vars/main.yml

@@ -0,0 +1,28 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+# New User to be created on Login node
+# Make sure the username is in small case
+# For more details chech freeipa website
+new_login_user: "omniauser"
+
+# User home directory path
+user_home_dir: "/home/omniauser"
+
+# User's first name
+user_first_name: "omnia"
+
+# User's last name
+user_last_name: "user"