Explorar o código

Merge branch 'devel' into issue-148

Lucas A. Wilson %!s(int64=4) %!d(string=hai) anos
pai
achega
d34b81ec49
Modificáronse 79 ficheiros con 1722 adicións e 564 borrados
  1. 13 10
      appliance/input_config.yml
  2. 4 4
      appliance/roles/common/tasks/main.yml
  3. 205 18
      appliance/roles/common/tasks/password_config.yml
  4. 16 4
      appliance/roles/common/vars/main.yml
  5. 10 6
      appliance/roles/inventory/files/add_host.yml
  6. 57 15
      appliance/roles/inventory/files/create_inventory.yml
  7. 0 3
      appliance/roles/inventory/files/inventory
  8. 47 25
      appliance/roles/inventory/tasks/main.yml
  9. 1 4
      appliance/roles/provision/files/Dockerfile
  10. 2 2
      appliance/roles/provision/files/settings
  11. 0 20
      appliance/roles/provision/files/dnsmasq.template
  12. 0 18
      appliance/roles/provision/files/ifcfg-em1
  13. 12 3
      appliance/roles/provision/files/inventory_creation.yml
  14. 19 2
      appliance/roles/provision/files/kickstart.yml
  15. 1 0
      appliance/roles/provision/files/temp_centos7.ks
  16. 9 8
      appliance/roles/provision/files/dhcp.template
  17. 28 3
      appliance/roles/provision/tasks/check_prerequisites.yml
  18. 26 3
      appliance/roles/provision/tasks/configure_cobbler.yml
  19. 60 0
      appliance/roles/provision/tasks/dhcp_configure.yml
  20. 14 8
      appliance/roles/provision/tasks/main.yml
  21. 84 0
      appliance/roles/provision/tasks/mapping_file.yml
  22. 9 26
      appliance/roles/provision/tasks/provision_password.yml
  23. 6 1
      appliance/roles/provision/vars/main.yml
  24. 243 146
      appliance/roles/web_ui/tasks/awx_configuration.yml
  25. 1 1
      appliance/roles/web_ui/tasks/clone_awx.yml
  26. 3 1
      appliance/roles/web_ui/tasks/install_awx.yml
  27. 11 42
      appliance/roles/web_ui/tasks/main.yml
  28. 85 0
      appliance/roles/web_ui/tasks/ui_accessibility.yml
  29. 2 1
      appliance/roles/web_ui/vars/main.yml
  30. 24 15
      appliance/roles/provision/tasks/configure_nic.yml
  31. 0 0
      appliance/tools/roles/cluster_preperation/tasks/main.yml
  32. 27 14
      roles/cluster_preperation/tasks/passwordless_ssh.yml
  33. 0 0
      appliance/tools/roles/cluster_preperation/vars/main.yml
  34. 42 0
      appliance/tools/roles/fetch_password/tasks/main.yml
  35. 186 0
      docs/INSTALL_OMNIA.md
  36. 75 0
      docs/MONITOR_CLUSTERS.md
  37. 51 7
      docs/README.md
  38. 9 29
      omnia.yml
  39. 24 0
      omnia_config.yml
  40. 1 13
      platforms/roles/kubeflow/tasks/main.yml
  41. 0 22
      platforms/roles/kubeflow/vars/main.yml
  42. 61 9
      roles/cluster_validation/tasks/fetch_password.yml
  43. 1 4
      roles/cluster_validation/tasks/main.yml
  44. 1 7
      roles/cluster_validation/tasks/validations.yml
  45. 10 0
      roles/cluster_validation/vars/main.yml
  46. 2 2
      roles/common/tasks/nvidia.yml
  47. 1 0
      roles/common/vars/main.yml
  48. 2 2
      roles/k8s_common/tasks/main.yml
  49. 4 4
      roles/k8s_firewalld/tasks/main.yml
  50. 1 4
      roles/k8s_firewalld/vars/main.yml
  51. 15 2
      roles/k8s_nfs_client_setup/tasks/main.yml
  52. 4 0
      roles/k8s_nfs_client_setup/vars/main.yml
  53. 22 5
      roles/k8s_start_manager/tasks/main.yml
  54. 1 5
      roles/k8s_start_manager/vars/main.yml
  55. 17 3
      roles/k8s_start_services/tasks/main.yml
  56. 8 2
      roles/k8s_start_workers/tasks/main.yml
  57. 1 3
      roles/k8s_start_workers/vars/main.yml
  58. 11 2
      roles/slurm_common/tasks/main.yml
  59. 1 2
      roles/slurm_common/vars/main.yml
  60. 14 10
      roles/slurm_manager/tasks/main.yml
  61. 1 1
      roles/slurm_manager/vars/main.yml
  62. 6 8
      roles/slurm_start_services/tasks/main.yml
  63. 3 3
      roles/slurm_workers/tasks/main.yml
  64. 6 0
      site/CONTRIBUTORS.md
  65. 17 12
      docs/INSTALL.md
  66. 27 0
      site/PREINSTALL.md
  67. 43 0
      site/README.md
  68. 4 0
      site/_config.yml
  69. BIN=BIN
      site/images/delltech.jpg
  70. BIN=BIN
      site/images/example-system-ethernet.png
  71. BIN=BIN
      site/images/example-system-infiniband.png
  72. BIN=BIN
      site/images/omnia-branch-structure.png
  73. BIN=BIN
      site/images/omnia-k8s.png
  74. BIN=BIN
      site/images/omnia-logo.png
  75. BIN=BIN
      site/images/omnia-overview.png
  76. BIN=BIN
      site/images/omnia-slurm.png
  77. BIN=BIN
      site/images/pisa.png
  78. 10 0
      site/metalLB/README.md
  79. 21 0
      site/metalLB/metal-config.yaml

+ 13 - 10
appliance/input_config.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -23,17 +23,20 @@ provision_password: ""
 # The password must not contain -,\, ',"
 awx_password: ""
 
-# Password used for Slurm database.
-# The Length of the password should be atleast 8.
-# The password must not contain -,\, ',"
-mariadb_password: ""
-
 # The nic/ethernet card that needs to be connected to the HPC switch.
 # This nic will be configured by Omnia for the DHCP server.
 # Default value of nic is em1.
 hpc_nic: "em1"
 
-# The nic card that needs to be connected to the public internet.
-# The public_nic should be em2, em1 or em3
-# Default value of nic is em2.
-public_nic: "em2"
+# The nic/ethernet card that will be connected to the public internet.
+# Default value of nic is em2
+public_nic: "em2"
+
+# The mapping file consists of the MAC address and its respective IP address and hostname.
+# If user wants to provide a mapping file, set this value to "true"
+# The format of mapping file should be MAC,hostname,IP and must be a CSV file.
+mapping_file_exists: ""
+
+# The dhcp range for assigning the IP address to the baremetal nodes.
+dhcp_start_ip_range: ""
+dhcp_end_ip_range: ""

+ 4 - 4
appliance/roles/common/tasks/main.yml

@@ -25,11 +25,11 @@
 - name: Common packages installation
   import_tasks: package_installation.yml
 
+- name: Basic Configuration
+  import_tasks: password_config.yml
+
 - name: Docker installation and configuration
   import_tasks: docker_installation.yml
 
 - name: Docker volume creation
-  import_tasks: docker_volume.yml
-
-- name: Basic Configuration
-  import_tasks: password_config.yml
+  import_tasks: docker_volume.yml

+ 205 - 18
appliance/roles/common/tasks/password_config.yml

@@ -18,27 +18,75 @@
   changed_when: false
   register: config_content
 
-- name: Decrpyt input_config.yml
-  command: ansible-vault decrypt {{ input_config_filename }} --vault-password-file {{ role_path }}/files/{{ vault_filename }}
+- name: Decrpyt appliance_config.yml
+  command: >-
+    ansible-vault decrypt {{ input_config_filename }}
+    --vault-password-file {{ vault_filename }}
   changed_when: false
   when: "'$ANSIBLE_VAULT;' in config_content.stdout"
 
-- name: Include variable file input_config.yml
+- name: Include variable file appliance_config.yml
   include_vars: "{{ input_config_filename }}"
+  no_log: true
 
 - name: Validate input parameters are not empty
   fail:
     msg: "{{ input_config_failure_msg }}"
   register: input_config_check
-  when: (provision_password | length < 1) or (awx_password | length < 1) or (mariadb_password | length < 1) or (hpc_nic | length < 1) or (public_nic | length < 1)
+  when:
+    - provision_password | length < 1 or
+      awx_password | length < 1 or
+      hpc_nic | length < 1 or
+      public_nic | length < 1 or
+      dhcp_start_ip_range | length < 1 or
+      dhcp_end_ip_range | length < 1
 
 - name: Save input variables from file
   set_fact:
     cobbler_password: "{{ provision_password }}"
     admin_password: "{{ awx_password }}"
-    input_mariadb_password: "{{ mariadb_password }}"
     nic:  "{{ hpc_nic }}"
     internet_nic: "{{ public_nic }}"
+    dhcp_start_ip: "{{ dhcp_start_ip_range | ipv4 }}"
+    dhcp_end_ip: "{{ dhcp_end_ip_range | ipv4 }}"
+    mapping_file: "{{ mapping_file_exists }}"
+  no_log: true
+
+- name: Get the system hpc ip
+  shell:  "ifconfig {{ hpc_nic }} | grep 'inet' |cut -d: -f2 |  awk '{ print $2}'"
+  register: ip
+  changed_when: false
+
+- name: Get the system public ip
+  shell:  "ifconfig {{ internet_nic }} | grep 'inet' |cut -d: -f2 |  awk '{ print $2}'"
+  register: internet_ip
+  changed_when: false
+
+- name: Get the system netmask
+  shell:  "ifconfig {{ hpc_nic }} | grep 'inet' |cut -d: -f2 |  awk '{ print $4}'"
+  register: net
+  changed_when: false
+
+- name: HPC nic IP
+  set_fact:
+    hpc_ip: "{{ ip.stdout }}"
+    public_ip: "{{ internet_ip.stdout }}"
+
+- name:  Netmask
+  set_fact:
+    netmask: "{{ net.stdout }}"
+
+- name: shell try
+  shell: |
+    IFS=. read -r i1 i2 i3 i4 <<< "{{ hpc_ip }}"
+    IFS=. read -r m1 m2 m3 m4 <<< "{{ netmask }}"
+    printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))"
+  register: sub_result
+  changed_when: false
+
+- name: Subnet
+  set_fact:
+    subnet: "{{ sub_result.stdout }}"
 
 - name: Assert provision_password
   assert:
@@ -51,6 +99,7 @@
       - " \"'\" not in cobbler_password "
     success_msg: "{{ success_msg_provision_password }}"
     fail_msg: "{{ fail_msg_provision_password }}"
+  no_log: true
   register: cobbler_password_check
 
 - name: Assert awx_password
@@ -64,20 +113,24 @@
         - " \"'\" not in admin_password "
     success_msg: "{{ success_msg_awx_password }}"
     fail_msg: "{{ fail_msg_awx_password }}"
+  no_log: true
   register: awx_password_check
 
-- name: Assert mariadb_password
+- name: Assert hpc_ip
   assert:
     that:
-        - input_mariadb_password | length > min_length | int - 1
-        - input_mariadb_password | length < max_length | int + 1
-        - '"-" not in input_mariadb_password '
-        - '"\\" not in input_mariadb_password '
-        - '"\"" not in input_mariadb_password '
-        - " \"'\" not in input_mariadb_password "
-    success_msg: "{{ success_msg_mariadb_password }}"
-    fail_msg: "{{ fail_msg_mariadb_password }}"
-  register: mariadb_password_check
+      - hpc_ip | length > 7
+    success_msg: "{{ success_hpc_ip }}"
+    fail_msg: "{{ fail_hpc_ip }}"
+  register: hpc_ip_check
+
+- name: Assert public_ip
+  assert:
+    that:
+      - public_ip | length > 7
+    success_msg: "{{ success_hpc_ip }}"
+    fail_msg: "{{ fail_hpc_ip }}"
+  register: public_ip_check
 
 - name: Assert hpc_nic
   assert:
@@ -93,11 +146,70 @@
     that:
       - internet_nic | length > nic_min_length | int - 1
       - nic != internet_nic
-      - "('em1' in internet_nic) or ('em2' in internet_nic) or ('em3' in internet_nic)"
     success_msg: "{{ success_msg_public_nic }}"
     fail_msg: "{{ fail_msg_public_nic }}"
   register: public_nic_check
 
+- name: Assert mapping_file_exists
+  assert:
+    that:
+      - "( mapping_file == true) or ( mapping_file == false)"
+    success_msg: "{{ success_mapping_file }}"
+    fail_msg: "{{ fail_mapping_file }}"
+  register: mapping_file_check
+
+- name: Check the subnet of dhcp start range
+  shell: |
+    IFS=. read -r i1 i2 i3 i4 <<< "{{ dhcp_start_ip }}"
+    IFS=. read -r m1 m2 m3 m4 <<< "{{ netmask }}"
+    printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))"
+  args:
+    warn: no
+  register: dhcp_start_sub_result
+  changed_when: false
+  when: dhcp_start_ip != "false"
+
+- name: Set the start dhcp subnet
+  set_fact:
+    dhcp_start_sub: "{{ dhcp_start_sub_result.stdout }}"
+  when: dhcp_start_ip != "false"
+
+- name: Check the subnet of dhcp end range
+  shell: |
+    IFS=. read -r i1 i2 i3 i4 <<< "{{ dhcp_end_ip }}"
+    IFS=. read -r m1 m2 m3 m4 <<< "{{ netmask }}"
+    printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))"
+  register: dhcp_end_sub_result
+  when: dhcp_end_ip != "false"
+  changed_when: false
+
+- name: Set the end dhcp subnet
+  set_fact:
+    dhcp_end_sub: "{{ dhcp_end_sub_result.stdout }}"
+  when: dhcp_end_ip != "false"
+
+- name: Assert dhcp_start_ip_range
+  assert:
+    that:
+      - dhcp_start_ip != "false"
+      - dhcp_start_ip != dhcp_end_ip
+      - dhcp_start_sub == subnet
+      - dhcp_start_sub == dhcp_end_sub
+    success_msg: "{{ success_dhcp_range }}"
+    fail_msg: "{{ fail_dhcp_range }}"
+  register: dhcp_start_ip_check
+
+- name: Assert dhcp_end_ip_range
+  assert:
+    that:
+      - dhcp_end_ip != "false"
+      - dhcp_start_ip != dhcp_end_ip
+      - dhcp_end_sub == subnet
+      - dhcp_start_sub == dhcp_end_sub
+    success_msg: "{{ success_dhcp_range }}"
+    fail_msg: "{{ fail_dhcp_range }}"
+  register: dhcp_end_ip_check
+
 - name: Create ansible vault key
   set_fact:
     vault_key: "{{ lookup('password', '/dev/null chars=ascii_letters') }}"
@@ -105,7 +217,7 @@
 
 - name: Save vault key
   copy:
-    dest: "{{ role_path }}/files/{{ vault_filename }}"
+    dest: "{{ vault_filename }}"
     content: |
       {{ vault_key }}
     owner: root
@@ -113,5 +225,80 @@
   when: "'$ANSIBLE_VAULT;' not in config_content.stdout"
 
 - name: Encrypt input config file
-  command: ansible-vault encrypt {{ input_config_filename }} --vault-password-file {{ role_path }}/files/{{ vault_filename }}
+  command: >-
+    ansible-vault encrypt {{ input_config_filename }}
+    --vault-password-file {{ vault_filename }}
+  changed_when: false
+
+- name: Check if omnia_vault_key exists
+  stat:
+    path: "{{ role_path }}/../../../{{ config_vaultname }}"
+  register: vault_key_result
+
+- name: Create ansible vault key if it does not exist
+  set_fact:
+    vault_key: "{{ lookup('password', '/dev/null chars=ascii_letters') }}"
+  when: not vault_key_result.stat.exists
+
+- name: Save vault key
+  copy:
+    dest: "{{ role_path }}/../../../{{ config_vaultname }}"
+    content: |
+      {{ vault_key }}
+    owner: root
+    force: yes
+  when: not vault_key_result.stat.exists
+
+- name: Check if omnia config file is encrypted
+  command: cat {{ role_path }}/../../../{{ config_filename }}
+  changed_when: false
+  register: config_content
+  no_log: True
+
+- name: Decrpyt omnia_config.yml
+  command: >-
+    ansible-vault decrypt {{ role_path }}/../../../{{ config_filename }}
+    --vault-password-file {{ role_path }}/../../../{{ config_vaultname }}
+  when: "'$ANSIBLE_VAULT;' in config_content.stdout"
+
+- name: Include variable file omnia_config.yml
+  include_vars: "{{ role_path }}/../../../{{ config_filename }}"
+  no_log: True
+
+- name: Validate input parameters are not empty
+  fail:
+    msg: "{{ input_config_failure_msg }}"
+  register: input_config_check
+  when:
+    - mariadb_password | length < 1 or
+      k8s_cni | length < 1
+
+- name: Assert mariadb_password
+  assert:
+    that:
+        - mariadb_password | length > min_length | int - 1
+        - mariadb_password | length < max_length | int + 1
+        - '"-" not in mariadb_password '
+        - '"\\" not in mariadb_password '
+        - '"\"" not in mariadb_password '
+        - " \"'\" not in mariadb_password "
+    success_msg: "{{ success_msg_mariadb_password }}"
+    fail_msg: "{{ fail_msg_mariadb_password }}"
+
+- name: Assert kubernetes cni
+  assert:
+    that: "('calico' in k8s_cni) or ('flannel' in k8s_cni)"
+    success_msg: "{{ success_msg_k8s_cni }}"
+    fail_msg: "{{ fail_msg_k8s_cni }}"
+
+- name: Save input variables from file
+  set_fact:
+    db_password: "{{ mariadb_password }}"
+    k8s_cni: "{{ k8s_cni }}"
+  no_log: True
+
+- name: Encrypt input config file
+  command: >-
+    ansible-vault encrypt {{ role_path }}/../../../{{ config_filename }}
+    --vault-password-file {{ role_path }}/../../../{{ config_vaultname }}
   changed_when: false

+ 16 - 4
appliance/roles/common/vars/main.yml

@@ -31,6 +31,8 @@ common_packages:
   - lvm2
   - gettext
   - python-docker
+  - net-tools
+  - python-netaddr
 
 # Usage: pre_requisite.yml
 internet_delay: 0
@@ -58,19 +60,29 @@ daemon_dest: /etc/docker/
 docker_volume_name: omnia-storage
 
 # Usage: password_config.yml
-input_config_filename: "input_config.yml"
+input_config_filename: "appliance_config.yml"
 fail_msg_provision_password: "Failed. Incorrect provision_password format provided in input_config.yml file"
 success_msg_provision_password: "provision_password validated"
 fail_msg_awx_password: "Failed. Incorrect awx_password format provided in input_config.yml file"
 success_msg_awx_password: "awx_password validated"
-fail_msg_mariadb_password: "Failed. Incorrect mariadb_password format provided in input_config.yml file"
-success_msg_mariadb_password: "mariadb_password validated"
 fail_msg_hpc_nic: "Failed. Incorrect hpc_nic format provided in input_config.yml file"
 success_msg_hpc_nic: "hpc_nic validated"
 fail_msg_public_nic: "Failed. Incorrect public_nic format provided in input_config.yml file"
 success_msg_public_nic: "public_nic validated"
+success_mapping_file: "mapping_file_exists validated"
+fail_mapping_file: "Failed. Incorrect mapping_file_exists value in input_config.yml. It should be either true or false"
 input_config_failure_msg: "Please provide all the required parameters in input_config.yml"
+success_dhcp_range: "Dhcp_range validated"
+fail_dhcp_range: "Failed: Incorrect range assigned for dhcp"
+success_hpc_ip: "IP validated"
+fail_hpc_ip: "Failed: Nic should be configured"
 min_length: 8
 max_length: 30
 nic_min_length: 3
-vault_filename: .vault_key
+vault_filename: .vault_key
+config_filename: "omnia_config.yml"
+config_vaultname: .omnia_vault_key
+fail_msg_mariadb_password: "maria_db password not given in correct format."
+success_msg_mariadb_password: "mariadb_password validated"
+success_msg_k8s_cni: "Kubernetes CNI Validated"
+fail_msg_k8s_cni: "Kubernetes CNI not correct."

+ 10 - 6
appliance/roles/inventory/files/add_host.yml

@@ -14,7 +14,7 @@
 ---
 
 - name: Check if host already exists
-  command: awk "{{ '/'+ item + '/' }}" inventory
+  command: awk "{{ '/'+ item + '/' }}" /root/inventory
   register: check_host
   changed_when: no
 
@@ -27,17 +27,21 @@
     host_description: "CPU:{{ hostvars[item]['ansible_processor_count'] }}
     Cores:{{ hostvars[item]['ansible_processor_cores'] }}
     Memory:{{ hostvars[item]['ansible_memtotal_mb'] }}MB
-    BIOS:{{ hostvars[item]['ansible_bios_version']}}"
-  changed_when: no
+    BIOS:{{ hostvars[item]['ansible_bios_version'] }}"
+  when: not check_host.stdout | regex_search(item)
   ignore_errors: yes
 
 - name: Add host
   lineinfile:
-    path:  "inventory"
+    path:  "/root/inventory"
     line: "    {{ item }}:\n      _awx_description: {{ host_description }}"
-  when: not check_host.stdout | regex_search(item)
+  when:
+    - not check_host.stdout | regex_search(item)
+    - host_description != "Description Unavailable"
 
 - name: Host added msg
   debug:
     msg: "{{ host_added_msg + item }}"
-  when: not check_host.stdout | regex_search(item)
+  when:
+    - not check_host.stdout | regex_search(item)
+    - host_description != "Description Unavailable"

+ 57 - 15
appliance/roles/inventory/files/create_inventory.yml

@@ -26,6 +26,11 @@
       ignore_errors: yes
       changed_when: false
 
+    - name: Refresh ssh keys
+      command: ssh-keygen -R {{ inventory_hostname }}
+      delegate_to: localhost
+      changed_when: false
+
     - name: Group reachable hosts
       group_by:
         key: "reachable"
@@ -42,38 +47,75 @@
 - name: Set hostname on reachable nodes and gather facts
   hosts: reachable
   gather_facts: False
+  ignore_unreachable: true
   remote_user: "{{ cobbler_username }}"
   vars:
     ansible_password: "{{ cobbler_password }}"
     ansible_become_pass: "{{ cobbler_password }}"
+    ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
+    mapping_file_present: ""
   tasks:
     - name: Setup
       setup:
        filter: ansible_*
 
+    - name: Check hostname of server
+      command: hostname
+      register: hostname_check
+      changed_when: false
+      ignore_errors: true
+
+    - name: Check if IP present in mapping file
+      command: grep "{{ inventory_hostname }}" ../../provision/files/new_mapping_file.csv
+      delegate_to: localhost
+      register: file_present
+      when: mapping_file | bool == true
+      ignore_errors: true
+
+    - name: Set fact if mapping file present
+      set_fact:
+        mapping_file_present: "{{ file_present.stdout }}"
+      when: mapping_file | bool == true
+      ignore_errors: true
+
+    - name: Get the static hostname from mapping file
+      shell: awk -F',' '$3 == "{{ inventory_hostname }}" { print $2 }' ../../provision/files/new_mapping_file.csv
+      delegate_to: localhost
+      when: ('localhost' in hostname_check.stdout) and (mapping_file_present != "" ) and ( mapping_file | bool == true )
+      register: host_name
+      ignore_errors: true
+
+    - name: Set the hostname from mapping file
+      hostname:
+        name: "{{ host_name.stdout }}"
+      register: result_host_name
+      when: ('localhost' in hostname_check.stdout) and (mapping_file_present != "" ) and  (mapping_file | bool == true )
+      ignore_errors: true
+
     - name: Set the system hostname
       hostname:
-        name: "compute{{ inventory_hostname.split('.')[-2] + '.' + inventory_hostname.split('.')[-1] }}"
+        name: "compute{{ inventory_hostname.split('.')[-2] + '-' + inventory_hostname.split('.')[-1] }}"
       register: result_name
+      when: ('localhost' in hostname_check.stdout) and (mapping_file | bool == false)
+      ignore_errors: true
 
-    - name: Add new hostname to /etc/hosts
+    - name: Add new hostname to /etc/hosts from mapping file
       lineinfile:
         dest: /etc/hosts
         regexp: '^127\.0\.0\.1[ \t]+localhost'
-        line: "127.0.0.1 localhost 'compute{{ inventory_hostname.split('.')[-1] }}'"
+        line: "127.0.0.1 localhost {{ host_name.stdout }}"
         state: present
+      when: ('localhost' in hostname_check.stdout) and ( mapping_file_present != "" ) and ( mapping_file | bool == true )
+      ignore_errors: true
 
-    - name: Ensure networking connection
-      command: nmcli networking off
-      changed_when: false
-
-    - name: Ensure networking connection
-      command: nmcli networking on
-      changed_when: false
-
-    - name: Ensure networking connection
-      command: nmcli networking on
-      changed_when: false
+    - name: Add new hostname to /etc/hosts
+      lineinfile:
+        dest: /etc/hosts
+        regexp: '^127\.0\.0\.1[ \t]+localhost'
+        line: "127.0.0.1 localhost 'compute{{ inventory_hostname.split('.')[-2] + '-' + inventory_hostname.split('.')[-1] }}'"
+        state: present
+      when: ('localhost' in hostname_check.stdout) and (mapping_file | bool == false )
+      ignore_errors: true
 
 - name: Update inventory
   hosts: localhost
@@ -90,4 +132,4 @@
     - name: Show unreachable hosts
       debug:
         msg: "{{ host_unreachable_msg }} + {{ groups['ungrouped'] }}"
-      when: "'ungrouped' in groups"
+      when: "'ungrouped' in groups"

+ 0 - 3
appliance/roles/inventory/files/inventory

@@ -1,3 +0,0 @@
----
-all:
-  hosts:

+ 47 - 25
appliance/roles/inventory/tasks/main.yml

@@ -16,18 +16,6 @@
   set_fact:
     ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
 
-- name: Disable key host checking
-  replace:
-    path: /etc/ansible/ansible.cfg
-    regexp: '#host_key_checking = False'
-    replace: 'host_key_checking = False'
-
-- name: Disable host key checking
-  replace:
-    path: /etc/ssh/ssh_config
-    regexp: '#   StrictHostKeyChecking ask'
-    replace: 'StrictHostKeyChecking no'
-
 - name: Check if provisioned host file exists
   stat:
     path: "{{ role_path }}/files/provisioned_hosts.yml"
@@ -35,39 +23,73 @@
 
 - name: Include vars file of common role
   include_vars: "{{ role_path }}/../common/vars/main.yml"
+  no_log: True
 
 - name: Include vars file of web_ui role
   include_vars: "{{ role_path }}/../web_ui/vars/main.yml"
+  no_log: True
 
 - name: Update inventory file
   block:
-    - name: Decrpyt input_config.yml
+    - name: Check if input config file is encrypted
+      command: cat {{ input_config_filename }}
+      changed_when: false
+      register: config_content
+
+    - name: Decrpyt appliance_config.yml
       command: >-
         ansible-vault decrypt {{ input_config_filename }}
-        --vault-password-file roles/common/files/{{ vault_filename }}
-      changed_when: false
+        --vault-password-file {{ vault_filename }}
+      when: "'$ANSIBLE_VAULT;' in config_content.stdout"
 
-    - name: Include variable file input_config.yml
+    - name: Include variable file appliance_config.yml
       include_vars: "{{ input_config_filename }}"
+      no_log: True
 
     - name: Save input variables from file
       set_fact:
         cobbler_password: "{{ provision_password }}"
+        mapping_file: "{{ mapping_file_exists }}"
+      no_log: True
 
     - name: Encrypt input config file
       command: >-
         ansible-vault encrypt {{ input_config_filename }}
-        --vault-password-file roles/common/files/{{ vault_filename }}
+        --vault-password-file {{ vault_filename }}
+      changed_when: false
 
-    - name: add hosts with description to inventory file
-      command: >-
-        ansible-playbook -i {{ role_path }}/files/provisioned_hosts.yml
-        {{ role_path }}/files/create_inventory.yml
-        --extra-vars "cobbler_username={{ cobbler_username }} cobbler_password={{ cobbler_password }}"
-      ignore_errors: yes
+    - name: Check if inventory file already exists
+      stat:
+        path: "/root/inventory"
+      register: stat_result
+
+    - name: Create inventory file if doesnt exist
+      copy:
+        dest:  "/root/inventory"
+        content: |
+          ---
+          all:
+            hosts:
+        owner: root
+        mode: 0775
+      when: not stat_result.stat.exists
+
+    - name: Add inventory playbook
+      block:
+        - name: add hosts with description to inventory file
+          command: >-
+            ansible-playbook -i {{ role_path }}/files/provisioned_hosts.yml
+            {{ role_path }}/files/create_inventory.yml
+            --extra-vars "cobbler_username={{ cobbler_username }} cobbler_password={{ cobbler_password }} mapping_file={{ mapping_file | bool }}"
+          no_log: True
+          register: register_error
+      rescue:
+        - name: Fail if host addition was not successful
+          fail:
+            msg: "{{ register_error.stderr + register_error.stdout | regex_replace(cobbler_username) | regex_replace(cobbler_password) }}"
 
   when: provisioned_file_result.stat.exists
 
 - name: push inventory to AWX
-  command: awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source {{ role_path }}/files/inventory
-  changed_when: no
+  command: awx-manage inventory_import --inventory-name {{ omnia_inventory_name }} --source /root/inventory
+  when: provisioned_file_result.stat.exists

+ 1 - 4
appliance/roles/provision/files/Dockerfile

@@ -21,7 +21,6 @@ RUN yum install -y \
   rsync \
   httpd\
   dhcp \
-  dnsmasq\
   xinetd \
   net-tools \
   memtest86+ \
@@ -33,7 +32,6 @@ RUN mkdir /root/omnia
 #Copy Configuration files
 COPY settings /etc/cobbler/settings
 COPY dhcp.template  /etc/cobbler/dhcp.template
-COPY dnsmasq.template /etc/cobbler/dnsmasq.template
 COPY modules.conf  /etc/cobbler/modules.conf
 COPY tftp /etc/xinetd.d/tftp
 COPY .users.digest /etc/cobbler/users.digest
@@ -50,6 +48,5 @@ VOLUME [ "/var/www/cobbler", "/var/lib/cobbler/backup", "/mnt" ]
 RUN systemctl enable cobblerd
 RUN systemctl enable httpd
 RUN systemctl enable rsyncd
-RUN systemctl enable dnsmasq
 
-CMD ["sbin/init"]
+CMD ["sbin/init"]

+ 2 - 2
appliance/roles/provision/files/settings

@@ -275,7 +275,7 @@ manage_reverse_zones: ['172.17']
 # if using cobbler with manage_dhcp, put the IP address
 # of the cobbler server here so that PXE booting guests can find it
 # if you do not set this correctly, this will be manifested in TFTP open timeouts.
-next_server: 172.17.0.1
+next_server: ip
 
 # settings for power management features.  optional.
 # see https://github.com/cobbler/cobbler/wiki/Power-management to learn more
@@ -387,7 +387,7 @@ scm_track_mode: "git"
 # if you have a server that appears differently to different subnets
 # (dual homed, etc), you need to read the --server-override section
 # of the manpage for how that works.
-server: 172.17.0.1
+server: ip
 
 # If set to 1, all commands will be forced to use the localhost address
 # instead of using the above value which can force commands like

+ 0 - 20
appliance/roles/provision/files/dnsmasq.template

@@ -1,20 +0,0 @@
-# Cobbler generated configuration file for dnsmasq
-# $date
-#
-
-# resolve.conf .. ?
-#no-poll
-#enable-dbus
-read-ethers
-addn-hosts = /var/lib/cobbler/cobbler_hosts
-
-dhcp-range=172.17.0.10 172.17.0.254
-dhcp-option=66,$next_server
-dhcp-lease-max=1000
-dhcp-authoritative
-dhcp-boot=pxelinux.0
-dhcp-boot=net:normalarch,pxelinux.0
-dhcp-boot=net:ia64,$elilo
-
-$insert_cobbler_system_definitions
-

+ 0 - 18
appliance/roles/provision/files/ifcfg-em1

@@ -1,18 +0,0 @@
-TYPE=Ethernet
-PROXY_METHOD=none
-BROWSER_ONLY=no
-BOOTPROTO=none
-DEFROUTE=yes
-IPV4_FAILURE_FATAL=no
-IPV6INIT=yes
-IPV6_AUTOCONF=yes
-IPV6_DEFROUTE=yes
-IPV6_FAILURE_FATAL=no
-IPV6_ADDR_GEN_MODE=stable-privacy
-NAME=em1
-UUID=485d7133-2c49-462d-bbb4-b854fe98e0fe
-DEVICE=em1
-ONBOOT=yes
-IPV6_PRIVACY=no
-IPADDR=172.17.0.1
-NETMASK=255.255.0.0

+ 12 - 3
appliance/roles/provision/files/inventory_creation.yml

@@ -25,10 +25,19 @@
       set_fact:
         vars_new: "{{ var| ipv4('address')| to_nice_yaml}}"
 
-    - name: Create the inventory
+    - name: Create the static ip
+      shell: awk -F',' 'NR >1{print $3}' omnia/appliance/roles/provision/files/new_mapping_file.csv > static_hosts.yml
+      changed_when: false
+      ignore_errors: true
+
+    - name: Create the dynamic inventory
       shell: |
-        echo "[all]" > omnia/appliance/roles/inventory/files/provisioned_hosts.yml
+        echo "[all]" >  omnia/appliance/roles/inventory/files/provisioned_hosts.yml
         echo "{{ vars_new }}" > temp.txt
-        egrep -o '[1-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' temp.txt >>omnia/appliance/roles/inventory/files/provisioned_hosts.yml
+        egrep -o '[1-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' temp.txt >>dynamic_hosts.yml
       changed_when: false
+      ignore_errors: true
 
+    - name: Final inventory
+      shell: cat dynamic_hosts.yml static_hosts.yml| sort -ur  >> omnia/appliance/roles/inventory/files/provisioned_hosts.yml
+      changed_when: false     

+ 19 - 2
appliance/roles/provision/files/kickstart.yml

@@ -58,7 +58,6 @@
 
   - name: Adding curl
     shell: export PATH="/usr/bin/curl:$PATH"
-    changed_when: true
 
   - name: Run import command
     command: cobbler import --arch=x86_64 --path=/mnt --name="{{ name_iso }}"
@@ -75,11 +74,29 @@
   - name: Syncing of cobbler
     command: cobbler sync
     changed_when: false
+  
+  - name: Disable default apache webpage
+    blockinfile:
+      state: present
+      insertafter: '^#insert the content here for disabling the default apache webpage'
+      dest: /etc/httpd/conf/httpd.conf
+      block: |
+        <Directory />
+           Order Deny,Allow
+           Deny from all
+           Options None
+           AllowOverride None
+         </Directory>
 
   - name: Restart cobbler
     service:
       name: cobblerd
       state: restarted
+ 
+  - name: Restart httpdd
+    service:
+      name: httpd
+      state: restarted
 
   - name: Restart xinetd
     service:
@@ -101,4 +118,4 @@
     cron:
       name: Create inventory
       minute: "*/5"
-      job: "ansible-playbook /root/inventory_creation.yml"
+      job: "ansible-playbook /root/inventory_creation.yml"

+ 1 - 0
appliance/roles/provision/files/temp_centos7.ks

@@ -59,5 +59,6 @@ reboot
 
 %packages
 @core
+net-tools
 %end
 

+ 9 - 8
appliance/roles/provision/files/dhcp.template

@@ -18,14 +18,15 @@ set vendorclass = option vendor-class-identifier;
 
 option pxe-system-type code 93 = unsigned integer 16;
 
-subnet 172.17.0.0 netmask 255.255.0.0 {
-     option routers             172.17.0.1;
-     option domain-name-servers 172.17.0.1;
-     option subnet-mask         255.255.0.0;
-     range dynamic-bootp        172.17.0.10 172.17.0.254;
-     default-lease-time         21600;
-     max-lease-time             43200;
-     next-server                $next_server;
+subnet subnet_mask netmask net_mask {
+option subnet-mask net_mask;
+range dynamic-bootp start end;
+default-lease-time  21600;
+max-lease-time  43200;
+next-server $next_server;
+#insert the static DHCP leases for configuration here
+
+
      class "pxeclients" {
           match if substring (option vendor-class-identifier, 0, 9) = "PXEClient";
           if option pxe-system-type = 00:02 {

+ 28 - 3
appliance/roles/provision/tasks/check_prerequisites.yml

@@ -17,17 +17,20 @@
   stat:
     path: "{{ role_path }}/files/{{ iso_name }}"
   register: iso_status
+  tags: install
 
 - name: Iso file not present
   fail:
     msg: "{{ iso_fail }}"
   when: iso_status.stat.exists == false
   register: iso_file_check
+  tags: install
 
 - name: Initialize variables
   set_fact:
-    cobbler_status: false
+    cobbler_container_status: false
     cobbler_image_status: false
+    cobbler_config_status: false
   tags: install
 
 - name: Inspect the cobbler image
@@ -48,8 +51,30 @@
   when: cobbler_image_result.images| length==1
   tags: install
 
-- name: Update cobbler status
+- name: Update cobbler container status
   set_fact:
-    cobbler_status: true
+    cobbler_container_status: true
   when: cobbler_result.exists
   tags: install
+
+- name: Fetch cobbler profile list
+  command: docker exec cobbler cobbler profile list
+  changed_when: false
+  register: cobbler_profile_list
+  when: cobbler_container_status == true
+
+- name: Check crontab list
+  command: docker exec cobbler crontab -l
+  changed_when: false
+  register: crontab_list
+  ignore_errors: true
+  when: cobbler_container_status == true
+
+- name: Update cobbler container status
+  set_fact:
+    cobbler_config_status: true
+  when:
+    - cobbler_container_status == true
+    - "'CentOS' in cobbler_profile_list.stdout"
+    - "'* * * * * ansible-playbook /root/tftp.yml' in crontab_list.stdout"
+    - "'5 * * * * ansible-playbook /root/inventory_creation.yml' in crontab_list.stdout"

+ 26 - 3
appliance/roles/provision/tasks/configure_cobbler.yml

@@ -12,12 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 ---
+- name: Delete the cobbler container if exits
+  docker_container:
+    name: cobbler
+    state: absent
+  tags: install
+  when: cobbler_container_status == true and cobbler_config_status == false
+
+- name: Run cobbler container
+  command: "{{ cobbler_run_command }}"
+  changed_when: false
+  tags: install
+  when: cobbler_container_status == true and cobbler_config_status == false
 
 - name: Configuring cobbler inside container (It may take 5-10 mins)
   command: docker exec cobbler ansible-playbook /root/kickstart.yml
   changed_when: false
   tags: install
-  when: not cobbler_status
+  when: cobbler_config_status == false
 
 - name: Schedule task
   cron:
@@ -25,9 +37,20 @@
     special_time: reboot
     job: "ansible-playbook {{ role_path }}/files/start_cobbler.yml"
   tags: install
-  when: not cobbler_status
+  when: cobbler_config_status == false
 
 - name: Execute cobbler sync in cobbler container
   command: docker exec cobbler cobbler sync
   changed_when: true
-  when: cobbler_status == true
+  when: cobbler_config_status == true
+
+- name: Remove the files
+  file:
+    path: "{{ item }}"
+    state: absent
+  with_items:
+    - "{{ role_path }}/files/.users.digest"
+    - "{{ role_path }}/files/dhcp.template"
+    - "{{ role_path }}/files/settings"
+    - "{{ role_path }}/files/centos7.ks"
+    - "{{ role_path }}/files/new_mapping_file.csv.bak"

+ 60 - 0
appliance/roles/provision/tasks/dhcp_configure.yml

@@ -0,0 +1,60 @@
+# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+- name: Create the dhcp template
+  copy:
+    src: "{{ role_path }}/files/temp_dhcp.template"
+    dest: "{{ role_path }}/files/dhcp.template"
+    mode: 0775
+  tags: install
+
+- name: Assign subnet and netmask
+  replace:
+    path: "{{ role_path }}/files/dhcp.template"
+    regexp: '^subnet subnet_mask netmask net_mask {'
+    replace: 'subnet {{ subnet }} netmask {{ netmask }} {'
+  tags: install
+
+- name: Assign netmask
+  replace:
+    path: "{{ role_path }}/files/dhcp.template"
+    regexp: '^option subnet-mask net_mask;'
+    replace: 'option subnet-mask {{ netmask }};'
+
+- name: Assign DHCP range
+  replace:
+    path: "{{ role_path }}/files/dhcp.template"
+    regexp: '^range dynamic-bootp start end;'
+    replace: 'range dynamic-bootp {{ dhcp_start_ip }} {{ dhcp_end_ip }};'
+
+- name: Create the cobbler settings file
+  copy:
+    src: "{{ role_path }}/files/cobbler_settings"
+    dest: "{{ role_path }}/files/settings"
+    mode: 0775
+  tags: install
+
+- name: Assign server ip
+  replace:
+    path: "{{ role_path }}/files/settings"
+    regexp: '^server: ip'
+    replace: 'server: {{ hpc_ip }}'
+
+- name: Assign next server ip
+  replace:
+    path: "{{ role_path }}/files/settings"
+    regexp: '^next_server: ip'
+    replace: 'next_server: {{ hpc_ip }}'
+

+ 14 - 8
appliance/roles/provision/tasks/main.yml

@@ -14,8 +14,6 @@
 ---
 
 #Tasks for Deploying cobbler on the system
-- name: Configure nic
-  import_tasks: configure_nic.yml
 
 - name: Check cobbler status on machine
   include_tasks: check_prerequisites.yml
@@ -26,23 +24,31 @@
 
 - name: Modify firewall settings for Cobbler
   import_tasks: firewall_settings.yml
-  when: not cobbler_status
+  when: not cobbler_container_status
 
 - name: Include common variables
   include_vars: ../../common/vars/main.yml
-  when: not cobbler_status
+  when: not cobbler_container_status
 
 - name: Internet validation
   include_tasks: ../../common/tasks/internet_validation.yml
-  when: not cobbler_status
+  when: not cobbler_container_status
 
 - name: Provision password validation
   import_tasks: provision_password.yml
   when: not cobbler_image_status
 
+- name: Dhcp Configuration
+  import_tasks: dhcp_configure.yml
+  when: not cobbler_image_status
+
+- name: Mapping file validation
+  import_tasks: mapping_file.yml
+  when: (not cobbler_image_status) and (mapping_file == true)
+
 - name: Cobbler image creation
   import_tasks: cobbler_image.yml
-  when: not cobbler_status
+  when: not cobbler_container_status
 
 - name: Cobbler configuration
   import_tasks: configure_cobbler.yml
@@ -52,9 +58,9 @@
     - debug:
         msg: "{{ message_skipped }}"
         verbosity: 2
-      when: cobbler_status
+      when: cobbler_container_status
     - debug:
         msg: "{{ message_installed }}"
         verbosity: 2
-      when: not cobbler_status
+      when: not cobbler_container_status
   tags: install

+ 84 - 0
appliance/roles/provision/tasks/mapping_file.yml

@@ -0,0 +1,84 @@
+# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+- name: Check availability of mapping file
+  stat:
+    path: "{{ role_path }}/files/{{ mapping_file_name }}"
+  register: mapping_file_status
+  tags: install
+
+- name: Mapping file not present
+  fail:
+    msg: "{{ mapping_file_fail }}"
+  when: mapping_file_status.stat.exists == false
+  register: mapping_file_check
+  tags: install
+
+- name: Remove blank lines
+  shell:  awk -F, 'length>NF+1' {{ role_path }}/files/{{ mapping_file_name }} > {{ role_path }}/files/new_mapping_file.csv
+  changed_when: false
+  tags: install
+
+- name: Remove blank spaces
+  shell:  sed -i.bak -E 's/(^|,)[[:blank:]]+/\1/g; s/[[:blank:]]+(,|$)/\1/g'  {{ role_path }}/files/new_mapping_file.csv
+  args:
+    warn: no
+  changed_when: false
+  tags: install
+
+- name: Count the rows
+  shell: awk -F',' '{print $2}' {{ role_path }}/files/new_mapping_file.csv | wc -l
+  register: total_count
+  changed_when: false
+  tags: install
+
+- name: Check for duplicate hostname
+  shell: awk -F',' '{print $2}' {{ role_path }}/files/new_mapping_file.csv | uniq | wc -l
+  register: count_host
+  changed_when: false
+  tags: install
+
+- name: Fail if duplicate hosts exist
+  fail:
+    msg: "{{ fail_hostname_duplicate }}"
+  when:  total_count.stdout >  count_host.stdout
+  tags: install
+
+- name: Check if _ or . or space present in hostname
+  shell: awk -F',' '{print $2}' {{ role_path }}/files/new_mapping_file.csv |grep -E -- '_|\.| '
+  register: hostname_result
+  ignore_errors: true
+  changed_when: false
+  tags: install
+
+- name: Fail if  _ or . or space present in hostname
+  fail:
+    msg: "{{ hostname_result.stdout + ' :Hostname should not contain _ or . as it will cause error with slurm and K8s'}}"
+  when: hostname_result.stdout != ""
+  tags: install
+
+- name: Fetch input
+  blockinfile:
+    path: "{{ role_path }}/files/dhcp.template"
+    insertafter: '^#insert the static DHCP leases for configuration here'
+    block: |
+      host {{ item.split(',')[1] }} {
+        hardware ethernet {{ item.split(',')[0] }};
+        fixed-address {{ item.split(',')[2] }};
+      }
+    marker: "# {mark} DHCP BLOCK OF {{ item.split(',')[0] }}"
+  with_lines: "{{ remove_header }}"
+  ignore_errors: true
+  tags: install

+ 9 - 26
appliance/roles/provision/tasks/provision_password.yml

@@ -27,14 +27,16 @@
   tags: install
 
 - name: Encrypt cobbler password
-  shell: printf "%s:%s:%s" {{ username }} "Cobbler" {{ cobbler_password }} | md5sum | awk '{print $1}'
+  shell: printf "%s:%s:%s" {{ username }} "Cobbler" "{{ cobbler_password }}" | md5sum | awk '{print $1}'
   changed_when: false
   register: encrypt_password
+  no_log: true
   tags: install
 
 - name: Copy cobbler password to cobbler config file
   shell: printf "%s:%s:%s\n" "{{ username }}" "Cobbler" "{{ encrypt_password.stdout }}" > "{{ role_path }}/files/.users.digest"
   changed_when: false
+  no_log: true
   tags: install
 
 - name: Create the kickstart file
@@ -44,28 +46,11 @@
     mode: 0775
   tags: install
 
-- name: Configure kickstart file
+- name: Configure kickstart file- IP
   replace:
     path: "{{ role_path }}/files/centos7.ks"
     regexp: '^url --url http://ip/cblr/links/CentOS7-x86_64/'
-    replace: url --url http://{{ ansible_em1.ipv4.address }}/cblr/links/CentOS7-x86_64/
-  when: internet_nic == "em1"
-  tags: install
-
-- name: Configure kickstart file
-  replace:
-    path: "{{ role_path }}/files/centos7.ks"
-    regexp: '^url --url http://ip/cblr/links/CentOS7-x86_64/'
-    replace: url --url http://{{ ansible_em2.ipv4.address }}/cblr/links/CentOS7-x86_64/
-  when: internet_nic == "em2"
-  tags: install
-
-- name: Configure kickstart file
-  replace:
-    path: "{{ role_path }}/files/centos7.ks"
-    regexp: '^url --url http://ip/cblr/links/CentOS7-x86_64/'
-    replace: url --url http://{{ ansible_em3.ipv4.address }}/cblr/links/CentOS7-x86_64/
-  when: internet_nic == "em3"
+    replace: url --url http://{{ public_ip }}/cblr/links/CentOS7-x86_64/
   tags: install
 
 - name: Random phrase generation
@@ -81,24 +66,22 @@
 
 - name: Login password
   command: openssl passwd -1 -salt {{ random_phrase }} {{ cobbler_password }}
+  no_log: true
   changed_when: false
   register: login_pass
   tags: install
 
-- name: Configure kickstart file
+- name: Configure kickstart file- Password
   replace:
     path: "{{ role_path }}/files/centos7.ks"
     regexp: '^rootpw --iscrypted password'
     replace: 'rootpw --iscrypted {{ login_pass.stdout }}'
+  no_log: true
   tags: install
 
-- name: Configure kickstart file
+- name: Configure kickstart file- nic
   replace:
     path: "{{ role_path }}/files/centos7.ks"
     regexp: '^network  --bootproto=dhcp --device=nic --onboot=on'
     replace: 'network  --bootproto=dhcp --device={{ nic }} --onboot=on'
   tags: install
-
-- name: Configure default password in settings
-  local_action: copy content="{{ login_pass.stdout }}" dest="{{ role_path }}/files/.node_login.digest"
-  tags: install

+ 6 - 1
appliance/roles/provision/vars/main.yml

@@ -15,6 +15,12 @@
 
 # vars file for provision
 
+#Usage: mapping_file.yml
+mapping_file_name: mapping_file.csv
+mapping_file_fail: "Mapping file absent: Copy the mapping file in omnia/appliance/roles/provision/files"
+fail_hostname_duplicate:  "Duplicate hostname exists. Please check"
+remove_header: awk 'NR > 1 { print }' {{ role_path }}/files/new_mapping_file.csv
+
 #Usage: check_prerequisite.yml
 iso_name: CentOS-7-x86_64-Minimal-2009.iso
 iso_fail: "Iso file absent: Download and copy the iso file in omnia/appliance/roles/provision/files"
@@ -28,7 +34,6 @@ docker_image_name: cobbler
 docker_image_tag: latest
 cobbler_run_command: docker run -itd --privileged --net=host --restart=always -v {{ mount_path }}:/root/omnia  -v cobbler_www:/var/www/cobbler:Z -v cobbler_backup:/var/lib/cobbler/backup:Z -v /mnt/iso:/mnt:Z -p 69:69/udp -p 81:80 -p 443:443 -p 25151:25151 --name cobbler  cobbler:latest  /sbin/init
 
-
 # Usage: main.yml
 message_skipped: "Installation Skipped: Cobbler instance is already running on your system"
 message_installed: "Installation Successful"

+ 243 - 146
appliance/roles/web_ui/tasks/awx_configuration.yml

@@ -14,174 +14,271 @@
 ---
 
 # Get Current AWX configuration
-- name: Get organization list
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    organizations list -f human
-  register: organizations_list
-  changed_when: no
-
-- name: Get project list
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    projects list -f human
-  register: projects_list
-  changed_when: no
-
-- name: Get inventory list
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    inventory list -f human
-  register: inventory_list
-  changed_when: no
-
-- name: Get credentials list
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    credentials list -f human
-  register: credentials_list
-  changed_when: no
-
-- name: Get template list
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    job_templates list -f human
-  register: job_templates_list
-  changed_when: no
-
-- name: If omnia-inventory exists, fetch group names in the inventory
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    groups list --inventory "{{ omnia_inventory_name }}" -f human
-  register: groups_list
-  when: omnia_inventory_name in inventory_list.stdout
-
-- name: Get schedules list
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    schedules list -f human
-  register: schedules_list
-  changed_when: no
+- name: Waiting for 30 seconds for UI components to be accessible
+  wait_for:
+    timeout: 30
 
-# Delete Default Configurations
-- name: Delete default organization
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    organizations delete "{{ default_org }}"
-  when: default_org in organizations_list.stdout
-
-- name: Delete default job template
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    job_templates delete "{{ default_template }}"
-  when: default_template in job_templates_list.stdout
-
-- name: Delete default project
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    projects delete "{{ default_projects }}"
-  when: default_projects in projects_list.stdout
-
-- name: Delete default credential
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    credentials delete "{{ default_credentials }}"
-  when: default_credentials in credentials_list.stdout
+- name: Organization list
+  block:
+    - name: Get organization list
+      command: >-
+        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+        organizations list -f human
+      register: organizations_list
+      changed_when: no
+      no_log: True
+  rescue:
+    - name: Message
+      fail:
+        msg: "{{ organizations_list.stderr | regex_replace(awx_user) | regex_replace(admin_password) }}"
 
-# Create required configuration if not present
-- name: Create organisation
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    organizations create --name "{{ organization_name }}"
-  when: organization_name not in organizations_list.stdout
-
-- name: Create new project
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    projects create --name "{{ project_name }}" --organization "{{ organization_name }}"
-    --local_path "{{ dir_name }}"
-  when: project_name not in projects_list.stdout
-
-- name: Create new omnia inventory
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    inventory create --name "{{ omnia_inventory_name }}" --organization "{{ organization_name }}"
-  when: omnia_inventory_name not in inventory_list.stdout
-
-- name: Create groups in omnia inventory
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    groups create --name "{{ item }}" --inventory "{{ omnia_inventory_name }}"
-  when: omnia_inventory_name not in inventory_list.stdout or item not in groups_list.stdout
-  loop: "{{ group_names }}"
-
-- name: Create credentials for omnia
-  command: >-
-    awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-    credentials create --name "{{ credential_name }}" --organization "{{ organization_name }}"
-    --credential_type "{{ credential_type }}"
-    --inputs '{"username": "{{ cobbler_username }}", "password": "{{ cobbler_password }}"}'
-  when: credential_name not in credentials_list.stdout
-
-- name: DeployOmnia Template
+- name: Project list
+  block:
+    - name: Get project list
+      command: >-
+        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+        projects list -f human
+      register: projects_list
+      changed_when: no
+      no_log: True
+  rescue:
+    - name: Message
+      fail:
+        msg: "{{ projects_list.stderr | regex_replace(awx_user) | regex_replace(admin_password) }}"
+
+- name: Inventory list
+  block:
+    - name: Get inventory list
+      command: >-
+        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+        inventory list -f human
+      register: inventory_list
+      changed_when: no
+      no_log: True
+  rescue:
+    - name: Message
+      fail:
+        msg: "{{ inventory_list.stderr | regex_replace(awx_user) | regex_replace(admin_password) }}"
+
+- name: Credential list
+  block:
+    - name: Get credentials list
+      command: >-
+        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+        credentials list -f human
+      register: credentials_list
+      changed_when: no
+      no_log: True
+  rescue:
+    - name: Message
+      fail:
+        msg: "{{ credentials_list.stderr | regex_replace(awx_user) | regex_replace(admin_password) }}"
+
+- name: Template List
   block:
-    - name: Create template to deploy omnia
+    - name: Get template list
       command: >-
         awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-        job_templates create
-        --name "{{ omnia_template_name }}"
-        --job_type run
-        --inventory "{{ omnia_inventory_name }}"
-        --project "{{ project_name }}"
-        --playbook "{{ omnia_playbook }}"
-        --verbosity "{{ playbooks_verbosity }}"
-        --ask_skip_tags_on_launch true
+        job_templates list -f human
+      register: job_templates_list
+      changed_when: no
+      no_log: True
+  rescue:
+    - name: Message
+      fail:
+        msg: "{{ job_templates_list.stderr | regex_replace(awx_user) | regex_replace(admin_password) }}"
 
-    - name: Associate credential
+- name: Group names
+  block:
+    - name: If omnia-inventory exists, fetch group names in the inventory
       command: >-
         awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-        job_templates associate "{{ omnia_template_name }}"
-        --credential ""{{ credential_name }}""
+        groups list --inventory "{{ omnia_inventory_name }}" -f human
+      register: groups_list
+      when: omnia_inventory_name in inventory_list.stdout
+      no_log: True
+  rescue:
+    - name: Message
+      fail:
+        msg: "{{ groups_list.stderr | regex_replace(awx_user) | regex_replace(admin_password) }}"
 
-  when: omnia_template_name not in job_templates_list.stdout
+- name: Schedules list
+  block:
+    - name: Get schedules list
+      command: >-
+        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+        schedules list -f human
+      register: schedules_list
+      changed_when: no
+      no_log: True
+  rescue:
+    - name: Message
+      fail:
+        msg: "{{ schedules_list.stderr | regex_replace(awx_user) | regex_replace(admin_password) }}"
 
-- name: DynamicInventory template
+# Delete Default Configurations
+- name: Delete default configurations
   block:
-    - name: Create template to fetch dynamic inventory
+    - name: Delete default organization
+      command: >-
+        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+        organizations delete "{{ default_org }}"
+      when: default_org in organizations_list.stdout
+      register: register_error
+      no_log: True
+
+    - name: Delete default job template
+      command: >-
+        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+        job_templates delete "{{ default_template }}"
+      when: default_template in job_templates_list.stdout
+      register: register_error
+      no_log: True
+
+    - name: Delete default project
       command: >-
         awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-        job_templates create
-        --name "{{ inventory_template_name }}"
-        --job_type run
-        --inventory "{{ omnia_inventory_name }}"
-        --project "{{ project_name }}"
-        --playbook "{{ inventory_playbook }}"
-        --verbosity "{{ playbooks_verbosity }}"
-        --use_fact_cache true
+        projects delete "{{ default_projects }}"
+      when: default_projects in projects_list.stdout
+      register: register_error
+      no_log: True
 
-    - name: Associate credential
+    - name: Delete default credential
       command: >-
         awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-        job_templates associate "{{ inventory_template_name }}"
-        --credential ""{{ credential_name }}""
-  when: inventory_template_name not in job_templates_list.stdout
+        credentials delete "{{ default_credentials }}"
+      when: default_credentials in credentials_list.stdout
+      register: register_error
+      no_log: True
+
+  rescue:
+    - name: Message
+      fail:
+        msg: "{{ register_error.stderr | regex_replace(awx_user) | regex_replace(admin_password) }}"
 
-- name: Schedule dynamic inventory template
+# Create required configuration if not present
+- name: Create required configurations
   block:
-    - name: Get unified job template list
+    - name: Create organisation
+      command: >-
+        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+        organizations create --name "{{ organization_name }}"
+      when: organization_name not in organizations_list.stdout
+      register: register_error
+      no_log: True
+
+    - name: Create new project
+      command: >-
+        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+        projects create --name "{{ project_name }}" --organization "{{ organization_name }}"
+        --local_path "{{ dir_name }}"
+      when: project_name not in projects_list.stdout
+      register: register_error
+      no_log: True
+
+    - name: Create new omnia inventory
       command: >-
         awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-        unified_job_templates list --name "{{ inventory_template_name }}" -f human
-      register: unified_job_template_list
+        inventory create --name "{{ omnia_inventory_name }}" --organization "{{ organization_name }}"
+      when: omnia_inventory_name not in inventory_list.stdout
+      register: register_error
+      no_log: True
 
-    - name: Get job ID
-      set_fact:
-        job_id: "{{ unified_job_template_list.stdout | regex_search('[0-9]+') }}"
+    - name: Create groups in omnia inventory
+      command: >-
+        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+        groups create --name "{{ item }}" --inventory "{{ omnia_inventory_name }}"
+      when: omnia_inventory_name not in inventory_list.stdout or item not in groups_list.stdout
+      register: register_error
+      no_log: True
+      loop: "{{ group_names }}"
 
-    - name: Schedule dynamic inventory job
+    - name: Create credentials for omnia
       command: >-
         awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-        schedules create --name "{{ schedule_name }}"
-        --unified_job_template="{{ job_id }}" --rrule="{{ schedule_rule }}"
+        credentials create --name "{{ credential_name }}" --organization "{{ organization_name }}"
+        --credential_type "{{ credential_type }}"
+        --inputs '{"username": "{{ cobbler_username }}", "password": "{{ cobbler_password }}"}'
+      when: credential_name not in credentials_list.stdout
+      register: register_error
+      no_log: True
+
+    - name: DeployOmnia Template
+      block:
+        - name: Create template to deploy omnia
+          command: >-
+            awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+            job_templates create
+            --name "{{ omnia_template_name }}"
+            --job_type run
+            --inventory "{{ omnia_inventory_name }}"
+            --project "{{ project_name }}"
+            --playbook "{{ omnia_playbook }}"
+            --verbosity "{{ playbooks_verbosity }}"
+            --ask_skip_tags_on_launch true
+          register: register_error
+          no_log: True
+
+        - name: Associate credential
+          command: >-
+            awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+            job_templates associate "{{ omnia_template_name }}"
+            --credential ""{{ credential_name }}""
+          register: register_error
+          no_log: True
+
+      when: omnia_template_name not in job_templates_list.stdout
+
+    - name: DynamicInventory template
+      block:
+        - name: Create template to fetch dynamic inventory
+          command: >-
+            awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+            job_templates create
+            --name "{{ inventory_template_name }}"
+            --job_type run
+            --inventory "{{ omnia_inventory_name }}"
+            --project "{{ project_name }}"
+            --playbook "{{ inventory_playbook }}"
+            --verbosity "{{ playbooks_verbosity }}"
+            --use_fact_cache true
+          register: register_error
+          no_log: True
+
+        - name: Associate credential
+          command: >-
+            awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+            job_templates associate "{{ inventory_template_name }}"
+            --credential ""{{ credential_name }}""
+          register: register_error
+          no_log: True
+
+      when: inventory_template_name not in job_templates_list.stdout
+
+    - name: Schedule dynamic inventory template
+      block:
+        - name: Get unified job template list
+          command: >-
+            awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+            unified_job_templates list --name "{{ inventory_template_name }}" -f human
+          no_log: True
+          register: unified_job_template_list
+
+        - name: Get job ID
+          set_fact:
+            job_id: "{{ unified_job_template_list.stdout | regex_search('[0-9]+') }}"
+
+        - name: Schedule dynamic inventory job
+          command: >-
+            awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
+            schedules create --name "{{ schedule_name }}"
+            --unified_job_template="{{ job_id }}" --rrule="{{ schedule_rule }}"
+          register: register_error
+          no_log: True
+
+      when: schedule_name not in schedules_list.stdout
 
-  when: schedule_name not in schedules_list.stdout
+  rescue:
+    - name: Message
+      fail:
+        msg: "{{ register_error.stderr | regex_replace(awx_user) | regex_replace(admin_password) }}"

+ 1 - 1
appliance/roles/web_ui/tasks/clone_awx.yml

@@ -18,5 +18,5 @@
     repo: "{{ awx_git_repo }}"
     dest: "{{ awx_repo_path }}"
     force: yes
-    version: devel
+    version: 15.0.0
   tags: install

+ 3 - 1
appliance/roles/web_ui/tasks/install_awx.yml

@@ -49,6 +49,7 @@
       args:
         chdir: "{{ awx_installer_path }}"
       register: awx_installation
+      no_log: True
 
   rescue:
     - name: Check AWX status on machine
@@ -56,7 +57,8 @@
 
     - name: Fail if container are not running
       fail:
-        msg: "AWX installation failed."
+        msg: "AWX installation failed with error msg:
+        {{ awx_installation.stdout | regex_replace(admin_password) }}."
       when: not awx_status
 
   tags: install

+ 11 - 42
appliance/roles/web_ui/tasks/main.yml

@@ -27,7 +27,7 @@
   when: not awx_status
   tags: install
 
-- name: Get and encrypt AWX password
+- name: Clone AWX repo
   include_tasks: clone_awx.yml
   when: not awx_status
   tags: install
@@ -58,50 +58,19 @@
   include_tasks: ../../common/tasks/internet_validation.yml
   tags: install
 
-- name: Waiting for AWX UI to be accessible
-  wait_for:
-    timeout: 300
-  delegate_to: localhost
-  tags: install
-
-- name: Re-install if in migrating state
-  block:
-    - name: Check if AWX UI is accessible
-      command: >-
-        awx --conf.host "{{ awx_ip }}" --conf.username "{{ awx_user }}" --conf.password "{{ admin_password }}"
-        organizations list -f human
-      changed_when: no
-
-  rescue:
-    - name: Remove old containers
-      docker_container:
-        name: "{{ item }}"
-        state: absent
-      loop:
-        - awx_task
-        - awx_web
-
-    - name: Restart docker
-      service:
-        name: docker
-        state: restarted
-
-    - name: Run AWX install.yml file
-      command: ansible-playbook -i inventory install.yml --extra-vars "admin_password={{ admin_password }}"
-      args:
-        chdir: "{{ awx_installer_path }}"
-      ignore_errors: yes
-
-    - name: Waiting for AWX UI to be accessible
-      wait_for:
-        timeout: 150
-      delegate_to: localhost
-  tags: install
-
 - name: Install AWX-CLI
   include_tasks: install_awx_cli.yml
   tags: install
 
+- name: Check if AWX-UI is accessible
+  include_tasks: ui_accessibility.yml
+  tags: install
+
 - name: Configure AWX
-  include_tasks: awx_configuration.yml
+  block:
+    - include_tasks: awx_configuration.yml
+  rescue:
+    - name: Display msg
+      debug:
+        msg: "{{ conf_fail_msg }}"
   tags: install

+ 85 - 0
appliance/roles/web_ui/tasks/ui_accessibility.yml

@@ -0,0 +1,85 @@
+# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+# Check accessibility of AWX-UI
+- name: Re-install if in migrating state
+  block:
+    - name: Wait for AWX UI to be up
+      uri:
+        url: "{{ awx_ip }}"
+        status_code: "{{ return_status }}"
+        return_content: yes
+      register: register_error
+      until: awx_ui_msg in register_error.content
+      retries: 20
+      delay: 15
+      changed_when: no
+      no_log: True
+
+  rescue:
+    - name: Starting rescue
+      debug:
+        msg: "Attempting to re-install AWX"
+
+    - name: Remove old containers
+      docker_container:
+        name: "{{ item }}"
+        state: absent
+      loop:
+        - awx_task
+        - awx_web
+
+    - name: Restart docker
+      service:
+        name: docker
+        state: restarted
+
+    - name: Re-install AWX
+      block:
+        - name: Run AWX install.yml file
+          command: ansible-playbook -i inventory install.yml --extra-vars "admin_password={{ admin_password }}"
+          args:
+            chdir: "{{ awx_installer_path }}"
+          register: awx_installation
+          no_log: True
+
+      rescue:
+        - name: Check AWX status on machine
+          include_tasks: check_awx_status.yml
+
+        - name: Fail if container are not running
+          fail:
+            msg: "AWX installation failed with error msg:
+             {{ awx_installation.stdout | regex_replace(admin_password) }}."
+          when: not awx_status
+
+    - name: Check if AWX UI is up
+      block:
+        - name: Wait for AWX UI to be up
+          uri:
+            url: "{{ awx_ip }}"
+            status_code: "{{ return_status }}"
+            return_content: yes
+          register: register_error
+          until: awx_ui_msg in register_error.content
+          retries: 30
+          delay: 10
+          changed_when: no
+          no_log: True
+      rescue:
+        - name: Message
+          fail:
+            msg: "{{ register_error | regex_replace(awx_user) | regex_replace(admin_password) }}"
+  tags: install

+ 2 - 1
appliance/roles/web_ui/vars/main.yml

@@ -38,7 +38,8 @@ message_skipped: "Installation Skipped: AWX instance is already running on your
 message_installed: "Installation Successful"
 awx_ip: http://localhost:8081
 return_status: 200
-migrating_msg: "IsMigrating"
+awx_ui_msg: "Password Dialog"
+conf_fail_msg: "AWX configuration failed at the last executed task."
 
 # Usage: install_awx_cli.yml
 awx_cli_repo: "https://releases.ansible.com/ansible-tower/cli/ansible-tower-cli-centos7.repo"

+ 24 - 15
appliance/roles/provision/tasks/configure_nic.yml

@@ -12,20 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 ---
+- name: Fetch provision_password
+  hosts: localhost
+  connection: local
+  gather_facts: no
+  roles:
+    - fetch_password
 
-- name: Configure NIC-1
-  copy:
-    src: "ifcfg-{{ nic }}"
-    dest: "/etc/sysconfig/network-scripts/ifcfg-{{ nic }}"
-    mode: 0644
-  tags: install
+- name: Prepare the cluster with passwordless ssh from manager to compute
+  hosts: manager
+  gather_facts: false
+  pre_tasks:
+    - name: Set Fact
+      set_fact:
+        ssh_to: "{{ groups['compute'] }}"
+  roles:
+    - cluster_preperation
 
-- name: Restart NIC
-  command: ifdown {{ nic }}
-  changed_when: false
-  tags: install
-
-- name: Restart NIC
-  command: ifup {{ nic }}
-  changed_when: false
-  tags: install
+- name: Prepare the cluster with passwordless ssh from compute to manager
+  hosts: compute
+  gather_facts: false
+  pre_tasks:
+    - name: Set Fact
+      set_fact:
+        ssh_to: "{{ groups['manager'] }}"
+  roles:
+    - cluster_preperation

roles/cluster_preperation/tasks/main.yml → appliance/tools/roles/cluster_preperation/tasks/main.yml


+ 27 - 14
roles/cluster_preperation/tasks/passwordless_ssh.yml

@@ -47,19 +47,32 @@
     ssh-add "{{ rsa_id_file }}"
   when: not ssh_status
 
-- name: Create .ssh directory
-  command: >-
-    sshpass -p "{{ hostvars['127.0.0.1']['cobbler_password'] }}"
-    ssh root@"{{ current_host }}" mkdir -p /root/.ssh
-  when: not ssh_status
+- name: Post public key
+  block:
+    - name: Create .ssh directory
+      command: >-
+        sshpass -p "{{ hostvars['127.0.0.1']['cobbler_password'] }}"
+        ssh root@"{{ current_host }}" mkdir -p /root/.ssh
+      when: not ssh_status
+      no_log: True
+      register: register_error
 
-- name: Copy the public key to remote host
-  shell: >-
-    set -o pipefail && cat "{{ rsa_id_file }}".pub
-    | sshpass -p "{{ hostvars['127.0.0.1']['cobbler_password'] }}"
-    ssh root@"{{ current_host }}" 'cat >> "{{ auth_key_path }}"'
-  when: not ssh_status
+    - name: Copy the public key to remote host
+      shell: >-
+        set -o pipefail && cat "{{ rsa_id_file }}".pub
+        | sshpass -p "{{ hostvars['127.0.0.1']['cobbler_password'] }}"
+        ssh root@"{{ current_host }}" 'cat >> "{{ auth_key_path }}"'
+      when: not ssh_status
+      no_log: True
+      register: register_error
+
+    - name: Change permissions on the remote host
+      shell: sshpass -p "{{ hostvars['127.0.0.1']['cobbler_password'] }}" ssh root@"{{ current_host }}" 'chmod 700 .ssh; chmod 640 "{{ auth_key_path }}"'
+      when: not ssh_status
+      no_log: True
+      register: register_error
 
-- name: Change permissions on the remote host
-  shell: sshpass -p "{{ hostvars['127.0.0.1']['cobbler_password'] }}" ssh root@"{{ current_host }}" 'chmod 700 .ssh; chmod 640 "{{ auth_key_path }}"'
-  when: not ssh_status
+  rescue:
+    - name: Passwordless ssh failed
+      fail:
+        msg: "{{ register_error.stderr | regex_replace(hostvars['127.0.0.1']['cobbler_password']) | regex_replace(auth_key_path) }}"

roles/cluster_preperation/vars/main.yml → appliance/tools/roles/cluster_preperation/vars/main.yml


+ 42 - 0
appliance/tools/roles/fetch_password/tasks/main.yml

@@ -0,0 +1,42 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+- name: Include variables from common role
+  include_vars: "{{ role_path }}/../../../roles/common/vars/main.yml"
+  no_log: True
+
+- name: Check input config file is encrypted
+  command: cat {{ role_path }}/../../../{{ input_config_filename }}
+  changed_when: false
+  register: config_content
+
+- name: Decrpyt appliance_config.yml
+  command: >-
+    ansible-vault decrypt {{ role_path }}/../../../{{ input_config_filename }}
+    --vault-password-file {{ role_path }}/../../../{{ vault_filename }}
+  changed_when: false
+  when: "'$ANSIBLE_VAULT;' in config_content.stdout"
+
+- name: Include variable file appliance_config.yml
+  include_vars: "{{ role_path }}/../../../{{ input_config_filename }}"
+
+- name: Save input variables from file
+  set_fact:
+    cobbler_password: "{{ provision_password }}"
+
+- name: Encrypt input config file
+  command: >-
+    ansible-vault encrypt {{ role_path }}/../../../{{ input_config_filename }}
+    --vault-password-file {{ role_path }}/../../../{{ vault_filename }}
+  changed_when: false

+ 186 - 0
docs/INSTALL_OMNIA.md

@@ -0,0 +1,186 @@
+# Install Omnia
+
+## Prerequisties
+Perform the following tasks before installing Omnia:
+* On the management node, install Ansible and Git using the following commands:
+	* `yum install epel-release -y`
+	* `yum install ansible git -y`
+
+__Note:__ ansible should be installed using __yum__ only.
+
+__Note:__ If ansible is installed using __pip3__, install it again using __yum__ only.
+
+* Ensure a stable Internet connection is available on management node and target nodes. 
+* CentOS 7.9 2009 is installed on the management node.
+* To provision the bare metal servers,
+	* Go to http://isoredirect.centos.org/centos/7/isos/x86_64/ and download the **CentOS-7-x86_64-Minimal-2009** ISO file to the following directory on the management node: `omnia/appliance/roles/provision/files`.
+	* Rename the downloaded ISO file to `CentOS-7-x86_64-Minimal-2009.iso`.
+* For DHCP configuration, you can provide a mapping file named mapping_file.csv under __omnia/appliance/roles/provision/files__. The details provided in the CSV file must be in the format: MAC, Hostname, IP __xx:xx:4B:C4:xx:44,validation01,172.17.0.81 xx:xx:4B:C5:xx:52,validation02,172.17.0.82__
+__Note:__ Duplicate hostnames must not be provided in the mapping file and the hostname should not contain these characters: "_" and "."
+* Connect one of the Ethernet cards on the management node to the HPC switch and one of the ethernet card connected to the __global_network__.
+* If SELinux is not disabled on the management node, disable it from /etc/sysconfig/selinux and restart the management node.
+* The default mode of PXE is __UEFI__ and the __BIOS legacy__ mode is not supported.
+* The default boot order for the bare metal server should be __PXE__.
+* Configuration of __RAID__ is not part of omnia. If bare metal server has __RAID__ controller installed then it's compulsory to create __VIRTUAL DISK__.
+
+## Steps to install Omnia
+1. On the management node, change the working directory to the directory where you want to clone the Omnia Git repository.
+2. Clone the Omnia repository.
+``` 
+$ git clone https://github.com/dellhpc/omnia.git 
+```
+__Note:__ After the Omnia repository is cloned, a folder named __omnia__ is created. It is recommended that you do not rename this folder.
+
+3. Change the directory to `omnia/appliance`
+4. To provide passwords for Cobbler and AWX, edit the __`appliance_config.yml`__ file.
+* If user want to provide the mapping file for DHCP configuration, go to  __appliance_config.yml__ file there is variable name __mapping_file_exits__ set as __true__ otherwise __false__.
+
+Omnia considers the following usernames as default:  
+* `cobbler` for Cobbler Server
+* `admin` for AWX`
+* `slurm` for Slurm
+
+**Note**: 
+* Minimum length of the password must be at least eight characters and maximum of 30 characters.
+* Do not use these characters while entering a password: -, \\, "", and \'
+
+5. Using the `appliance_config.yml` file, you can also change the NIC for the DHCP server under *hpc_nic* and the NIC used to connect to the Internet under public_nic. Default values of both __hpc_nic__ and __public_nic__ is set to em1 and em2 respectively.
+6. The valid DHCP range for HPC cluster is set into two variables name __Dhcp_start_ip_range__ and __Dhcp_end_ip_range__ present in the __appliance_config.yml__ file.
+7. To provide password for Slurm Database and Kubernetes CNI, edit the __`omnia_config.yml`__ file.
+
+**Note**:
+* Supported Kubernetes CNI : calico and flannel, default is __calico__.
+
+To view the set passwords of __`appliance_config.yml`__ at a later time, run the following command under omnia->appliance:
+```
+ansible-vault view appliance_config.yml --vault-password-file .vault_key
+```
+
+To view the set passwords of __`omnia_config.yml`__ at a later time, run the following command:
+```
+ansible-vault view omnia_config.yml --vault-password-file .omnia_vault_key
+```
+
+  
+5. To install Omnia, run the following command:
+```
+ansible-playbook appliance.yml -e "ansible_python_interpreter=/usr/bin/python2"
+```
+   
+Omnia creates a log file which is available at: `/var/log/omnia.log`.
+
+**Provision operating system on the target nodes**  
+Omnia role used: *provision*
+
+To create the Cobbler image, Omnia configures the following:
+* Firewall settings are configured.
+* The kickstart file of Cobbler will enable the UEFI PXE boot.
+
+To access the Cobbler dashboard, enter `https://<IP>/cobbler_web` where `<IP>` is the Global IP address of the management node.  	For example, enter
+`https://100.98.24.225/cobbler_web` to access the Cobbler dashboard.
+
+__Note__: If a mapping file is not provided, the hostname to the server is given on the basis of following format: __compute<xxx>-<xxx>__ where "xxx" is the last 2 octets of Host Ip address
+After the Cobbler Server provisions the operating system on the nodes, IP addresses and host names are assigned by the DHCP service. The host names are assigned based on the following format: **compute\<xxx>-xxx** where **xxx** is the Host ID (last 2 octet) of the Host IP address. For example, if the Host IP address is 172.17.0.11 then assigned hostname will be compute0-11.
+__Note__: If a mapping file is provided, the hostnames follow the format provided in the mapping file.
+
+**Install and configure Ansible AWX**  
+Omnia role used: *web_ui*  
+AWX repository is cloned from the GitHub path: https://github.com/ansible/awx.git 
+
+
+Omnia performs the following configuration on AWX:
+* The default organization name is set to **Dell EMC**.
+* The default project name is set to **omnia**.
+* Credential: omnia_credential
+* Inventory: omnia_inventory with compute and manager groups
+* Template: DeployOmnia and Dynamic Inventory
+* Schedules: DynamicInventorySchedule which is scheduled for every 10 mins
+
+To access the AWX dashboard, enter `http://<IP>:8081` where **\<IP>** is the Global IP address of the management node. For example, enter `http://100.98.24.225:8081` to access the AWX dashboard.
+
+***Note**: The AWX configurations are automatically performed Omnia and Dell Technologies recommends that you do not change the default configurations provided by Omnia as the functionality may be impacted.
+
+__Note__: Although AWX UI is accessible, hosts will be shown only after few nodes have been provisioned by a cobbler. It will take approx 10-15 mins. If any server is provisioned but user is not able to see any host on the AWX UI, then user can run __provision_report.yml__ playbook from __omnia__ -> __appliance__ ->__tools__ folder to see which hosts are reachable.
+
+
+## Install Kubernetes and Slurm using AWX UI
+Kubernetes and Slurm are installed by deploying the **DeployOmnia** template on the AWX dashboard.
+
+1. On the AWX dashboard, under __RESOURCES__ __->__ __Inventories__, select __Groups__.
+2. Select either __compute__ or __manager__ group.
+3. Select the __Hosts__ tab.
+4. To add the hosts provisioned by Cobbler, select __Add__ __->__ __Add__ __existing__ __host__, and then select the hosts from the list and click __Save__.
+5. To deploy Omnia, under __RESOURCES__ -> __Templates__, select __DeployOmnia__ and click __LAUNCH__.
+6. By default, no skip tags are selected and both Kubernetes and Slurm will be deployed. To install only Kubernetes, enter `slurm` and select **Create "slurm"**. Similarly, to install only Slurm, select and add `kubernetes` skip tag. 
+
+__Note:__
+*	If you would like to skip the NFS client setup, enter _nfs_client in the skip tag section to skip the k8s_nfs_client_setup__ role of Kubernetes.
+
+7. Click **Next**.
+8. Review the details in the **Preview** window, and click **Launch** to run the DeployOmnia template. 
+
+To establish the passwordless communication between compute nodes and manager node:
+1. In AWX UI, under __RESOURCES__ -> __Templates__, select __DeployOmnia__ template.
+2. From __Playbook dropdown__ menu, select __appliance/tools/passwordless_ssh.yml__ and __Launch__ the template.
+
+__Note:__ If you want to install __jupyterhub__ and __kubeflow__ playbooks, you have to first install the __jupyterhub__ playbook and then install the __kubeflow__ playbook.
+
+__Note:__ To install __jupyterhub__ and __kubeflow__ playbook:
+*	From __AWX UI__, under __RESOURCES__ -> __Templates__, select __DeployOmnia__ template.
+*	From __Playbook dropdown__ menu, select __platforms/jupyterhub.yml__ option and __Launch__ the template to install jupyterhub playbook.
+*	From __Playbook dropdown__ menu, select __platforms/kubeflow.yml__ option and __Launch__ the template to install kubeflow playbook.
+
+
+The DeployOmnia template may not run successfully if:
+- The Manager group contains more than one host.
+- The Compute group does not contain a host. Ensure that the Compute group must be assigned with a minimum of one host node.
+- Under Skip Tags, when both kubernetes and slurm tags are selected.
+
+After **DeployOmnia** template is executed from the AWX UI, the **omnia.yml** file installs Kubernetes and Slurm, or either Kubernetes or slurm, as per the selection in the template on the management node. Additionally, appropriate roles are assigned to the compute and manager groups.
+
+The following __kubernetes__ roles are provided by Omnia when __omnia.yml__ file is executed:
+- __common__ role:
+	- Install common packages on master and compute nodes
+	- Docker is installed
+	- Deploy time ntp/chrony
+	- Install Nvidia drivers and software components
+- __k8s_common__ role: 
+	- Required Kubernetes packages are installed
+	- Starts the docker and kubernetes services.
+- __k8s_manager__ role: 
+	- __helm__ package for Kubernetes is installed.
+- __k8s_firewalld__ role: This role is used to enable the required ports to be used by Kubernetes. 
+	- For __head-node-ports__: 6443, 2379-2380,10251,10252
+	- For __compute-node-ports__: 10250,30000-32767
+	- For __calico-udp-ports__: 4789
+	- For __calico-tcp-ports__: 5473,179
+	- For __flanel-udp-ports__: 8285,8472
+- __k8s_nfs_server_setup__ role: 
+	- A __nfs-share__ directory, __/home/k8nfs__, is created. Using this directory, compute nodes share the common files.
+- __k8s_nfs_client_setup__ role
+- __k8s_start_manager__ role: 
+	- Runs the __/bin/kubeadm init__ command to initialize the Kubernetes services on manager node.
+	- Initialize the Kubernetes services in the manager node and create service account for Kubernetes Dashboard
+- __k8s_start_workers__ role: 
+	- The compute nodes are initialized and joined to the Kubernetes cluster with the manager node. 
+- __k8s_start_services__ role
+	- Kubernetes services are deployed such as Kubernetes Dashboard, Prometheus, MetalLB and NFS client provisioner
+
+__Note:__ Once kubernetes is installed and configured, few Kubernetes and calico/flannel related ports will be opened in the manager/compute nodes. This is required for kubernetes Pod-to-Pod and Pod-to-Service communications. Calico/flannel provides a full networking stack for kubernetes pods.
+
+The following __Slurm__ roles are provided by Omnia when __omnia.yml__ file is executed:
+- __slurm_common__ role:
+	- Install the common packages on manager/head node and compute node.
+- __slurm_manager__ role:
+	- Install the packages only related to manager node
+	- This role also enables the required ports to be used by slurm.
+		__tcp_ports__: 6817,6818,6819
+		__udp_ports__: 6817,6818,6819
+	- Creating and updating the slurm configuration files based on the manager node requirements.
+- __slurm_workers__ role:
+	- Install the slurm packages into all compute nodes as per the compute node requirements.
+- __slurm_start_services__ role: 
+	- Starting the slurm services so that compute node starts to communicate with manager node.
+- __slurm_exporter__ role: 
+	- slurm exporter is a package for exporting metrics collected from slurm resource scheduling system to prometheus.
+	- Slurm exporter is installed on the host just like slurm and slurm exporter will be successfully installed only if slurm is installed.

+ 75 - 0
docs/MONITOR_CLUSTERS.md

@@ -0,0 +1,75 @@
+# Monitor Kuberentes and Slurm
+Omnia provides playbooks to configure additional software components for Kubernetes such as JupyterHub and Kubeflow. For workload management (submitting, conrolling, and managing jobs) of HPC, AI, and Data Analytics clusters, you can access Kubernetes and Slurm dashboards and other supported applications. 
+
+__Note:__ To access the below dashboards, user has to login to the manager node and open the installed web browser.
+
+__Note:__ If you are connecting remotely make sure your putty or any other similar client supports X11 forwarding. If you are using mobaxterm version 8 and above, follow the below mentioned steps:
+1. `yum install firefox -y`
+2. `yum install xorg-x11-xauth`
+3. `logout and login back`
+4. To launch firefox from terminal use the following command: 
+   `firefox&`
+
+## Access Kuberentes Dashboard
+1. To verify if the __Kubernetes-dashboard service__ is __running__, run the following command:
+  `kubectl get pods --all-namespaces`
+2. To start the Kubernetes dashboard, run the following command:
+  `kubectl proxy`
+3. From the CLI, run the following command to see the generated tokens: `kubectl get secrets`
+4. Copy the token with the name __prometheus-__-kube-state-metrics__ of the type __kubernetes.io/service-account-token__.
+5. Run the following command: `kubectl describe secret __<copied token name>__`
+6. Copy the encrypted token value.
+7. On a web browser(installed on the manager node), enter http://localhost:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:/proxy/ to access the Kubernetes Dashboard.
+8. Select the authentication method as __Token__.
+9. On the Kuberenetes Dashboard, paste the copied encrypted token and click __Sign in__.
+
+## Access Kubeflow Dashboard
+
+__Note:__ Use only port number between __8000-8999__
+
+1. To see which are the ports are in use, use the following command:
+   `netstat -an`
+2. Choose port number from __8000-8999__ which is not in use.
+3. To run the __kubeflow__ dashboard at selected port number, run the following command:
+   `kubectl port-forward -n istio-system svc/istio-ingressgateway __selected-port-number__:80`
+4. On a web browser installed on the __manager node__, go to http://localhost:selected-port-number/ to launch the kubeflow central navigation dashboard.
+
+## Access JupyterHub Dashboard
+If you have installed the JupyterHub application for Kubernetes, you can access the dashboard by following these actions:
+1. To verify if the JupyterHub services are running, run the following command: 
+   `kubectl get pods --namespace default`
+2. Ensure that the pod names starting with __hub__ and __proxy__ are in __running__ status.
+3. Run the following command:
+   `kubectl get services`
+4. Copy the **External IP** of __proxy-public__ service.
+5. On a web browser installed on the __manager node__, use the External IP address to access the JupyterHub Dashboard.
+6. Enter any __username__ and __password__ combination to enter the Jupyterhub. The __username__ and __password__ can be later configured from the JupyterHub dashboard.
+
+## Prometheus:
+
+* Prometheus is installed in two different ways:
+  * Prometheus is installed on the host when Slurm is installed without installing kubernetes.
+  * Prometheus is installed as a Kubernetes role, if you install both Slurm and Kubernetes.
+
+If Prometheus is installed as part of k8s role, run the following commands before starting the Prometheus UI:
+1. `export POD_NAME=$(kubectl get pods --namespace default -l "app=prometheus,component=server" -o jsonpath="{.items[0].metadata.name}")`
+2. `echo $POD_NAME`
+3. `kubectl --namespace default port-forward $POD_NAME 9090`
+
+__Note:__ If Prometheus is installed on the host, start the Prometheus web server with the following command:
+* Navigate to Prometheus folder. The default path is __/var/lib/prometheus-2.23.0.linux-amd64/__.
+* Start the web server, 
+  `./prometheus.yml`
+
+Go to http://localhost:9090 to launch the Prometheus UI in the browser.
+
+
+
+
+ 
+
+
+
+
+
+

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 51 - 7
docs/README.md


+ 9 - 29
omnia.yml

@@ -13,36 +13,16 @@
 # limitations under the License.
 ---
 
-#- name: Validate the cluster
-# hosts: localhost
-# connection: local
-# gather_facts: no
-# roles:
-#   - cluster_validation
+- name: Validate the cluster
+  hosts: localhost
+  connection: local
+  gather_facts: no
+  roles:
+    - cluster_validation
 
 - name: Gather facts from all the nodes
   hosts: all
 
-- name: Prepare the cluster with passwordless ssh from manager to compute
-  hosts: manager
-  gather_facts: false
-  pre_tasks:
-    - name: Set Fact
-      set_fact:
-        ssh_to: "{{ groups['compute'] }}"
-  roles:
-    - cluster_preperation
-
-- name: Prepare the cluster with passwordless ssh from compute to manager
-  hosts: compute
-  gather_facts: false
-  pre_tasks:
-    - name: Set Fact
-      set_fact:
-        ssh_to: "{{ groups['manager'] }}"
-  roles:
-    - cluster_preperation
-    
 - name: Apply common installation and config
   hosts: manager, compute
   gather_facts: false
@@ -76,7 +56,7 @@
   gather_facts: false
   roles:
     - k8s_nfs_server_setup
-  tags: 
+  tags:
     - kubernetes
     - nfs
 
@@ -85,7 +65,7 @@
   gather_facts: false
   roles:
     - k8s_nfs_client_setup
-  tags: 
+  tags:
     - kubernetes
     - nfs
 
@@ -143,4 +123,4 @@
 # gather_facts: false
 # roles:
 #   - slurm_exporter
-# tags: slurm
+# tags: slurm

+ 24 - 0
omnia_config.yml

@@ -0,0 +1,24 @@
+# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+# Password used for Slurm database.
+# The Length of the password should be atleast 8.
+# The password must not contain -,\, ',"
+mariadb_password: "password"
+
+# Kubernetes SDN network.
+# It can either be "calico" or "flannel".
+# Default value assigned is "calico".
+k8s_cni: "calico"

+ 1 - 13
platforms/roles/kubeflow/tasks/main.yml

@@ -114,20 +114,8 @@
     regexp: 'NodePort'
     replace: 'LoadBalancer'
 
-- name: Remove cert-manager application block
-  replace:
-    path: "{{ kubeflow_config_file }}"
-    regexp: "{{ cert_manager_block }}"
-    replace: "\n"
-
-- name: Remove seldon-core-operator application block
-  replace:
-    path: "{{ kubeflow_config_file }}"
-    regexp: "{{ seldon_core_operator_block }}"
-    replace: "\n"
-
 - name: Apply kubeflow configuration
   command:
     cmd: "/usr/bin/kfctl apply -V -f '{{ kubeflow_config_file }}'"
     chdir: "{{ omnia_kubeflow_dir_path }}"
-  changed_when: true
+  changed_when: true

+ 0 - 22
platforms/roles/kubeflow/vars/main.yml

@@ -32,25 +32,3 @@ kfserving_gateway_yaml_file_path: "{{ omnia_kubeflow_dir_path }}/kustomize/kfser
 argo_yaml_file_path: "{{ omnia_kubeflow_dir_path }}/kustomize/argo/base/service.yaml"
 
 kubeflow_config_file: "{{ omnia_kubeflow_dir_path }}/kfctl_k8s_istio.v1.0.2.yaml"
-
-cert_manager_block: >
-    - kustomizeConfig:
-          overlays:
-          - self-signed
-          - application
-          parameters:
-          - name: namespace
-            value: cert-manager
-          repoRef:
-            name: manifests
-            path: cert-manager/cert-manager
-        name: cert-manager
-
-seldon_core_operator_block: >
-    - kustomizeConfig:
-          overlays:
-          - application
-          repoRef:
-            name: manifests
-            path: seldon/seldon-core-operator
-        name: seldon-core-operator

+ 61 - 9
roles/cluster_validation/tasks/fetch_password.yml

@@ -12,23 +12,75 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 ---
+- name: Check if omnia_vault_key exists
+  stat:
+    path: "{{ role_path }}/../../{{ config_vaultname }}"
+  register: vault_key_result
 
-- name: Decrpyt input_config.yml
-  command: >-
-    ansible-vault decrypt {{ role_path }}/../../appliance/{{ input_config_filename }}
-    --vault-password-file {{ role_path }}/../../appliance/roles/common/files/{{ vault_filename }}
+- name: Create ansible vault key if it does not exist
+  set_fact:
+    vault_key: "{{ lookup('password', '/dev/null chars=ascii_letters') }}"
+  when: not vault_key_result.stat.exists
+
+- name: Save vault key
+  copy:
+    dest: "{{ role_path }}/../../{{ config_vaultname }}"
+    content: |
+      {{ vault_key }}
+    owner: root
+    force: yes
+  when: not vault_key_result.stat.exists
+
+- name: Check if omnia config file is encrypted
+  command: cat {{ role_path }}/../../{{ config_filename }}
   changed_when: false
+  register: config_content
+  no_log: True
+
+- name: Decrpyt omnia_config.yml
+  command: >-
+    ansible-vault decrypt {{ role_path }}/../../{{ config_filename }}
+    --vault-password-file {{ role_path }}/../../{{ config_vaultname }}
+  when: "'$ANSIBLE_VAULT;' in config_content.stdout"
+
+- name: Include variable file omnia_config.yml
+  include_vars: "{{ role_path }}/../../{{ config_filename }}"
+  no_log: True
+
+- name: Validate input parameters are not empty
+  fail:
+    msg: "{{ input_config_failure_msg }}"
+  register: input_config_check
+  when:
+    - mariadb_password | length < 1 or
+      k8s_cni | length < 1
+
+- name: Assert mariadb_password
+  assert:
+    that:
+        - mariadb_password | length > min_length | int - 1
+        - mariadb_password | length < max_length | int + 1
+        - '"-" not in mariadb_password '
+        - '"\\" not in mariadb_password '
+        - '"\"" not in mariadb_password '
+        - " \"'\" not in mariadb_password "
+    success_msg: "{{ success_msg_mariadb_password }}"
+    fail_msg: "{{ fail_msg_mariadb_password }}"
 
-- name: Include variable file input_config.yml
-  include_vars: "{{ role_path }}/../../appliance/{{ input_config_filename }}"
+- name: Assert kubernetes cni
+  assert:
+    that: "('calico' in k8s_cni) or ('flannel' in k8s_cni)"
+    success_msg: "{{ success_msg_k8s_cni }}"
+    fail_msg: "{{ fail_msg_k8s_cni }}"
 
 - name: Save input variables from file
   set_fact:
-    cobbler_password: "{{ provision_password }}"
     db_password: "{{ mariadb_password }}"
+    k8s_cni: "{{ k8s_cni }}"
+  no_log: True
 
 - name: Encrypt input config file
   command: >-
-    ansible-vault encrypt {{ role_path }}/../../appliance/{{ input_config_filename }}
-    --vault-password-file {{ role_path }}/../../appliance/roles/common/files/{{ vault_filename }}
+    ansible-vault encrypt {{ role_path }}/../../{{ config_filename }}
+    --vault-password-file {{ role_path }}/../../{{ config_vaultname }}
   changed_when: false

+ 1 - 4
roles/cluster_validation/tasks/main.yml

@@ -12,11 +12,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 ---
-- name: Include vars file of common role
-  include_vars: "{{ role_path }}/../../appliance/roles/common/vars/main.yml"
-
 - name: Perform validations
   include_tasks: validations.yml
 
-- name: Fetch cobbler password
+- name: Fetch passwords
   include_tasks: fetch_password.yml

+ 1 - 7
roles/cluster_validation/tasks/validations.yml

@@ -27,10 +27,4 @@
   assert:
     that: "groups['compute'] | length | int >= 1"
     fail_msg: "{{ compute_group_fail_msg }}"
-    success_msg: "{{ compute_group_success_msg }}"
-
-- name: Manager and compute groups should be disjoint
-  assert:
-    that: "groups['manager'][0] not in groups['compute']"
-    fail_msg: "{{ disjoint_fail_msg }}"
-    success_msg: "{{ disjoint_success_msg }}"
+    success_msg: "{{ compute_group_success_msg }}"

+ 10 - 0
roles/cluster_validation/vars/main.yml

@@ -12,7 +12,17 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 ---
+#Usage: fetch_password.yml
+config_filename: "omnia_config.yml"
+config_vaultname: .omnia_vault_key
+min_length: 8
+max_length: 30
+fail_msg_mariadb_password: "maria_db password not given in correct format."
+success_msg_mariadb_password: "mariadb_password validated"
+success_msg_k8s_cni: "Kubernetes CNI Validated"
+fail_msg_k8s_cni: "Kubernetes CNI not correct."
 
+#Usage: validations.yml
 skip_tag_fail_msg: "Can't skip both slurm and kubernetes"
 manager_group_fail_msg: "manager group should contain exactly 1 node"
 manager_group_success_msg: "manager group check passed"

+ 2 - 2
roles/common/tasks/nvidia.yml

@@ -26,7 +26,7 @@
     enabled: yes
   tags: install
 
-- name: Add nvidia-container-runtime Repo 
+- name: Add nvidia-container-runtime Repo
   yum_repository:
     name: nvidia-container-runtime
     description:  nvidia-container-runtime
@@ -39,7 +39,7 @@
     enabled: yes
   tags: install
 
-- name: Add nvidia-docker Repo 
+- name: Add nvidia-docker Repo
   yum_repository:
     name: nvidia-docker
     description:  nvidia-docker

+ 1 - 0
roles/common/vars/main.yml

@@ -24,6 +24,7 @@ common_packages:
   - chrony
   - pciutils
   - docker-ce
+  - openssl
 
 custom_fact_dir: /etc/ansible/facts.d
 

+ 2 - 2
roles/k8s_common/tasks/main.yml

@@ -21,8 +21,8 @@
     enabled: yes
     gpgcheck: no
     repo_gpgcheck: no
-    gpgkey: 
-      - https://packages.cloud.google.com/yum/doc/yum-key.gpg 
+    gpgkey:
+      - https://packages.cloud.google.com/yum/doc/yum-key.gpg
       - https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
   tags: install
 

+ 4 - 4
roles/k8s_firewalld/tasks/main.yml

@@ -41,7 +41,7 @@
     permanent: yes
     state: enabled
   with_items: '{{ k8s_compute_ports }}'
-  when: "'compute' in group_names"
+  when: "'compute' in group_names and groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1"
   tags: firewalld
 
 - name: Open flannel ports on the firewall
@@ -50,7 +50,7 @@
     permanent: yes
     state: enabled
   with_items: "{{ flannel_udp_ports }}"
-  when: k8s_cni == "flannel"
+  when: hostvars['127.0.0.1']['k8s_cni'] == "flannel"
   tags: firewalld
 
 - name: Open calico UDP ports on the firewall
@@ -59,7 +59,7 @@
     permanent: yes
     state: enabled
   with_items: "{{ calico_udp_ports }}"
-  when: k8s_cni == "calico"
+  when: hostvars['127.0.0.1']['k8s_cni'] == "calico"
   tags: firewalld
 
 - name: Open calico TCP ports on the firewall
@@ -68,7 +68,7 @@
     permanent: yes
     state: enabled
   with_items: "{{ calico_tcp_ports }}"
-  when: k8s_cni == "calico"
+  when: hostvars['127.0.0.1']['k8s_cni'] == "calico"
   tags: firewalld
 
 - name: Reload firewalld

+ 1 - 4
roles/k8s_firewalld/vars/main.yml

@@ -13,9 +13,6 @@
 #  limitations under the License.
 ---
 
-# Kubernetes SDN network
-k8s_cni: calico
-
 # Master nodes firewall ports
 k8s_master_ports:
   - 6443
@@ -39,4 +36,4 @@ calico_tcp_ports:
 # Flannel CNI firewall ports
 flannel_udp_ports:
   - 8285
-  - 8472
+  - 8472

+ 15 - 2
roles/k8s_nfs_client_setup/tasks/main.yml

@@ -17,6 +17,16 @@
   package:
     name: nfs-utils
     state: present
+  when: groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1
+  tags: nfs_client
+
+- name: Check mounted share
+  shell: mount | grep nfs
+  changed_when: false
+  args:
+    warn: false
+  register: mounted_share
+  ignore_errors: True
   tags: nfs_client
 
 - name: Creating directory to mount NFS Share
@@ -24,17 +34,20 @@
     path: "{{ nfs_mnt_dir }}"
     state: directory
     mode: "{{ nfs_mnt_dir_mode }}"
+  when: groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1
   tags: nfs_client
 
 - name: Mounting NFS Share
-  command: "mount {{ groups['manager'] }}:{{ nfs_mnt_dir }} {{ nfs_mnt_dir }}"
+  command: "mount {{ mounthost }}:{{ nfs_share_dir }} {{ nfs_mnt_dir }}"
   changed_when: true
   args:
     warn: false
+  when: groups['manager'][0] not in mounted_share.stdout and groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1
   tags: nfs_client
 
 - name: Configuring Automount NFS Shares on reboot
   lineinfile:
     path: "{{ fstab_file_path }}"
-    line: "{{ groups['manager'] }}:{{ nfs_mnt_dir }}     {{ nfs_mnt_dir }}  nfs     nosuid,rw,sync,hard,intr 0 0"
+    line: "{{ mounthost }}:{{ nfs_share_dir }}     {{ nfs_mnt_dir }}  nfs     nosuid,rw,sync,hard,intr 0 0"
+  when: groups['manager'][0] not in mounted_share.stdout and groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1
   tags: nfs_client

+ 4 - 0
roles/k8s_nfs_client_setup/vars/main.yml

@@ -15,6 +15,10 @@
 
 nfs_mnt_dir: /home/k8snfs
 
+nfs_share_dir: /home/k8snfs
+
+mounthost: "{{ groups['manager'][0] }}"
+
 nfs_mnt_dir_mode: 0755
 
 fstab_file_path: /etc/fstab

+ 22 - 5
roles/k8s_start_manager/tasks/main.yml

@@ -22,9 +22,17 @@
   setup:
     filter: ansible_default_ipv4.address
 
+- name: Check K8s nodes status
+  command: kubectl get nodes
+  changed_when: false
+  ignore_errors: True
+  register: k8s_nodes
+  tags: init
+
 - name: Initialize kubeadm
   command: "/bin/kubeadm init --pod-network-cidr='{{ pod_network_cidr_ip }}' --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'"
   changed_when: true
+  when: "'master' not in k8s_nodes.stdout"
   register: init_output
   tags: init
 
@@ -74,6 +82,7 @@
     token:  "{{ K8S_TOKEN.stdout }}"
     hash:   "{{ K8S_MANAGER_CA_HASH.stdout }}"
     ip:     "{{ ansible_default_ipv4.address }}"
+    k8s_nodes:  "{{ k8s_nodes.stdout }}"
   tags: init
 
 - name: Print k8s token
@@ -96,12 +105,12 @@
 
 - name: Setup Calico SDN network
   command: "kubectl apply -f '{{ calico_yml_url }}'"
-  when: k8s_cni == "calico"
+  when: hostvars['127.0.0.1']['k8s_cni'] == "calico"
   tags: init
 
 - name: Setup Flannel SDN network
   command: "kubectl apply -f '{{ flannel_yml_url }}'"
-  when: k8s_cni == "flannel"
+  when: hostvars['127.0.0.1']['k8s_cni'] == "flannel"
   tags: init
 
 - name: Create yaml repo for setup
@@ -120,9 +129,16 @@
     mode: "{{ k8s_service_account_file_mode }}"
   tags: init
 
+- name: Check K8s service accounts status
+  command: "kubectl get serviceaccounts"
+  changed_when: false
+  register: k8s_service_accounts
+  tags: init
+
 - name: Create service account (K8s dashboard)
   command: "kubectl create -f '{{ k8s_service_account_file_dest }}'"
   changed_when: true
+  when: "'default' not in k8s_service_accounts.stdout"
   tags: init
 
 - name: Create clusterRoleBinding (K8s dashboard) files
@@ -137,6 +153,7 @@
 - name: Create clusterRoleBinding (K8s dashboard)
   command: "kubectl create -f '{{ k8s_clusterRoleBinding_file_dest }}'"
   changed_when: true
+  ignore_errors: True
   tags: init
 
 - name: Dump bearer token for K8s dashboard login
@@ -146,7 +163,7 @@
   changed_when: true
   tags: init
 
-- name: Edge / Workstation Install allows pods to schedule on manager
+- name: Edge / Workstation Install allows pods to scheudle on manager
   command: kubectl taint nodes --all node-role.kubernetes.io/master-
-  when: single_node
-  tags: init
+  when: groups['manager'][0] == groups['compute'][0] and groups['compute']|length == 1
+  tags: init

+ 1 - 5
roles/k8s_start_manager/vars/main.yml

@@ -13,10 +13,6 @@
 #  limitations under the License.
 ---
 
-single_node: false
-
-k8s_cni: calico
-
 pod_network_cidr_ip: 10.244.0.0/16
 
 k8s_root_directory: /root/.kube
@@ -47,4 +43,4 @@ k8s_clusterRoleBinding_file_mode: 0655
 
 calico_yml_url: https://docs.projectcalico.org/manifests/calico.yaml
 
-flannel_yml_url: https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
+flannel_yml_url: https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml

+ 17 - 3
roles/k8s_start_services/tasks/main.yml

@@ -19,9 +19,16 @@
   ignore_errors: True
   tags: init
 
+- name: Get K8s pods
+  command: kubectl get pods --all-namespaces
+  changed_when: false
+  register: k8s_pods
+  tags: init
+
 - name: Deploy MetalLB
   command: "kubectl apply -f '{{ metallb_yaml_url }}'"
   changed_when: true
+  when: "'metallb' not in k8s_pods.stdout"
   tags: init
 
 - name: Create MetalLB Setup Config Files
@@ -45,17 +52,19 @@
 - name: Deploy MetalLB
   command: "kubectl apply -f '{{ metallb_deployment_file_dest }}'"
   changed_when: true
+  when: "'metallb' not in k8s_pods.stdout"
   tags: init
 
 - name: Create default setup for MetalLB
   command: "kubectl apply -f '{{ metallb_config_file_dest }}'"
   changed_when: true
+  when: "'metallb' not in k8s_pods.stdout"
   tags: init
 
 - name: Start k8s dashboard
   command: "kubectl create -f '{{ k8s_dashboard_yaml_url }}'"
   changed_when: true
-  register: result
+  when: "'kubernetes-dashboard' not in k8s_pods.stdout"
   tags: init
 
 - name: Helm - add stable repo
@@ -81,7 +90,7 @@
 - name: Start NFS Client Provisioner
   command: "helm install stable/nfs-client-provisioner --set nfs.server='{{ nfs_server }}' --set nfs.path='{{ nfs_path }}' --generate-name"
   changed_when: true
-  register: result
+  when: "'nfs-client-provisioner' not in k8s_pods.stdout"
   tags: init
 
 - name: Set NFS-Client Provisioner as DEFAULT StorageClass
@@ -97,25 +106,30 @@
     --set alertmanager.persistentVolume.storageClass=nfs-client,server.persistentVolume.storageClass=nfs-client,server.service.type=LoadBalancer \
     --generate-name
   changed_when: true
+  when: "'prometheus' not in k8s_pods.stdout"
   tags: init
 
 - name: Install MPI Operator
   command: "kubectl create -f '{{ mpi_operator_yaml_url }}'"
   changed_when: true
+  when: "'mpi-operator' not in k8s_pods.stdout"
   tags: init
 
 - name: Install nvidia-device-plugin
   command: "helm install --version='{{ nvidia_device_plugin_version }}' --generate-name --set migStrategy='{{ mig_strategy }}' nvdp/nvidia-device-plugin"
   changed_when: true
+  when: "'nvidia-device-plugin' not in k8s_pods.stdout"
   tags: init
 
 - name: Install GPU Feature Discovery
   command: "helm install --version='{{ gpu_feature_discovery_version }}' --generate-name --set migStrategy='{{ mig_strategy }}' nvgfd/gpu-feature-discovery"
   changed_when: true
+  when: "'node-feature-discovery' not in k8s_pods.stdout"
   tags: init
 
 - name: Deploy Xilinx Device plugin
   command: "kubectl create -f '{{ fpga_device_plugin_yaml_url }}'"
   changed_when: true
   register: fpga_enable
-  tags: init
+  when: "'fpga-device-plugin' not in k8s_pods.stdout"
+  tags: init

+ 8 - 2
roles/k8s_start_workers/tasks/main.yml

@@ -18,10 +18,16 @@
   changed_when: true
   tags: init
 
+- name: Get hostname
+  command: hostname
+  changed_when: true
+  register: node_hostname
+  tags: init
+
 - name: Execute kubeadm join command
   shell: >
     kubeadm join --token={{ hostvars['K8S_TOKEN_HOLDER']['token'] }}
     --discovery-token-ca-cert-hash sha256:{{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}
     {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}:{{ apiserver_bind_port }}
-  when: not single_node
-  tags: init
+  when: groups['manager'][0] != groups['compute'][0] and groups['compute']|length >= 1 and node_hostname.stdout not in hostvars['K8S_TOKEN_HOLDER']['k8s_nodes']
+  tags: init

+ 1 - 3
roles/k8s_start_workers/vars/main.yml

@@ -13,6 +13,4 @@
 #  limitations under the License.
 ---
 
-single_node: false
-
-apiserver_bind_port: 6443
+apiserver_bind_port: 6443

+ 11 - 2
roles/slurm_common/tasks/main.yml

@@ -14,7 +14,7 @@
 ---
 
 - name: Get hostname
-  command: hostname -s
+  command: hostname
   register: host_name
   changed_when: true
 
@@ -29,7 +29,7 @@
 - name: Add host name in hosts file
   lineinfile:
     dest: "{{ hosts_dest }}"
-    line: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} {{ host_name.stdout }}"
+    line: "{{ inventory_hostname }} {{ host_name.stdout }}"
     state: present
     create: yes
     mode: "{{ common_mode }}"
@@ -155,6 +155,15 @@
     mode: "{{ gen_mode }}"
     recurse: yes
 
+- name: Give slurm user permission to spool directory
+  file:
+    path: "{{ spool_dir }}"
+    owner: slurm
+    group: slurm
+    state: directory
+    mode: "{{ common_mode }}"
+    recurse: yes
+
 - name: Create slurm pid directory
   file:
     path: "{{ slurm_pidpth }}"

+ 1 - 2
roles/slurm_common/vars/main.yml

@@ -13,8 +13,6 @@
 #  limitations under the License.
 ---
 
-epel_url: https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
-
 common_packages:
    - munge
    - munge-libs
@@ -41,6 +39,7 @@ slurm_uid: "6001"
 slurm_logpth: "/var/log/slurm/"
 slurm_pidpth: "/var/run/slurm/"
 gen_mode: "0755"
+spool_dir: "/var/spool/"
 spool_pth: "/var/spool/slurm/"
 slurmctld_pid: "/var/run/slurmctld.pid"
 slurmd_pid: "/var/run/slurmd.pid"

+ 14 - 10
roles/slurm_manager/tasks/main.yml

@@ -29,7 +29,7 @@
     mode: "{{ tmp_mode }}"
     state: touch
 
-- name: Create slurmctld log file on master
+- name: Create slurmctld log file on manager
   file:
     path: "{{ slurm_logpth }}"
     owner: slurm
@@ -38,14 +38,14 @@
   with_items:
     - slurmctld.log
 
-- name: Create log files on master
+- name: Create log files on manager
   file:
     path: "{{ slurm_logpth }}"
     owner: slurm
     mode: "{{ tmp_mode }}"
     state: touch
   with_items:
-    - "{{ log_files_master }}"
+    - "{{ log_files_manager }}"
 
 - name: Install packages for slurm
   package:
@@ -86,7 +86,7 @@
     warn: no
 
 - name: Verify package md5
-  command: rpm -qa
+  shell: rpm -qa | grep slurm
   ignore_errors: true
   register: verify_result
   changed_when: no
@@ -100,9 +100,10 @@
     chdir: "{{ rpm_path }}"
     warn: no
   changed_when: true
+  when: verify_result.rc != 0
 
 - name: Get the hostname
-  command: hostname -s
+  command: hostname
   register: machine_name
   changed_when: true
 
@@ -147,13 +148,13 @@
   when: "'manager' in group_names"
   tags: firewalld
 
-- name: Get network address/subnet mask through ipaddr
+- name: Get network address/subnet mask
   set_fact:
     network_address: "{{ (ansible_default_ipv4.network + '/' + ansible_default_ipv4.netmask) | ipaddr('network/prefix') }}"
 
 - name: Firewall rule slurm - allow all incoming traffic on internal network
   firewalld:
-    zone: internal
+    zone: public
     rich_rule: 'rule family="{{ family }}" source address="{{ network_address }}" accept'
     permanent: true
     state: enabled
@@ -172,7 +173,10 @@
   tags: install
 
 - name: Grant permissions for slurm db
-  command: mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO '{{ db_user }}'@'{{ db_host }}' identified by '{{ db_password[0] }}'with grant option;"
+  command: >-
+    mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO '{{ db_user }}'@'{{
+    db_host }}' identified by '{{ hostvars['127.0.0.1']['db_password'] }}'with
+    grant option;"
   tags: install
   changed_when: true
 
@@ -206,7 +210,7 @@
   lineinfile:
     path: "{{ slurmdbd_path }}"
     regexp: "StoragePass="
-    line: "StoragePass={{ db_password[0] }}"
+    line: "StoragePass={{ hostvars['127.0.0.1']['db_password'] }}"
 
 - name: Add storage user
   lineinfile:
@@ -230,4 +234,4 @@
   fetch:
     src: "{{ slurm_confpth }}"
     dest: "{{ buffer_path }}"
-    flat: true
+    flat: true

+ 1 - 1
roles/slurm_manager/vars/main.yml

@@ -38,7 +38,7 @@ dev_tools:
    - ncurses-devel
    - gtk2-devel
 
-log_files_master:
+log_files_manager:
    - slurm_jobacct.log
    - slurm_jobcomp.log
 

+ 6 - 8
roles/slurm_start_services/tasks/main.yml

@@ -32,7 +32,7 @@
   tags: install
 
 - name: Start slurmctld on manager
-  systemd:
+  service:
     name: slurmctld
     state: started
   tags: install
@@ -44,24 +44,22 @@
 
 - name: Create slurm cluster
   command: sacctmgr -i add cluster {{ cluster_name }}
-  when: slurm_clusterlist.stdout.find(cluster_name) == 1
+  when: not slurm_clusterlist.stdout
 
 - name: Show account
-  command: sacctmgr show account
+  command: sacctmgr show account -s
   register: account_added
   changed_when: false
 
 - name: Create default slurm group
   command: sacctmgr -i add account defaultgroup Cluster={{ cluster_name }} Description="Default Account" Organization="Default Org"
-  when: account_added.stdout.find(cluster_name) == 1
-  tags: install
+  when: account_added.rc != 0
 
 - name: Check if user exists
-  command: sacctmgr show user
+  command: sacctmgr show user -s
   register: user_added
   changed_when: false
 
 - name: Add root to the default account
   command: sacctmgr -i add user root DefaultAccount=defaultgroup
-  when: account_added.stdout.find(cluster_name) == 1
-  tags: install
+  when: user_added.rc != 0

+ 3 - 3
roles/slurm_workers/tasks/main.yml

@@ -92,7 +92,7 @@
     warn: no
 
 - name: Verify package md5
-  command: rpm -qa
+  shell: rpm -qa | grep slurm
   ignore_errors: true
   register: verify_result
   changed_when: no
@@ -106,9 +106,10 @@
     chdir: "{{ rpm_path }}"
     warn: no
   changed_when: true
+  when: verify_result.rc != 0
 
 - name: Get the hostname
-  command: hostname -s
+  command: hostname
   register: machine_name
   changed_when: true
 
@@ -119,7 +120,6 @@
     line: "NodeName={{ machine_name.stdout }} Sockets={{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}
       CoresPerSocket={{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}"
 
-
 - name: Save slurm conf in buffer
   fetch:
     src: "{{ slurm_confpth }}"

+ 6 - 0
site/CONTRIBUTORS.md

@@ -0,0 +1,6 @@
+# Omnia Maintainers
+- Luke Wilson and John Lockman (Dell Technologies)
+<img src="images/delltech.jpg" height="90px" alt="Dell Technologies">
+
+# Omnia Contributors
+<img src="images/delltech.jpg" height="90px" alt="Dell Technologies"> <img src="images/pisa.png" height="100px" alt="Universita di Pisa">

+ 17 - 12
docs/INSTALL.md

@@ -1,24 +1,29 @@
 ## TL;DR Installation
  
-### Kubernetes Only
-Install Kubernetes and all dependencies
+### Kubernetes
+Install Slurm and Kubernetes, along with all dependencies
 ```
-ansible-playbook -i host_inventory_file omnia.yml --skip-tags "slurm"
+ansible-playbook -i host_inventory_file omnia.yml
 ```
 
-Initialize K8s cluster
+Install Slurm only
 ```
-ansible-playbook -i host_inventory_file omnia.yml --tags "init"
+ansible-playbook -i host_inventory_file omnia.yml --skip-tags "k8s"
 ```
 
-### Install Kubeflow 
+Install Kubernetes only
 ```
-ansible-playbook -i host_inventory_file platform/kubeflow.yaml
+ansible-playbook -i host_inventory_file omnia.yml --skip-tags "slurm"
+ 
+
+Initialize Kubernetes cluster (packages already installed)
+```
+ansible-playbook -i host_inventory_file omnia.yml --skip-tags "slurm" --tags "init"
 ```
 
-### Slurm Only
+### Install Kubeflow 
 ```
-ansible-playbook -i host_inventory_file omnia.yml --skip-tags "k8s"
+ansible-playbook -i host_inventory_file platforms/kubeflow.yml
 ```
 
 # Omnia  
@@ -63,7 +68,7 @@ Omnia playbooks perform several tasks:
     - Docker
     - Kubelet
 
-`manager` playbook
+`master` playbook
 * Install Helm v3
 * (optional) add firewall rules for Slurm and kubernetes
 
@@ -72,10 +77,10 @@ Everything from this point on can be called by using the `init` tag
 ansible-playbook -i host_inventory_file kubernetes/kubernetes.yml --tags "init"
 ```
 
-`startmanager` playbook
+`startmaster` playbook
 * turn off swap
 *Initialize Kubernetes
-    * Head/manager
+    * Head/master
         - Start K8S pass startup token to compute/slaves
         - Initialize software defined networking (Calico)
 

+ 27 - 0
site/PREINSTALL.md

@@ -0,0 +1,27 @@
+# Pre-Installation Preparation
+
+## Assumptions
+Omnia assumes that prior to installation:
+* Systems have a base operating system (currently CentOS 7 or 8)
+* Network(s) has been cabled and nodes can reach the internet
+* SSH Keys for `root` have been installed on all nodes to allow for password-less SSH
+* Ansible is installed on either the master node or a separate deployment node
+```
+yum install ansible
+```
+
+## Example system designs
+Omnia can configure systems which use Ethernet- or Infiniband-based fabric to connect the compute servers.
+
+![Example system configuration with Ethernet fabric](images/example-system-ethernet.png)
+
+![Example system configuration with Infiniband fabric](images/example-system-infiniband.png)
+
+## Network Setup
+Omnia assumes that servers are already connected to the network and have access to the internet.
+### Network Topology
+Possible network configurations include:
+* A flat topology where all nodes are connected to a switch which includes an uplink to the internet. This requires multiple externally-facing IP addresses
+* A hierarchical topology where compute nodes are connected to a common switch, but the master node contains a second network connection which is connected to the internet. All outbound/inbound traffic would be routed through the master node. This requires setting up firewall rules for IP masquerade, see [here](https://www.server-world.info/en/note?os=CentOS_7&p=firewalld&f=2) for an example.
+### IP and Hostname Assignment
+The recommended setup is to assign IP addresses to individual servers. This can be done manually by logging onto each node, or via DHCP.

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 43 - 0
site/README.md


+ 4 - 0
site/_config.yml

@@ -0,0 +1,4 @@
+theme: jekyll-theme-minimal
+title: Omnia
+description: Ansible playbook-based tools for deploying Slurm and Kubernetes clusters for High Performance Computing, Machine Learning, Deep Learning, and High-Performance Data Analytics
+logo: images/omnia-logo.png

BIN=BIN
site/images/delltech.jpg


BIN=BIN
site/images/example-system-ethernet.png


BIN=BIN
site/images/example-system-infiniband.png


BIN=BIN
site/images/omnia-branch-structure.png


BIN=BIN
site/images/omnia-k8s.png


BIN=BIN
site/images/omnia-logo.png


BIN=BIN
site/images/omnia-overview.png


BIN=BIN
site/images/omnia-slurm.png


BIN=BIN
site/images/pisa.png


+ 10 - 0
site/metalLB/README.md

@@ -0,0 +1,10 @@
+# MetalLB 
+
+MetalLB is a load-balancer implementation for bare metal Kubernetes clusters, using standard routing protocols.
+https://metallb.universe.tf/
+
+Omnia installs MetalLB by manifest in the playbook `startservices`. A default configuration is provdied for layer2 protocol and an example for providing an address pool. Modify metal-config.yaml to suit your network requirements and apply the changes using with: 
+
+``` 
+kubectl apply -f metal-config.yaml
+```

+ 21 - 0
site/metalLB/metal-config.yaml

@@ -0,0 +1,21 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: metallb-system
+  name: config
+data:
+  config: |
+    address-pools:
+    - name: default
+      protocol: layer2
+      addresses:
+      - 192.168.2.150/32
+      - 192.168.2.151/32
+      - 192.168.2.152/32
+      - 192.168.2.153/32
+      - 192.168.2.154/32
+      - 192.168.2.155/32
+      - 192.168.2.156/32
+      - 192.168.2.157/32
+      - 192.168.2.158/32
+      - 192.168.2.159/32