Browse Source

Issue #452: Fixed volcano and spark-operator to a stable version

Signed-off-by: blesson-james <blesson_james@Dellteam>
Lucas A. Wilson 3 years ago
parent
commit
448baf786e

+ 14 - 13
control_plane/roles/control_plane_k8s/tasks/k8s_init.yml

@@ -27,30 +27,31 @@
 - name: Get K8s nodes status
   command: kubectl get nodes
   changed_when: false
-  ignore_errors: True
+  failed_when: false
   register: k8s_nodes
 
 - name: Get K8s pods status
   command: kubectl get pods --all-namespaces
   changed_when: false
-  ignore_errors: True
+  failed_when: false
   register: k8s_pods
 
 - name: Docker login
   command: docker login -u {{ docker_username }} -p {{ docker_password }}
   changed_when: true
   register: docker_login_output
-  ignore_errors: True
+  failed_when: false
   when: docker_username or docker_password
+  no_log: true
 
 - name: Docker login check
   fail:
     msg: "{{ docker_login_fail_msg }}"
   when: docker_login_output is failed
 
-- name: Initialize kubeadm
+- name: Initialize kubeadm (This process may take 5-10min)
   block:
-    - name: Initialize kubeadm
+    - name: Initialize kubeadm (This process may take 5-10min)
       command: "/bin/kubeadm init --pod-network-cidr='{{ appliance_k8s_pod_net_cidr }}' \
         --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'"
       changed_when: true
@@ -60,7 +61,7 @@
       command: "kubeadm reset -f"
       changed_when: true
 
-    - name: Initialize kubeadm
+    - name: Initialize kubeadm (This process may take 5-10min)
       command: "/bin/kubeadm init --pod-network-cidr='{{ appliance_k8s_pod_net_cidr }}' \
           --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'"
       changed_when: true
@@ -69,7 +70,7 @@
     - name: Get K8s pods status
       command: kubectl get pods --all-namespaces
       changed_when: false
-      ignore_errors: True
+      failed_when: false
       register: k8s_pods
   when: "'master' not in k8s_nodes.stdout"
 
@@ -99,20 +100,20 @@
     set -o pipefail && \
       kubeadm token list | cut -d ' ' -f1 | sed -n '2p'
   changed_when: false
-  register: K8S_TOKEN
+  register: k8s_token
 
 - name: CA Hash
   shell: >
     set -o pipefail && \
       openssl x509 -pubkey -in {{ k8s_cert_path }} | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
   changed_when: false
-  register: K8S_MANAGER_CA_HASH
+  register: k8s_manager_ca_hash
 
 - name: Add K8S Manager IP, Token, and Hash to dummy host
   add_host:
     name:   "K8S_TOKEN_HOLDER"
-    token:  "{{ K8S_TOKEN.stdout }}"
-    hash:   "{{ K8S_MANAGER_CA_HASH.stdout }}"
+    token:  "{{ k8s_token.stdout }}"
+    hash:   "{{ k8s_manager_ca_hash.stdout }}"
     ip:     "{{ ansible_default_ipv4.address }}"
 
 - name: Create yaml repo for setup
@@ -129,10 +130,10 @@
 - name: Setup Calico SDN network - custom-resources
   command: "kubectl create -f {{ calico_yml_url }}"
   changed_when: true
-  ignore_errors: True
+  failed_when: false
   when: "'calico-system' not in k8s_pods.stdout"
 
 - name: Edge / Workstation Install allows pods to schedule on manager
   command: kubectl taint nodes --all node-role.kubernetes.io/master-
   changed_when: true
-  ignore_errors: True
+  failed_when: false

+ 1 - 1
control_plane/roles/control_plane_k8s/tasks/k8s_services.yml

@@ -16,7 +16,7 @@
 - name: Wait for CoreDNS to restart
   command: kubectl rollout status deployment/coredns -n kube-system
   changed_when: false
-  ignore_errors: True
+  failed_when: false
 
 - name: Get K8s pods
   command: kubectl get pods --all-namespaces

+ 31 - 5
control_plane/roles/powervault_me4/tasks/main.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,7 +13,33 @@
 #  limitations under the License.
 ---
 
-# Will be updated later in each PR
-- name: Pass
-  debug:
-    msg: "Pass"
+- name: Set powervault credentials
+  set_fact:
+    powervault_me4_username: "{{ lookup('env','ANSIBLE_NET_USERNAME') }}"
+    powervault_me4_password: "{{ lookup('env','ANSIBLE_NET_PASSWORD') }}"
+  no_log: true
+  run_once: true
+
+- name: Include variable file base_vars.yml
+  include_vars: "{{ base_pv_file }}"
+  no_log: true
+
+- name: Include variable file for powervault
+  include_vars: "{{ pv_file }}"
+  no_log: true
+
+- name: Include common variables
+  include_vars: ../../control_plane_common/vars/main.yml
+
+- name: Validation of powervault vars
+  include: pv_validation.yml
+
+- name: Check powervault pre-requisite
+  include_tasks: pv_me4_prereq.yml
+
+- name: Create volume
+  include_tasks: volume.yml
+
+- name: Set snmp parameters
+  include_tasks: set_snmp.yml
+  when: snmp_trap_destination != ""

+ 96 - 0
control_plane/roles/powervault_me4/tasks/map_volume.yml

@@ -0,0 +1,96 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Include powervault_me4_vars.yml
+  include_vars: "{{ playbook_dir }}/control_plane/input_params/powervault_me4_vars.yml"
+
+- name: Include powervault_me4 role variables
+  include_vars: "{{ playbook_dir }}/control_plane/roles/powervault_me4/vars/main.yml"
+
+- name: Check if login_vars.yml file is encrypted
+  command: cat {{ login_pv_file }}
+  changed_when: false
+  no_log: true
+  register: config_content
+  run_once: true
+
+- name: Decrpyt login_vars.yml
+  command: >-
+    ansible-vault decrypt {{ login_pv_file }}
+    --vault-password-file {{ login_pv_vault_file }}
+  changed_when: false
+  run_once: true
+  when: "'$ANSIBLE_VAULT;' in config_content.stdout"
+
+- name: Include variable file login_vars.yml
+  include_vars: "{{ login_pv_file }}"
+  no_log: true
+
+- name: Get auth string
+  shell: echo -n {{ powervault_me4_username }}_{{ powervault_me4_password }}  | sha256sum
+  register: map_auth_string
+  changed_when: false
+  ignore_errors: yes
+  delegate_to: localhost
+  tags: install
+
+- name: Get session key
+  uri:
+    url: https://{{ groups['powervault_me4'][0] }}/api/login/{{ map_auth_string.stdout | replace(" -", "") }}
+    method: GET
+    headers:
+      {'datatype': 'json'}
+    validate_certs: no
+  register: map_session_key
+  delegate_to: localhost
+  tags: install
+
+- name: Map volume
+  uri:
+    url: https://{{ groups['powervault_me4'][0] }}/api/map/volume/{{ powervault_me4_k8s_volume_name }}/access/{{ access }}/ports/{{ item.0 }}/lun/{{ lun1 }}/initiator/{{ hostvars['server_iqdn_id']['server_iqdn'] }}
+    method: GET
+    body_format: json
+    validate_certs: no
+    use_proxy: no
+    headers:
+      {'sessionKey': "{{ map_session_key.json.status[0].response }}", 'datatype':'json'}
+  register: map_vol1
+  with_together:
+    - "{{ up_port }}"
+  delegate_to: localhost
+  tags: install
+
+- name: Map volume
+  uri:
+    url: https://{{ groups['powervault_me4'][0] }}/api/map/volume/{{ powervault_me4_slurm_volume_name }}/access/{{ access }}/ports/{{ item.0 }}/lun/{{ lun2 }}/initiator/{{ hostvars['server_iqdn_id']['server_iqdn']  }}
+    method: GET
+    body_format: json
+    validate_certs: no
+    use_proxy: no
+    headers:
+      {'sessionKey': "{{ map_session_key.json.status[0].response }}", 'datatype':'json'}
+  register: map_vol2
+  with_together:
+    - "{{ up_port }}"
+  delegate_to: localhost
+  tags: install
+
+- name: Encypt login_vars.yml
+  command: >-
+    ansible-vault encrypt {{ login_pv_file }}
+    --vault-password-file {{ login_pv_vault_file }}
+  changed_when: false
+  run_once: true
+  when: "'$ANSIBLE_VAULT;' in config_content.stdout"

+ 98 - 0
control_plane/roles/powervault_me4/tasks/ports.yml

@@ -0,0 +1,98 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Include powervault_me4_vars.yml
+  include_vars: "{{ playbook_dir }}/control_plane/input_params/powervault_me4_vars.yml"
+
+- name: Include powervault_me4 role variables
+  include_vars: "{{ playbook_dir }}/control_plane/roles/powervault_me4/vars/main.yml"
+
+- name: Check if login_vars.yml file is encrypted
+  command: cat {{ login_pv_file }}
+  changed_when: false
+  no_log: true
+  register: config_content
+  run_once: true
+
+- name: Decrpyt login_vars.yml
+  command: >-
+    ansible-vault decrypt {{ login_pv_file }}
+    --vault-password-file {{ login_pv_vault_file }}
+  changed_when: false
+  run_once: true
+  when: "'$ANSIBLE_VAULT;' in config_content.stdout"
+
+- name: Include variable file login_vars.yml
+  include_vars: "{{ login_pv_file }}"
+  no_log: true
+
+- name: Get auth string
+  shell: echo -n {{ powervault_me4_username }}_{{ powervault_me4_password }}  | sha256sum
+  register: port_auth_string
+  changed_when: false
+  ignore_errors: yes
+  tags: install
+
+- name: Get session key
+  uri:
+    url: https://{{ groups['powervault_me4'][0] }}/api/login/{{ port_auth_string.stdout | replace(" -", "") }}
+    method: GET
+    headers:
+      {'datatype': 'json'}
+    validate_certs: no
+  register: port_session_key
+  tags: install
+
+- name: Show ports
+  uri:
+    url: https://{{ groups['powervault_me4'][0] }}/api/show/ports
+    method: GET
+    body_format: json
+    validate_certs: no
+    use_proxy: no
+    headers:
+      {'sessionKey': "{{ port_session_key.json.status[0].response }}", 'datatype':'json'}
+  register: show_ports
+  tags: install
+
+- name: Up ports
+  set_fact:
+    up_port: "{{ up_port + [item.port] }}"
+    target_id: "{{ item.get('target-id') }}"
+  when: item.status == "Up"
+  loop: "{{ show_ports.json.port }}"
+  loop_control:
+    label: "{{ item.port }}"
+
+- name: Set ip
+  set_fact:
+    set_port_ip: "{{ set_port_ip+['{{ port_ip }}{{ temp|int }}'] }}"
+    temp: "{{ temp|int+t|int }}"
+  loop: "{{ up_port }}"
+
+- name: Assign ip to ports
+  uri:
+    url: https://{{ groups['powervault_me4'][0] }}/api/set/host-parameters/gateway/{{ port_gateway }}/ip/{{ item.0 }}/netmask/{{ port_netmask }}/ports/{{ item.1 }}/prompt/yes/noprompt
+    method: GET
+    body_format: json
+    validate_certs: no
+    use_proxy: no
+    headers:
+      {'sessionKey': "{{ port_session_key.json.status[0].response }}", 'datatype':'json'}
+  register: set_ports
+  with_together: 
+    - "{{ set_port_ip }}"
+    - "{{ up_port }}"
+  tags: install

+ 80 - 0
control_plane/roles/powervault_me4/tasks/pv_me4_prereq.yml

@@ -0,0 +1,80 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Ping the powervault device to validate connectivity
+  command: ping -c1 {{ inventory_hostname }}
+  register: validate_login
+  changed_when: false
+  ignore_errors: yes
+  tags: install
+
+- name: Get auth string
+  shell: echo -n {{ powervault_me4_username }}_{{ powervault_me4_password }} | sha256sum
+  register: auth_string
+  changed_when: false
+  ignore_errors: yes
+  tags: install
+
+- name: Get session key
+  uri:
+    url: https://{{ inventory_hostname }}/api/login/{{ auth_string.stdout | replace(" -", "") }}
+    method: GET
+    headers:
+      {'datatype': 'json'}
+    validate_certs: no
+  register: session_key
+  tags: install
+
+- name: Set powervault ip
+  add_host:
+    name: "pv_ip"
+    powervault_ip: "{{ inventory_hostname }}"
+  tags: install
+
+- name: Execute show system command
+  uri:
+    url: https://{{ inventory_hostname }}/api/show/system
+    method: GET
+    body_format: json
+    validate_certs: no
+    use_proxy: no
+    headers:
+      {'sessionKey': "{{ session_key.json.status[0].response }}", 'datatype':'json'}
+  register: system_info
+  tags: install
+
+- name: Get the product id
+  set_fact:
+    pv_id: system_info.json.system[0]['product-id']
+
+- name: Verify the product id and model no. of device
+  fail:
+    msg: "{{ fail_pv_support }}"
+  when:
+    - scsi_product_id in system_info.json.system[0]['scsi-product-id']
+    - pv_id  == "ME4084" or pv_id == "ME4024"  or pv_id == "ME4012"
+
+- name: Set system name
+  uri:
+    url: https://{{ inventory_hostname }}/api/set/system/name/{{ powervault_me4_system_name }}
+    method: GET
+    body_format: json
+    validate_certs: no
+    use_proxy: no
+    headers:
+      {'sessionKey': "{{ session_key.json.status[0].response }}", 'datatype':'json'}
+  register: system_name
+  when: powervault_me4_system_name != ""
+  tags: install

+ 136 - 0
control_plane/roles/powervault_me4/tasks/pv_validation.yml

@@ -0,0 +1,136 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Get auth string
+  shell: echo -n {{ powervault_me4_username }}_{{ powervault_me4_password }} | sha256sum
+  register: val_auth_string
+  changed_when: false
+  ignore_errors: yes
+  tags: install
+
+- name: Get session key
+  uri:
+    url: https://{{ inventory_hostname }}/api/login/{{ val_auth_string.stdout | replace(" -", "") }}
+    method: GET
+    headers:
+      {'datatype': 'json'}
+    validate_certs: no
+  register: val_session_key
+  tags: install
+
+- name: Check if system name has space
+  shell: echo {{ powervault_me4_system_name }} | grep '\s' -c
+  register: space_count
+  ignore_errors: true
+  tags: install
+
+- name: Fail if system name has space
+  assert:
+    that:
+      - space_count.stdout|int == 0
+      - powervault_me4_system_name | length < 30
+    msg: "{{ system_name_wrong }}"
+
+- name: Check if volume name has space
+  shell: echo {{ powervault_me4_k8s_volume_name }} | grep '\s' -c
+  register: vol_count1
+  ignore_errors: true
+  tags: install
+
+- name: Fail if volume name has space
+  assert:
+    that:
+      - vol_count1.stdout == "0"
+      - powervault_me4_k8s_volume_name | length < 30
+    msg: "{{ system_name_wrong }}"
+
+- name: Check if volume name has space
+  shell: echo {{ powervault_me4_slurm_volume_name }} | grep '\s' -c
+  register: vol_count2
+  ignore_errors: true
+  tags: install
+
+- name: Assert if volume name has space
+  assert:
+    that:
+      - vol_count2.stdout == "0"
+      - powervault_me4_slurm_volume_name | length < 30
+    msg: "{{ system_name_wrong }}"
+
+- name: Check if disk name has space
+  shell: echo {{ powervault_me4_disk_group_name }} | grep '\s' -c
+  register: disk_count
+  when: powervault_me4_disk_group_name | length > 1
+  ignore_errors: true
+  tags: install
+
+- name: Assert if disk name has space
+  assert:
+    that:
+      - disk_count.stdout == "0"
+      - powervault_me4_disk_group_name | length < 30
+    msg: "{{ system_name_wrong }}" 
+
+- name: Assert snmp notification level
+  assert:
+    that:
+      - powervault_me4_snmp_notify_level | length >1
+      - powervault_me4_snmp_notify_level == "crit" or powervault_me4_snmp_notify_level == "error" or powervault_me4_snmp_notify_level == "warn" or powervault_me4_snmp_notify_level == "resolved" or powervault_me4_snmp_notify_level == "info" or powervault_me4_snmp_notify_level == "none"
+    fail_msg: "{{ snmp_wrong_value }}"
+    success_msg: "{{ snmp_success }}" 
+
+- name: Assert RAID value
+  assert:
+    that: >
+      powervault_me4_raid_levels| lower == "r1" or powervault_me4_raid_levels| lower == "raid1" or
+      powervault_me4_raid_levels| lower == "r5" or powervault_me4_raid_levels| lower == "raid5" or
+      powervault_me4_raid_levels| lower == "r6" or powervault_me4_raid_levels| lower == "raid6" or
+      powervault_me4_raid_levels| lower == "r10" or powervault_me4_raid_levels| lower == "raid10" or
+      powervault_me4_raid_levels| lower == "adapt"
+    msg: "{{ raid_level_wrong }}"
+
+- name: Assert disk range
+  assert:
+    that:
+      - powervault_me4_disk_range | length >1
+    fail_msg: "{{ wrong_disk_range }}"
+    success_msg: "{{ correct_disk_range }}"
+
+- name: Check pool value
+  assert: 
+    that: 
+      - powervault_me4_pool == "a" or powervault_me4_pool == "A" or powervault_me4_pool == "b" or powervault_me4_pool == "B"
+    msg: "{{ wrong_pool }}"
+
+- name: Check parition percentage
+  assert:
+    that:
+      - powervault_me4_disk_partition_size|int
+      - powervault_me4_disk_partition_size|int < 99
+      - powervault_me4_disk_partition_size|int > 5
+    msg: "{{ wrong_partition }}"
+
+- name: Check the volume size
+  assert:
+    that: >
+      'GB' in powervault_me4_volume_size or 'TB' in powervault_me4_volume_size or 
+      'MB' in powervault_me4_volume_size or 
+      'B' in powervault_me4_volume_size 
+    msg: "{{ wrong_vol_size  }}"
+
+- name: Assert the nic provided
+  assert:
+    that:
+      - powervault_me4_server_nic | length > 2

+ 43 - 0
control_plane/roles/powervault_me4/tasks/set_snmp.yml

@@ -0,0 +1,43 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Get auth string
+  shell: echo -n {{ powervault_me4_username }}_{{ powervault_me4_password }}  | sha256sum
+  register: snmp_auth_string
+  changed_when: false
+  ignore_errors: yes
+  tags: install
+
+- name: Get session key
+  uri:
+    url: https://{{ inventory_hostname }}/api/login/{{ snmp_auth_string.stdout | replace(" -", "") }}
+    method: GET
+    headers:
+      {'datatype': 'json'}
+    validate_certs: no
+  register: snmp_session_key
+  tags: install
+
+- name: Set snmp parameters
+  uri:
+    url: https://{{ inventory_hostname }}/api/set/snmp-parameters/enable/{{ powervault_me4_snmp_notify_level }}/add-trap-host/{{ snmp_trap_destination }}/read-community/{{ snmp_community_name }}
+    method: GET
+    body_format: json
+    validate_certs: no
+    use_proxy: no
+    headers:
+      {'sessionKey': "{{ snmp_session_key.json.status[0].response }}", 'datatype':'json'}
+  register: snmp
+  tags: install

+ 82 - 0
control_plane/roles/powervault_me4/tasks/volume.yml

@@ -0,0 +1,82 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Get auth string
+  shell: echo -n {{ powervault_me4_username }}_{{ powervault_me4_password }}  | sha256sum
+  register: vol_auth_string
+  changed_when: false
+  ignore_errors: yes
+  tags: install
+
+- name: Get session key
+  uri:
+    url: https://{{ inventory_hostname }}/api/login/{{ vol_auth_string.stdout | replace(" -", "") }}
+    method: GET
+    headers:
+      {'datatype': 'json'}
+    validate_certs: no
+  register: vol_session_key
+  tags: install
+
+- name: Add disk group
+  uri:
+    url: https://{{ inventory_hostname }}/api/add/disk-group/type/{{ type }}/disks/{{ powervault_me4_disk_range }}/level/{{ powervault_me4_raid_levels }}/pool/{{ powervault_me4_pool }}/{{ powervault_me4_disk_group_name }}
+    method: GET
+    body_format: json
+    validate_certs: no
+    use_proxy: no
+    headers:
+      {'sessionKey': "{{ vol_session_key.json.status[0].response }}", 'datatype':'json'}
+  register: pv_disk
+  tags: install
+
+- name: Assert if disk group created or not
+  fail:
+    msg: "{{ pv_disk.json.status[0].response }}"
+  when:  pv_disk.json.status[0] ['response-type'] == "Error"
+
+- name: Create volume1
+  uri:
+    url: https://{{ inventory_hostname }}/api/create/volume/size/{{ powervault_me4_volume_size }}/pool/{{ powervault_me4_pool }}/{{ powervault_me4_k8s_volume_name }}
+    method: GET
+    body_format: json
+    validate_certs: no
+    use_proxy: no
+    headers:
+      {'sessionKey': "{{ vol_session_key.json.status[0].response }}", 'datatype':'json'}
+  register: pv_vol1
+  tags: install
+
+- name: Create volume2
+  uri:
+    url: https://{{ inventory_hostname }}/api/create/volume/size/{{ powervault_me4_volume_size }}/pool/{{ powervault_me4_pool }}/{{ powervault_me4_slurm_volume_name }}
+    method: GET
+    body_format: json
+    validate_certs: no
+    use_proxy: no
+    headers:
+      {'sessionKey': "{{ vol_session_key.json.status[0].response }}", 'datatype':'json'}
+  register: pv_vol2
+  tags: install
+
+- name: Assert if k8s_volume created correctly
+  fail:
+    msg: "{{ pv_vol1.json.status[0].response }}"
+  when: pv_vol1.json.status[0]['response-type'] == "Error"
+
+- name: Assert if slurm_volume created correctly
+  fail:
+    msg: "{{ pv_vol2.json.status[0].response }}"
+  when: pv_vol2.json.status[0]['response-type'] == "Error"

+ 54 - 0
control_plane/roles/powervault_me4/vars/main.yml

@@ -0,0 +1,54 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+# Usage: pv_validation.yml
+system_name_wrong: "Failed. Given name might be empty or it  containes space or is more than 30 in length"
+snmp_wrong_value: "Failed. Snmp value wrong"
+snmp_success: "Success: Snmp value correct"
+raid_level_wrong: "Failed: wrong raid level entered"
+wrong_disk_range: "Failed: disk range should not be empty"
+correct_disk_range: "Succes: Disk range is correct"
+wrong_pool: "Failed: Given pool value is wrong"
+wrong_partition: "Failed: Given partition is wrong"
+wrong_vol_size: "Failed: Given volume size is wrong"
+
+
+# Usage: pv_me4_prereq.yml
+scsi_product_id: ME4
+fail_pv_support: "Failed. Powervault not supported"
+success_pv_support: "Success. Powervault supported"
+
+# Usage: main.yml
+pv_file: "{{ role_path }}/../../input_params/powervault_me4_vars.yml"
+base_pv_file: "{{ role_path }}/../../input_params/base_vars.yml"
+
+# Usage: volume.yml
+lun1: 0
+lun2: 1
+type: virtual
+
+# Usage: ports.yml
+port_ip: 192.168.25.
+port_netmask: 255.255.255.0
+port_gateway: 192.168.25.1
+temp: 0
+t: 5
+set_port_ip: []
+up_port: []
+
+# Usage: map_volume.yml
+access: rw
+login_pv_file: "{{ playbook_dir }}/control_plane/input_params/login_vars.yml"
+login_pv_vault_file: "{{ playbook_dir }}/control_plane/input_params/.login_vault_key"

+ 24 - 19
roles/k8s_start_manager/tasks/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -31,21 +31,25 @@
 - name: Get K8s nodes status
   command: kubectl get nodes
   changed_when: false
-  ignore_errors: True
+  failed_when: false
   register: k8s_nodes
   tags: init
 
 - name: Get K8s ready compute nodes
-  shell: kubectl get node --selector='!node-role.kubernetes.io/master' | grep -v 'NotReady'
+  shell: >
+    set -o pipefail && \
+    kubectl get node --selector='!node-role.kubernetes.io/master' | grep -v 'NotReady'
   changed_when: false
-  ignore_errors: True
+  failed_when: false
   register: k8s_nodes_ready
   tags: init
 
 - name: Get K8s not ready compute nodes
-  shell: kubectl get node --selector='!node-role.kubernetes.io/master' | grep 'NotReady'
+  shell: >
+    set -o pipefail && \
+    kubectl get node --selector='!node-role.kubernetes.io/master' | grep 'NotReady'
   changed_when: false
-  ignore_errors: True
+  failed_when: false
   register: k8s_nodes_not_ready
   tags: init
 
@@ -53,17 +57,18 @@
   command: docker login -u {{ hostvars['127.0.0.1']['docker_username'] }} -p {{ hostvars['127.0.0.1']['docker_password'] }}
   changed_when: true
   register: docker_login_output
-  ignore_errors: True
+  failed_when: false
   when: hostvars['127.0.0.1']['docker_username'] or hostvars['127.0.0.1']['docker_password']
+  no_log: true
 
 - name: Docker login check
   fail:
     msg: "{{ docker_login_fail_msg }}"
   when: docker_login_output is failed
 
-- name: Initialize kubeadm
+- name: Initialize kubeadm (This process may take 5-10min)
   block:
-    - name: Initialize kubeadm
+    - name: Initialize kubeadm (This process may take 5-10min)
       command: "/bin/kubeadm init --pod-network-cidr='{{ hostvars['127.0.0.1']['k8s_pod_network_cidr'] }}' \
         --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'"
       changed_when: true
@@ -74,7 +79,7 @@
       command: "kubeadm reset -f"
       changed_when: true
 
-    - name: Initialize kubeadm
+    - name: Initialize kubeadm (This process may take 5-10min)
       command: "/bin/kubeadm init --pod-network-cidr='{{ hostvars['127.0.0.1']['k8s_pod_network_cidr'] }}' \
         --apiserver-advertise-address='{{ ansible_default_ipv4.address }}'"
       changed_when: true
@@ -111,7 +116,7 @@
     set -o pipefail && \
       kubeadm token list | cut -d ' ' -f1 | sed -n '2p'
   changed_when: false
-  register: K8S_TOKEN
+  register: k8s_token
   tags: init
 
 - name: CA Hash
@@ -119,14 +124,14 @@
     set -o pipefail && \
       openssl x509 -pubkey -in {{ k8s_cert_path }} | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
   changed_when: false
-  register: K8S_MANAGER_CA_HASH
+  register: k8s_manager_ca_hash
   tags: init
 
 - name: Add K8S Manager IP, Token, and Hash to dummy host
   add_host:
     name:   "K8S_TOKEN_HOLDER"
-    token:  "{{ K8S_TOKEN.stdout }}"
-    hash:   "{{ K8S_MANAGER_CA_HASH.stdout }}"
+    token:  "{{ k8s_token.stdout }}"
+    hash:   "{{ k8s_manager_ca_hash.stdout }}"
     ip:     "{{ ansible_default_ipv4.address }}"
     k8s_nodes:  "{{ k8s_nodes.stdout }}"
     k8s_nodes_ready:  "{{ k8s_nodes_ready.stdout }}"
@@ -214,16 +219,16 @@
 - name: Create clusterRoleBinding (K8s dashboard) files
   copy:
     src: create_clusterRoleBinding.yaml
-    dest: "{{ k8s_clusterRoleBinding_file_dest }}"
+    dest: "{{ cluster_role_binding_file_dest }}"
     owner: root
     group: root
-    mode: "{{ k8s_clusterRoleBinding_file_mode }}"
+    mode: "{{ cluster_role_binding_file_mode }}"
   tags: init
 
 - name: Create clusterRoleBinding (K8s dashboard)
-  command: "kubectl create -f '{{ k8s_clusterRoleBinding_file_dest }}'"
+  command: "kubectl create -f '{{ cluster_role_binding_file_dest }}'"
   changed_when: true
-  ignore_errors: True
+  failed_when: false
   tags: init
 
 - name: Dump bearer token for K8s dashboard login
@@ -236,5 +241,5 @@
 - name: Edge / Workstation Install allows pods to scheudle on manager
   command: kubectl taint nodes --all node-role.kubernetes.io/master-
   when: groups['manager'][0] == groups['compute'][0] and groups['compute']|length == 1
-  ignore_errors: True
+  failed_when: false
   tags: init

+ 3 - 3
roles/k8s_start_manager/vars/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -37,9 +37,9 @@ k8s_service_account_file_dest: /root/k8s/create_admin_user.yaml
 
 k8s_service_account_file_mode: 0655
 
-k8s_clusterRoleBinding_file_dest: /root/k8s/create_clusterRoleBinding.yaml
+cluster_role_binding_file_dest: /root/k8s/create_clusterRoleBinding.yaml
 
-k8s_clusterRoleBinding_file_mode: 0655
+cluster_role_binding_file_mode: 0655
 
 calico_yml_url: https://docs.projectcalico.org/manifests/calico.yaml
 

+ 42 - 53
roles/k8s_start_services/tasks/main.yml

@@ -17,38 +17,10 @@
   include_vars: ../../slurm_exporter/vars/main.yml
 
 - name: Wait for CoreDNS to restart
-  block:
-    - name: Wait for CoreDNS to restart
-      command: kubectl rollout status deployment/coredns -n kube-system  --timeout=4m
-      changed_when: false
-      tags: init
-  rescue:
-    - name: Get K8s pods
-      command: kubectl get pods --all-namespaces
-      register: k8s_pods
-      tags: init
-
-    - name: Pull docker images
-      command: docker pull {{ item }}
-      with_items: "{{ kube_system_docker_images }}"
-      when:
-        - hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
-        - "'ImagePullBackOff' in k8s_pods.stdout"
-      register: docker_image_pull_result
-      until: docker_image_pull_result is not failed
-      retries: 5
-
-    - name: Wait for CoreDNS to restart
-      command: kubectl rollout status deployment/coredns -n kube-system
-      when: hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
-      tags: init
-
-    - name: Fail message
-      fail:
-        msg: "{{ docker_pull_limit_msg }}"
-      when:
-        - "'ImagePullBackOff' in k8s_pods.stdout or 'ErrImagePull' in k8s_pods.stdout"
-        - not hostvars['127.0.0.1']['docker_username'] and not hostvars['127.0.0.1']['docker_password']
+  command: kubectl rollout status deployment/coredns -n kube-system  --timeout=5m
+  changed_when: false
+  failed_when: false
+  tags: init
 
 - name: Get K8s pods
   command: kubectl get pods --all-namespaces
@@ -139,7 +111,7 @@
 - name: Set NFS-Client Provisioner as DEFAULT StorageClass
   shell: >
     kubectl patch storageclasses.storage.k8s.io nfs-client \
-    -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
+    -p '{ "metadata": { "annotations":{ "storageclass.kubernetes.io/is-default-class":"true" }}}'
   changed_when: true
   tags: init
 
@@ -224,35 +196,52 @@
   when: "'amdgpu-device-plugin' not in k8s_pods.stdout"
   tags: init
 
+- name: Deploy Volcano Scheduling
+  command: "kubectl apply -f '{{ volcano_scheduling_yaml_url }}'"
+  changed_when: true
+  when: "'volcano-system' not in k8s_pods.stdout"
+  tags: init
+
 - name: Install Spark Operator
   command: "helm repo add spark-operator '{{ spark_operator_repo }}'"
   changed_when: true
   tags: init
 
 - name: Install Spark Operator Namespace
-  command: "helm install my-release spark-operator/spark-operator --namespace spark-operator --create-namespace"
+  command: helm install my-release spark-operator/spark-operator --set image.tag={{ operator_image_tag }} --namespace spark-operator --create-namespace
   changed_when: true
   when: "'spark-operator' not in k8s_pods.stdout"
   tags: init
 
-- name: Deploy Volcano Scheduling
-  command: "kubectl apply -f '{{ volcano_scheduling_yaml_url }}'"
-  changed_when: true
-  when: "'volcano-system' not in k8s_pods.stdout"
-  tags: init
+- name: Wait for k8s pod to come to ready state
+  block:
+    - name: Wait for k8s pod to come to ready state
+      command: "kubectl wait --for=condition=ready -n {{ item.namespace }} pod -l app={{ item.app }} --timeout={{ item.timeout }}"
+      with_items:
+        - { namespace: "default", app: "nfs-client-provisioner", timeout: "10m" }
+        - { namespace: "volcano-system", app: "volcano-scheduler", timeout: "5m" }
+      changed_when: false
+      tags: install
+  rescue:
+    - name: Get K8s pods
+      command: kubectl get pods --all-namespaces
+      changed_when: false
+      register: k8s_pods
+      tags: init
 
-- name: Get K8s pods
-  command: kubectl get pods --all-namespaces
-  changed_when: false
-  register: k8s_pods
-  tags: init
+    - name: Fail message
+      fail:
+        msg: "{{ docker_pull_limit_msg }}"
+      when:
+        - "'ImagePullBackOff' in k8s_pods.stdout or 'ErrImagePull' in k8s_pods.stdout"
+        - not hostvars['127.0.0.1']['docker_username'] and not hostvars['127.0.0.1']['docker_password']
 
-- name: Pull K8s services docker images
-  command: docker pull {{ item }}
-  with_items: "{{ k8s_services_docker_images }}"
-  when:
-    - "'ImagePullBackOff' in k8s_pods.stdout"
-    - hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
-  register: docker_image_pull_result
-  until: docker_image_pull_result is not failed
-  retries: 5
+    - name: Pull K8s services docker images
+      command: docker pull {{ item }}
+      with_items: "{{ k8s_docker_images }}"
+      when:
+        - "'ImagePullBackOff' in k8s_pods.stdout or 'ErrImagePull' in k8s_pods.stdout"
+        - hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
+      register: docker_image_pull_result
+      until: docker_image_pull_result is not failed
+      retries: 5

+ 6 - 6
roles/k8s_start_services/vars/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,15 +13,14 @@
 #  limitations under the License.
 ---
 
-kube_system_docker_images:
+k8s_docker_images:
   - docker.io/calico/kube-controllers:v3.19.1
   - docker.io/calico/cni:v3.19.1
   - docker.io/calico/pod2daemon-flexvol:v3.19.1
   - docker.io/calico/node:v3.19.1
   - xilinxatg/xilinx_k8s_fpga_plugin:2020.11.24
   - nvidia/k8s-device-plugin:v0.7.0
-
-k8s_services_docker_images:
+  - quay.io/external_storage/nfs-client-provisioner:v3.1.0-k8s1.11
   - docker.io/rocm/k8s-device-plugin
   - kubernetesui/dashboard:v2.0.5
   - kubernetesui/metrics-scraper:v1.0.6
@@ -36,7 +35,6 @@ k8s_services_docker_images:
   - volcanosh/vc-controller-manager:latest
   - volcanosh/vc-scheduler:latest
   - volcanosh/vc-webhook-manager:latest
-  - quay.io/external_storage/nfs-client-provisioner:v3.1.0-k8s1.11
 
 docker_pull_limit_msg: "You have reached your docker pull rate limit. Please provide docker credentials in omnia_config.yml and try again"
 
@@ -88,4 +86,6 @@ prometheus_path_on_host: /var/lib/prometheus-2.23.0.linux-amd64/
 
 spark_operator_repo: https://googlecloudplatform.github.io/spark-on-k8s-operator
 
-volcano_scheduling_yaml_url: https://raw.githubusercontent.com/volcano-sh/volcano/master/installer/volcano-development.yaml
+operator_image_tag: v1beta2-1.2.3-3.1.1
+
+volcano_scheduling_yaml_url: https://raw.githubusercontent.com/volcano-sh/volcano/v1.3.0/installer/volcano-development.yaml

+ 1 - 1
roles/k8s_start_workers/tasks/main.yml

@@ -27,7 +27,7 @@
 - name: Reset kubeadm
   command: kubeadm reset -f
   changed_when: true
-  ignore_errors: True
+  failed_when: false
   when:
     - groups['manager'][0] != groups['compute'][0]
     - groups['compute']|length >= 1