# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. --- # Testcase OMNIA_1.1_US_CRD_TC_001 # Execute omnia.yml with separate servers for manager,compute,login,nfs node with default parameters - name: OMNIA_1.1_US_CRD_TC_001 hosts: manager, compute vars_files: - test_vars/test_k8s_common_vars.yml - test_vars/test_slurm_common_vars.yml tasks: - name: Checking K8s service status systemd: name: kubelet register: kubelet_service tags: VERIFY_OMNIA_01 - name: Validating K8s service status assert: that: - kubelet_service.status.ActiveState == 'active' fail_msg: "{{ kubelet_service_fail_msg }}" success_msg: "{{ kubelet_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: Checking munge service status systemd: name: munge register: munge_service tags: VERIFY_OMNIA_01 - name: Validating munge service status assert: that: - munge_service.status.ActiveState == 'active' fail_msg: "{{ munge_service_fail_msg }}" success_msg: "{{ munge_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: OMNIA_1.1_US_CRD_TC_001 hosts: manager vars_files: - test_vars/test_k8s_start_manager_workers_vars.yml - test_vars/test_k8s_start_services_vars.yml - test_vars/test_slurmexporter_vars.yml - test_vars/test_slurm_start_services_vars.yml - test_vars/test_login_server_vars.yml - test_vars/test_slurm_manager_vars.yml - test_vars/test_login_node_vars.yml tasks: - name: Checking kube-system pods command: kubectl get pods --namespace kube-system --field-selector=status.phase=Running register: kube_system_pods changed_when: false ignore_errors: True tags: VERIFY_OMNIA_01 - name: Validating controller-manager and scheduler and coreDNS pods status assert: that: - "'kube-scheduler' in kube_system_pods.stdout" - "'kube-controller' in kube_system_pods.stdout" fail_msg: "{{ controller_scheduler_status_fail_msg }}" success_msg: "{{ controller_scheduler_status_success_msg }}" tags: VERIFY_OMNIA_01 - name: Validating coreDNS pods status assert: that: - "'coredns' in kube_system_pods.stdout" fail_msg: "{{ coredns_status_fail_msg }}" success_msg: "{{ coredns_status_success_msg }}" tags: VERIFY_OMNIA_01 - name: Checking all running pods command: kubectl get pods --all-namespaces --field-selector=status.phase=Running register: running_pods changed_when: false ignore_errors: True tags: VERIFY_OMNIA_01 - name: Validating Metallb, Prometheus and MPI pods assert: that: - "'metallb' in running_pods.stdout" - "'prometheus' in running_pods.stdout" - "'mpi-operator' in running_pods.stdout" fail_msg: "{{ metallb_prometheus_mpi_pods_fail_msg }}" success_msg: "{{ metallb_prometheus_mpi_pods_success_msg }}" tags: VERIFY_OMNIA_01 - name: Validating K8s dashboard assert: that: - "'kubernetes-dashboard' in running_pods.stdout" fail_msg: "{{ kubernetes_dashboard_fail_msg }}" success_msg: "{{ kubernetes_dashboard_success_msg }}" tags: VERIFY_OMNIA_01 - name: Verify slurm exporter status systemd: name: prometheus-slurm-exporter register: slurm_exporter_status tags: VERIFY_OMNIA_01 - name: Validate slurm exporter service status assert: that: - slurm_exporter_status.status.ActiveState == 'active' fail_msg: "{{ slurm_exporter_service_fail_msg }}" success_msg: "{{ slurm_exporter_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: Verify slurm exporter job in k8s services shell: >- export POD_NAME=$(kubectl get pods --namespace default -l "app=prometheus,component=server" -o jsonpath="{.items[0].metadata.name}") changed_when: true failed_when: false tags: VERIFY_OMNIA_01 - name: Get pod name shell: echo $POD_NAME register: pod_name changed_when: false tags: VERIFY_OMNIA_01 - name: Check if prometheus-server is in running state command: kubectl get pods {{ pod_name.stdout }} register: slurm_exporter_pod_status ignore_errors: yes changed_when: false tags: VERIFY_OMNIA_01 - name: Validate slurm exporter job in k8s services assert: that: - "'Error from server' not in slurm_exporter_pod_status.stdout" fail_msg: "{{ slurm_exporter_job_fail_msg }}" success_msg: "{{ slurm_exporter_job_success_msg }}" tags: VERIFY_OMNIA_01 - name: Checking mariadb service status systemd: name: mariadb register: mariadb_service tags: VERIFY_OMNIA_01 - name: Validating mariadb service status assert: that: - mariadb_service.status.ActiveState == 'active' fail_msg: "{{ mariadb_service_fail_msg }}" success_msg: "{{ mariadb_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: Checking slurmctld service status systemd: name: slurmctld register: slurmctld_service tags: VERIFY_OMNIA_01 - name: Checking slurmdbd service status systemd: name: slurmdbd register: slurmdbd_service tags: VERIFY_OMNIA_01 - name: Check if slurm is installed command: sinfo -V register: slurm_version changed_when: false ignore_errors: True tags: VERIFY_OMNIA_01 - name: Validating slurmctld service status assert: that: - slurmctld_service.status.ActiveState == 'active' fail_msg: "{{ slurmctld_service_fail_msg }}" success_msg: "{{ slurmctld_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: Validating slurmdbd service status assert: that: - slurmdbd_service.status.ActiveState == 'active' fail_msg: "{{ slurmdbd_service_fail_msg }}" success_msg: "{{ slurmdbd_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: Validate slurm installation assert: that: "'command not found' not in slurm_version.stdout" fail_msg: "{{ slurm_status_fail_msg }}" success_msg: "{{ slurm_status_success_msg }}" tags: VERIFY_OMNIA_01 - name: Submit kubernetes job command: kubectl run nginx --image=nginx --restart=Never changed_when: false failed_when: false tags: VERIFY_OMNIA_01 - name: Check submitted kubernetes job status command: kubectl get pod nginx register: kubo_job changed_when: false ignore_errors: True tags: VERIFY_OMNIA_01 - name: Validate kubernetes job submission assert: that: "'pods nginx not found' not in kubo_job.stdout" fail_msg: "{{ kubernetes_job_status_fail_msg }}" success_msg: "{{ kubernetes_job_status_success_msg }}" tags: VERIFY_OMNIA_01 - name: OMNIA_1.1_US_CRD_TC_001 hosts: compute vars_files: - test_vars/test_slurm_workers_vars.yml tasks: - name: Check if slurm is installed command: sinfo -V register: slurm_version changed_when: false ignore_errors: True tags: VERIFY_OMNIA_01 - name: Checking slurmd service status service: name: slurmd.service register: slurmd_service tags: VERIFY_OMNIA_01 - name: Validate slurm installation assert: that: "'command not found' not in slurm_version.stdout" fail_msg: "{{ slurm_status_fail_msg }}" success_msg: "{{ slurm_status_success_msg }}" tags: VERIFY_OMNIA_01 - name: Validating slurmd service status assert: that: - slurmd_service.status.ActiveState == 'active' fail_msg: "{{ slurmd_service_fail_msg }}" success_msg: "{{ slurmd_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: OMNIA_1.1_US_CRD_TC_001 hosts: manager, login_node vars_files: - test_vars/test_login_common_vars.yml tasks: - name: Checking installed Freeipa version command: ipa --version register: ipa_version changed_when: false ignore_errors: True tags: VERIFY_OMNIA_01 - name: Validating Freeipa Installation assert: that: - "'command not found' not in ipa_version.stdout" fail_msg: "{{ ipa_install_fail_msg }}" success_msg: "{{ ipa_install_success_msg }}" tags: VERIFY_OMNIA_01 - name: Start and enable firewalld service: name: firewalld state: started enabled: yes tags: VERIFY_OMNIA_01 - name: Checking firewalld open ports on manager/login node command: firewall-cmd --list-ports changed_when: false register: login_common_firewalld_ports tags: VERIFY_OMNIA_01 - name: Validating firewalld open ports on manager/login node assert: that: - "'80/tcp' in login_common_firewalld_ports.stdout" - "'443/tcp' in login_common_firewalld_ports.stdout" - "'389/tcp' in login_common_firewalld_ports.stdout" - "'636/tcp' in login_common_firewalld_ports.stdout" - "'88/tcp' in login_common_firewalld_ports.stdout" - "'464/tcp' in login_common_firewalld_ports.stdout" - "'88/udp' in login_common_firewalld_ports.stdout" - "'464/udp' in login_common_firewalld_ports.stdout" - "'53/tcp' in login_common_firewalld_ports.stdout" - "'53/udp' in login_common_firewalld_ports.stdout" - "'123/udp' in login_common_firewalld_ports.stdout" - "'7389/tcp' in login_common_firewalld_ports.stdout" fail_msg: "{{ login_common_ports_status_fail_msg }}" success_msg: "{{ login_common_ports_status_success_msg }}" tags: VERIFY_OMNIA_01 - name: Stop and disable firewalld service: name: firewalld state: stopped enabled: no tags: VERIFY_OMNIA_01 - name: Check Freeipa server/client configuration command: ipa help topics register: ipa_config changed_when: false ignore_errors: True tags: VERIFY_OMNIA_01 - name: Validating Freeipa server/client Configuration assert: that: - "'command not found' not in ipa_config.stdout" fail_msg: "{{ ipa_configuration_fail_msg }}" success_msg: "{{ ipa_configuration_success_msg }}" failed_when: false tags: VERIFY_OMNIA_01 - name: Ensure host is present shell: echo "{{ ipa_admin_password }}" | kinit admin register: authen changed_when: false ignore_errors: true tags: VERIFY_OMNIA_01 - name: Validate admin user in ipa server/client assert: that: - authen.rc == 0 fail_msg: "{{ admin_user_authentication_status_fail_msg }}" success_msg: "{{ admin_user_authentication_status_success_msg }}" tags: VERIFY_OMNIA_01 - name: OMNIA_1.1_US_CRD_TC_001 hosts: login_node gather_facts: false vars_files: - test_vars/test_login_node_vars.yml - test_vars/test_slurm_workers_vars.yml tasks: - name: Checking slurmd service status service: name: slurmd.service register: slurmd_service tags: VERIFY_OMNIA_01 - name: Validating slurmd service status assert: that: - slurmd_service.status.ActiveState == 'active' fail_msg: "{{ slurmd_service_fail_msg }}" success_msg: "{{ slurmd_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: Submit slurm jobs command: srun --nodes "{{ nodes }}" --ntasks-per-node "{{ ntasks }}" --partition normal hostname register: job_status changed_when: false ignore_errors: True tags: VERIFY_OMNIA_01 - name: Validate slurm job submission assert: that: "'compute.ipa.test' in job_status.stdout" fail_msg: "{{ slurm_job_status_fail_msg }}" success_msg: "{{ slurm_job_status_success_msg }}" failed_when: false tags: VERIFY_OMNIA_01 - name: OMNIA_1.1_US_CRD_TC_001 hosts: nfs_node vars_files: - test_vars/test_nfs_node_vars.yml tasks: - name: Checking rpcbind service status systemd: name: rpcbind register: rpcbind_service tags: VERIFY_OMNIA_01 - name: Validating rpcbind service status assert: that: - rpcbind_service.status.ActiveState == 'active' fail_msg: "{{ rpcbind_service_fail_msg }}" success_msg: "{{ rpcbind_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: Checking nfs-server service status systemd: name: nfs-server register: nfs_server_service tags: VERIFY_OMNIA_01 - name: Validating nfs-server service status assert: that: - nfs_server_service.status.ActiveState == 'active' fail_msg: "{{ nfs_server_service_fail_msg }}" success_msg: "{{ nfs_server_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: Checking nfs-lock service status systemd: name: nfs-lock register: nfs_lock_service tags: VERIFY_OMNIA_01 - name: Validating nfs-lock service status assert: that: - nfs_lock_service.status.ActiveState == 'active' fail_msg: "{{ nfs_lock_service_fail_msg }}" success_msg: "{{ nfs_lock_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: Checking nfs-idmap service status systemd: name: nfs-idmap register: nfs_idmap_service tags: VERIFY_OMNIA_01 - name: Validating nfs-idmap service status assert: that: - nfs_idmap_service.status.ActiveState == 'active' fail_msg: "{{ nfs_idmap_service_fail_msg }}" success_msg: "{{ nfs_idmap_service_success_msg }}" tags: VERIFY_OMNIA_01 - name: Check if nfs server setup is complete command: exportfs -v changed_when: false register: nfs_share tags: VERIFY_OMNIA_01 - name: Validate nfs server setup assert: that: "'{{ nfs_dir }}' in nfs_share.stdout" fail_msg: "{{ nfs_server_fail_msg }}" success_msg: "{{ nfs_server_success_msg }}" tags: VERIFY_OMNIA_01