123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469 |
- # Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- ---
- # Testcase OMNIA_1.1_US_CRD_TC_001
- # Execute omnia.yml with separate servers for manager,compute,login,nfs node with default parameters
- - name: OMNIA_1.1_US_CRD_TC_001
- hosts: manager, compute
- vars_files:
- - test_vars/test_k8s_common_vars.yml
- - test_vars/test_slurm_common_vars.yml
- tasks:
- - name: Checking K8s service status
- systemd:
- name: kubelet
- register: kubelet_service
- tags: VERIFY_OMNIA_01
- - name: Validating K8s service status
- assert:
- that:
- - kubelet_service.status.ActiveState == 'active'
- fail_msg: "{{ kubelet_service_fail_msg }}"
- success_msg: "{{ kubelet_service_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: Checking munge service status
- systemd:
- name: munge
- register: munge_service
- tags: VERIFY_OMNIA_01
-
- - name: Validating munge service status
- assert:
- that:
- - munge_service.status.ActiveState == 'active'
- fail_msg: "{{ munge_service_fail_msg }}"
- success_msg: "{{ munge_service_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: OMNIA_1.1_US_CRD_TC_001
- hosts: manager
- vars_files:
- - test_vars/test_k8s_start_manager_workers_vars.yml
- - test_vars/test_k8s_start_services_vars.yml
- - test_vars/test_slurmexporter_vars.yml
- - test_vars/test_slurm_start_services_vars.yml
- - test_vars/test_login_server_vars.yml
- - test_vars/test_slurm_manager_vars.yml
- - test_vars/test_login_node_vars.yml
- tasks:
- - name: Checking kube-system pods
- command: kubectl get pods --namespace kube-system --field-selector=status.phase=Running
- register: kube_system_pods
- changed_when: false
- ignore_errors: True
- tags: VERIFY_OMNIA_01
-
- - name: Validating controller-manager and scheduler and coreDNS pods status
- assert:
- that:
- - "'kube-scheduler' in kube_system_pods.stdout"
- - "'kube-controller' in kube_system_pods.stdout"
- fail_msg: "{{ controller_scheduler_status_fail_msg }}"
- success_msg: "{{ controller_scheduler_status_success_msg }}"
- tags: VERIFY_OMNIA_01
- - name: Validating coreDNS pods status
- assert:
- that:
- - "'coredns' in kube_system_pods.stdout"
- fail_msg: "{{ coredns_status_fail_msg }}"
- success_msg: "{{ coredns_status_success_msg }}"
- tags: VERIFY_OMNIA_01
- - name: Checking all running pods
- command: kubectl get pods --all-namespaces --field-selector=status.phase=Running
- register: running_pods
- changed_when: false
- ignore_errors: True
- tags: VERIFY_OMNIA_01
-
- - name: Validating Metallb, Prometheus and MPI pods
- assert:
- that:
- - "'metallb' in running_pods.stdout"
- - "'prometheus' in running_pods.stdout"
- - "'mpi-operator' in running_pods.stdout"
- fail_msg: "{{ metallb_prometheus_mpi_pods_fail_msg }}"
- success_msg: "{{ metallb_prometheus_mpi_pods_success_msg }}"
- tags: VERIFY_OMNIA_01
- - name: Validating K8s dashboard
- assert:
- that:
- - "'kubernetes-dashboard' in running_pods.stdout"
- fail_msg: "{{ kubernetes_dashboard_fail_msg }}"
- success_msg: "{{ kubernetes_dashboard_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: Verify slurm exporter status
- systemd:
- name: prometheus-slurm-exporter
- register: slurm_exporter_status
- tags: VERIFY_OMNIA_01
- - name: Validate slurm exporter service status
- assert:
- that:
- - slurm_exporter_status.status.ActiveState == 'active'
- fail_msg: "{{ slurm_exporter_service_fail_msg }}"
- success_msg: "{{ slurm_exporter_service_success_msg }}"
- tags: VERIFY_OMNIA_01
- - name: Verify slurm exporter job in k8s services
- shell: >-
- export POD_NAME=$(kubectl get pods --namespace
- default -l "app=prometheus,component=server" -o jsonpath="{.items[0].metadata.name}")
- changed_when: true
- failed_when: false
- tags: VERIFY_OMNIA_01
- - name: Get pod name
- shell: echo $POD_NAME
- register: pod_name
- changed_when: false
- tags: VERIFY_OMNIA_01
- - name: Check if prometheus-server is in running state
- command: kubectl get pods {{ pod_name.stdout }}
- register: slurm_exporter_pod_status
- ignore_errors: yes
- changed_when: false
- tags: VERIFY_OMNIA_01
- - name: Validate slurm exporter job in k8s services
- assert:
- that:
- - "'Error from server' not in slurm_exporter_pod_status.stdout"
- fail_msg: "{{ slurm_exporter_job_fail_msg }}"
- success_msg: "{{ slurm_exporter_job_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: Checking mariadb service status
- systemd:
- name: mariadb
- register: mariadb_service
- tags: VERIFY_OMNIA_01
-
- - name: Validating mariadb service status
- assert:
- that:
- - mariadb_service.status.ActiveState == 'active'
- fail_msg: "{{ mariadb_service_fail_msg }}"
- success_msg: "{{ mariadb_service_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: Checking slurmctld service status
- systemd:
- name: slurmctld
- register: slurmctld_service
- tags: VERIFY_OMNIA_01
- - name: Checking slurmdbd service status
- systemd:
- name: slurmdbd
- register: slurmdbd_service
- tags: VERIFY_OMNIA_01
- - name: Check if slurm is installed
- command: sinfo -V
- register: slurm_version
- changed_when: false
- ignore_errors: True
- tags: VERIFY_OMNIA_01
- - name: Validating slurmctld service status
- assert:
- that:
- - slurmctld_service.status.ActiveState == 'active'
- fail_msg: "{{ slurmctld_service_fail_msg }}"
- success_msg: "{{ slurmctld_service_success_msg }}"
- tags: VERIFY_OMNIA_01
- - name: Validating slurmdbd service status
- assert:
- that:
- - slurmdbd_service.status.ActiveState == 'active'
- fail_msg: "{{ slurmdbd_service_fail_msg }}"
- success_msg: "{{ slurmdbd_service_success_msg }}"
- tags: VERIFY_OMNIA_01
- - name: Validate slurm installation
- assert:
- that: "'command not found' not in slurm_version.stdout"
- fail_msg: "{{ slurm_status_fail_msg }}"
- success_msg: "{{ slurm_status_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: Submit kubernetes job
- command: kubectl run nginx --image=nginx --restart=Never
- changed_when: false
- failed_when: false
- tags: VERIFY_OMNIA_01
- - name: Check submitted kubernetes job status
- command: kubectl get pod nginx
- register: kubo_job
- changed_when: false
- ignore_errors: True
- tags: VERIFY_OMNIA_01
- - name: Validate kubernetes job submission
- assert:
- that: "'pods nginx not found' not in kubo_job.stdout"
- fail_msg: "{{ kubernetes_job_status_fail_msg }}"
- success_msg: "{{ kubernetes_job_status_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: OMNIA_1.1_US_CRD_TC_001
- hosts: compute
- vars_files:
- - test_vars/test_slurm_workers_vars.yml
- tasks:
- - name: Check if slurm is installed
- command: sinfo -V
- register: slurm_version
- changed_when: false
- ignore_errors: True
- tags: VERIFY_OMNIA_01
- - name: Checking slurmd service status
- service:
- name: slurmd.service
- register: slurmd_service
- tags: VERIFY_OMNIA_01
- - name: Validate slurm installation
- assert:
- that: "'command not found' not in slurm_version.stdout"
- fail_msg: "{{ slurm_status_fail_msg }}"
- success_msg: "{{ slurm_status_success_msg }}"
- tags: VERIFY_OMNIA_01
- - name: Validating slurmd service status
- assert:
- that:
- - slurmd_service.status.ActiveState == 'active'
- fail_msg: "{{ slurmd_service_fail_msg }}"
- success_msg: "{{ slurmd_service_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: OMNIA_1.1_US_CRD_TC_001
- hosts: manager, login_node
- vars_files:
- - test_vars/test_login_common_vars.yml
-
- tasks:
- - name: Checking installed Freeipa version
- command: ipa --version
- register: ipa_version
- changed_when: false
- ignore_errors: True
- tags: VERIFY_OMNIA_01
-
- - name: Validating Freeipa Installation
- assert:
- that:
- - "'command not found' not in ipa_version.stdout"
- fail_msg: "{{ ipa_install_fail_msg }}"
- success_msg: "{{ ipa_install_success_msg }}"
- tags: VERIFY_OMNIA_01
- - name: Start and enable firewalld
- service:
- name: firewalld
- state: started
- enabled: yes
- tags: VERIFY_OMNIA_01
- - name: Checking firewalld open ports on manager/login node
- command: firewall-cmd --list-ports
- changed_when: false
- register: login_common_firewalld_ports
- tags: VERIFY_OMNIA_01
-
- - name: Validating firewalld open ports on manager/login node
- assert:
- that:
- - "'80/tcp' in login_common_firewalld_ports.stdout"
- - "'443/tcp' in login_common_firewalld_ports.stdout"
- - "'389/tcp' in login_common_firewalld_ports.stdout"
- - "'636/tcp' in login_common_firewalld_ports.stdout"
- - "'88/tcp' in login_common_firewalld_ports.stdout"
- - "'464/tcp' in login_common_firewalld_ports.stdout"
- - "'88/udp' in login_common_firewalld_ports.stdout"
- - "'464/udp' in login_common_firewalld_ports.stdout"
- - "'53/tcp' in login_common_firewalld_ports.stdout"
- - "'53/udp' in login_common_firewalld_ports.stdout"
- - "'123/udp' in login_common_firewalld_ports.stdout"
- - "'7389/tcp' in login_common_firewalld_ports.stdout"
- fail_msg: "{{ login_common_ports_status_fail_msg }}"
- success_msg: "{{ login_common_ports_status_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: Stop and disable firewalld
- service:
- name: firewalld
- state: stopped
- enabled: no
- tags: VERIFY_OMNIA_01
- - name: Check Freeipa server/client configuration
- command: ipa help topics
- register: ipa_config
- changed_when: false
- ignore_errors: True
- tags: VERIFY_OMNIA_01
-
- - name: Validating Freeipa server/client Configuration
- assert:
- that:
- - "'command not found' not in ipa_config.stdout"
- fail_msg: "{{ ipa_configuration_fail_msg }}"
- success_msg: "{{ ipa_configuration_success_msg }}"
- failed_when: false
- tags: VERIFY_OMNIA_01
- - name: Ensure host is present
- shell: echo "{{ ipa_admin_password }}" | kinit admin
- register: authen
- changed_when: false
- ignore_errors: true
- tags: VERIFY_OMNIA_01
-
- - name: Validate admin user in ipa server/client
- assert:
- that:
- - authen.rc == 0
- fail_msg: "{{ admin_user_authentication_status_fail_msg }}"
- success_msg: "{{ admin_user_authentication_status_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: OMNIA_1.1_US_CRD_TC_001
- hosts: login_node
- gather_facts: false
- vars_files:
- - test_vars/test_login_node_vars.yml
- - test_vars/test_slurm_workers_vars.yml
-
- tasks:
- - name: Checking slurmd service status
- service:
- name: slurmd.service
- register: slurmd_service
- tags: VERIFY_OMNIA_01
-
- - name: Validating slurmd service status
- assert:
- that:
- - slurmd_service.status.ActiveState == 'active'
- fail_msg: "{{ slurmd_service_fail_msg }}"
- success_msg: "{{ slurmd_service_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: Submit slurm jobs
- command: srun --nodes "{{ nodes }}" --ntasks-per-node "{{ ntasks }}" --partition normal hostname
- register: job_status
- changed_when: false
- ignore_errors: True
- tags: VERIFY_OMNIA_01
- - name: Validate slurm job submission
- assert:
- that: "'compute.ipa.test' in job_status.stdout"
- fail_msg: "{{ slurm_job_status_fail_msg }}"
- success_msg: "{{ slurm_job_status_success_msg }}"
- failed_when: false
- tags: VERIFY_OMNIA_01
- - name: OMNIA_1.1_US_CRD_TC_001
- hosts: nfs_node
- vars_files:
- - test_vars/test_nfs_node_vars.yml
-
- tasks:
-
- - name: Checking rpcbind service status
- systemd:
- name: rpcbind
- register: rpcbind_service
- tags: VERIFY_OMNIA_01
-
- - name: Validating rpcbind service status
- assert:
- that:
- - rpcbind_service.status.ActiveState == 'active'
- fail_msg: "{{ rpcbind_service_fail_msg }}"
- success_msg: "{{ rpcbind_service_success_msg }}"
- tags: VERIFY_OMNIA_01
- - name: Checking nfs-server service status
- systemd:
- name: nfs-server
- register: nfs_server_service
- tags: VERIFY_OMNIA_01
-
- - name: Validating nfs-server service status
- assert:
- that:
- - nfs_server_service.status.ActiveState == 'active'
- fail_msg: "{{ nfs_server_service_fail_msg }}"
- success_msg: "{{ nfs_server_service_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: Checking nfs-lock service status
- systemd:
- name: nfs-lock
- register: nfs_lock_service
- tags: VERIFY_OMNIA_01
-
- - name: Validating nfs-lock service status
- assert:
- that:
- - nfs_lock_service.status.ActiveState == 'active'
- fail_msg: "{{ nfs_lock_service_fail_msg }}"
- success_msg: "{{ nfs_lock_service_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: Checking nfs-idmap service status
- systemd:
- name: nfs-idmap
- register: nfs_idmap_service
- tags: VERIFY_OMNIA_01
-
- - name: Validating nfs-idmap service status
- assert:
- that:
- - nfs_idmap_service.status.ActiveState == 'active'
- fail_msg: "{{ nfs_idmap_service_fail_msg }}"
- success_msg: "{{ nfs_idmap_service_success_msg }}"
- tags: VERIFY_OMNIA_01
-
- - name: Check if nfs server setup is complete
- command: exportfs -v
- changed_when: false
- register: nfs_share
- tags: VERIFY_OMNIA_01
-
- - name: Validate nfs server setup
- assert:
- that: "'{{ nfs_dir }}' in nfs_share.stdout"
- fail_msg: "{{ nfs_server_fail_msg }}"
- success_msg: "{{ nfs_server_success_msg }}"
- tags: VERIFY_OMNIA_01
|