test_compute_gpu.yml 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. # Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ---
  15. # OMNIA_UKP_US_VFKP_TC_003
  16. # Execute compute_gpu role in compute nodes with os installed centos 7.8
  17. - name: OMNIA_UKP_US_VFKP_TC_003
  18. hosts: gpus
  19. vars_files:
  20. - test_vars/test_compute_gpu_vars.yml
  21. tasks:
  22. - block:
  23. - name: Call compute_gpu role
  24. include_role:
  25. name: ../roles/compute_gpu
  26. tags: TC_003
  27. - name: Checking Nvidia drivers installation
  28. command: whereis nvidia
  29. register: nvidia_drivers
  30. changed_when: false
  31. ignore_errors: True
  32. tags: TC_003, VERIFY_003
  33. - name: Checking nvidia-docker2 installation status
  34. command: nvidia-docker version
  35. register: nvidia_docker_version
  36. changed_when: false
  37. ignore_errors: True
  38. tags: TC_003, VERIFY_003
  39. - name: Checking docker service status
  40. systemd:
  41. name: docker
  42. register: docker_service
  43. tags: TC_003, VERIFY_003
  44. - name: Checking K8s service status
  45. systemd:
  46. name: kubelet
  47. register: kubelet_service
  48. tags: TC_003, VERIFY_003
  49. - name: Validating Nvidia drivers
  50. assert:
  51. that:
  52. - "'/usr/lib/nvidia' in nvidia_drivers.stdout"
  53. fail_msg: "{{ nvidia_drivers_fail_msg }}"
  54. success_msg: "{{ nvidia_drivers_success_msg }}"
  55. tags: TC_003, VERIFY_003
  56. - name: Validating nvidia-docker2 installation
  57. assert:
  58. that:
  59. - "'command not found' not in nvidia_docker_version.stdout"
  60. fail_msg: "{{ nvidia_docker_fail_msg }}"
  61. success_msg: "{{ nvidia_docker_success_msg }}"
  62. tags: TC_003, VERIFY_003
  63. - name: Validating docker service status
  64. assert:
  65. that:
  66. - docker_service.status.ActiveState == 'active'
  67. fail_msg: "{{ docker_service_fail_msg }}"
  68. success_msg: "{{ docker_service_success_msg }}"
  69. tags: TC_003, VERIFY_003
  70. - name: Validating K8s service status
  71. assert:
  72. that:
  73. - kubelet_service.status.ActiveState == 'active'
  74. fail_msg: "{{ kubelet_service_fail_msg }}"
  75. success_msg: "{{ kubelet_service_success_msg }}"
  76. tags: TC_003, VERIFY_003
  77. # OMNIA_UKP_US_VFKP_TC_004
  78. # Execute compute_gpu role in compute nodes with NVIDIA kmod/docker drivers already installed
  79. - name: OMNIA_UKP_US_VFKP_TC_004
  80. hosts: gpus
  81. vars_files:
  82. - test_vars/test_compute_gpu_vars.yml
  83. tasks:
  84. - block:
  85. - name: Call compute_gpu role
  86. include_role:
  87. name: ../roles/compute_gpu
  88. tags: TC_004
  89. - name: Checking Nvidia drivers installation
  90. command: whereis nvidia
  91. register: nvidia_drivers
  92. changed_when: false
  93. ignore_errors: True
  94. tags: TC_004, VERIFY_004
  95. - name: Checking nvidia-docker2 installation status
  96. command: nvidia-docker version
  97. register: nvidia_docker_version
  98. changed_when: false
  99. ignore_errors: True
  100. tags: TC_004, VERIFY_004
  101. - name: Checking docker service status
  102. systemd:
  103. name: docker
  104. register: docker_service
  105. tags: TC_004, VERIFY_0042
  106. - name: Checking K8s service status
  107. systemd:
  108. name: kubelet
  109. register: kubelet_service
  110. tags: TC_004, VERIFY_004
  111. - name: Validating Nvidia drivers
  112. assert:
  113. that:
  114. - "'/usr/lib/nvidia' in nvidia_drivers.stdout"
  115. fail_msg: "{{ nvidia_drivers_fail_msg }}"
  116. success_msg: "{{ nvidia_drivers_success_msg }}"
  117. tags: TC_004, VERIFY_004
  118. - name: Validating nvidia-docker2 installation
  119. assert:
  120. that:
  121. - "'command not found' not in nvidia_docker_version.stdout"
  122. fail_msg: "{{ nvidia_docker_fail_msg }}"
  123. success_msg: "{{ nvidia_docker_success_msg }}"
  124. tags: TC_004, VERIFY_004
  125. - name: Validating docker service status
  126. assert:
  127. that:
  128. - docker_service.status.ActiveState == 'active'
  129. fail_msg: "{{ docker_service_fail_msg }}"
  130. success_msg: "{{ docker_service_success_msg }}"
  131. tags: TC_004, VERIFY_004
  132. - name: Validating K8s service status
  133. assert:
  134. that:
  135. - kubelet_service.status.ActiveState == 'active'
  136. fail_msg: "{{ kubelet_service_fail_msg }}"
  137. success_msg: "{{ kubelet_service_success_msg }}"
  138. tags: TC_004, VERIFY_004