omnia.yml 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. # Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ---
  15. - name: Validate the cluster
  16. hosts: localhost
  17. connection: local
  18. gather_facts: no
  19. roles:
  20. - cluster_validation
  21. - name: Gather facts from all the nodes
  22. hosts: manager, compute, login_node, nfs_node
  23. - name: Validate hostname
  24. hosts: manager, login_node
  25. gather_facts: false
  26. any_errors_fatal: true
  27. roles:
  28. - hostname_validation
  29. tags: freeipa
  30. - name: Validate nfs_node config
  31. hosts: nfs_node
  32. gather_facts: false
  33. tasks:
  34. - name: Validate nfs_node config
  35. include_role:
  36. name: powervault_me4_nfs
  37. tasks_from: validate_nfs_config.yml
  38. when: hostvars['127.0.0.1']['powervault_status']
  39. - name: Apply common installation and config
  40. hosts: manager, compute, login_node
  41. gather_facts: false
  42. roles:
  43. - common
  44. tags: common
  45. - name: Apply login common installation and config
  46. hosts: manager, login_node
  47. gather_facts: false
  48. roles:
  49. - login_common
  50. tags: freeipa
  51. - name: Apply login server installation and config
  52. hosts: manager
  53. gather_facts: false
  54. roles:
  55. - login_server
  56. tags: freeipa
  57. - name: Apply login node installation and config
  58. hosts: login_node
  59. gather_facts: false
  60. roles:
  61. - login_node
  62. tags: freeipa
  63. - name: Apply common K8s installation and config
  64. hosts: manager, compute
  65. gather_facts: false
  66. roles:
  67. - k8s_common
  68. tags: kubernetes
  69. - name: Apply K8s manager config
  70. hosts: manager
  71. gather_facts: true
  72. roles:
  73. - k8s_manager
  74. tags: kubernetes
  75. - name: Apply K8s firewalld config on manager and compute nodes
  76. hosts: manager, compute
  77. gather_facts: false
  78. roles:
  79. - k8s_firewalld
  80. tags: kubernetes
  81. - name: Powervault Server Configuration
  82. hosts: nfs_node
  83. gather_facts: false
  84. tasks:
  85. - name: Configuring NFS node
  86. include_role:
  87. name: powervault_me4_nfs
  88. when: hostvars['127.0.0.1']['powervault_status']
  89. - name: Map volume
  90. hosts: powervault_me4
  91. connection: local
  92. gather_facts: false
  93. tasks:
  94. - name: Include map volume task
  95. include_tasks: "{{ playbook_dir }}/control_plane/roles/powervault_me4/tasks/ports.yml"
  96. when: hostvars['127.0.0.1']['powervault_status']
  97. - name: Include map volume task
  98. include_tasks: "{{ playbook_dir }}/control_plane/roles/powervault_me4/tasks/map_volume.yml"
  99. when: hostvars['127.0.0.1']['powervault_status']
  100. - name: Apply NFS server setup on NFS node
  101. hosts: nfs_node
  102. gather_facts: false
  103. tasks:
  104. - name: Check the mapped volume on server
  105. include_role:
  106. name: powervault_me4_nfs
  107. tasks_from: nfs_volume
  108. when: hostvars['127.0.0.1']['powervault_status']
  109. - name: Mount partitions
  110. include_role:
  111. name: powervault_me4_nfs
  112. tasks_from: mount_me4_partitions
  113. when: hostvars['127.0.0.1']['powervault_status']
  114. - name: Setup NFS server on the partitions
  115. include_role:
  116. name: powervault_me4_nfs
  117. tasks_from: me4_nfs_server_setup
  118. when: hostvars['127.0.0.1']['powervault_status']
  119. - name: Apply NFS server setup on manager node
  120. hosts: manager
  121. gather_facts: false
  122. tasks:
  123. - name: Apply NFS server setup on manager node
  124. include_role:
  125. name: k8s_nfs_server_setup
  126. when: not hostvars['127.0.0.1']['powervault_status']
  127. tags:
  128. - kubernetes
  129. - nfs
  130. - name: Apply NFS client setup on compute nodes
  131. hosts: compute
  132. gather_facts: false
  133. tasks:
  134. - name: Apply NFS client setup on compute nodes
  135. include_role:
  136. name: k8s_nfs_client_setup
  137. when: not hostvars['127.0.0.1']['powervault_status']
  138. tags:
  139. - kubernetes
  140. - nfs
  141. - name: Start K8s on manager server
  142. hosts: manager
  143. gather_facts: true
  144. roles:
  145. - k8s_start_manager
  146. tags: kubernetes
  147. - name: Start K8s worker servers on compute nodes
  148. hosts: compute
  149. gather_facts: false
  150. roles:
  151. - k8s_start_workers
  152. tags: kubernetes
  153. - name: Start K8s worker servers on manager nodes
  154. hosts: manager, compute
  155. gather_facts: false
  156. roles:
  157. - k8s_start_services
  158. tags: kubernetes
  159. - name: Apply common Slurm installation and config
  160. hosts: manager, compute, login_node
  161. gather_facts: false
  162. roles:
  163. - slurm_common
  164. tags: slurm
  165. - name: Apply Slurm manager config
  166. hosts: manager
  167. gather_facts: false
  168. roles:
  169. - slurm_manager
  170. tags: slurm
  171. - name: Configure Slurm workers
  172. hosts: compute, login_node
  173. serial: 1
  174. gather_facts: false
  175. roles:
  176. - slurm_workers
  177. tags: slurm
  178. - name: Start Slurm workers
  179. hosts: compute, login_node
  180. gather_facts: false
  181. roles:
  182. - slurm_workers_service
  183. tags: slurm
  184. - name: Start Slurm services
  185. hosts: manager
  186. gather_facts: false
  187. roles:
  188. - slurm_start_services
  189. tags: slurm
  190. - name: Install slurm exporter
  191. hosts: manager
  192. gather_facts: false
  193. roles:
  194. - slurm_exporter
  195. tags: slurm
  196. - name: Install slurm restd
  197. hosts: manager
  198. gather_facts: false
  199. roles:
  200. - slurm_restd
  201. tags: slurm
  202. - name: Passwordless SSH between manager and compute nodes
  203. include: control_plane/tools/passwordless_ssh.yml
  204. when: hostvars['127.0.0.1']['control_plane_status']