main.yml 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. # Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ---
  15. - name: Saving distribution of os
  16. set_fact:
  17. compute_os: "{{ ansible_facts['distribution'] | lower }}"
  18. - name: Create a custom fact directory on each host
  19. file:
  20. path: "{{ custom_fact_dir }}"
  21. state: directory
  22. mode: "{{ custom_fact_dir_mode }}"
  23. - name: Install accelerator discovery script
  24. copy:
  25. src: inventory.fact
  26. dest: "{{ accelerator_discovery_script_dest }}"
  27. owner: root
  28. group: root
  29. mode: "{{ accelerator_discovery_script_mode }}"
  30. - block:
  31. - name: Add epel-release repo
  32. package:
  33. name: epel-release
  34. state: present
  35. tags: install
  36. - name: Add elrepo GPG key
  37. rpm_key:
  38. state: present
  39. key: "{{ elrepo_gpg_key_url }}"
  40. register: elrepo_gpg_key
  41. until: elrepo_gpg_key is not failed
  42. retries: "{{ max_retries }}"
  43. delay: "{{ max_delay }}"
  44. tags: install
  45. - name: Add elrepo (nvidia kmod drivers)
  46. package:
  47. name: "{{ elrepo_rpm_url }}"
  48. state: present
  49. register: elrepo
  50. until: elrepo is not failed
  51. retries: "{{ max_retries }}"
  52. delay: "{{ max_delay }}"
  53. tags: install
  54. - name: Add docker community edition repository
  55. get_url:
  56. url: "{{ docker_repo_url }}"
  57. dest: "{{ docker_repo_dest }}"
  58. register: docker_repo
  59. until: docker_repo is not failed
  60. retries: "{{ max_retries }}"
  61. delay: "{{ max_delay }}"
  62. tags: install
  63. - name: Permanently Disable swap
  64. mount:
  65. name: "swap"
  66. fstype: swap
  67. state: absent
  68. - name: Install common packages
  69. package:
  70. name: "{{ common_packages }}"
  71. state: present
  72. tags: install
  73. - name: Install common packages
  74. package:
  75. name: "{{ common_packages_for_non_leap }}"
  76. state: present
  77. tags: install
  78. - name: Versionlock docker
  79. command: "yum versionlock '{{ item }}'"
  80. args:
  81. warn: false
  82. with_items:
  83. - "{{ docker_packages }}"
  84. changed_when: true
  85. tags: install
  86. - name: Collect host facts (including acclerator information)
  87. setup: ~
  88. - name: Install infiniBand support
  89. package:
  90. name: "@Infiniband Support"
  91. state: present
  92. tags: install
  93. when: ( os_supported_leap not in compute_os )
  94. - block:
  95. - name: Installing python-xml
  96. package:
  97. name: python-xml
  98. state: present
  99. tags: install
  100. - name: Add nvidia repo
  101. zypper_repository:
  102. name: NVIDIA
  103. repo: "{{ nvidia_repo }}"
  104. state: present
  105. autorefresh: yes
  106. tags: install
  107. - name: Install nvidia
  108. command: zypper --gpg-auto-import-keys install -l -y x11-video-nvidiaG06
  109. changed_when: false
  110. tags: install
  111. - name: Add docker community edition repository
  112. get_url:
  113. url: "{{ docker_repo_url_leap }}"
  114. dest: "{{ docker_repo_dest_leap }}"
  115. register: docker_repo
  116. until: docker_repo is not failed
  117. retries: "{{ max_retries }}"
  118. delay: "{{ max_delay }}"
  119. tags: install
  120. - name: Permanently Disable swap
  121. mount:
  122. name: "swap"
  123. fstype: swap
  124. state: absent
  125. tags: install
  126. - name: Install common packages
  127. package:
  128. name: "{{ common_packages }}"
  129. state: present
  130. tags: install
  131. - name: Install docker-compose
  132. package:
  133. name: docker-compose
  134. state: present
  135. tags: install
  136. - name: Collect host facts (including acclerator information)
  137. setup: ~
  138. tags: install
  139. - name: Install infiniBand support
  140. package:
  141. name: infiniband-diags
  142. state: present
  143. tags: install
  144. when: ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  145. - name: Deploy time ntp/chrony
  146. include_tasks: ntp.yml
  147. tags: install
  148. - name: Install Nvidia drivers and software components
  149. include_tasks: nvidia.yml
  150. when:
  151. - ansible_local.inventory.nvidia_gpu > 0
  152. - ( ansible_facts['distribution'] == os_name and ansible_facts['distribution_major_version'] == os_version) or
  153. ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  154. tags: install
  155. - name: Install AMD GPU drivers and software components
  156. include_tasks: amd.yml
  157. when:
  158. - ansible_local.inventory.amd_gpu > 0
  159. - ansible_facts['distribution'] == os_name or
  160. ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  161. tags: install
  162. - name: Get the hostname
  163. command: hostname
  164. register: machine_hostname
  165. changed_when: true
  166. - name: Set facts for node hostname and ip
  167. set_fact:
  168. node_ip: "{{ inventory_hostname }}"
  169. node_hostname: "{{ machine_hostname.stdout }}"
  170. - name: Add host name in hosts file
  171. lineinfile:
  172. dest: "{{ hosts_file_dest }}"
  173. line: "{{ inventory_hostname }} {{ machine_hostname.stdout }}"
  174. state: present
  175. create: yes
  176. mode: "{{ hosts_file_mode }}"
  177. - name: Add compute hosts info in manager node hosts file
  178. lineinfile:
  179. dest: "{{ hosts_file_dest }}"
  180. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  181. state: present
  182. create: yes
  183. mode: "{{ hosts_file_mode }}"
  184. with_items:
  185. - "{{ groups['compute'] }}"
  186. when: "'manager' in group_names"
  187. - name: Add manager hosts info in compute node hosts file
  188. lineinfile:
  189. dest: "{{ hosts_file_dest }}"
  190. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  191. state: present
  192. create: yes
  193. mode: "{{ hosts_file_mode }}"
  194. with_items:
  195. - "{{ groups['manager'] }}"
  196. when: "'compute' in group_names"
  197. - name: Add login node info in /etc/hosts of manager node
  198. lineinfile:
  199. dest: "{{ hosts_file_dest }}"
  200. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  201. state: present
  202. create: yes
  203. mode: "{{ hosts_file_mode }}"
  204. with_items:
  205. - "{{ groups['login_node'] }}"
  206. when:
  207. - '"manager" in group_names'
  208. - hostvars["127.0.0.1"]["login_node_required"]
  209. - name: Add manager info in /etc/hosts of login node
  210. lineinfile:
  211. dest: "{{ hosts_file_dest }}"
  212. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  213. state: present
  214. create: yes
  215. mode: "{{ hosts_file_mode }}"
  216. with_items:
  217. - "{{ groups['manager'] }}"
  218. when:
  219. - '"login_node" in group_names'
  220. - hostvars["127.0.0.1"]["login_node_required"]
  221. - name: Add compute info in /etc/hosts of login node
  222. lineinfile:
  223. dest: "{{ hosts_file_dest }}"
  224. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  225. state: present
  226. create: yes
  227. mode: "{{ hosts_file_mode }}"
  228. with_items:
  229. - "{{ groups['compute'] }}"
  230. when:
  231. - hostvars["127.0.0.1"]["login_node_required"]
  232. - '"login_node" in group_names'