main.yml 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. # Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ---
  15. - name: Saving distribution of os
  16. set_fact:
  17. compute_os: "{{ ansible_facts['distribution'] | lower }}"
  18. - name: Create a custom fact directory on each host
  19. file:
  20. path: "{{ custom_fact_dir }}"
  21. state: directory
  22. mode: "{{ custom_fact_dir_mode }}"
  23. - name: Install accelerator discovery script
  24. copy:
  25. src: inventory.fact
  26. dest: "{{ accelerator_discovery_script_dest }}"
  27. owner: root
  28. group: root
  29. mode: "{{ accelerator_discovery_script_mode }}"
  30. - block:
  31. - name: Add epel-release repo
  32. package:
  33. name: epel-release
  34. state: present
  35. tags: install
  36. - name: Add elrepo GPG key
  37. rpm_key:
  38. state: present
  39. key: "{{ elrepo_gpg_key_url }}"
  40. register: elrepo_gpg_key
  41. until: elrepo_gpg_key is not failed
  42. retries: "{{ max_retries }}"
  43. delay: "{{ max_delay }}"
  44. tags: install
  45. - name: Add elrepo (nvidia kmod drivers)
  46. package:
  47. name: "{{ elrepo_rpm_url }}"
  48. state: present
  49. register: elrepo
  50. until: elrepo is not failed
  51. retries: "{{ max_retries }}"
  52. delay: "{{ max_delay }}"
  53. tags: install
  54. - name: Add docker community edition repository
  55. get_url:
  56. url: "{{ docker_repo_url }}"
  57. dest: "{{ docker_repo_dest }}"
  58. register: docker_repo
  59. until: docker_repo is not failed
  60. retries: "{{ max_retries }}"
  61. delay: "{{ max_delay }}"
  62. tags: install
  63. - name: Permanently Disable swap
  64. mount:
  65. name: "swap"
  66. fstype: swap
  67. state: absent
  68. - name: Disable selinux
  69. selinux:
  70. state: disabled
  71. tags: install
  72. - name: Install common packages
  73. package:
  74. name: "{{ common_packages }}"
  75. state: present
  76. tags: install
  77. - name: Install common packages
  78. package:
  79. name: "{{ common_packages_for_non_leap }}"
  80. state: present
  81. tags: install
  82. - name: Versionlock docker
  83. command: "yum versionlock '{{ item }}'"
  84. args:
  85. warn: false
  86. with_items:
  87. - "{{ docker_packages }}"
  88. changed_when: true
  89. tags: install
  90. - name: Collect host facts (including acclerator information)
  91. setup: ~
  92. - name: Install infiniBand support
  93. package:
  94. name: "@Infiniband Support"
  95. state: present
  96. tags: install
  97. when: ( os_supported_leap not in compute_os )
  98. - block:
  99. - name: Installing python-xml
  100. package:
  101. name: python-xml
  102. state: present
  103. tags: install
  104. - name: Add nvidia repo
  105. zypper_repository:
  106. name: NVIDIA
  107. repo: "{{ nvidia_repo }}"
  108. state: present
  109. autorefresh: yes
  110. tags: install
  111. - name: Install nvidia
  112. command: zypper --gpg-auto-import-keys install -l -y x11-video-nvidiaG06
  113. changed_when: false
  114. tags: install
  115. - name: Add docker community edition repository
  116. get_url:
  117. url: "{{ docker_repo_url_leap }}"
  118. dest: "{{ docker_repo_dest_leap }}"
  119. register: docker_repo
  120. until: docker_repo is not failed
  121. retries: "{{ max_retries }}"
  122. delay: "{{ max_delay }}"
  123. tags: install
  124. - name: Permanently Disable swap
  125. mount:
  126. name: "swap"
  127. fstype: swap
  128. state: absent
  129. tags: install
  130. - name: Install common packages
  131. package:
  132. name: "{{ common_packages }}"
  133. state: present
  134. tags: install
  135. - name: Install docker-compose
  136. package:
  137. name: docker-compose
  138. state: present
  139. tags: install
  140. - name: Collect host facts (including acclerator information)
  141. setup: ~
  142. tags: install
  143. - name: Install infiniBand support
  144. package:
  145. name: infiniband-diags
  146. state: present
  147. tags: install
  148. when: ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  149. - name: Deploy time ntp/chrony
  150. include_tasks: ntp.yml
  151. tags: install
  152. - name: Install Nvidia drivers and software components
  153. include_tasks: nvidia.yml
  154. when:
  155. - ansible_local.inventory.nvidia_gpu > 0
  156. - ( ansible_facts['distribution'] == os_name and ansible_facts['distribution_major_version'] == os_version) or
  157. ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  158. tags: install
  159. - name: Install AMD GPU drivers and software components
  160. include_tasks: amd.yml
  161. when:
  162. - ansible_local.inventory.amd_gpu > 0
  163. - ansible_facts['distribution'] == os_name or
  164. ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  165. tags: install
  166. - name: Get the hostname
  167. command: hostname
  168. register: machine_hostname
  169. changed_when: true
  170. - name: Set facts for node hostname and ip
  171. set_fact:
  172. node_ip: "{{ inventory_hostname }}"
  173. node_hostname: "{{ machine_hostname.stdout }}"
  174. - name: Add host name in hosts file
  175. lineinfile:
  176. dest: "{{ hosts_file_dest }}"
  177. line: "{{ inventory_hostname }} {{ machine_hostname.stdout }}"
  178. state: present
  179. create: yes
  180. mode: "{{ hosts_file_mode }}"
  181. - name: Add compute hosts info in manager node hosts file
  182. lineinfile:
  183. dest: "{{ hosts_file_dest }}"
  184. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  185. state: present
  186. create: yes
  187. mode: "{{ hosts_file_mode }}"
  188. with_items:
  189. - "{{ groups['compute'] }}"
  190. when: "'manager' in group_names"
  191. - name: Add manager hosts info in compute node hosts file
  192. lineinfile:
  193. dest: "{{ hosts_file_dest }}"
  194. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  195. state: present
  196. create: yes
  197. mode: "{{ hosts_file_mode }}"
  198. with_items:
  199. - "{{ groups['manager'] }}"
  200. when: "'compute' in group_names"
  201. - name: Add login node info in /etc/hosts of manager node
  202. lineinfile:
  203. dest: "{{ hosts_file_dest }}"
  204. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  205. state: present
  206. create: yes
  207. mode: "{{ hosts_file_mode }}"
  208. with_items:
  209. - "{{ groups['login_node'] }}"
  210. when:
  211. - '"manager" in group_names'
  212. - hostvars["127.0.0.1"]["login_node_required"]
  213. - name: Add manager info in /etc/hosts of login node
  214. lineinfile:
  215. dest: "{{ hosts_file_dest }}"
  216. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  217. state: present
  218. create: yes
  219. mode: "{{ hosts_file_mode }}"
  220. with_items:
  221. - "{{ groups['manager'] }}"
  222. when:
  223. - '"login_node" in group_names'
  224. - hostvars["127.0.0.1"]["login_node_required"]
  225. - name: Add compute info in /etc/hosts of login node
  226. lineinfile:
  227. dest: "{{ hosts_file_dest }}"
  228. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  229. state: present
  230. create: yes
  231. mode: "{{ hosts_file_mode }}"
  232. with_items:
  233. - "{{ groups['compute'] }}"
  234. when:
  235. - hostvars["127.0.0.1"]["login_node_required"]
  236. - '"login_node" in group_names'