main.yml 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. # Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ---
  15. - name: Saving distribution of os
  16. set_fact:
  17. compute_os: "{{ ansible_facts['distribution'] | lower }}"
  18. - name: Create a custom fact directory on each host
  19. file:
  20. path: "{{ custom_fact_dir }}"
  21. state: directory
  22. mode: "{{ custom_fact_dir_mode }}"
  23. - name: Install accelerator discovery script
  24. copy:
  25. src: inventory.fact
  26. dest: "{{ accelerator_discovery_script_dest }}"
  27. owner: root
  28. group: root
  29. mode: "{{ accelerator_discovery_script_mode }}"
  30. - block:
  31. - name: Add epel-release repo
  32. package:
  33. name: epel-release
  34. state: present
  35. tags: install
  36. - name: Add elrepo GPG key
  37. rpm_key:
  38. state: present
  39. key: "{{ elrepo_gpg_key_url }}"
  40. register: elrepo_gpg_key
  41. until: elrepo_gpg_key is not failed
  42. retries: "{{ max_retries }}"
  43. delay: "{{ max_delay }}"
  44. tags: install
  45. - name: Add elrepo (nvidia kmod drivers)
  46. package:
  47. name: "{{ elrepo_rpm_url }}"
  48. state: present
  49. register: elrepo
  50. until: elrepo is not failed
  51. retries: "{{ max_retries }}"
  52. delay: "{{ max_delay }}"
  53. tags: install
  54. - name: Add docker community edition repository
  55. get_url:
  56. url: "{{ docker_repo_url }}"
  57. dest: "{{ docker_repo_dest }}"
  58. register: docker_repo
  59. until: docker_repo is not failed
  60. retries: "{{ max_retries }}"
  61. delay: "{{ max_delay }}"
  62. tags: install
  63. - name: Permanently Disable swap
  64. mount:
  65. name: "swap"
  66. fstype: swap
  67. state: absent
  68. - name: Disable selinux
  69. selinux:
  70. state: disabled
  71. tags: install
  72. - name: Install common packages
  73. package:
  74. name: "{{ common_packages }}"
  75. state: present
  76. tags: install
  77. - name: Install common packages
  78. package:
  79. name: "{{ common_packages_for_non_leap }}"
  80. state: present
  81. tags: install
  82. - name: Versionlock docker
  83. command: "yum versionlock '{{ item }}'"
  84. args:
  85. warn: false
  86. with_items:
  87. - "{{ docker_packages }}"
  88. changed_when: true
  89. tags: install
  90. - name: Collect host facts (including acclerator information)
  91. setup: ~
  92. - name: Install infiniBand support
  93. package:
  94. name: "@Infiniband Support"
  95. state: present
  96. tags: install
  97. when: ( os_supported_leap not in compute_os )
  98. - block:
  99. - name: Add leap repos
  100. zypper_repository:
  101. name: "{{ item.name }}"
  102. repo: "{{ item.repo }}"
  103. state: present
  104. autorefresh: yes
  105. with_items:
  106. - "{{ leap_repo }}"
  107. tags: install
  108. - name: Installing python-xml
  109. package:
  110. name: python-xml
  111. state: present
  112. tags: install
  113. - name: Add nvidia repo
  114. zypper_repository:
  115. name: NVIDIA
  116. repo: "{{ nvidia_repo }}"
  117. state: present
  118. disable_gpg_check: yes
  119. auto_import_keys: yes
  120. autorefresh: yes
  121. tags: install
  122. - name: Install nvidia(This might take 10-15 minutes)
  123. command: zypper --gpg-auto-import-keys install -l -y x11-video-nvidiaG06
  124. changed_when: true
  125. tags: install
  126. - name: Add docker community edition repository
  127. get_url:
  128. url: "{{ docker_repo_url_leap }}"
  129. dest: "{{ docker_repo_dest_leap }}"
  130. register: docker_repo
  131. until: docker_repo is not failed
  132. retries: "{{ max_retries }}"
  133. delay: "{{ max_delay }}"
  134. tags: install
  135. - name: Permanently Disable swap
  136. mount:
  137. name: "swap"
  138. fstype: swap
  139. state: absent
  140. tags: install
  141. - name: Install common packages
  142. package:
  143. name: "{{ common_packages }}"
  144. state: present
  145. tags: install
  146. - name: Install docker-compose
  147. package:
  148. name: docker-compose
  149. state: present
  150. tags: install
  151. - name: Collect host facts (including acclerator information)
  152. setup: ~
  153. tags: install
  154. - name: Install infiniBand support
  155. package:
  156. name: infiniband-diags
  157. state: present
  158. tags: install
  159. when: ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  160. - name: Deploy time ntp/chrony
  161. include_tasks: ntp.yml
  162. tags: install
  163. - name: Install Nvidia drivers and software components
  164. include_tasks: nvidia.yml
  165. when:
  166. - ansible_local.inventory.nvidia_gpu > 0
  167. - ( ansible_facts['distribution'] == os_name and ansible_facts['distribution_major_version'] == os_version) or
  168. ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  169. tags: install
  170. - name: Install AMD GPU drivers and software components
  171. include_tasks: amd.yml
  172. when:
  173. - ansible_local.inventory.amd_gpu > 0
  174. - ansible_facts['distribution'] == os_name or
  175. ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  176. tags: install
  177. - name: Get the hostname
  178. command: hostname
  179. register: machine_hostname
  180. changed_when: true
  181. - name: Set facts for node hostname and ip
  182. set_fact:
  183. node_ip: "{{ inventory_hostname }}"
  184. node_hostname: "{{ machine_hostname.stdout }}"
  185. - name: Add host name in hosts file
  186. lineinfile:
  187. dest: "{{ hosts_file_dest }}"
  188. line: "{{ inventory_hostname }} {{ machine_hostname.stdout }}"
  189. state: present
  190. create: yes
  191. mode: "{{ hosts_file_mode }}"
  192. - name: Add compute hosts info in manager node hosts file
  193. lineinfile:
  194. dest: "{{ hosts_file_dest }}"
  195. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  196. state: present
  197. create: yes
  198. mode: "{{ hosts_file_mode }}"
  199. with_items:
  200. - "{{ groups['compute'] }}"
  201. when: "'manager' in group_names"
  202. - name: Add manager hosts info in compute node hosts file
  203. lineinfile:
  204. dest: "{{ hosts_file_dest }}"
  205. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  206. state: present
  207. create: yes
  208. mode: "{{ hosts_file_mode }}"
  209. with_items:
  210. - "{{ groups['manager'] }}"
  211. when: "'compute' in group_names"
  212. - name: Add login node info in /etc/hosts of manager node
  213. lineinfile:
  214. dest: "{{ hosts_file_dest }}"
  215. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  216. state: present
  217. create: yes
  218. mode: "{{ hosts_file_mode }}"
  219. with_items:
  220. - "{{ groups['login_node'] }}"
  221. when:
  222. - '"manager" in group_names'
  223. - hostvars["127.0.0.1"]["login_node_required"]
  224. - name: Add manager info in /etc/hosts of login node
  225. lineinfile:
  226. dest: "{{ hosts_file_dest }}"
  227. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  228. state: present
  229. create: yes
  230. mode: "{{ hosts_file_mode }}"
  231. with_items:
  232. - "{{ groups['manager'] }}"
  233. when:
  234. - '"login_node" in group_names'
  235. - hostvars["127.0.0.1"]["login_node_required"]
  236. - name: Add compute info in /etc/hosts of login node
  237. lineinfile:
  238. dest: "{{ hosts_file_dest }}"
  239. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  240. state: present
  241. create: yes
  242. mode: "{{ hosts_file_mode }}"
  243. with_items:
  244. - "{{ groups['compute'] }}"
  245. when:
  246. - hostvars["127.0.0.1"]["login_node_required"]
  247. - '"login_node" in group_names'