main.yml 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. # Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ---
  15. - name: Saving distribution of os
  16. set_fact:
  17. compute_os: "{{ ansible_facts['distribution'] | lower }}"
  18. - name: Create a custom fact directory on each host
  19. file:
  20. path: "{{ custom_fact_dir }}"
  21. state: directory
  22. mode: "{{ custom_fact_dir_mode }}"
  23. - name: Install accelerator discovery script
  24. copy:
  25. src: inventory.fact
  26. dest: "{{ accelerator_discovery_script_dest }}"
  27. owner: root
  28. group: root
  29. mode: "{{ accelerator_discovery_script_mode }}"
  30. - block:
  31. - name: Add epel-release repo
  32. package:
  33. name: epel-release
  34. state: present
  35. tags: install
  36. - name: Add elrepo GPG key
  37. rpm_key:
  38. state: present
  39. key: "{{ elrepo_gpg_key_url }}"
  40. register: elrepo_gpg_key
  41. until: elrepo_gpg_key is not failed
  42. retries: "{{ max_retries }}"
  43. delay: "{{ max_delay }}"
  44. tags: install
  45. - name: Add elrepo (nvidia kmod drivers)
  46. package:
  47. name: "{{ elrepo_rpm_url }}"
  48. state: present
  49. register: elrepo
  50. until: elrepo is not failed
  51. retries: "{{ max_retries }}"
  52. delay: "{{ max_delay }}"
  53. tags: install
  54. - name: Add docker community edition repository
  55. get_url:
  56. url: "{{ docker_repo_url }}"
  57. dest: "{{ docker_repo_dest }}"
  58. register: docker_repo
  59. until: docker_repo is not failed
  60. retries: "{{ max_retries }}"
  61. delay: "{{ max_delay }}"
  62. tags: install
  63. - name: Permanently Disable swap
  64. mount:
  65. name: "swap"
  66. fstype: swap
  67. state: absent
  68. - name: Disable selinux
  69. selinux:
  70. state: disabled
  71. tags: install
  72. - name: Install common packages
  73. package:
  74. name: "{{ common_packages }}"
  75. state: present
  76. tags: install
  77. - name: Install common packages
  78. package:
  79. name: "{{ common_packages_for_non_leap }}"
  80. state: present
  81. tags: install
  82. - name: Versionlock docker
  83. command: "yum versionlock '{{ item }}'"
  84. args:
  85. warn: false
  86. with_items:
  87. - "{{ docker_packages }}"
  88. changed_when: true
  89. tags: install
  90. - name: Collect host facts (including acclerator information)
  91. setup: ~
  92. - name: Install infiniBand support
  93. package:
  94. name: "@Infiniband Support"
  95. state: present
  96. tags: install
  97. when: ( os_supported_leap not in compute_os )
  98. - block:
  99. - name: Add leap repos
  100. zypper_repository:
  101. name: "{{ item.name }}"
  102. repo: "{{ item.repo }}"
  103. state: present
  104. autorefresh: yes
  105. with_items:
  106. - "{{ leap_repo }}"
  107. tags: install
  108. - name: Installing python-xml
  109. package:
  110. name: python-xml
  111. state: present
  112. tags: install
  113. - name: Add nvidia repo
  114. zypper_repository:
  115. name: NVIDIA
  116. repo: "{{ nvidia_repo }}"
  117. state: present
  118. autorefresh: yes
  119. tags: install
  120. - name: Install nvidia(This might take 10-15 minutes)
  121. command: zypper --gpg-auto-import-keys install -l -y x11-video-nvidiaG06
  122. changed_when: true
  123. tags: install
  124. - name: Add docker community edition repository
  125. get_url:
  126. url: "{{ docker_repo_url_leap }}"
  127. dest: "{{ docker_repo_dest_leap }}"
  128. register: docker_repo
  129. until: docker_repo is not failed
  130. retries: "{{ max_retries }}"
  131. delay: "{{ max_delay }}"
  132. tags: install
  133. - name: Permanently Disable swap
  134. mount:
  135. name: "swap"
  136. fstype: swap
  137. state: absent
  138. tags: install
  139. - name: Install common packages
  140. package:
  141. name: "{{ common_packages }}"
  142. state: present
  143. tags: install
  144. - name: Install docker-compose
  145. package:
  146. name: docker-compose
  147. state: present
  148. tags: install
  149. - name: Collect host facts (including acclerator information)
  150. setup: ~
  151. tags: install
  152. - name: Install infiniBand support
  153. package:
  154. name: infiniband-diags
  155. state: present
  156. tags: install
  157. when: ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  158. - name: Deploy time ntp/chrony
  159. include_tasks: ntp.yml
  160. tags: install
  161. - name: Install Nvidia drivers and software components
  162. include_tasks: nvidia.yml
  163. when:
  164. - ansible_local.inventory.nvidia_gpu > 0
  165. - ( ansible_facts['distribution'] == os_name and ansible_facts['distribution_major_version'] == os_version) or
  166. ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  167. tags: install
  168. - name: Install AMD GPU drivers and software components
  169. include_tasks: amd.yml
  170. when:
  171. - ansible_local.inventory.amd_gpu > 0
  172. - ansible_facts['distribution'] == os_name or
  173. ( os_supported_leap in compute_os ) and ( ansible_distribution_version >= os_supported_leap_version )
  174. tags: install
  175. - name: Get the hostname
  176. command: hostname
  177. register: machine_hostname
  178. changed_when: true
  179. - name: Set facts for node hostname and ip
  180. set_fact:
  181. node_ip: "{{ inventory_hostname }}"
  182. node_hostname: "{{ machine_hostname.stdout }}"
  183. - name: Add host name in hosts file
  184. lineinfile:
  185. dest: "{{ hosts_file_dest }}"
  186. line: "{{ inventory_hostname }} {{ machine_hostname.stdout }}"
  187. state: present
  188. create: yes
  189. mode: "{{ hosts_file_mode }}"
  190. - name: Add compute hosts info in manager node hosts file
  191. lineinfile:
  192. dest: "{{ hosts_file_dest }}"
  193. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  194. state: present
  195. create: yes
  196. mode: "{{ hosts_file_mode }}"
  197. with_items:
  198. - "{{ groups['compute'] }}"
  199. when: "'manager' in group_names"
  200. - name: Add manager hosts info in compute node hosts file
  201. lineinfile:
  202. dest: "{{ hosts_file_dest }}"
  203. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  204. state: present
  205. create: yes
  206. mode: "{{ hosts_file_mode }}"
  207. with_items:
  208. - "{{ groups['manager'] }}"
  209. when: "'compute' in group_names"
  210. - name: Add login node info in /etc/hosts of manager node
  211. lineinfile:
  212. dest: "{{ hosts_file_dest }}"
  213. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  214. state: present
  215. create: yes
  216. mode: "{{ hosts_file_mode }}"
  217. with_items:
  218. - "{{ groups['login_node'] }}"
  219. when:
  220. - '"manager" in group_names'
  221. - hostvars["127.0.0.1"]["login_node_required"]
  222. - name: Add manager info in /etc/hosts of login node
  223. lineinfile:
  224. dest: "{{ hosts_file_dest }}"
  225. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  226. state: present
  227. create: yes
  228. mode: "{{ hosts_file_mode }}"
  229. with_items:
  230. - "{{ groups['manager'] }}"
  231. when:
  232. - '"login_node" in group_names'
  233. - hostvars["127.0.0.1"]["login_node_required"]
  234. - name: Add compute info in /etc/hosts of login node
  235. lineinfile:
  236. dest: "{{ hosts_file_dest }}"
  237. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  238. state: present
  239. create: yes
  240. mode: "{{ hosts_file_mode }}"
  241. with_items:
  242. - "{{ groups['compute'] }}"
  243. when:
  244. - hostvars["127.0.0.1"]["login_node_required"]
  245. - '"login_node" in group_names'