main.yml 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. # Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ---
  15. - name: Create a custom fact directory on each host
  16. file:
  17. path: "{{ custom_fact_dir }}"
  18. state: directory
  19. mode: "{{ custom_fact_dir_mode }}"
  20. - name: Install accelerator discovery script
  21. copy:
  22. src: inventory.fact
  23. dest: "{{ accelerator_discovery_script_dest }}"
  24. owner: root
  25. group: root
  26. mode: "{{ accelerator_discovery_script_mode }}"
  27. - name: Add epel-release repo
  28. package:
  29. name: epel-release
  30. state: present
  31. tags: install
  32. - name: Add elrepo GPG key
  33. rpm_key:
  34. state: present
  35. key: "{{ elrepo_gpg_key_url }}"
  36. register: elrepo_gpg_key
  37. until: elrepo_gpg_key is not failed
  38. retries: 20
  39. delay: 10
  40. tags: install
  41. - name: Add elrepo (nvidia kmod drivers)
  42. package:
  43. name: "{{ elrepo_rpm_url }}"
  44. state: present
  45. register: elrepo
  46. until: elrepo is not failed
  47. retries: 20
  48. delay: 10
  49. tags: install
  50. - name: Add docker community edition repository
  51. get_url:
  52. url: "{{ docker_repo_url }}"
  53. dest: "{{ docker_repo_dest }}"
  54. register: docker_repo
  55. until: docker_repo is not failed
  56. retries: 20
  57. delay: 10
  58. tags: install
  59. - name: Permanently Disable swap
  60. mount:
  61. name: "swap"
  62. fstype: swap
  63. state: absent
  64. - name: Disable selinux
  65. selinux:
  66. state: disabled
  67. tags: install
  68. - name: Install common packages
  69. package:
  70. name: "{{ common_packages }}"
  71. state: present
  72. tags: install
  73. - name: Versionlock docker
  74. command: "yum versionlock '{{ item }}'"
  75. args:
  76. warn: false
  77. with_items:
  78. - "{{ docker_packages }}"
  79. changed_when: true
  80. tags: install
  81. - name: Collect host facts (including acclerator information)
  82. setup: ~
  83. - name: Install infiniBand support
  84. package:
  85. name: "@Infiniband Support"
  86. state: present
  87. tags: install
  88. - name: Deploy time ntp/chrony
  89. include_tasks: ntp.yml
  90. tags: install
  91. - name: Install Nvidia drivers and software components
  92. include_tasks: nvidia.yml
  93. when:
  94. - ansible_local.inventory.nvidia_gpu > 0
  95. - ansible_facts['distribution'] == os_name
  96. - ansible_facts['distribution_major_version'] == os_version
  97. tags: install
  98. - name: Install AMD GPU drivers and software components
  99. include_tasks: amd.yml
  100. when:
  101. - ansible_local.inventory.amd_gpu > 0
  102. - ansible_facts['distribution'] == os_name
  103. tags: install
  104. - name: Get the hostname
  105. command: hostname
  106. register: machine_hostname
  107. changed_when: true
  108. - name: Set facts for node hostname and ip
  109. set_fact:
  110. node_ip: "{{ inventory_hostname }}"
  111. node_hostname: "{{ machine_hostname.stdout }}"
  112. - name: Add host name in hosts file
  113. lineinfile:
  114. dest: "{{ hosts_file_dest }}"
  115. line: "{{ inventory_hostname }} {{ machine_hostname.stdout }}"
  116. state: present
  117. create: yes
  118. mode: "{{ hosts_file_mode }}"
  119. - name: Add compute hosts info in manager node hosts file
  120. lineinfile:
  121. dest: "{{ hosts_file_dest }}"
  122. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  123. state: present
  124. create: yes
  125. mode: "{{ hosts_file_mode }}"
  126. with_items:
  127. - "{{ groups['compute'] }}"
  128. when: "'manager' in group_names"
  129. - name: Add manager hosts info in compute node hosts file
  130. lineinfile:
  131. dest: "{{ hosts_file_dest }}"
  132. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  133. state: present
  134. create: yes
  135. mode: "{{ hosts_file_mode }}"
  136. with_items:
  137. - "{{ groups['manager'] }}"
  138. when: "'compute' in group_names"
  139. - name: Add login node info in /etc/hosts of manager node
  140. lineinfile:
  141. dest: "{{ hosts_file_dest }}"
  142. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  143. state: present
  144. create: yes
  145. mode: "{{ hosts_file_mode }}"
  146. with_items:
  147. - "{{ groups['login_node'] }}"
  148. when:
  149. - '"manager" in group_names'
  150. - hostvars["127.0.0.1"]["login_node_required"]
  151. - name: Add manager info in /etc/hosts of login node
  152. lineinfile:
  153. dest: "{{ hosts_file_dest }}"
  154. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  155. state: present
  156. create: yes
  157. mode: "{{ hosts_file_mode }}"
  158. with_items:
  159. - "{{ groups['manager'] }}"
  160. when:
  161. - '"login_node" in group_names'
  162. - hostvars["127.0.0.1"]["login_node_required"]
  163. - name: Add compute info in /etc/hosts of login node
  164. lineinfile:
  165. dest: "{{ hosts_file_dest }}"
  166. line: "{{ hostvars[item].node_ip }} {{ hostvars[item].node_hostname }}"
  167. state: present
  168. create: yes
  169. mode: "{{ hosts_file_mode }}"
  170. with_items:
  171. - "{{ groups['compute'] }}"
  172. when:
  173. - hostvars["127.0.0.1"]["login_node_required"]
  174. - '"login_node" in group_names'