main.yml 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. # Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ---
  15. - name: Include common variables
  16. include_vars: ../../slurm_manager/vars/main.yml
  17. - name: Give slurm user permission to slurmd spool
  18. file:
  19. path: "{{ spool_slurmd_pth }}"
  20. owner: slurm
  21. group: slurm
  22. mode: "{{ tmp_mode }}"
  23. state: touch
  24. - name: Create log files on compute nodes
  25. file:
  26. path: "{{ slurm_logpth }}"
  27. owner: slurm
  28. group: slurm
  29. mode: "{{ tmp_mode }}"
  30. state: touch
  31. with_items:
  32. - slurmd.log
  33. - name: Install firewalld
  34. package:
  35. name: firewalld
  36. state: present
  37. tags: firewalld
  38. - name: Start and enable firewalld
  39. service:
  40. name: firewalld
  41. state: started
  42. enabled: yes
  43. tags: firewalld
  44. - name: Firewall rule for slurm - tcp/udp ports
  45. firewalld:
  46. zone: public
  47. port: "{{ item }}"
  48. permanent: true
  49. state: enabled
  50. with_items:
  51. - "{{ tcp_port2 }}"
  52. - "{{ udp_port2 }}"
  53. tags: firewalld
  54. - name: Reload firewalld
  55. command: firewall-cmd --reload
  56. changed_when: true
  57. tags: firewalld
  58. - name: Stop and disable firewalld
  59. service:
  60. name: firewalld
  61. state: stopped
  62. enabled: no
  63. tags: firewalld
  64. - name: Copy slurm conf from buffer
  65. copy:
  66. src: "{{ buffer_path }}"
  67. dest: "{{ slurm_confpth }}"
  68. mode: "{{ slurm_mode }}"
  69. - name: Install packages for slurm
  70. package:
  71. name: "{{ slurm_packages }}"
  72. state: present
  73. tags: install
  74. when: os_supported_leap not in compute_os
  75. - name: Install development tools
  76. package:
  77. name: "{{ dev_tools }}"
  78. state: present
  79. tags: install
  80. when: os_supported_leap not in compute_os
  81. - name: Get the hostname
  82. command: hostname
  83. register: machine_name
  84. changed_when: true
  85. - name: Set compute node hostname/host ip to add in manager hosts file
  86. set_fact:
  87. compute_host: "{{ inventory_hostname }}"
  88. compute_ip: "{{ machine_name.stdout }}"
  89. - name: Get socket and core info from compute nodes
  90. set_fact:
  91. node_name: "{{ machine_name.stdout }}"
  92. sockets: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}"
  93. cores: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}"
  94. - name: Add compute nodes core & socket info in slurm config file
  95. lineinfile:
  96. dest: "{{ slurm_confpth }}"
  97. line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}"
  98. state: present
  99. create: yes
  100. mode: "{{ slurm_mode }}"
  101. with_items:
  102. - "{{ groups['compute'] }}"
  103. when: '"compute" in group_names'
  104. delegate_to: "{{ item }}"
  105. with_items:
  106. - "{{ play_hosts }}"
  107. - name: Add login node core & socket info in slurm config file
  108. lineinfile:
  109. dest: "{{ slurm_confpth }}"
  110. line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}"
  111. state: present
  112. create: yes
  113. mode: "{{ slurm_mode }}"
  114. with_items:
  115. - "{{ groups['login_node'] }}"
  116. when:
  117. - hostvars["127.0.0.1"]["login_node_required"]
  118. - '"login_node" in group_names'
  119. delegate_to: "{{ item }}"
  120. with_items:
  121. - "{{ play_hosts }}"
  122. - name: Update hostnames of compute node when ALL in partition nodes
  123. replace:
  124. path: "{{ slurm_confpth }}"
  125. regexp: 'PartitionName=normal Nodes=ALL'
  126. replace: 'PartitionName=normal Nodes={{ machine_name.stdout }}'
  127. when:
  128. - hostvars["127.0.0.1"]["login_node_required"]
  129. - '"compute" in group_names'
  130. register: output
  131. - name: Update hostnames of compute node in partition nodes
  132. replace:
  133. path: "{{ slurm_confpth }}"
  134. regexp: ' Default=YES MaxTime=INFINITE State=UP'
  135. replace: ',{{ machine_name.stdout }} Default=YES MaxTime=INFINITE State=UP'
  136. when:
  137. - hostvars["127.0.0.1"]["login_node_required"]
  138. - '"compute" in group_names'
  139. - output.msg | length == 0
  140. - name: Save slurm conf in buffer
  141. fetch:
  142. src: "{{ slurm_confpth }}"
  143. dest: "{{ buffer_path }}"
  144. flat: true