main.yml 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. # Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ---
  15. - name: Include common variables
  16. include_vars: ../../slurm_manager/vars/main.yml
  17. - name: Give slurm user permission to slurmd spool
  18. file:
  19. path: "{{ spool_slurmd_pth }}"
  20. owner: slurm
  21. group: slurm
  22. mode: "{{ tmp_mode }}"
  23. state: touch
  24. - name: Create log files on compute nodes
  25. file:
  26. path: "{{ slurm_logpth }}"
  27. owner: slurm
  28. group: slurm
  29. mode: "{{ tmp_mode }}"
  30. state: touch
  31. with_items:
  32. - slurmd.log
  33. - name: Install firewalld
  34. package:
  35. name: firewalld
  36. state: present
  37. tags: firewalld
  38. - name: Start and enable firewalld
  39. service:
  40. name: firewalld
  41. state: started
  42. enabled: yes
  43. tags: firewalld
  44. - name: Firewall rule for slurm - tcp/udp ports
  45. firewalld:
  46. zone: public
  47. port: "{{ item }}"
  48. permanent: true
  49. state: enabled
  50. with_items:
  51. - "{{ tcp_port2 }}"
  52. - "{{ udp_port2 }}"
  53. tags: firewalld
  54. - name: Reload firewalld
  55. command: firewall-cmd --reload
  56. changed_when: true
  57. tags: firewalld
  58. - name: Stop and disable firewalld
  59. service:
  60. name: firewalld
  61. state: stopped
  62. enabled: no
  63. tags: firewalld
  64. - name: Copy slurm conf from buffer
  65. copy:
  66. src: "{{ buffer_path }}"
  67. dest: "{{ slurm_confpth }}"
  68. mode: "{{ slurm_mode }}"
  69. - name: Install packages for slurm
  70. package:
  71. name: "{{ slurm_packages }}"
  72. state: present
  73. tags: install
  74. - name: Install development tools
  75. package:
  76. name: "{{ dev_tools }}"
  77. state: present
  78. tags: install
  79. - name: Get the hostname
  80. command: hostname
  81. register: machine_name
  82. changed_when: true
  83. - name: Set compute node hostname/host ip to add in manager hosts file
  84. set_fact:
  85. compute_host: "{{ inventory_hostname }}"
  86. compute_ip: "{{ machine_name.stdout }}"
  87. - name: Get socket and core info from compute nodes
  88. set_fact:
  89. node_name: "{{ machine_name.stdout }}"
  90. sockets: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}"
  91. cores: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}"
  92. - name: Add compute nodes core & socket info in slurm config file
  93. lineinfile:
  94. dest: "{{ slurm_confpth }}"
  95. line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}"
  96. state: present
  97. create: yes
  98. mode: "{{ slurm_mode }}"
  99. with_items:
  100. - "{{ groups['compute'] }}"
  101. when: '"compute" in group_names'
  102. delegate_to: "{{ item }}"
  103. with_items:
  104. - "{{ play_hosts }}"
  105. - name: Add login node core & socket info in slurm config file
  106. lineinfile:
  107. dest: "{{ slurm_confpth }}"
  108. line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}"
  109. state: present
  110. create: yes
  111. mode: "{{ slurm_mode }}"
  112. with_items:
  113. - "{{ groups['login_node'] }}"
  114. when:
  115. - hostvars["127.0.0.1"]["login_node_required"]
  116. - '"login_node" in group_names'
  117. delegate_to: "{{ item }}"
  118. with_items:
  119. - "{{ play_hosts }}"
  120. - name: Update hostnames of compute node when ALL in partition nodes
  121. replace:
  122. path: "{{ slurm_confpth }}"
  123. regexp: 'PartitionName=normal Nodes=ALL'
  124. replace: 'PartitionName=normal Nodes={{ machine_name.stdout }}'
  125. when:
  126. - hostvars["127.0.0.1"]["login_node_required"]
  127. - '"compute" in group_names'
  128. register: output
  129. - name: Update hostnames of compute node in partition nodes
  130. replace:
  131. path: "{{ slurm_confpth }}"
  132. regexp: ' Default=YES MaxTime=INFINITE State=UP'
  133. replace: ',{{ machine_name.stdout }} Default=YES MaxTime=INFINITE State=UP'
  134. when:
  135. - hostvars["127.0.0.1"]["login_node_required"]
  136. - '"compute" in group_names'
  137. - output.msg | length == 0
  138. - name: Save slurm conf in buffer
  139. fetch:
  140. src: "{{ slurm_confpth }}"
  141. dest: "{{ buffer_path }}"
  142. flat: true