main.yml 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ---
  15. - name: Include common variables
  16. include_vars: ../../slurm_manager/vars/main.yml
  17. - name: Give slurm user permission to slurmd spool
  18. file:
  19. path: "{{ spool_slurmd_pth }}"
  20. owner: slurm
  21. group: slurm
  22. mode: "{{ tmp_mode }}"
  23. state: touch
  24. - name: Create log files on compute nodes
  25. file:
  26. path: "{{ slurm_logpth }}"
  27. owner: slurm
  28. group: slurm
  29. mode: "{{ tmp_mode }}"
  30. state: touch
  31. with_items:
  32. - slurmd.log
  33. - name: Install firewalld
  34. package:
  35. name: firewalld
  36. state: present
  37. tags: firewalld
  38. - name: Stop and disable firewalld
  39. service:
  40. name: firewalld
  41. state: stopped
  42. enabled: no
  43. tags: firewalld
  44. - name: Copy slurm conf from buffer
  45. copy:
  46. src: "{{ buffer_path }}"
  47. dest: "{{ slurm_confpth }}"
  48. mode: "{{ slurm_mode }}"
  49. - name: Install packages for slurm
  50. package:
  51. name: "{{ slurm_packages }}"
  52. state: present
  53. tags: install
  54. - name: Install development tools
  55. package:
  56. name: "{{ dev_tools }}"
  57. state: present
  58. tags: install
  59. - name: Verify if slurm is installed
  60. shell: rpm -qa | grep slurm
  61. ignore_errors: true
  62. register: verify_result
  63. changed_when: no
  64. failed_when: no
  65. args:
  66. warn: no
  67. - name: Create temporary download folder for slurm
  68. file:
  69. path: "{{ tmp_path }}"
  70. owner: slurm
  71. group: slurm
  72. mode: "{{ tmp_mode }}"
  73. state: directory
  74. when: verify_result.rc != 0
  75. - name: Download slurm source
  76. get_url:
  77. url: "{{ slurm_url }}"
  78. dest: "{{ tmp_path }}"
  79. checksum: "{{ slurm_md5 }}"
  80. validate_certs: no
  81. tags: install
  82. when: verify_result.rc != 0
  83. - name: Build slurm rpms
  84. command: rpmbuild -ta "{{ rpmbuild_path }}" --with mysql
  85. changed_when: false
  86. when: verify_result.rc != 0
  87. args:
  88. warn: no
  89. - name: Install rpms
  90. command: rpm -Uvh ~"{{ rpm_loop }}"
  91. args:
  92. chdir: "{{ rpm_path }}"
  93. warn: no
  94. changed_when: true
  95. when: verify_result.rc != 0
  96. - name: Get the hostname
  97. command: hostname
  98. register: machine_name
  99. changed_when: true
  100. - name: Set compute node hostname/host ip to add in manager hosts file
  101. set_fact:
  102. compute_host: "{{ inventory_hostname }}"
  103. compute_ip: "{{ machine_name.stdout }}"
  104. - name: Get socket and core info from compute nodes
  105. set_fact:
  106. node_name: "{{ machine_name.stdout }}"
  107. sockets: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}"
  108. cores: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}"
  109. - name: Add compute nodes core & socket info in slurm config file
  110. lineinfile:
  111. dest: "{{ slurm_confpth }}"
  112. line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}"
  113. state: present
  114. create: yes
  115. mode: "{{ slurm_mode }}"
  116. with_items:
  117. - "{{ groups['compute'] }}"
  118. - name: Save slurm conf in buffer
  119. fetch:
  120. src: "{{ slurm_confpth }}"
  121. dest: "{{ buffer_path }}"
  122. flat: true
  123. - name: Start slurmd on compute nodes
  124. systemd:
  125. name: slurmd.service
  126. state: started
  127. enabled: yes
  128. tags: install