123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- # Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- ---
- - name: Include common variables
- include_vars: ../../slurm_manager/vars/main.yml
- - name: Give slurm user permission to slurmd spool
- file:
- path: "{{ spool_slurmd_pth }}"
- owner: slurm
- group: slurm
- mode: "{{ tmp_mode }}"
- state: touch
- - name: Create log files on compute nodes
- file:
- path: "{{ slurm_logpth }}"
- owner: slurm
- group: slurm
- mode: "{{ tmp_mode }}"
- state: touch
- with_items:
- - slurmd.log
- - name: Install firewalld
- package:
- name: firewalld
- state: present
- tags: firewalld
- - name: Start and enable firewalld
- service:
- name: firewalld
- state: started
- enabled: yes
- tags: firewalld
- - name: Firewall rule for slurm - tcp/udp ports
- firewalld:
- zone: public
- port: "{{ item }}"
- permanent: true
- state: enabled
- with_items:
- - "{{ tcp_port2 }}"
- - "{{ udp_port2 }}"
- tags: firewalld
- - name: Reload firewalld
- command: firewall-cmd --reload
- changed_when: true
- tags: firewalld
- - name: Stop and disable firewalld
- service:
- name: firewalld
- state: stopped
- enabled: no
- tags: firewalld
- - name: Copy slurm conf from buffer
- copy:
- src: "{{ buffer_path }}"
- dest: "{{ slurm_confpth }}"
- mode: "{{ slurm_mode }}"
- - name: Install packages for slurm
- package:
- name: "{{ slurm_packages }}"
- state: present
- tags: install
- - name: Install development tools
- package:
- name: "{{ dev_tools }}"
- state: present
- tags: install
- - name: Get the hostname
- command: hostname
- register: machine_name
- changed_when: true
- - name: Set compute node hostname/host ip to add in manager hosts file
- set_fact:
- compute_host: "{{ inventory_hostname }}"
- compute_ip: "{{ machine_name.stdout }}"
- - name: Get socket and core info from compute nodes
- set_fact:
- node_name: "{{ machine_name.stdout }}"
- sockets: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}"
- cores: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}"
- - name: Add compute nodes core & socket info in slurm config file
- lineinfile:
- dest: "{{ slurm_confpth }}"
- line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}"
- state: present
- create: yes
- mode: "{{ slurm_mode }}"
- with_items:
- - "{{ groups['compute'] }}"
- when: '"compute" in group_names'
- delegate_to: "{{ item }}"
- with_items:
- - "{{ play_hosts }}"
- - name: Add login node core & socket info in slurm config file
- lineinfile:
- dest: "{{ slurm_confpth }}"
- line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}"
- state: present
- create: yes
- mode: "{{ slurm_mode }}"
- with_items:
- - "{{ groups['login_node'] }}"
- when:
- - hostvars["127.0.0.1"]["login_node_required"]
- - '"login_node" in group_names'
- delegate_to: "{{ item }}"
- with_items:
- - "{{ play_hosts }}"
- - name: Update hostnames of compute node when ALL in partition nodes
- replace:
- path: "{{ slurm_confpth }}"
- regexp: 'PartitionName=normal Nodes=ALL'
- replace: 'PartitionName=normal Nodes={{ machine_name.stdout }}'
- when:
- - hostvars["127.0.0.1"]["login_node_required"]
- - '"compute" in group_names'
- register: output
- - name: Update hostnames of compute node in partition nodes
- replace:
- path: "{{ slurm_confpth }}"
- regexp: ' Default=YES MaxTime=INFINITE State=UP'
- replace: ',{{ machine_name.stdout }} Default=YES MaxTime=INFINITE State=UP'
- when:
- - hostvars["127.0.0.1"]["login_node_required"]
- - '"compute" in group_names'
- - output.msg | length == 0
- - name: Save slurm conf in buffer
- fetch:
- src: "{{ slurm_confpth }}"
- dest: "{{ buffer_path }}"
- flat: true
|