# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. --- - name: Include common variables include_vars: ../../slurm_manager/vars/main.yml - name: Give slurm user permission to slurmd spool file: path: "{{ spool_slurmd_pth }}" owner: slurm group: slurm mode: "{{ tmp_mode }}" state: touch - name: Create log files on compute nodes file: path: "{{ slurm_logpth }}" owner: slurm group: slurm mode: "{{ tmp_mode }}" state: touch with_items: - slurmd.log - name: Install firewalld package: name: firewalld state: present tags: firewalld - name: Start and enable firewalld service: name: firewalld state: started enabled: yes tags: firewalld - name: Firewall rule for slurm - tcp/udp ports firewalld: zone: public port: "{{ item }}" permanent: true state: enabled with_items: - "{{ tcp_port2 }}" - "{{ udp_port2 }}" tags: firewalld - name: Reload firewalld command: firewall-cmd --reload changed_when: true tags: firewalld - name: Stop and disable firewalld service: name: firewalld state: stopped enabled: no tags: firewalld - name: Copy slurm conf from buffer copy: src: "{{ buffer_path }}" dest: "{{ slurm_confpth }}" mode: "{{ slurm_mode }}" - name: Install packages for slurm package: name: "{{ slurm_packages }}" state: present tags: install - name: Install development tools package: name: "{{ dev_tools }}" state: present tags: install - name: Get the hostname command: hostname register: machine_name changed_when: true - name: Set compute node hostname/host ip to add in manager hosts file set_fact: compute_host: "{{ inventory_hostname }}" compute_ip: "{{ machine_name.stdout }}" - name: Get socket and core info from compute nodes set_fact: node_name: "{{ machine_name.stdout }}" sockets: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}" cores: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}" - name: Add compute nodes core & socket info in slurm config file lineinfile: dest: "{{ slurm_confpth }}" line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}" state: present create: yes mode: "{{ slurm_mode }}" with_items: - "{{ groups['compute'] }}" when: '"compute" in group_names' delegate_to: "{{ item }}" with_items: - "{{ play_hosts }}" - name: Add login node core & socket info in slurm config file lineinfile: dest: "{{ slurm_confpth }}" line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}" state: present create: yes mode: "{{ slurm_mode }}" with_items: - "{{ groups['login_node'] }}" when: - hostvars["127.0.0.1"]["login_node_required"] - '"login_node" in group_names' delegate_to: "{{ item }}" with_items: - "{{ play_hosts }}" - name: Update hostnames of compute node when ALL in partition nodes replace: path: "{{ slurm_confpth }}" regexp: 'PartitionName=normal Nodes=ALL' replace: 'PartitionName=normal Nodes={{ machine_name.stdout }}' when: - hostvars["127.0.0.1"]["login_node_required"] - '"compute" in group_names' register: output - name: Update hostnames of compute node in partition nodes replace: path: "{{ slurm_confpth }}" regexp: ' Default=YES MaxTime=INFINITE State=UP' replace: ',{{ machine_name.stdout }} Default=YES MaxTime=INFINITE State=UP' when: - hostvars["127.0.0.1"]["login_node_required"] - '"compute" in group_names' - output.msg | length == 0 - name: Save slurm conf in buffer fetch: src: "{{ slurm_confpth }}" dest: "{{ buffer_path }}" flat: true