123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- # Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- ---
- - name: Include common variables
- include_vars: ../../slurm_manager/vars/main.yml
- - name: Give slurm user permission to slurmd spool
- file:
- path: "{{ spool_slurmd_pth }}"
- owner: slurm
- group: slurm
- mode: "{{ tmp_mode }}"
- state: touch
- - name: Create log files on compute nodes
- file:
- path: "{{ slurm_logpth }}"
- owner: slurm
- group: slurm
- mode: "{{ tmp_mode }}"
- state: touch
- with_items:
- - slurmd.log
- - name: Install firewalld
- package:
- name: firewalld
- state: present
- tags: firewalld
- - name: Stop and disable firewalld
- service:
- name: firewalld
- state: stopped
- enabled: no
- tags: firewalld
- - name: Copy slurm conf from buffer
- copy:
- src: "{{ buffer_path }}"
- dest: "{{ slurm_confpth }}"
- mode: "{{ slurm_mode }}"
- - name: Install packages for slurm
- package:
- name: "{{ slurm_packages }}"
- state: present
- with_items:
- - "{{ slurm_packages }}"
- tags: install
- - name: Install development tools
- package:
- name: "{{ item }}"
- state: present
- with_items:
- - "{{ dev_tools }}"
- tags: install
- - name: Verify if slurm is installed
- shell: rpm -qa | grep slurm
- ignore_errors: true
- register: verify_result
- changed_when: no
- failed_when: no
- args:
- warn: no
- - name: Create temporary download folder for slurm
- file:
- path: "{{ tmp_path }}"
- owner: slurm
- group: slurm
- mode: "{{ tmp_mode }}"
- state: directory
- when: verify_result.rc != 0
- - name: Download slurm source
- get_url:
- url: "{{ slurm_url }}"
- dest: "{{ tmp_path }}"
- checksum: "{{ slurm_md5 }}"
- validate_certs: no
- tags: install
- when: verify_result.rc != 0
- - name: Build slurm rpms
- command: rpmbuild -ta "{{ rpmbuild_path }}" --with mysql
- changed_when: false
- when: verify_result.rc != 0
- args:
- warn: no
- - name: Install rpms
- command: rpm -Uvh ~"{{ rpm_loop }}"
- args:
- chdir: "{{ rpm_path }}"
- warn: no
- changed_when: true
- when: verify_result.rc != 0
- - name: Get the hostname
- command: hostname
- register: machine_name
- changed_when: true
- - name: Set compute node hostname/host ip to add in manager hosts file
- set_fact:
- compute_host: "{{ inventory_hostname }}"
- compute_ip: "{{ machine_name.stdout }}"
- - name: Get socket and core info from compute nodes
- set_fact:
- node_name: "{{ machine_name.stdout }}"
- sockets: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}"
- cores: "{{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}"
- - name: Add compute nodes core & socket info in slurm config file
- lineinfile:
- dest: "{{ slurm_confpth }}"
- line: "NodeName={{ hostvars[item].node_name }} Sockets={{ hostvars[item].sockets }} CoresPerSocket={{ hostvars[item].cores }}"
- state: present
- create: yes
- mode: "{{ slurm_mode }}"
- with_items:
- - "{{ groups['compute'] }}"
- - name: Save slurm conf in buffer
- fetch:
- src: "{{ slurm_confpth }}"
- dest: "{{ buffer_path }}"
- flat: true
- - name: Start slurmd on compute nodes
- systemd:
- name: slurmd.service
- state: started
- enabled: yes
- tags: install
|