Просмотр исходного кода

Merge pull request #164 from VishnupriyaKrish/devel

Issue dellhpc#154: Update Slurm Playbook
Lucas A. Wilson 4 лет назад
Родитель
Сommit
320683fb5c

+ 28 - 14
slurm/roles/common/handlers/main.yml

@@ -1,26 +1,40 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
 ---
-- name: restart ntpd
+
+- name: Restart ntpd
   service:
-    name=ntpd
-    state=restarted
-    enabled=yes
+    name: ntpd
+    state: restarted
+    enabled: yes
 
-- name: restart chrony
+- name: Restart chrony
   service:
-    name=chronyd
-    state=restarted
-    enabled=yes
+    name: chronyd
+    state: restarted
+    enabled: yes
 
-- name: sync ntp clocks
+- name: Sync tp clocks
   command: ntpdc -np
   register: ntp_clock
   until:  ntp_clock.stdout.find('*') > -1
-  retries: 10
-  delay: 60
+  retries: "{{ retry_count_one }}"
+  delay: "{{ delay_count_one }}"
 
-- name: sync chrony sources
+- name: Sync chrony sources
   command: chronyc sources
   register: chrony_src
   until:  chrony_src.stdout.find('^*') > -1
-  retries: 6
-  delay: 10
+  retries: "{{ retry_count }}"
+  delay: "{{ delay_count }}"

+ 17 - 2
slurm/roles/common/tasks/main.yml

@@ -1,2 +1,17 @@
-- name: deploy time ntp/chrony
-  include_tasks: ntp.yml
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Deploy time ntp/chrony
+  include_tasks: ntp.yml

+ 27 - 13
slurm/roles/common/tasks/ntp.yml

@@ -1,42 +1,56 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
 ---
 
-  - name: deploy ntp servers
+  - name: Deploy ntp servers
     block:
-      - name: deploy ntpd
+      - name: Deploy ntpd
         package:
           name: ntp
           state: present
-      - name: deploy ntpdate
+      - name: Deploy ntpdate
         package:
           name: ntpdate
           state: present
-      - name: update ntp servers
+      - name: Update ntp servers
         template:
           src: ntp.conf.j2
-          dest: /etc/ntp.conf
+          dest: "{{ ntp_path }}"
           owner: root
           group: root
-          mode: u=rw,g=r,o=r
+          mode: "{{ ntp_mode }}"
           backup: yes
         notify:
           - restart ntpd
           - sync ntp clocks
-    when:  ( ansible_distribution == "CentOS" or   ansible_distribution == "RedHat" ) and ansible_distribution_major_version | int < 8
-  - name:   deploy chrony server
+    when:  ( ansible_distribution == "CentOS" or   ansible_distribution == "RedHat" ) and ansible_distribution_major_version  < os_higher_version
+
+  - name: Deploy chrony server
     block:
-      - name: deploy chrony
+      - name: Deploy chrony
         package:
             name: chrony
             state: present
-      - name: update ntp servers
+      - name: Update ntp servers
         template:
           src: chrony.conf.j2
-          dest: /etc/chrony.conf
+          dest: "{{ chrony_path }}"
           owner: root
           group: root
-          mode: u=rw,g=r,o=r
+          mode: "{{ ntp_mode }}"
           backup: yes
         notify:
           - restart chrony
           - sync chrony sources
-    when:  ( ansible_distribution == "CentOS" or   ansible_distribution == "RedHat" ) and ansible_distribution_major_version | int > 7
+    when:  ( ansible_distribution == "CentOS" or   ansible_distribution == "RedHat" ) and ansible_distribution_major_version  > os_version

+ 26 - 1
slurm/roles/common/vars/main.yml

@@ -1,6 +1,31 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+chrony_path: "/etc/chrony.conf"
+ntp_path: "/etc/ntp.conf"
+ntp_mode: "0644"
+os_higher_version: "8"
+os_version: "7"
+retry_count_one: "10"
+delay_count_one: "60"
+retry_count: "6"
+delay_count: "10"
+
 ntp_servers: 
   - 0.centos.pool.ntp.org
   - 1.centos.pool.ntp.org
   - 2.centos.pool.ntp.org
 chrony_servers:
-  - 2.centos.pool.ntp.org 
+  - 2.centos.pool.ntp.org

+ 0 - 104
slurm/roles/slurm-common/tasks/main.yml

@@ -1,104 +0,0 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
----
-
-- name: install packages for slurm
-  package:
-    name:
-      - munge
-      - mariadb
-      - mariadb-devel
-      - python3
-    state: present
-  tags: install
-
-- name: create munge key
-  command: /usr/sbin/create-munge-key -f
-  tags: install
-
-- name: Copy munge key
-  copy:
-    src: munge.key
-    dest: /etc/munge
-    owner: munge
-    group: munge
-    mode: 0400
-  tags: install
-
-- name: Copy example Slurm Configuration - slurm.conf
-  copy:
-    src: slurm.conf
-    dest: /etc/slurm/
-    mode: 0644
-  tags: install
-
-
-- name: create SLURM Group
-  group:
-    name: slurm
-    state: present
-  tags: install
-
-- name: Add the user 'slurm' with uid 6001 and a primary group of 'slurm'
-  user:
-    name: slurm
-    comment: Slurm User Account
-    uid: 6001
-    group: slurm
-  tags: install
-
-- name: create SLURM log directory
-  file:
-    path: /var/log/slurm
-    state: directory
-    owner: slurm
-    group: slurm
-    mode: 0755
-    recurse: yes
-  tags: install
-
-- name: give slurm user permission to spool
-  file:
-    path: /var/spool/slurm
-    owner: slurm
-    group: slurm
-    state: directory
-    mode: 0755
-    recurse: yes
-
-- name: give slurm user permission to slurmctld
-  file:
-    path: /var/run/slurmctld.pid
-    owner: slurm
-    group: slurm
-    mode: 0755
-    state: touch
-
-- name: give slurm user permission to slurmd
-  file:
-    path: /var/run/slurmd.pid
-    owner: slurm
-    group: slurm
-    mode: 0755
-    state: touch
-
-- name: start munge service
-  service:
-    name: munge
-    state: restarted
-    enabled: yes
-  tags: install
-
-
-

+ 0 - 118
slurm/roles/slurm-manager/tasks/main.yml

@@ -1,118 +0,0 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
----
-- name: create download folder
-  file:
-    path: /root/Downloads
-    state: directory
-    mode: '0755'
-- name: Download Slurm source
-  get_url:
-    url: "{{ slurm_url }}"
-    dest: /root/Downloads/
-    checksum: "{{ slurm_md5 }}"
-    validate_certs: no    
-  tags: install
-
-- name: Build SLURM RPMs
-  command: rpmbuild -ta /root/Downloads/slurm-20.02.0.tar.bz2
-  tags: install
-
-- name: Copy RPMs to NFS share
-  copy:
-    src: "{{ item }}"
-    dest: /home/rpms/
-    mode: '0755'
-  with_fileglob:
-    - /root/rpmbuild/RPMS/x86_64/slurm*20*.rpm
-  tags: install
-
-- name: Install SLURM RPMs on Manager
-  yum:
-    name: "{{ item }}"
-    #name: "{{ query('fileglob', ['/home/rpms/slurm*20*.rpm']) }}" <-- how it should work to avoid loop
-  with_fileglob:
-    - /home/rpms/slurm*20*.rpm
-  tags: install
-
-- name: Firewall Rule slurm allow 6817/tcp
-  command: firewall-cmd  --zone=internal --add-port=6817/tcp --permanent
-  tags: install
-
-- name: Firewall Rule slurm allow 6818/tcp
-  command: firewall-cmd  --zone=internal --add-port=6818/tcp --permanent
-  tags: install
-
-- name: Firewall Rule slurm allow 6819/tcp
-  command: firewall-cmd  --zone=internal --add-port=6819/tcp --permanent
-  tags: install
-
-- name: Firewall Rule slurm allow all incoming traffic on internal network
-  command: firewall-cmd --permanent --zone=internal --add-rich-rule='rule family="ipv4" source address="192.168.1.0/24" accept'
-  tags: install
-
-- name: Firewall Reload
-  command: firewall-cmd  --reload
-  tags: install
-
-
-- name: Start MariaDB
-  service:
-    name: mariadb
-    state: restarted
-    enabled: yes
-  tags: install
-
-- name: Grant Permissions for SLURM DB
-  command: mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost' identified by 'password' with grant option;"
-  tags: install
-
-- name: Create slurmdbd.conf file
-  copy:
-    src: /etc/slurm/slurmdbd.conf.example
-    dest: /etc/slurm/slurmdbd.conf
-    mode: 0600
-  tags: install
-
-- name: Populate Accounting Database
-  command: slurmdbd
-  tags: install
-
-- name: Create Slurm Cluster
-  command: sacctmgr -i add cluster {{ inventory_hostname }}
-  tags: install
-
-- name: Create Default Slurm Group
-  command: sacctmgr -i add account defaultgroup Cluster={{inventory_hostname}} Description="Default Account" Organization="Default Org"
-  tags: install
-
-- name: Add root to the Default Account
-  command: sacctmgr -i add user root DefaultAccount=defaultgroup
-  tags: install
-
-- name: Start slurmctld on Manager
-  service:
-    name: slurmctld
-    state: restarted
-    enabled: yes
-  tags: install
-
-- name: Enable Slurmdbd on Manager
-  service:
-    name: slurmdbd
-    state: restarted
-    enabled: yes
-  tags: install
-

slurm/roles/slurm-common/files/munge.key → slurm/roles/slurm_common/files/munge.key


+ 14 - 14
slurm/roles/slurm-common/files/slurm.conf

@@ -8,25 +8,25 @@
 #
 # See the slurm.conf man page for more information.
 #
-ClusterName=friday
-ControlMachine=friday
-ControlAddr=10.0.0.1
+ClusterName=
+ControlMachine=
+#ControlAddr=
 #BackupController=
 #BackupAddr=
 #
-SlurmUser=slurm
+SlurmUser=
 #SlurmdUser=root
-SlurmctldPort=6817
-SlurmdPort=6818
+SlurmctldPort=
+SlurmdPort=
 AuthType=auth/munge
 #JobCredentialPrivateKey=
 #JobCredentialPublicCertificate=
-StateSaveLocation=/var/spool/slurm/ctld
-SlurmdSpoolDir=/var/spool/slurm/
+#StateSaveLocation=/var/spool/
+SlurmdSpoolDir=
 SwitchType=switch/none
 MpiDefault=none
-SlurmctldPidFile=/var/run/slurmctld.pid
-SlurmdPidFile=/var/run/slurmd.pid
+SlurmctldPidFile=
+SlurmdPidFile=
 ProctrackType=proctrack/pgid
 #PluginDir=
 #FirstJobId=
@@ -72,9 +72,9 @@ PriorityMaxAge=14-0
 #
 # LOGGING
 SlurmctldDebug=3
-SlurmctldLogFile=/var/log/slurm/slurmctld.log
+SlurmctldLogFile=
 SlurmdDebug=1
-SlurmdLogFile=/var/log/slurm/slurmd.log
+SlurmdLogFile=
 JobCompType=jobcomp/none
 #JobCompLoc=
 #
@@ -91,7 +91,7 @@ AccountingStorageType=accounting_storage/slurmdbd
 # COMPUTE NODES
 #NodeName=linux[1-32] Procs=1 State=UNKNOWN
 #NodeName=DEFAULT Sockets=2 CoresPerSocket=20 State=UNKNOWN
-NodeName=compute000 Sockets=2 CoresPerSocket=8
-NodeName=compute[002-005] CoresPerSocket=20
+NodeName= Sockets= CoresPerSocket=
+#NodeName=compute[002-005] CoresPerSocket=20
 PartitionName=normal Nodes=ALL Default=YES MaxTime=INFINITE State=UP
 #PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP

+ 164 - 0
slurm/roles/slurm_common/tasks/main.yml

@@ -0,0 +1,164 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Install epel repository
+  package:
+    name: epel-release
+    state: present
+  tags: install
+
+- name: Munge installation
+  package:
+    name: munge-devel
+    enablerepo: PowerTools
+    state: present
+
+- name: Install packages for slurm
+  package:
+    name: "{{ item }}"
+    state: present
+  with_items:
+    - "{{ common_packages }}"
+  tags: install
+
+- name: Create munge key
+  command: "{{ munge_cmd }}"
+  changed_when: true
+
+- name: Copy munge key
+  copy:
+    src: munge.key
+    dest: "{{ munge_dest }}"
+    owner: munge
+    group: munge
+    mode: "{{ munge_mode }}"
+  tags: install
+
+- name: Slurm configuration - slurm.conf
+  copy:
+    src: slurm.conf
+    dest: "{{ slurm_dest }}"
+    mode: "{{ slurm_mode }}"
+  tags: install
+
+- name: Add cluster name
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "ClusterName="
+    line: "ClusterName={{ cluster_name }}"
+
+- name: Add slurm user name
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "SlurmUser="
+    line: "SlurmUser={{ slurm_user }}"
+
+- name: Add slurmctld port no
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "SlurmctldPort="
+    line: "SlurmctldPort={{ slurmctld_port }}"
+
+- name: Add slurmd port no
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "SlurmdPort="
+    line: "SlurmdPort={{ slurmd_port }}"
+
+- name: Add spool path
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "SlurmdSpoolDir="
+    line: "SlurmdSpoolDir={{ spool_pth }}"
+
+- name: Add slurmctld pid file path
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "SlurmctldPidFile="
+    line: "SlurmctldPidFile={{ slurmctld_pid }}"
+
+- name: Add slurmd pid file path
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "SlurmdPidFile="
+    line: "SlurmdPidFile={{ slurmd_pid }}"
+
+- name: Add slurmctld log file path
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "SlurmctldLogFile="
+    line: "SlurmctldLogFile={{ slurmctld_log }}"
+
+- name: Add slurmd log file path
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "SlurmdLogFile="
+    line: "SlurmdLogFile={{ slurmd_log }}"
+
+- name: Create slurm group
+  group:
+    name: slurm
+    state: present
+  tags: install
+
+- name: Add the user 'slurm' with uid 6001 and a primary group of 'slurm'
+  user:
+    name: slurm
+    comment: Slurm User Account
+    uid: "{{ slurm_uid }}"
+    group: slurm
+  tags: install
+
+- name: Create slurm log directory
+  file:
+    path: "{{ slurm_logpth }}"
+    state: directory
+    owner: slurm
+    group: slurm
+    mode: "{{ gen_mode }}"
+    recurse: yes
+  tags: install
+
+- name: Give slurm user permission to spool
+  file:
+    path: "{{ spool_pth }}"
+    owner: slurm
+    group: slurm
+    state: directory
+    mode: "{{ gen_mode }}"
+    recurse: yes
+
+- name: Give slurm user permission to slurmctld
+  file:
+    path: "{{ slurmctld_pid }}"
+    owner: slurm
+    group: slurm
+    mode: "{{ gen_mode }}"
+    state: touch
+
+- name: Give slurm user permission to slurmd
+  file:
+    path: "{{ slurmd_pid }}"
+    owner: slurm
+    group: slurm
+    mode: "{{ gen_mode }}"
+    state: touch
+
+- name: Start munge service
+  service:
+    name: munge
+    state: restarted
+    enabled: yes
+  tags: install

+ 42 - 0
slurm/roles/slurm_common/vars/main.yml

@@ -0,0 +1,42 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+epel_url: https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
+
+common_packages:
+   - munge
+   - munge-libs
+   - mariadb-server
+   - mariadb-devel
+   - python3
+
+munge_dest: "/etc/munge/"
+munge_cmd: "/usr/sbin/create-munge-key -f"
+munge_mode: "0400"
+slurm_mode: "0644"
+slurm_dest: "/etc/slurm/"
+slurm_confpth: "/etc/slurm/slurm.conf"
+slurm_user: "slurm"
+slurmctld_port: "6817"
+slurmd_port: "6818"
+slurm_uid: "6001"
+slurm_logpth: "/var/log/slurm/"
+gen_mode: "0755"
+spool_pth: "/var/spool/slurm/"
+slurmctld_pid: "/var/run/slurmctld.pid"
+slurmd_pid: "/var/run/slurmd.pid"
+cluster_name : "manager,compute"
+slurmctld_log: "/var/log/slurm/slurmctld.log"
+slurmd_log: "/var/log/slurm/slurmd.log"

+ 38 - 0
slurm/roles/slurm_manager/files/slurmdbd.conf

@@ -0,0 +1,38 @@
+#
+# Example slurmdbd.conf file.
+#
+# See the slurmdbd.conf man page for more information.
+#
+# Archive info
+#ArchiveJobs=yes
+#ArchiveDir="/tmp"
+#ArchiveSteps=yes
+#ArchiveScript=
+#JobPurge=12
+#StepPurge=1
+#
+# Authentication info
+AuthType=auth/munge
+#AuthInfo=/var/run/munge/munge.socket.2
+#
+# slurmDBD info
+DbdAddr=
+DbdHost=
+#DbdPort=7031
+SlurmUser=
+#MessageTimeout=300
+DebugLevel=verbose
+#DefaultQOS=normal,standby
+LogFile=
+PidFile=
+#PluginDir=/usr/lib/slurm
+#PrivateData=accounts,users,usage,jobs
+#TrackWCKey=yes
+#
+# Database info
+StorageType=accounting_storage/mysql
+#StorageHost=
+#StoragePort=
+#StoragePass=
+#StorageUser=
+#StorageLoc=

+ 174 - 0
slurm/roles/slurm_manager/tasks/main.yml

@@ -0,0 +1,174 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Install packages for slurm
+  package:
+    name: "{{ item }}"
+    state: present
+  with_items:
+    - "{{ slurm_packages }}"
+  tags: install
+
+- name: Install development tools
+  package:
+    name: "{{ item }}"
+    enablerepo: PowerTools
+    state: present
+  with_items:
+    - "{{ dev_tools }}"
+  tags: install
+
+- name: Create temporary download folder for slurm
+  file:
+    path: "{{ tmp_path }}"
+    owner: slurm
+    group: slurm
+    mode: "{{ tmp_mode }}"
+    state: directory
+
+- name: Download slurm source
+  get_url:
+    url: "{{ slurm_url }}"
+    dest: "{{ tmp_path }}"
+    checksum: "{{ slurm_md5 }}"
+    validate_certs: no
+  tags: install
+
+- name: Build slurm rpms
+  command: rpmbuild -ta "{{ rpmbuild_path }}"
+  changed_when: false
+  args:
+    warn: no
+
+- name: Verify package md5
+  command: rpm -qa
+  ignore_errors: true
+  register: verify_result
+  changed_when: no
+  failed_when: no
+  args:
+    warn: no
+
+- name: Install rpms
+  command: rpm -Uvh ~"{{ rpm_loop }}"
+  args:
+    chdir: "{{ rpm_path }}"
+    warn: no
+  when: verify_result.rc != 0
+
+- name: Add control machine name
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "ControlMachine="
+    line: "ControlMachine={{ group_names[0] }}"
+
+- name: Add slurm user name
+  lineinfile:
+    path: "{{ slurmdbd_path }}"
+    regexp: "SlurmUser="
+    line: "SlurmUser={{ slurm_user }}"
+
+- name: Firewall rule for slurm - tcp/ip,udp
+  firewalld:
+    zone: internal
+    port: "{{ item }}"
+    permanent: true
+    state: enabled
+  with_items:
+    - "{{ tcp_port1 }}"
+    - "{{ tcp_port2 }}"
+    - "{{ tcp_port3 }}"
+    - "{{ tcp_port4 }}"
+    - "{{ udp_port1 }}"
+    - "{{ udp_port2 }}"
+  tags: install
+
+- name: Get network address/subnet mask through ipaddr
+  set_fact:
+    network_address: "{{ (ansible_default_ipv4.network + '/' + ansible_default_ipv4.netmask) | ipaddr('network/prefix') }}"
+
+- name: Firewall rule slurm - allow all incoming traffic on internal network
+  firewalld:
+    zone: internal
+    rich_rule: 'rule family="{{ family }}" source address="{{ network_address }}" accept'
+    permanent: true
+    state: enabled
+  tags: install
+
+- name: Firewall reload
+  systemd:
+    name: firewalld
+    state: reloaded
+  tags: install
+
+- name: Start mariadb
+  service:
+    name: mariadb
+    state: restarted
+    enabled: yes
+  tags: install
+
+- name: Grant permissions for slurm db
+  command: mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost' identified by 'password' with grant option;"
+  tags: install
+  changed_when: true
+
+- name: Create slurmdbd.conf file
+  copy:
+    src: slurmdbd.conf
+    dest: "{{ slurmdbd_path }}"
+    mode: "{{ slurmdbd_mode }}"
+  tags: install
+
+- name: Add slurm user name
+  lineinfile:
+    path: "{{ slurmdbd_path }}"
+    regexp: "SlurmUser="
+    line: "SlurmUser={{ slurm_user }}"
+
+- name: Add db address
+  lineinfile:
+    path: "{{ slurmdbd_path }}"
+    regexp: "DbdAddr="
+    line: "DbdAddr={{ DbdAddr }}"
+
+- name: Add db host
+  lineinfile:
+    path: "{{ slurmdbd_path }}"
+    regexp: "DbdHost="
+    line: "DbdHost={{ DbdHost }}"
+
+- name: Add log file path
+  lineinfile:
+    path: "{{ slurmdbd_path }}"
+    regexp: "LogFile="
+    line: "LogFile={{ logfile }}"
+
+- name: Add pid file path
+  lineinfile:
+    path: "{{ slurmdbd_path }}"
+    regexp: "PidFile="
+    line: "PidFile={{ pidfile }}"
+
+- name: Populate accounting database
+  command: slurmdbd
+  tags: install
+  changed_when: true
+
+- name: Save slurm conf file in buffer
+  fetch:
+    src: "{{ slurm_confpth }}"
+    dest: "{{ buffer_path }}"
+    flat: true

+ 62 - 0
slurm/roles/slurm_manager/vars/main.yml

@@ -0,0 +1,62 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+slurm_packages:
+   - python3
+   - gcc
+   - openssl
+   - openssl-devel
+   - numactl
+   - numactl-devel
+   - hwloc
+   - lua
+   - readline
+   - readline-devel
+   - pam-devel
+   - perl-ExtUtils-MakeMaker
+   - cpanm*
+   - rpm-build
+
+dev_tools:
+   - rrdtool-devel
+   - lua-devel
+   - hwloc-devel
+
+tmp_path: "/root/slurm-tmp"
+tmp_mode: "0755"
+slurm_url: https://download.schedmd.com/slurm/slurm-20.02.3.tar.bz2
+slurm_md5: "md5:c71a300d6c5d33ef8ca60e52a203bb1e"
+rpmbuild_path: "/root/slurm-tmp/slurm-20.02.3.tar.bz2"
+rpm_loop: "/rpmbuild/RPMS/x86_64/*.rpm"
+tcp_port1: "6817/tcp"
+tcp_port2: "6818/tcp"
+tcp_port3: "6819/tcp"
+tcp_port4: "7321/tcp"
+udp_port1: "6817/udp"
+udp_port2: "7321/udp"
+family: "ipv4"
+db_user: "slurm"
+db_host: "localhost"
+slurmdbd_path: "/etc/slurm/slurmdbd.conf"
+slurmdbd_mode: "0600"
+slurm_confpth: "/etc/slurm/slurm.conf"
+slurm_user: "slurm"
+DbdAddr: "localhost"
+DbdHost: "localhost"
+logfile: "/var/log/slurm/slurmdbd.log"
+pidfile: "/var/run/slurm/slurmdbd.pid"
+buffer_path: "/tmp/slurm.conf"
+rpm_path: "/root/rpmbuild/RPMS/x86_64/"
+slurm_mode: "0644"

+ 64 - 0
slurm/roles/slurm_start_services/tasks/main.yml

@@ -0,0 +1,64 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Include common variables
+  include_vars: ../../slurm_manager/vars/main.yml
+
+- name: Copy slurm conf from buffer
+  copy:
+    src: "{{ buffer_path }}"
+    dest: "{{ slurm_confpth }}"
+    mode: "{{ slurm_mode }}"
+
+- name: Start slurmctld on manager
+  service:
+    name: slurmctld
+    enabled: yes
+  tags: install
+
+- name: Enable slurmdbd on manager
+  service:
+    name: slurmdbd
+    enabled: yes
+  tags: install
+
+- name: Show cluster if exists
+  command: sacctmgr -n show cluster {{ inventory_hostname }}
+  register: slurm_clusterlist
+  changed_when: false
+
+- name: Create slurm cluster
+  command: sacctmgr -i add cluster {{ inventory_hostname }}
+  when: slurm_clusterlist.stdout.find(inventory_hostname) == 1
+
+- name: Show account
+  command: sacctmgr show account
+  register: account_added
+  changed_when: false
+
+- name: Create default slurm group
+  command: sacctmgr -i add account defaultgroup Cluster={{ inventory_hostname }} Description="Default Account" Organization="Default Org"
+  when: account_added.stdout.find(inventory_hostname) == 1
+  tags: install
+
+- name: Check if user exists
+  command: sacctmgr show user
+  register: user_added
+  changed_when: false
+
+- name: Add root to the default account
+  command: sacctmgr -i add user root DefaultAccount=defaultgroup
+  when: account_added.stdout.find(inventory_hostname) == 1
+  tags: install

+ 0 - 22
slurm/roles/start-slurm-workers/tasks/main.yml

@@ -1,22 +0,0 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
----
-- name: Install SLURM RPMs on compute
-  yum:
-    name: "{{ item }}"
-    #name: "{{ query('fileglob', ['/home/rpms/slurm*20*.rpm']) }}" <-- how it should work to avoid loop
-  with_fileglob:
-    - /home/rpms/slurm*20*.rpm
-  tags: install
-

+ 97 - 0
slurm/roles/start_slurm_workers/tasks/main.yml

@@ -0,0 +1,97 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Include common variables
+  include_vars: ../../slurm_manager/vars/main.yml
+
+- name: Copy slurm conf from buffer
+  copy:
+    src: "{{ buffer_path }}"
+    dest: "{{ slurm_confpth }}"
+    mode: "{{ slurm_mode }}"
+
+- name: Install packages for slurm
+  package:
+    name: "{{ item }}"
+    state: present
+  with_items:
+    - "{{ slurm_packages }}"
+  tags: install
+
+- name: Install development tools
+  package:
+    name: "{{ item }}"
+    enablerepo: PowerTools
+    state: present
+  with_items:
+    - "{{ dev_tools }}"
+  tags: install
+
+- name: Create temporary download folder for slurm
+  file:
+    path: "{{ tmp_path }}"
+    owner: slurm
+    group: slurm
+    mode: "{{ tmp_mode }}"
+    state: directory
+
+- name: Download slurm source
+  get_url:
+    url: "{{ slurm_url }}"
+    dest: "{{ tmp_path }}"
+    checksum: "{{ slurm_md5 }}"
+    validate_certs: no
+  tags: install
+
+- name: Build slurm rpms
+  command: rpmbuild -ta "{{ rpmbuild_path }}"
+  changed_when: false
+  args:
+    warn: no
+
+- name: Verify package md5
+  command: rpm -qa
+  ignore_errors: true
+  register: verify_result
+  changed_when: no
+  failed_when: no
+  args:
+    warn: no
+
+- name: Install rpms
+  command: rpm -Uvh ~"{{ rpm_loop }}"
+  args:
+    chdir: "{{ rpm_path }}"
+    warn: no
+  when: verify_result.rc != 0
+
+- name: Add socket and core info
+  lineinfile:
+    path: "{{ slurm_confpth }}"
+    regexp: "NodeName= Sockets= CoresPerSocket="
+    line: "NodeName={{ group_names[0] }} Sockets={{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}
+      CoresPerSocket={{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}"
+
+- name: Save slurm conf in buffer
+  fetch:
+    src: "{{ slurm_confpth }}"
+    dest: "{{ buffer_path }}"
+    flat: true
+
+- name: Start slurmd on compute nodes
+  service:
+    name: slurmd.service
+    enabled: yes
+  tags: install

+ 16 - 9
slurm/slurm.yml

@@ -12,26 +12,33 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 ---
+
 #Playbook for installing Slurm on a cluster
 
 #collect info from everything
 - hosts: all
 
-# Apply Common Installation and Config
-- hosts: cluster
+- name: Apply common installation and config
+  hosts: manager,compute
   gather_facts: false
   roles:
+    - slurm_common
     - common
-    - slurm-common
 
-# Apply Manager Config, start services
-- hosts: manager
+- name: Apply manager config
+  hosts: manager
   gather_facts: false
   roles:
-    - slurm-manager
+    - slurm_manager
+
+- name: Start slurm workers
+  hosts: compute
+  gather_facts: true
+  roles:
+    - start_slurm_workers
 
-# Start SLURM workers
-- hosts: compute
+- name: Start services
+  hosts: manager
   gather_facts: false
   roles:
-    - start-slurm-workers
+    - slurm_start_services