Browse Source

Merge branch 'devel' into update-nfs-client-variables

John Lockman 4 years ago
parent
commit
fafd3725d3

+ 11 - 2
roles/slurm_common/tasks/main.yml

@@ -14,7 +14,7 @@
 ---
 ---
 
 
 - name: Get hostname
 - name: Get hostname
-  command: hostname -s
+  command: hostname
   register: host_name
   register: host_name
   changed_when: true
   changed_when: true
 
 
@@ -29,7 +29,7 @@
 - name: Add host name in hosts file
 - name: Add host name in hosts file
   lineinfile:
   lineinfile:
     dest: "{{ hosts_dest }}"
     dest: "{{ hosts_dest }}"
-    line: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} {{ host_name.stdout }}"
+    line: "{{ inventory_hostname }} {{ host_name.stdout }}"
     state: present
     state: present
     create: yes
     create: yes
     mode: "{{ common_mode }}"
     mode: "{{ common_mode }}"
@@ -155,6 +155,15 @@
     mode: "{{ gen_mode }}"
     mode: "{{ gen_mode }}"
     recurse: yes
     recurse: yes
 
 
+- name: Give slurm user permission to spool directory
+  file:
+    path: "{{ spool_dir }}"
+    owner: slurm
+    group: slurm
+    state: directory
+    mode: "{{ common_mode }}"
+    recurse: yes
+
 - name: Create slurm pid directory
 - name: Create slurm pid directory
   file:
   file:
     path: "{{ slurm_pidpth }}"
     path: "{{ slurm_pidpth }}"

+ 1 - 2
roles/slurm_common/vars/main.yml

@@ -13,8 +13,6 @@
 #  limitations under the License.
 #  limitations under the License.
 ---
 ---
 
 
-epel_url: https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
-
 common_packages:
 common_packages:
    - munge
    - munge
    - munge-libs
    - munge-libs
@@ -41,6 +39,7 @@ slurm_uid: "6001"
 slurm_logpth: "/var/log/slurm/"
 slurm_logpth: "/var/log/slurm/"
 slurm_pidpth: "/var/run/slurm/"
 slurm_pidpth: "/var/run/slurm/"
 gen_mode: "0755"
 gen_mode: "0755"
+spool_dir: "/var/spool/"
 spool_pth: "/var/spool/slurm/"
 spool_pth: "/var/spool/slurm/"
 slurmctld_pid: "/var/run/slurmctld.pid"
 slurmctld_pid: "/var/run/slurmctld.pid"
 slurmd_pid: "/var/run/slurmd.pid"
 slurmd_pid: "/var/run/slurmd.pid"

+ 14 - 10
roles/slurm_manager/tasks/main.yml

@@ -29,7 +29,7 @@
     mode: "{{ tmp_mode }}"
     mode: "{{ tmp_mode }}"
     state: touch
     state: touch
 
 
-- name: Create slurmctld log file on master
+- name: Create slurmctld log file on manager
   file:
   file:
     path: "{{ slurm_logpth }}"
     path: "{{ slurm_logpth }}"
     owner: slurm
     owner: slurm
@@ -38,14 +38,14 @@
   with_items:
   with_items:
     - slurmctld.log
     - slurmctld.log
 
 
-- name: Create log files on master
+- name: Create log files on manager
   file:
   file:
     path: "{{ slurm_logpth }}"
     path: "{{ slurm_logpth }}"
     owner: slurm
     owner: slurm
     mode: "{{ tmp_mode }}"
     mode: "{{ tmp_mode }}"
     state: touch
     state: touch
   with_items:
   with_items:
-    - "{{ log_files_master }}"
+    - "{{ log_files_manager }}"
 
 
 - name: Install packages for slurm
 - name: Install packages for slurm
   package:
   package:
@@ -86,7 +86,7 @@
     warn: no
     warn: no
 
 
 - name: Verify package md5
 - name: Verify package md5
-  command: rpm -qa
+  shell: rpm -qa | grep slurm
   ignore_errors: true
   ignore_errors: true
   register: verify_result
   register: verify_result
   changed_when: no
   changed_when: no
@@ -100,9 +100,10 @@
     chdir: "{{ rpm_path }}"
     chdir: "{{ rpm_path }}"
     warn: no
     warn: no
   changed_when: true
   changed_when: true
+  when: verify_result.rc != 0
 
 
 - name: Get the hostname
 - name: Get the hostname
-  command: hostname -s
+  command: hostname
   register: machine_name
   register: machine_name
   changed_when: true
   changed_when: true
 
 
@@ -147,13 +148,13 @@
   when: "'manager' in group_names"
   when: "'manager' in group_names"
   tags: firewalld
   tags: firewalld
 
 
-- name: Get network address/subnet mask through ipaddr
+- name: Get network address/subnet mask
   set_fact:
   set_fact:
     network_address: "{{ (ansible_default_ipv4.network + '/' + ansible_default_ipv4.netmask) | ipaddr('network/prefix') }}"
     network_address: "{{ (ansible_default_ipv4.network + '/' + ansible_default_ipv4.netmask) | ipaddr('network/prefix') }}"
 
 
 - name: Firewall rule slurm - allow all incoming traffic on internal network
 - name: Firewall rule slurm - allow all incoming traffic on internal network
   firewalld:
   firewalld:
-    zone: internal
+    zone: public
     rich_rule: 'rule family="{{ family }}" source address="{{ network_address }}" accept'
     rich_rule: 'rule family="{{ family }}" source address="{{ network_address }}" accept'
     permanent: true
     permanent: true
     state: enabled
     state: enabled
@@ -172,7 +173,10 @@
   tags: install
   tags: install
 
 
 - name: Grant permissions for slurm db
 - name: Grant permissions for slurm db
-  command: mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO '{{ db_user }}'@'{{ db_host }}' identified by '{{ db_password[0] }}'with grant option;"
+  command: >-
+    mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO '{{ db_user }}'@'{{
+    db_host }}' identified by '{{ hostvars['127.0.0.1']['db_password'] }}'with
+    grant option;"
   tags: install
   tags: install
   changed_when: true
   changed_when: true
 
 
@@ -206,7 +210,7 @@
   lineinfile:
   lineinfile:
     path: "{{ slurmdbd_path }}"
     path: "{{ slurmdbd_path }}"
     regexp: "StoragePass="
     regexp: "StoragePass="
-    line: "StoragePass={{ db_password[0] }}"
+    line: "StoragePass={{ hostvars['127.0.0.1']['db_password'] }}"
 
 
 - name: Add storage user
 - name: Add storage user
   lineinfile:
   lineinfile:
@@ -230,4 +234,4 @@
   fetch:
   fetch:
     src: "{{ slurm_confpth }}"
     src: "{{ slurm_confpth }}"
     dest: "{{ buffer_path }}"
     dest: "{{ buffer_path }}"
-    flat: true
+    flat: true

+ 1 - 1
roles/slurm_manager/vars/main.yml

@@ -38,7 +38,7 @@ dev_tools:
    - ncurses-devel
    - ncurses-devel
    - gtk2-devel
    - gtk2-devel
 
 
-log_files_master:
+log_files_manager:
    - slurm_jobacct.log
    - slurm_jobacct.log
    - slurm_jobcomp.log
    - slurm_jobcomp.log
 
 

+ 6 - 8
roles/slurm_start_services/tasks/main.yml

@@ -32,7 +32,7 @@
   tags: install
   tags: install
 
 
 - name: Start slurmctld on manager
 - name: Start slurmctld on manager
-  systemd:
+  service:
     name: slurmctld
     name: slurmctld
     state: started
     state: started
   tags: install
   tags: install
@@ -44,24 +44,22 @@
 
 
 - name: Create slurm cluster
 - name: Create slurm cluster
   command: sacctmgr -i add cluster {{ cluster_name }}
   command: sacctmgr -i add cluster {{ cluster_name }}
-  when: slurm_clusterlist.stdout.find(cluster_name) == 1
+  when: not slurm_clusterlist.stdout
 
 
 - name: Show account
 - name: Show account
-  command: sacctmgr show account
+  command: sacctmgr show account -s
   register: account_added
   register: account_added
   changed_when: false
   changed_when: false
 
 
 - name: Create default slurm group
 - name: Create default slurm group
   command: sacctmgr -i add account defaultgroup Cluster={{ cluster_name }} Description="Default Account" Organization="Default Org"
   command: sacctmgr -i add account defaultgroup Cluster={{ cluster_name }} Description="Default Account" Organization="Default Org"
-  when: account_added.stdout.find(cluster_name) == 1
-  tags: install
+  when: account_added.rc != 0
 
 
 - name: Check if user exists
 - name: Check if user exists
-  command: sacctmgr show user
+  command: sacctmgr show user -s
   register: user_added
   register: user_added
   changed_when: false
   changed_when: false
 
 
 - name: Add root to the default account
 - name: Add root to the default account
   command: sacctmgr -i add user root DefaultAccount=defaultgroup
   command: sacctmgr -i add user root DefaultAccount=defaultgroup
-  when: account_added.stdout.find(cluster_name) == 1
-  tags: install
+  when: user_added.rc != 0

+ 3 - 3
roles/slurm_workers/tasks/main.yml

@@ -92,7 +92,7 @@
     warn: no
     warn: no
 
 
 - name: Verify package md5
 - name: Verify package md5
-  command: rpm -qa
+  shell: rpm -qa | grep slurm
   ignore_errors: true
   ignore_errors: true
   register: verify_result
   register: verify_result
   changed_when: no
   changed_when: no
@@ -106,9 +106,10 @@
     chdir: "{{ rpm_path }}"
     chdir: "{{ rpm_path }}"
     warn: no
     warn: no
   changed_when: true
   changed_when: true
+  when: verify_result.rc != 0
 
 
 - name: Get the hostname
 - name: Get the hostname
-  command: hostname -s
+  command: hostname
   register: machine_name
   register: machine_name
   changed_when: true
   changed_when: true
 
 
@@ -119,7 +120,6 @@
     line: "NodeName={{ machine_name.stdout }} Sockets={{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}
     line: "NodeName={{ machine_name.stdout }} Sockets={{ hostvars[inventory_hostname]['ansible_facts']['processor_count'] }}
       CoresPerSocket={{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}"
       CoresPerSocket={{ hostvars[inventory_hostname]['ansible_facts']['processor_cores'] }}"
 
 
-
 - name: Save slurm conf in buffer
 - name: Save slurm conf in buffer
   fetch:
   fetch:
     src: "{{ slurm_confpth }}"
     src: "{{ slurm_confpth }}"