Browse Source

refactor playbooks

Signed-off-by: John Lockman <jlockman3@gmail.com>
John Lockman 5 năm trước cách đây
mục cha
commit
008c1b9788

BIN
roles/slurm-common/files/munge.key


+ 0 - 97
roles/slurm-common/files/slurm.conf

@@ -1,97 +0,0 @@
-#
-# Example slurm.conf file. Please run configurator.html
-# (in doc/html) to build a configuration file customized
-# for your environment.
-#
-#
-# slurm.conf file generated by configurator.html.
-#
-# See the slurm.conf man page for more information.
-#
-ClusterName=friday
-ControlMachine=friday
-ControlAddr=10.0.0.1
-#BackupController=
-#BackupAddr=
-#
-SlurmUser=slurm
-#SlurmdUser=root
-SlurmctldPort=6817
-SlurmdPort=6818
-AuthType=auth/munge
-#JobCredentialPrivateKey=
-#JobCredentialPublicCertificate=
-StateSaveLocation=/var/spool/slurm/ctld
-SlurmdSpoolDir=/var/spool/slurm/
-SwitchType=switch/none
-MpiDefault=none
-SlurmctldPidFile=/var/run/slurmctld.pid
-SlurmdPidFile=/var/run/slurmd.pid
-ProctrackType=proctrack/pgid
-#PluginDir=
-#FirstJobId=
-ReturnToService=2
-#MaxJobCount=
-#PlugStackConfig=
-#PropagatePrioProcess=
-#PropagateResourceLimits=
-#PropagateResourceLimitsExcept=
-#Prolog=
-#Epilog=
-#SrunProlog=
-#SrunEpilog=
-#TaskProlog=
-#TaskEpilog=
-#TaskPlugin=
-#TrackWCKey=no
-#TreeWidth=50
-#TmpFS=
-#UsePAM=
-#
-# TIMERS
-SlurmctldTimeout=300
-SlurmdTimeout=300
-InactiveLimit=0
-MinJobAge=300
-KillWait=30
-Waittime=0
-#
-# SCHEDULING
-SchedulerType=sched/backfill
-#SchedulerAuth=
-SelectType=select/linear
-#FastSchedule=1
-PriorityType=priority/multifactor
-PriorityDecayHalfLife=14-0
-#PriorityUsageResetPeriod=14-0
-PriorityWeightFairshare=100000
-PriorityWeightAge=1000
-PriorityWeightPartition=10000
-PriorityWeightJobSize=1000
-PriorityMaxAge=14-0
-#
-# LOGGING
-SlurmctldDebug=3
-SlurmctldLogFile=/var/log/slurm/slurmctld.log
-SlurmdDebug=1
-SlurmdLogFile=/var/log/slurm/slurmd.log
-JobCompType=jobcomp/none
-#JobCompLoc=
-#
-# ACCOUNTING
-JobAcctGatherType=jobacct_gather/linux
-JobAcctGatherFrequency=30
-#
-AccountingStorageType=accounting_storage/slurmdbd
-#AccountingStorageHost=
-#AccountingStorageLoc=
-#AccountingStoragePass=
-#AccountingStorageUser=
-#
-# COMPUTE NODES
-#NodeName=linux[1-32] Procs=1 State=UNKNOWN
-#NodeName=DEFAULT Sockets=2 CoresPerSocket=20 State=UNKNOWN
-NodeName=compute000 Sockets=2 CoresPerSocket=8
-NodeName=compute[002-005] CoresPerSocket=20
-PartitionName=normal Nodes=ALL Default=YES MaxTime=INFINITE State=UP
-#PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP

+ 0 - 91
roles/slurm-common/tasks/main.yaml

@@ -1,91 +0,0 @@
----
-
-- name: install packages for slurm
-  yum: 
-    name:
-      - munge
-      - mariadb
-      - mariadb-devel
-      - python3
-    state: present
-  tags: install
-
-- name: create munge key
-  command: /usr/sbin/create-munge-key -f
-  tags: install
-
-- name: Copy munge key
-  copy:
-    src: munge.key
-    dest: /etc/munge
-    owner: munge
-    group: munge
-    mode: 0400
-  tags: install
-
-- name: Copy example Slurm Configuration - slurm.conf
-  copy:
-    src: slurm.conf
-    dest: /etc/slurm/
-    mode: 0644
-  tags: install
-
-
-- name: create SLURM Group
-  group: 
-    name: slurm 
-    state: present
-  tags: install
-
-- name: Add the user 'slurm' with uid 6001 and a primary group of 'slurm'
-  user:
-    name: slurm
-    comment: Slurm User Account
-    uid: 6001
-    group: slurm
-  tags: install
-
-- name: create SLURM log directory
-  file:
-    path: /var/log/slurm
-    state: directory
-    owner: slurm
-    group: slurm
-    mode: 0755
-    recurse: yes
-  tags: install
-
-- name: give slurm user permission to spool
-  file: 
-    path: /var/spool/slurm
-    owner: slurm
-    group: slurm
-    state: directory
-    mode: 0755
-    recurse: yes
-
-- name: give slurm user permission to slurmctld
-  file: 
-    path: /var/run/slurmctld.pid
-    owner: slurm
-    group: slurm
-    mode: 0755
-    state: touch
-
-- name: give slurm user permission to slurmd
-  file: 
-    path: /var/run/slurmd.pid
-    owner: slurm
-    group: slurm
-    mode: 0755
-    state: touch
-
-- name: start munge service
-  service:
-    name: munge 
-    state: restarted
-    enabled: yes
-  tags: install
-
-
-

+ 0 - 98
roles/slurm-master/tasks/main.yaml

@@ -1,98 +0,0 @@
----
-
-- name: Download Slurm source
-  get_url:
-    url: "{{ slurm_url }}"
-    dest: /root/Downloads/
-    checksum: "{{ slurm_md5 }}" 
-  tags: install
-
-- name: Build SLURM RPMs
-  command: rpmbuild -ta /root/Downloads/slurm-20.02.0.tar.bz2 
-  tags: install
-
-- name: Copy RPMs to NFS share
-  copy:
-    src: "{{ item }}" 
-    dest: /home/rpms/
-  with_fileglob:
-    - /root/rpmbuild/RPMS/x86_64/slurm*20*.rpm
-  tags: install
-
-- name: Install SLURM RPMs on Master
-  yum: 
-    name: "{{ item }}"
-    #name: "{{ query('fileglob', ['/home/rpms/slurm*20*.rpm']) }}" <-- how it should work to avoid loop
-  with_fileglob:
-    - /home/rpms/slurm*20*.rpm
-  tags: install
-
-- name: Firewall Rule slurm allow 6817/tcp
-  command: firewall-cmd  --zone=internal --add-port=6817/tcp --permanent
-  tags: install
-
-- name: Firewall Rule slurm allow 6818/tcp
-  command: firewall-cmd  --zone=internal --add-port=6818/tcp --permanent
-  tags: install
-
-- name: Firewall Rule slurm allow 6819/tcp
-  command: firewall-cmd  --zone=internal --add-port=6819/tcp --permanent
-  tags: install
-
-- name: Firewall Rule slurm allow all incoming traffic on internal network
-  command: firewall-cmd --permanent --zone=internal --add-rich-rule='rule family="ipv4" source address="192.168.1.0/24" accept'
-  tags: install
-
-- name: Firewall Reload
-  command: firewall-cmd  --reload
-  tags: install
-
-
-- name: Start MariaDB 
-  service:
-    name: mariadb
-    state: restarted
-    enabled: yes
-  tags: install
-
-- name: Grant Permissions for SLURM DB
-  command: mysql -u root -e "GRANT ALL ON slurm_acct_db.* TO 'slurm'@'localhost' identified by 'password' with grant option;"
-  tags: install
-
-- name: Create slurmdbd.conf file
-  copy:
-    src: /etc/slurm/slurmdbd.conf.example
-    dest: /etc/slurm/slurmdbd.conf
-    mode: 0600
-  tags: install
-
-- name: Populate Accounting Database
-  command: slurmdbd
-  tags: install
-
-- name: Create Slurm Cluster
-  command: sacctmgr -i add cluster {{inventory_hostname}}
-  tags: install
-
-- name: Create Default Slurm Group
-  command: sacctmgr -i add account defaultgroup Cluster={{inventory_hostname}} Description="Default Account" Organization="Default Org"
-  tags: install
-
-- name: Add root to the Default Account 
-  command: sacctmgr -i add user root DefaultAccount=defaultgroup
-  tags: install
-
-- name: Start slurmctld on Master
-  service: 
-    name: slurmctld
-    state: restarted
-    enabled: yes
-  tags: install
-
-- name: Enable Slurmdbd on Master
-  service: 
-    name: slurmdbd
-    state: restarted
-    enabled: yes
-  tags: install
-

+ 0 - 9
roles/start-slurm-workers/tasks/main.yml

@@ -1,9 +0,0 @@
----
-- name: Install SLURM RPMs on compute
-  yum:
-    name: "{{ item }}"
-    #name: "{{ query('fileglob', ['/home/rpms/slurm*20*.rpm']) }}" <-- how it should work to avoid loop
-  with_fileglob:
-    - /home/rpms/slurm*20*.rpm
-  tags: install
-

+ 0 - 23
slurm-cluster.yaml

@@ -1,23 +0,0 @@
----
-#Playbook for installing Slurm on a cluster 
-
-#collect info from everything
-- hosts: all
-
-# Apply Common Installation and Config
-- hosts: cluster
-  gather_facts: false
-  roles:
-    - slurm-common
-
-# Apply Master Config, start services
-- hosts: master
-  gather_facts: false
-  roles:
-    - slurm-master
-
-# Start SLURM workers
-- hosts: compute
-  gather_facts: false
-  roles:
-    - start-slurm-workers

+ 0 - 18
slurm_inventory_file

@@ -1,18 +0,0 @@
-[master]
-friday
-
-[master:vars]
-slurm_url=https://download.schedmd.com/slurm/slurm-20.02.0.tar.bz2
-slurm_md5=md5:8ed2257471ff24ca213b510a4c1c3563
-
-[compute]
-compute000
-compute[002:005]
-
-
-[workers:children]
-compute
-
-[cluster:children]
-master
-workers