Browse Source

Merge pull request #185 from j0hnL/bugfix

Bugfix
Lucas A. Wilson 4 years ago
parent
commit
657e81709a

+ 18 - 14
omnia.yml

@@ -13,12 +13,12 @@
 # limitations under the License.
 ---
 
-- name: Validate the cluster
-  hosts: localhost
-  connection: local
-  gather_facts: no
-  roles:
-    - cluster_validation
+#- name: Validate the cluster
+# hosts: localhost
+# connection: local
+# gather_facts: no
+# roles:
+#   - cluster_validation
 
 - name: Gather facts from all the nodes
   hosts: all
@@ -76,14 +76,18 @@
   gather_facts: false
   roles:
     - k8s_nfs_server_setup
-  tags: kubernetes
+  tags: 
+    - kubernetes
+    - nfs
 
 - name: Apply NFS client setup on compute nodes
   hosts: compute
   gather_facts: false
   roles:
     - k8s_nfs_client_setup
-  tags: kubernetes
+  tags: 
+    - kubernetes
+    - nfs
 
 - name: Start K8s on manager server
   hosts: manager
@@ -134,9 +138,9 @@
     - slurm_start_services
   tags: slurm
 
-- name: Install slurm exporter
-  hosts: manager
-  gather_facts: false
-  roles:
-    - slurm_exporter
-  tags: slurm
+#- name: Install slurm exporter
+# hosts: manager
+# gather_facts: false
+# roles:
+#   - slurm_exporter
+# tags: slurm

+ 8 - 1
roles/common/tasks/main.yml

@@ -39,6 +39,12 @@
     state: present
   tags: install
 
+- name: Add docker community edition repository
+  get_url:
+    url: "{{ docker_repo_url }}"
+    dest: "{{ docker_repo_dest }}"
+  tags: install
+
 - name: Disable swap
   command: /sbin/swapoff -a
   changed_when: true
@@ -70,4 +76,5 @@
 
 - name: Install Nvidia drivers and software components
   include_tasks: nvidia.yml
-  when: ansible_local.inventory.nvidia_gpu > 0
+  when: ansible_local.inventory.nvidia_gpu > 0
+  tags: install

+ 37 - 17
roles/common/tasks/nvidia.yml

@@ -13,17 +13,44 @@
 #  limitations under the License.
 ---
 
-- name: Add nvidia-docker2 Repo
-  get_url:
-    url: "{{ nvidia_docker_repo_url }}"
-    dest: "{{ nvidia_docker_repo_dest }}"
-  tags: install, testing
-
 - name: Add libnvidia container Repo
-  get_url:
-    url: "{{ nvidia_container_repo_url }}"
-    dest: "{{ nvidia_container_repo_dest }}"
-  tags: install, testing
+  yum_repository:
+    name: libnvidia-container
+    description:  libnvidia-container
+    baseurl: https://nvidia.github.io/libnvidia-container/stable/centos7/$basearch
+    repo_gpgcheck: no
+    gpgcheck: no
+    gpgkey: https://nvidia.github.io/libnvidia-container/gpgkey
+    sslverify: yes
+    sslcacert: /etc/pki/tls/certs/ca-bundle.crt
+    enabled: yes
+  tags: install
+
+- name: Add nvidia-container-runtime Repo 
+  yum_repository:
+    name: nvidia-container-runtime
+    description:  nvidia-container-runtime
+    baseurl: https://nvidia.github.io/nvidia-container-runtime/stable/centos7/$basearch
+    repo_gpgcheck: no
+    gpgcheck: no
+    gpgkey: https://nvidia.github.io/nvidia-container-runtime/gpgkey
+    sslverify: yes
+    sslcacert: /etc/pki/tls/certs/ca-bundle.crt
+    enabled: yes
+  tags: install
+
+- name: Add nvidia-docker Repo 
+  yum_repository:
+    name: nvidia-docker
+    description:  nvidia-docker
+    baseurl: https://nvidia.github.io/nvidia-docker/centos7/$basearch
+    repo_gpgcheck: no
+    gpgcheck: no
+    gpgkey: https://nvidia.github.io/nvidia-docker/gpgkey
+    enabled: yes
+    sslverify: yes
+    sslcacert: /etc/pki/tls/certs/ca-bundle.crt
+  tags: install
 
 - name: Install nvidia driver and nvidia-docker2
   package:
@@ -52,10 +79,3 @@
     enabled: yes
     daemon_reload: yes
   tags: install
-
-- name: Restart and enable kubernetes - kubelet
-  service:
-    name: kubelet
-    state: restarted
-    enabled: yes
-  tags: install

+ 6 - 1
roles/common/vars/main.yml

@@ -23,6 +23,7 @@ common_packages:
   - nvidia-detect
   - chrony
   - pciutils
+  - docker-ce
 
 custom_fact_dir: /etc/ansible/facts.d
 
@@ -36,6 +37,10 @@ elrepo_gpg_key_url: https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
 
 elrepo_rpm_url: https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm
 
+docker_repo_url: https://download.docker.com/linux/centos/docker-ce.repo
+
+docker_repo_dest: /etc/yum.repos.d/docker-ce.repo
+
 chrony_path: "/etc/chrony.conf"
 ntp_path: "/etc/ntp.conf"
 ntp_mode: "0644"
@@ -63,4 +68,4 @@ nvidia_packages:
   - nvidia-docker2
 
 daemon_file_dest: /etc/docker/
-daemon_file_mode: 0644
+daemon_file_mode: 0644

+ 11 - 19
roles/k8s_common/tasks/main.yml

@@ -14,18 +14,16 @@
 ---
 
 - name: Add kubernetes repo
-  copy:
-    src: kubernetes.repo
-    dest: "{{ k8s_repo_dest }}"
-    owner: root
-    group: root
-    mode: "{{ k8s_repo_file_mode }}"
-  tags: install
-
-- name: Add docker community edition repository
-  get_url:
-    url: "{{ docker_repo_url }}"
-    dest: "{{ docker_repo_dest }}"
+  yum_repository:
+    name: kubernetes
+    description: kubernetes
+    baseurl: https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
+    enabled: yes
+    gpgcheck: no
+    repo_gpgcheck: no
+    gpgkey: 
+      - https://packages.cloud.google.com/yum/doc/yum-key.gpg 
+      - https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
   tags: install
 
 - name: Update sysctl to handle incorrectly routed traffic when iptables is bypassed
@@ -42,12 +40,6 @@
   changed_when: true
   tags: install
 
-- name: Install docker
-  package:
-    name: docker-ce
-    state: present
-  tags: install
-
 - name: Install k8s packages
   package:
     name: "{{ k8s_packages }}"
@@ -74,4 +66,4 @@
   service:
     name: kubelet
     state: restarted
-    enabled: yes
+    enabled: yes

+ 1 - 5
roles/k8s_common/vars/main.yml

@@ -20,12 +20,8 @@ k8s_packages:
 
 k8s_repo_dest: /etc/yum.repos.d/
 
-docker_repo_url: https://download.docker.com/linux/centos/docker-ce.repo
-
-docker_repo_dest: /etc/yum.repos.d/docker-ce.repo
-
 k8s_conf_dest: /etc/sysctl.d/
 
 k8s_repo_file_mode: 0644
 
-k8s_conf_file_mode: 0644
+k8s_conf_file_mode: 0644