Browse Source

Merge branch 'devel' of https://github.com/cgoveas/omnia into devel

cgoveas 3 years ago
parent
commit
97e290b639
23 changed files with 146 additions and 347 deletions
  1. 0 3
      control_plane/control_plane.yml
  2. 17 7
      control_plane/roles/control_plane_ib/files/Dockerfile
  3. 23 14
      control_plane/roles/control_plane_ib/files/infiniband_container_configure.yml
  4. 17 1
      control_plane/roles/control_plane_ib/files/k8s_infiniband.yml
  5. 5 0
      control_plane/roles/control_plane_ib/files/start.sh
  6. 32 23
      control_plane/roles/control_plane_ib/tasks/check_prerequisites.yml
  7. 1 2
      control_plane/roles/control_plane_ib/tasks/configure_infiniband_container.yml
  8. 28 6
      control_plane/roles/control_plane_ib/tasks/infiniband_container_image.yml
  9. 5 4
      control_plane/roles/control_plane_ib/tasks/main.yml
  10. 8 2
      control_plane/roles/control_plane_ib/vars/main.yml
  11. 0 17
      control_plane/roles/control_plane_sm/files/Dockerfile
  12. 0 43
      control_plane/roles/control_plane_sm/files/k8s_sm.yml
  13. 0 5
      control_plane/roles/control_plane_sm/files/start.sh
  14. 0 28
      control_plane/roles/control_plane_sm/tasks/create_image.yml
  15. 0 67
      control_plane/roles/control_plane_sm/tasks/create_pod.yml
  16. 0 31
      control_plane/roles/control_plane_sm/tasks/main.yml
  17. 0 50
      control_plane/roles/control_plane_sm/tasks/pre_requisites.yml
  18. 0 27
      control_plane/roles/control_plane_sm/vars/main.yml
  19. 1 8
      roles/k8s_start_services/tasks/main.yml
  20. 2 2
      roles/k8s_start_services/vars/main.yml
  21. 0 7
      roles/slurm_exporter/tasks/main.yml
  22. 1 0
      roles/slurm_exporter/vars/main.yml
  23. 6 0
      telemetry/roles/slurm_telemetry/tasks/get_node_inventory.yml

+ 0 - 3
control_plane/control_plane.yml

@@ -37,9 +37,6 @@
     - role: control_plane_ib
       tags: network-ib
 
-    - role: control_plane_sm
-      tags: network-ib
-
     - role: control_plane_customiso
       tags: idrac
 

+ 17 - 7
control_plane/roles/control_plane_ib/files/Dockerfile

@@ -1,14 +1,24 @@
-# Dockerfile for creating the management network container
-FROM alpine:latest
+FROM rockylinux/rockylinux:8.5
 
-#Installation of packages
-RUN apk add dhcp
-RUN apk add ansible
-RUN apk add openrc
+RUN dnf install -y epel-release
+RUN dnf install dhcp-server -y \
+  ansible \
+  cronie \
+  net-tools
+RUN dnf groupinstall "Infiniband Support" -y
+RUN dnf install -y opensm
+RUN dnf clean all  && \
+    rm -rf /var/cache/yum
 
 #Creation of directories and files
 RUN mkdir /root/omnia
-RUN touch /var/lib/dhcp/dhcpd.leases
+RUN touch /var/lib/dhcpd/dhcpd.leases
 
 #Copy Configuration files
 COPY dhcpd.conf  /etc/dhcp/dhcpd.conf
+COPY opensm.conf /etc/rdma/opensm.conf
+COPY start.sh /
+
+RUN systemctl enable dhcpd
+
+CMD ["sbin/init"]

+ 23 - 14
control_plane/roles/control_plane_ib/files/infiniband_container_configure.yml

@@ -1,16 +1,16 @@
-#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
+ Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 ---
 
 - name: Initial  setup
@@ -18,7 +18,16 @@
   connection: local
   gather_facts: false
   tasks:
-  - name: Install netaddr
-    package:
-      name: py3-netaddr
-      state: present
+  - name: Start dhcpd services
+    command: dhcpd {{ ib_nic }}
+    changed_when: false
+
+  - name: Change mode
+    command: chmod 777 /start.sh
+    changed_when: false
+
+  - name: Run shell
+    shell: ./start.sh
+    args:
+      chdir: /
+    changed_when: false

+ 17 - 1
control_plane/roles/control_plane_ib/files/k8s_infiniband.yml

@@ -23,14 +23,30 @@ spec:
           hostPath:
             path: /root/omnia
             type: Directory
+        - name: opensm-cache
+          hostPath:
+            path: /var/cache/opensm
+            type: Directory
+        - name: opensm-logs
+          hostPath:
+            path: /var/log
+            type: Directory
       containers:
         - name: infiniband-container
           image: 'localhost/infiniband-container:latest'
           imagePullPolicy: Never
-          command: ["sh", "-c", "tail -f /dev/null"]
+          command:
+            - /sbin/init
           volumeMounts:
             - name: omnia-storage
               mountPath: /root/omnia
+            - mountPath: /var/cache/opensm
+              name: opensm-cache
+            - mountPath: /var/log
+              name: opensm-logs
+          resources:
+            limits:
+              memory: "10Gi"
           securityContext:
             allowPrivilegeEscalation: true
             capabilities:

+ 5 - 0
control_plane/roles/control_plane_ib/files/start.sh

@@ -0,0 +1,5 @@
+#!/bin/sh
+
+/usr/libexec/rdma-init-kernel
+
+exec /usr/sbin/opensm -F /etc/rdma/opensm.conf

+ 32 - 23
control_plane/roles/control_plane_ib/tasks/check_prerequisites.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,47 +20,56 @@
     infiniband_container_config_status: false
     infiniband_backup_map_status: false
     infiniband_new_node_status: false
-  tags: install
-
-- name: Check if any backup file exists
-  block:
-  - name: Check status of backup file
-    stat:
-      path: "{{ role_path }}/files/backup_mapping_file.csv"
-    register: infiniband_backup_map
-
-  - name: Set status for backup file
-    set_fact:
-      infiniband_backup_map_status: true
-    when: infiniband_backup_map.stat.exists 
-  rescue:
-  - name: Message
-    debug:
-      msg: "All nodes are new"
-      verbosity: 2
 
 - name: Inspect the infiniband_container image
   command: "buildah images"
   register: infiniband_container_image_result
   failed_when: false
   changed_when: false
-  tags: install
+
+- name: Create directories if they don't exist
+  block:
+    - name: Checking directory
+      stat:
+        path: "{{ item }}"
+      register: stat_result
+      with_items:
+        - "{{ subnet_manager.cache_directory }}"
+        - "{{ subnet_manager.log_directory }}"
+      loop_control:
+        label: "{{ item }}"
+
+    - name: Creating directories
+      file:
+        path: "{{ item.item }}"
+        state: directory
+        mode: "{{ folder_perm }}"
+        group: root
+        owner: root
+      when: not item.stat.exists
+      loop: "{{ stat_result.results }}"
+      loop_control:
+        label: "{{ item.item }}"
+
+- name: Copy opensm configuration file
+  copy:
+    src: "{{ opensm_conf_file }}"
+    dest: "{{ opensm_conf_file_dest }}"
+    mode: preserve
+    force: yes
 
 - name: Check infiniband_container status on the machine
   command: kubectl get pods -n network-config
   register: infiniband_container_result
   failed_when: false
   changed_when: false
-  tags: install
 
 - name: Update infiniband_container image status
   set_fact:
     infiniband_container_image_status: true
   when: infiniband_image_name in infiniband_container_image_result.stdout
-  tags: install
 
 - name: Update infiniband_container container status
   set_fact:
     infiniband_container_status: true
   when: "'infiniband-container' in infiniband_container_result.stdout"
-  tags: install

+ 1 - 2
control_plane/roles/control_plane_ib/tasks/configure_infiniband_container.yml

@@ -38,7 +38,6 @@
 
 - name: Configuring infiniband container
   command: 'kubectl exec --stdin --tty -n network-config {{ infiniband_pod_name.stdout }} \
-    -- ansible-playbook /root/omnia/control_plane/roles/control_plane_ib/files/infiniband_container_configure.yml'
+    -- ansible-playbook /root/omnia/control_plane/roles/control_plane_ib/files/infiniband_container_configure.yml -e ib_nic= "{{ ib_network_nic }}"'
   changed_when: false
   tags: install
-#  when: infiniband_container_config_status == false

+ 28 - 6
control_plane/roles/control_plane_ib/tasks/infiniband_container_image.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,21 +18,43 @@
   changed_when: true
   args:
     chdir: "{{ role_path }}/files/"
-  tags: install
 
 - name: Update image name in k8s_infiniband.yml
   replace:
-    path: "{{ role_path }}/files/k8s_infiniband.yml"
+    path: "{{ ib_kube_config_file }}"
     regexp: 'localhost/infiniband-container:latest'
     replace: "localhost/{{ infiniband_image_name }}:{{ infiniband_image_tag }}"
 
 - name: Update omnia project path in k8s_infiniband.yml
   replace:
-    path: "{{ role_path }}/files/k8s_infiniband.yml"
+    path: "{{ ib_kube_config_file }}"
     regexp: '        - name: omnia-storage\n          hostPath:\n            path:.*'
     replace: "        - name: omnia-storage\n          hostPath:\n            path: {{ role_path.split('control_plane')[0] }}"
 
+- name: Replace container name in sm config file
+  replace:
+    path: "{{ ib_kube_config_file }}"
+    regexp: "      containers:\n        - name:.*"
+    replace: "      containers:\n        - name: {{ ib_container_name }}"
+
+- name: Replace image name in sm config file
+  replace:
+    path: "{{ ib_kube_config_file }}"
+    regexp: "          image:.*"
+    replace: "          image: 'localhost/{{ infiniband_image_name }}:{{ infiniband_image_tag }}'"
+
+- name: Replace cache directory in sm config file
+  replace:
+    path: "{{ ib_kube_config_file }}"
+    regexp: "        - name: opensm-cache\n          hostPath:\n            path:.*"
+    replace: "        - name: opensm-cache\n          hostPath:\n            path: {{ subnet_manager.cache_directory }} "
+
+- name: Replace cache directory in sm config file
+  replace:
+    path: "{{ ib_kube_config_file }}"
+    regexp: "        - name: opensm-logs\n          hostPath:\n            path:.*"
+    replace: "        - name: opensm-logs\n          hostPath:\n            path: {{ subnet_manager.log_directory }} "
+
 - name: Deploy infiniband pod
-  command: "kubectl apply -f {{ role_path }}/files/k8s_infiniband.yml"
+  command: "kubectl apply -f {{ ib_kube_config_file }}"
   changed_when: true
-  tags: install

+ 5 - 4
control_plane/roles/control_plane_ib/tasks/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
 ---
 
 # Tasks file for infiniband
--
+
 - name: Check if IB switch is supported
   block:
     - name: Check infiniband_container status on machine
@@ -52,5 +52,6 @@
             msg: "{{ infiniband_message_installed }}"
             verbosity: 2
           when: not infiniband_container_status
-      tags: install
-  when: ib_switch_support
+  when:
+    - ib_switch_support
+    - mgmt_os in os_supported_rocky

+ 8 - 2
control_plane/roles/control_plane_ib/vars/main.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,7 +13,10 @@
 # limitations under the License.
 ---
 
-# vars file for infiniband
+# Usage: check_prerequisites.yml
+folder_perm: 644
+opensm_conf_file: "{{ role_path }}/../../input_params/opensm.conf"
+opensm_conf_file_dest: "{{ role_path }}/files/opensm.conf"
 
 # Usage: infiniband_container_image.yml
 infiniband_image_name: infiniband_container
@@ -21,3 +24,6 @@ infiniband_image_tag: latest
 mount_path: /root/omnia
 infiniband_message_skipped: "The container is already present"
 infiniband_message_installed: "The container is installed"
+ib_kube_config_file: "{{ role_path }}/files/k8s_infiniband.yml"
+ib_container_name: inifiniband-container"
+infiniband_message_installed: "The container is installed"

+ 0 - 17
control_plane/roles/control_plane_sm/files/Dockerfile

@@ -1,17 +0,0 @@
-FROM centos:8
-
-RUN dnf -y update && dnf clean all
-
-RUN dnf install -y epel-release
-
-RUN dnf groupinstall "Infiniband Support" -y
-
-RUN dnf install -y opensm
-
-COPY opensm.conf /etc/rdma/opensm.conf
-
-COPY start.sh /
-
-RUN chmod +x /start.sh
-
-ENTRYPOINT ["/start.sh"]

+ 0 - 43
control_plane/roles/control_plane_sm/files/k8s_sm.yml

@@ -1,43 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: subnet-manager
-  namespace: subnet-manager
-  labels:
-    app: subnet-manager
-spec:
-  selector:
-    matchLabels:
-      app: subnet-manager
-  replicas: 1
-  strategy:
-    type: RollingUpdate
-  template:
-    metadata:
-      labels:
-        app: subnet-manager
-    spec:
-      hostNetwork: true
-      containers:
-        - name: opensm-service
-          image: 'localhost/opensm-service:latest'
-          imagePullPolicy: Never
-          volumeMounts:
-            - mountPath: /var/cache/opensm
-              name: opensm-cache
-            - mountPath: /var/log
-              name: opensm-logs
-          resources:
-            limits:
-              memory: "10Gi"
-          securityContext:
-            privileged: true
-      volumes:
-        - name: opensm-cache
-          hostPath:
-            path: /var/cache/opensm
-            type: Directory
-        - name: opensm-logs
-          hostPath:
-            path: /var/log
-            type: Directory

+ 0 - 5
control_plane/roles/control_plane_sm/files/start.sh

@@ -1,5 +0,0 @@
-#!/bin/sh
-
-/usr/libexec/rdma-init-kernel
-
-exec /usr/sbin/opensm -F /etc/rdma/opensm.conf

+ 0 - 28
control_plane/roles/control_plane_sm/tasks/create_image.yml

@@ -1,28 +0,0 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
----
-# tasks file for sm image creation
-
-- name: Get buildah images
-  command: buildah images
-  changed_when: false
-  register: buildah_images
-  tags: install
-
-- name: Subnet manager image creation (It may take 5-10 mins)
-  command: "buildah bud -t {{ sm_docker_image_name }}:{{ sm_docker_image_tag }} --network host -f {{ role_path }}/files/Dockerfile"
-  args:
-    chdir: "{{ role_path }}/files/"
-  when: "sm_docker_image_name not in buildah_images.stdout"
-  tags: install

+ 0 - 67
control_plane/roles/control_plane_sm/tasks/create_pod.yml

@@ -1,67 +0,0 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
----
-# tasks file for sm pod creation
-- name: Get namespaces
-  command: kubectl get namespaces
-  changed_when: false
-  register: k8s_namespaces
-  tags: install
-
-- name: Ensure that namespace is created
-  command: kubectl create namespace subnet-manager
-  when: "'subnet-manager' not in k8s_namespaces.stdout"
-  tags: install
-
-- name: Get K8s pods
-  command: kubectl get pods --all-namespaces
-  changed_when: false
-  register: k8s_pods
-  tags: install
-
-- name: Create subnet manager pod
-  block:
-    - name: Replace container name in sm config file
-      replace:
-        path: "{{ sm_kube_config_file }}"
-        regexp: "      containers:\n        - name:.*"
-        replace: "      containers:\n        - name: {{ sm_container_name }}"
-      tags: install
-
-    - name: Replace image name in sm config file
-      replace:
-        path: "{{ sm_kube_config_file }}"
-        regexp: "          image:.*"
-        replace: "          image: 'localhost/{{ sm_docker_image_name }}:{{ sm_docker_image_tag }}'"
-      tags: install
-
-    - name: Replace cache directory in sm config file
-      replace:
-        path: "{{ sm_kube_config_file }}"
-        regexp: "        - name: opensm-cache\n          hostPath:\n            path:.*"
-        replace: "        - name: opensm-cache\n          hostPath:\n            path: {{ subnet_manager.cache_directory }} "
-      tags: install
-
-    - name: Replace cache directory in sm config file
-      replace:
-        path: "{{ sm_kube_config_file }}"
-        regexp: "        - name: opensm-logs\n          hostPath:\n            path:.*"
-        replace: "        - name: opensm-logs\n          hostPath:\n            path: {{ subnet_manager.log_directory }} "
-      tags: install
-
-    - name: Create subnet manager pod
-      command: "kubectl apply -f {{ sm_kube_config_file }}"
-      tags: install
-
-  when: "'subnet-manager' not in k8s_pods.stdout"

+ 0 - 31
control_plane/roles/control_plane_sm/tasks/main.yml

@@ -1,31 +0,0 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
----
-# tasks file for control_plane_sm
-
-- name: Check if IB switch is supported
-  block:
-    - name: Check pre-requisites
-      include_tasks: pre_requisites.yml
-      tags: install
-
-    - name: Create image
-      include_tasks: create_image.yml
-      tags: install
-
-    - name: Check pod
-      include_tasks: create_pod.yml
-      tags: install
-
-  when: ib_switch_support

+ 0 - 50
control_plane/roles/control_plane_sm/tasks/pre_requisites.yml

@@ -1,50 +0,0 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
----
-
-# pre-req
-
-- name: Create directories if they don't exist
-  block:
-    - name: Checking directory
-      stat:
-        path: "{{ item }}"
-      register: stat_result
-      with_items:
-        - "{{ subnet_manager.cache_directory }}"
-        - "{{ subnet_manager.log_directory }}"
-      loop_control:
-        label: "{{ item }}"
-      tags: install
-
-    - name: Creating directories
-      file:
-        path: "{{ item.item }}"
-        state: directory
-        mode: "{{ folder_perm }}"
-        group: root
-        owner: root
-      when: not item.stat.exists
-      loop: "{{ stat_result.results }}"
-      loop_control:
-        label: "{{ item.item }}"
-      tags: install
-
-- name: Copy opensm configuration file
-  copy:
-    src: "{{ opensm_conf_file }}"
-    dest: "{{ opensm_conf_file_dest }}"
-    mode: preserve
-    force: yes
-  tags: install

+ 0 - 27
control_plane/roles/control_plane_sm/vars/main.yml

@@ -1,27 +0,0 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
----
-
-# Usage: pre_requisites.yml
-folder_perm: 644
-opensm_conf_file: "{{ role_path }}/../../input_params/opensm.conf"
-opensm_conf_file_dest: "{{ role_path }}/files/opensm.conf"
-
-# Usage: create_image.yml
-sm_docker_image_name: opensm-service
-sm_docker_image_tag: latest
-
-#Usage: create_pod.yml
-sm_container_name: opensm-container
-sm_kube_config_file: "{{ role_path }}/files/k8s_sm.yml"

+ 1 - 8
roles/k8s_start_services/tasks/main.yml

@@ -21,14 +21,7 @@
 - name: Check K8s pods
   include_tasks: check_k8s_pods.yml
   tags: install
-
-- name: Install community.grafana collection
-  command: ansible-galaxy collection install community.grafana
-  changed_when: True
-  tags: install
-  delegate_to: localhost
-  when: hostvars['127.0.0.1']['control_plane_status']
-
+  
 - name: Configure Nginx/Prometheus/Grafana
   include_tasks: configure_nginx_prom_grafana.yml
   when:

+ 2 - 2
roles/k8s_start_services/vars/main.yml

@@ -99,9 +99,9 @@ nginx_conf_file_path: /etc/nginx/nginx.conf
 
 nginx_conf_file_mode: 2534
 
-grafana_svc_ip: "{{ lookup('ini', 'ip section=grafana_svc file={{ role_path }}/files/grafana_svc_details.ini') }}"
+grafana_svc_ip: "{{ lookup('ini', 'ip section=grafana_svc file={{ playbook_dir }}/roles/k8s_start_services/files/grafana_svc_details.ini') }}"
 
-grafana_svc_port: "{{ lookup('ini', 'port section=grafana_svc file={{ role_path }}/files/grafana_svc_details.ini') }}"
+grafana_svc_port: "{{ lookup('ini', 'port section=grafana_svc file={{ playbook_dir }}/roles/k8s_start_services/files/grafana_svc_details.ini') }}"
 
 opensuse_os_name: "openSUSE Leap"
 

+ 0 - 7
roles/slurm_exporter/tasks/main.yml

@@ -38,13 +38,6 @@
     - "'kubernetes' in ansible_skip_tags"
     - "'No such file' in k8s_installation_status.stderr"
 
-- name: Install community.grafana collection
-  command: ansible-galaxy collection install community.grafana
-  changed_when: True
-  tags: install
-  delegate_to: localhost
-  when: hostvars['127.0.0.1']['control_plane_status']
-
 - name: Configure grafana dashboard
   include_tasks: configure_grafana.yml
   when:

+ 1 - 0
roles/slurm_exporter/vars/main.yml

@@ -31,6 +31,7 @@ prometheus_inst_path: "/var/lib/prometheus-2.23.0.linux-amd64/"
 prometheus_exec_path: "{{ prometheus_inst_path }}/prometheus"
 system_local_path: "/usr/local/bin"
 prometheus_config_file: "{{ prometheus_inst_path }}/prometheus.yml"
+opensuse_os_name: "openSUSE Leap"
 
 #Usage: start_service.yml
 file_permission: "0755"

+ 6 - 0
telemetry/roles/slurm_telemetry/tasks/get_node_inventory.yml

@@ -18,6 +18,12 @@
     src: "{{ role_path }}/files/monster"
     dest: "{{ slurm_telemetry_code_dir }}"
     mode: "{{ slurm_telemetry_code_dir_mode }}"
+    
+- name: Install jmepath
+  pip:
+    name: jmespath
+    state: present
+    executable: pip3
 
 - name: Get AWX service IP
   command: kubectl get svc awx-ui -n {{ awx_namespace }} -o=jsonpath='{.spec.clusterIP}'