Explorar o código

Issue #247: Fix to remove double instance of prometheus

Signed-off-by: K <Deepika_K2@Dell.com>
John Lockman %!s(int64=4) %!d(string=hai) anos
pai
achega
185ccc2303

+ 2 - 0
.github/workflows/ansible-lint.yml

@@ -33,6 +33,8 @@ jobs:
           /github/workspace/platforms/jupyterhub.yml
           /github/workspace/platforms/kubeflow.yml
           /github/workspace/tools/install_tools.yml
+          /github/workspace/tools/intel_tools.yml
+          /github/workspace/tools/olm.yml
         # [optional]
         # Arguments to override a package and its version to be set explicitly.
         # Must follow the example syntax.

+ 15 - 0
roles/k8s_start_services/tasks/main.yml

@@ -100,6 +100,21 @@
   changed_when: true
   tags: init
 
+- name: Check if prometheus is installed on the host
+  stat:
+    path: "{{ prometheus_path_on_host }}"
+  register: prometheus_status
+  changed_when: False
+  ignore_errors: yes
+  tags: init
+
+- name: Delete prometheus installed on host if it exists
+  file:
+    path: "{{ prometheus_path_on_host }}"
+    state: absent
+  when: prometheus_status.stat.exists
+  tags: init
+
 - name: Copy the slurm exporter config file
   copy:
     src: "{{ slurm_exporter_config_file }}"

+ 3 - 1
roles/k8s_start_services/vars/main.yml

@@ -51,4 +51,6 @@ slurm_exporter_config_file: extraScrapeConfigs.yaml
 
 slurm_exporter_config_file_path: /var/lib/
 
-slurm_exporter_file_mode: 0655
+slurm_exporter_file_mode: 0655
+
+prometheus_path_on_host: /var/lib/prometheus-2.23.0.linux-amd64/

+ 0 - 43
roles/slurm_exporter/tasks/configure_prometheus_pod.yml

@@ -1,43 +0,0 @@
-# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
----
-
-- name: Copy the slurm exporter config file
-  copy:
-    src: "{{ slurm_config_file }}"
-    dest: "{{ installation_dir }}"
-    owner: root
-    group: root
-    mode: "{{ file_permission }}"
-
-- name: Add the host IP to config file
-  lineinfile:
-    dest: "{{ installation_dir }}/{{ slurm_config_file }}"
-    regexp: "http:"
-    insertafter: "- targets"
-    line: "        - http://{{ inventory_hostname }}:8080/metrics"
-    state: present
-    backup: yes
-    backrefs: yes
-
-- name: Verify if slurm exporter is already configured
-  command: kubectl get service prometheus-slurmexporter-metrics-2
-  register: service_status
-  changed_when: False
-  ignore_errors: yes
-
-- name: Apply slurm exporter configuration to prometheus
-  command: kubectl apply -f "{{ installation_dir }}/{{ slurm_config_file }}" --validate=false
-  changed_when: true
-  when: "'Error from server (NotFound)' in service_status.stderr"

+ 16 - 1
roles/slurm_exporter/tasks/main.yml

@@ -13,12 +13,27 @@
 #  limitations under the License.
 ---
 
+- name: Verify if slurm-exporter is already installed
+  command: ls /usr/bin/prometheus-slurm-exporter
+  register: slurm_exporter_status
+  changed_when: False
+  ignore_errors: yes
+
 - name: Install slurm exporter
   include_tasks: install_slurm_exporter.yml
+  when: "'No such file or directory' in slurm_exporter_status.stderr"
 
 - name: Start slurm exporter services
   include_tasks: start_services.yml
 
+- name: Verify if kubernetes is already installed
+  command: ls /usr/bin/kubectl
+  register: k8s_installation_status
+  changed_when: False
+  ignore_errors: yes
+
 - name: Install prometheus on host
   include_tasks: install_prometheus.yml
-  when: "'kubernetes' in ansible_skip_tags"
+  when:
+    - "'kubernetes' in ansible_skip_tags"
+    - "'No such file' in k8s_installation_status.stderr"

+ 1 - 1
roles/slurm_exporter/tasks/start_services.yml

@@ -23,4 +23,4 @@
 - name: Start services
   systemd:
     name: prometheus-slurm-exporter
-    state: started
+    state: restarted

+ 1 - 1
tools/intel_tools.yml

@@ -31,7 +31,7 @@
       state: present
 
 # Install the Intel Cluster Checker
-- hosts: cluster 
+- hosts: cluster
   tasks:
   - name: Import the Intel(R) Cluster Checker Repo GPG Key
     rpm_key:

+ 21 - 0
tools/olm.yml

@@ -0,0 +1,21 @@
+#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+# olm.yml: Install Operator Lifecycle Manager (OLM) for Operator Framework
+
+- name: Deploy Operator Lifecycle Manager (OLM) CRDs
+  command: kubectl apply -f https://github.com/operator-framework/operator-lifecycle-manager/releases/download/v0.17.0/crds.yaml
+
+- name: Deploy Operator Lifecycle Manager (OLM)
+  command: kubectl apply -f https://github.com/operator-framework/operator-lifecycle-manager/releases/download/v0.17.0/olm.yaml