Procházet zdrojové kódy

Issue #731: Adding Pre-requisites for awx

Signed-off-by: Lakshmi-Patneedi <Lakshmi_Patneedi@Dellteam.com>
Lakshmi-Patneedi před 3 roky
rodič
revize
70335b6436

+ 117 - 0
control_plane/roles/webui_awx/tasks/check_prerequisites.yml

@@ -0,0 +1,117 @@
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+- name: Initialize variables
+  set_fact:
+    awx_pod_deployment_status: false
+    awx_ui_status: false
+    awx_configuration_status: false
+
+- name: Fetching pods from AWX namespace
+  command: "kubectl get pods -n {{ awx_namespace }}"
+  register: awx_pods
+  changed_when: false
+
+- name: Fetching deployment from AWX namespace
+  command: "kubectl get deployment -n {{ awx_namespace }}"
+  register: awx_deployment
+  changed_when: false
+
+- name: Updating awx_pod_deployment_status
+  set_fact:
+    awx_pod_deployment_status: true
+  when:
+    - awx_deployment.stdout | regex_search('awx')
+    - awx_pods.stdout | regex_search('awx-([A-Za-z0-9]{10})-([A-Za-z0-9]{5})')
+  failed_when: false
+
+- name: Check if config file exists
+  stat:
+    path: "{{ tower_config_file }}"
+  register: tower_config_file_status
+
+- name: Check if tower_vault_key exists
+  stat:
+    path: "{{ tower_vault_file }}"
+  register: tower_vault_file_status
+
+- name: Fetching services of awx
+  command: kubectl get svc -n {{ awx_namespace }}
+  register: awx_services_list
+  changed_when: false
+
+- block:
+    - name: Get awx-service cluster-ip
+      command: "kubectl get svc {{ awx_service_name }} -n {{ awx_namespace }} -o jsonpath='{.spec.clusterIP}'"
+      register: awx_cluster_ip
+
+    - name: Get AWX admin password
+      shell: >
+        set -o pipefail && \
+        kubectl get secret awx-admin-password -n {{ awx_namespace }} -o jsonpath='{.data.password}' | base64 --decode
+      no_log: true
+      register: awx_admin_password
+
+    - name: Waiting for the AWX UI to be up
+      uri:
+        url: "http://{{ awx_cluster_ip.stdout }}:{{ awx_port }}"
+        status_code: "{{ return_status }}"
+      register: display
+      until: display.status == return_status
+      retries: "{{ min_retries }}"
+      delay: "{{ max_delay }}"
+      failed_when: false
+
+    - name: Waiting for the AWX UI to be up and running
+      uri:
+       url: "http://{{ awx_cluster_ip.stdout }}:{{ awx_port }}"
+       status_code: "{{ return_status }}"
+       return_content: true
+      register: web_ui
+      until: awx_ui_msg not in web_ui.content
+      retries: "{{ min_retries }}"
+      delay: "{{ max_delay }}"
+      failed_when: false
+
+    - name: Updating awx_ui_status
+      set_fact:
+        awx_ui_status: true
+      when: awx_ui_msg not in web_ui.content
+
+    - block:
+         - name: Fetching Schedule from AWX UI
+           command: awx schedules list --all --conf.host http://{{ awx_cluster_ip.stdout }}:{{ awx_port }} --conf.username admin --conf.password {{ awx_admin_password.stdout }} -f human --filter "name"
+           register: awx_schedule_list
+
+         - name: Fetching job_templates from AWX UI
+           command: awx job_templates list --all --conf.host http://{{ awx_cluster_ip.stdout }}:{{ awx_port }} --conf.username admin --conf.password {{ awx_admin_password.stdout }} -f human --filter "name"
+           register: awx_job_templates_list
+
+         - name: Updating awx_configuration_status
+           set_fact:
+             awx_configuration_status: true
+           when:
+             - ' scheduled_templates[1].name in awx_schedule_list.stdout'
+             - ' scheduled_templates[0].name in awx_schedule_list.stdout'
+             - ' omnia_job_template_details[0].name in awx_job_templates_list.stdout'
+             - ' job_template_details[5].name in awx_job_templates_list.stdout'
+             - ' job_template_details[0].name in awx_job_templates_list.stdout'
+             - ' job_template_details[4].name in awx_job_templates_list.stdout'
+      when: awx_ui_status == true
+      ignore_errors: true
+  when:
+    - tower_config_file_status.stat.exists
+    - tower_vault_file_status.stat.exists
+    - awx_services_list.stdout | regex_search('awx-ui')

+ 9 - 10
control_plane/roles/webui_awx/tasks/main.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,22 +19,21 @@
   include_tasks: ../../control_plane_common/tasks/internet_validation.yml
   tags: install
 
-- name: Install AWX
-  include_tasks: install_awx.yml
+- name: Check awx prequisites
+  include_tasks: check_prerequisites_awx.yml
   tags: install
 
-- name: Internet validation
-  include_tasks: ../../control_plane_common/tasks/internet_validation.yml
+- name: Install AWX
+  include_tasks: install_awx.yml
+  when: not awx_pod_deployment_status
   tags: install
 
 - name: Configure settings
   include_tasks: configure_settings.yml
+  when: not awx_ui_status
   tags: install
-
-- name: Internet validation
-  include_tasks: ../../control_plane_common/tasks/internet_validation.yml
-  tags: install
-
+  
 - name: Configure AWX
   include_tasks: awx_configuration.yml
+  when: not awx_configuration_status
   tags: install

+ 4 - 1
control_plane/roles/webui_awx/vars/main.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -46,6 +46,9 @@ return_status: 200
 max_retries: 20
 max_delay: 15
 
+# Usage: check_prerequisites_awx.yml
+min_retries: 2
+
 # Usage: awx_configuration.yml
 default_org: Default
 default_template: 'Demo Job Template'

+ 63 - 0
roles/cluster_validation/tasks/fetch_grafana_cred.yml

@@ -0,0 +1,63 @@
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Check login_vars file is encrypted
+  command: cat "{{ role_path }}/../../control_plane/{{ login_vars_filename }}"
+  changed_when: false
+  register: config_content
+  no_log: true
+
+- name: Decrpyt login_vars.yml
+  command: >-
+    ansible-vault decrypt "{{ role_path }}/../../control_plane/{{ login_vars_filename }}"
+    --vault-password-file "{{ role_path }}/../../control_plane/{{ vault_filename }}"
+  changed_when: false
+  when: "'$ANSIBLE_VAULT;' in config_content.stdout"
+
+- name: Include variable file login_vars.yml
+  include_vars: "{{ role_path }}/../../control_plane/{{ login_vars_filename }}"
+  no_log: true
+
+- name: Save grafana variables
+  set_fact:
+    grafana_username: '{{ grafana_username }}'
+    grafana_password: '{{ grafana_password }}'
+  no_log: True
+
+- name: Create ansible vault key
+  set_fact:
+    vault_key: "{{ lookup('password', '/dev/null chars=ascii_letters') }}"
+  when: "'$ANSIBLE_VAULT;' not in config_content.stdout"
+
+- name: Save vault key
+  copy:
+    dest: "{{ role_path }}/../../control_plane/{{ vault_filename }}"
+    content: |
+      {{ vault_key }}
+    owner: root
+    force: yes
+    mode: "{{ vault_file_perm }}"
+  when: "'$ANSIBLE_VAULT;' not in config_content.stdout"
+
+- name: Encrypt input config file
+  command: >-
+    ansible-vault encrypt "{{ role_path }}/../../control_plane/{{ login_vars_filename }}"
+    --vault-password-file "{{ role_path }}/../../control_plane/{{ vault_filename }}"
+  changed_when: false
+
+- name: Update login_vars.yml permission
+  file:
+    path: "{{ role_path }}/../../control_plane/{{ login_vars_filename }}"
+    mode: "{{ vault_file_perm }}"

+ 5 - 1
roles/cluster_validation/tasks/main.yml

@@ -93,4 +93,8 @@
         regexp: '#log_path = /var/log/ansible.log'
         replace: 'log_path = /var/log/omnia.log'
       when: ansible_conf_exists.stat.exists
-  when: not control_plane_status
+  when: not control_plane_status
+
+- name: Fetch grafana credentials
+  include_tasks: fetch_grafana_cred.yml
+  when: control_plane_status

+ 4 - 7
roles/k8s_start_services/tasks/configure_nginx_prom_grafana.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,9 +13,6 @@
 #  limitations under the License.
 ---
 
-- name: Include control_plane_monitoring variables
-  include_vars: "{{ role_path }}/../../control_plane/roles/control_plane_monitoring/vars/main.yml"
-
 - name: Install Nginx
   package:
     name: nginx
@@ -73,10 +70,10 @@
   community.grafana.grafana_datasource:
     name: "hpc-prometheus-{{ ansible_default_ipv4.address }}"
     grafana_url: "http://{{ grafana_svc_ip }}:{{ grafana_svc_port }}"
-    grafana_user: "{{ grafana_username }}"
-    grafana_password: "{{ grafana_password }}"
+    grafana_user: "{{ hostvars['127.0.0.1']['grafana_username'] }}"
+    grafana_password: "{{ hostvars['127.0.0.1']['grafana_password'] }}"
     ds_type: prometheus
     ds_url: "http://{{ ansible_default_ipv4.address }}"
     access: direct
   delegate_to: localhost
-  no_log: true
+  no_log: true

+ 7 - 0
roles/k8s_start_services/tasks/main.yml

@@ -22,6 +22,13 @@
   include_tasks: check_k8s_pods.yml
   tags: install
 
+- name: Install community.grafana collection
+  command: ansible-galaxy collection install community.grafana
+  changed_when: True
+  tags: install
+  delegate_to: localhost
+  when: hostvars['127.0.0.1']['control_plane_status']
+
 - name: Configure Nginx/Prometheus/Grafana
   include_tasks: configure_nginx_prom_grafana.yml
   when:

+ 6 - 9
roles/slurm_exporter/tasks/configure_grafana.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,15 +13,12 @@
 #  limitations under the License.
 ---
 
-- name: Include control_plane_monitoring variables
-  include_vars: "{{ role_path }}/../../control_plane/roles/control_plane_monitoring/vars/main.yml"
-
 - name: Create prometheus datasource in grafana
   community.grafana.grafana_datasource:
     name: "hpc-prometheus-{{ ansible_default_ipv4.address }}"
     grafana_url: "http://{{ grafana_svc_ip }}:{{ grafana_svc_port }}"
-    grafana_user: "{{ grafana_username }}"
-    grafana_password: "{{ grafana_password }}"
+    grafana_user: "{{ hostvars['127.0.0.1']['grafana_username'] }}"
+    grafana_password: "{{ hostvars['127.0.0.1']['grafana_password'] }}"
     ds_type: prometheus
     ds_url: "http://{{ ansible_default_ipv4.address }}"
     access: direct
@@ -53,12 +50,12 @@
 - name: Import Slurm Grafana dashboards
   community.grafana.grafana_dashboard:
     grafana_url: "http://{{ grafana_svc_ip }}:{{ grafana_svc_port }}"
-    grafana_user: "{{ grafana_username }}"
-    grafana_password: "{{ grafana_password }}"
+    grafana_user: "{{ hostvars['127.0.0.1']['grafana_username'] }}"
+    grafana_password: "{{ hostvars['127.0.0.1']['grafana_password'] }}"
     state: present
     commit_message: Updated by ansible
     overwrite: yes
     path: "{{ role_path }}/files/{{ item }}"
   with_items: "{{ slurm_dashboard_json_files }}"
   delegate_to: localhost
-  no_log: true
+  no_log: true

+ 7 - 0
roles/slurm_exporter/tasks/main.yml

@@ -38,6 +38,13 @@
     - "'kubernetes' in ansible_skip_tags"
     - "'No such file' in k8s_installation_status.stderr"
 
+- name: Install community.grafana collection
+  command: ansible-galaxy collection install community.grafana
+  changed_when: True
+  tags: install
+  delegate_to: localhost
+  when: hostvars['127.0.0.1']['control_plane_status']
+
 - name: Configure grafana dashboard
   include_tasks: configure_grafana.yml
   when:

+ 20 - 0
telemetry/roles/slurm_telemetry/files/Dockerfile

@@ -0,0 +1,20 @@
+FROM rockylinux:8.5
+
+RUN yum install epel-release git gcc -y
+RUN yum -y install openssl-devel bzip2-devel libffi-devel xz-devel
+RUN yum install python3.8 -y
+RUN echo 1 | update-alternatives --config python3
+RUN dnf -y install https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm
+RUN dnf module disable postgresql -y
+RUN dnf install postgresql13-devel -y
+RUN yum install python38-devel libpq-devel -y
+
+COPY requirements.txt requirements.txt
+RUN ln -s /usr/pgsql-13/bin/pg_config /usr/bin/pg_config
+
+RUN pip3 install psycopg2-binary
+RUN pip3 install -r requirements.txt
+RUN mkdir /log/
+RUN touch /log/monster.log
+
+WORKDIR /MonSter/

+ 23 - 0
telemetry/roles/slurm_telemetry/files/input_config.yml

@@ -0,0 +1,23 @@
+# TimeScaleDB Configuration
+timescaledb:
+  host:
+  port:
+  username:
+  password:
+  database:
+
+# Slurm REST API Configuration
+slurm_rest_api:
+  ip:
+  port: 6820
+  user: root
+  headnode:
+  slurm_jobs: /slurm/v0.0.36/jobs/
+  slurm_nodes: /slurm/v0.0.36/nodes/
+  openapi: /openapi/v3
+
+# Mapping of OS hostnames and OS IP addresses
+hostnames:
+
+# Mapping of service tag and OS IP addresses
+clusternodes:

+ 15 - 0
telemetry/roles/slurm_telemetry/files/requirements.txt

@@ -0,0 +1,15 @@
+sphinx
+pytest
+PyYAML
+tqdm
+requests
+psycopg2
+pgcopy
+aiohttp
+schedule
+async_retrying
+python-hostlist
+python-dateutil
+flask
+flask_cors
+sqlalchemy

+ 90 - 0
telemetry/roles/slurm_telemetry/files/update_service_tags.yml

@@ -0,0 +1,90 @@
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.​0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+
+- name: Create inventory in awx
+  hosts: manager, compute
+  tasks:
+    - name: Check slurmctld service
+      systemd:
+        name: slurmctld
+      register: slurm_service_status
+
+    - name: Set fact slurm_service
+      set_fact:
+        slurm_service: True
+      delegate_to: localhost
+      when: "slurm_service_status.status.ActiveState == 'active'"
+
+    - name: Set fact slurm_service
+      set_fact:
+        slurm_service: False
+      delegate_to: localhost
+      when: "slurm_service_status.status.ActiveState == 'inactive'"
+
+    - name: Replace input file
+      copy:
+        src: "input_config.yml"
+        dest: /mnt/omnia/slurm/monster/config.yml
+        mode: 0644
+      delegate_to: localhost
+      when: "slurm_service_status.status.ActiveState == 'active'"
+
+    - name: Prepare input config file
+      block:
+        - name: Get service tag
+          shell: >
+            set -o pipefail && \
+            dmidecode -t 1 | grep Serial
+          changed_when: false
+          register: service_tag_details
+
+        - name: Set fact service tag
+          set_fact:
+            service_tag: "{{ service_tag_details.stdout.split(':')[1].strip() }}"
+
+        - name: Get the hostname
+          command: hostname
+          register: machine_hostname
+          changed_when: false
+
+        - name: Update Head Node IP
+          replace:
+            path: /mnt/omnia/slurm/monster/config.yml
+            regexp: '  ip:.*'
+            replace: "  ip: {{ groups['manager'][0] }}"
+          delegate_to: localhost
+
+        - name: Update Head Node hostname
+          replace:
+            path: /mnt/omnia/slurm/monster/config.yml
+            regexp: '  headnode:.*'
+            replace: "  headnode: {{ hostvars[groups['manager'][0]]['machine_hostname'].stdout }}"
+          delegate_to: localhost
+
+        - name: Update nodes hostnames
+          lineinfile:
+            path: /mnt/omnia/slurm/monster/config.yml
+            line: "  {{ machine_hostname.stdout }}: {{ ansible_default_ipv4.address }}"
+            insertafter: "hostnames:"
+          delegate_to: localhost
+
+        - name: Update service tag info
+          lineinfile:
+            path: /mnt/omnia/slurm/monster/config.yml
+            line: "  - Servicetag: {{ service_tag }}\n    Os_Ip_Addr: {{ ansible_default_ipv4.address }}"
+            insertafter: "clusternodes:"
+          delegate_to: localhost
+      when: hostvars[groups['manager'][0]]['slurm_service']

+ 82 - 0
telemetry/roles/slurm_telemetry/tasks/get_node_inventory.yml

@@ -0,0 +1,82 @@
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+- name: Copy slurm telemetry code
+  copy:
+    src: "{{ role_path }}/files/monster"
+    dest: "{{ slurm_telemetry_code_dir }}"
+    mode: "{{ slurm_telemetry_code_dir_mode }}"
+
+- name: Get AWX service IP
+  command: kubectl get svc awx-ui -n {{ awx_namespace }} -o=jsonpath='{.spec.clusterIP}'
+  changed_when: false
+  failed_when: false
+  register: awx_svc_ip
+
+- name: AWX needs to be installed
+  fail:
+    msg: "{{ awx_fail_msg }}"
+  when: not awx_svc_ip.stdout
+
+- name: Get AWX service port
+  command: kubectl get svc awx-ui -n {{ awx_namespace }} -o=jsonpath='{.spec.ports[0].port}'
+  changed_when: false
+  register: awx_svc_port
+
+- name: Get AWX secret
+  shell: >
+    set -o pipefail && \
+    kubectl get secret awx-admin-password -n {{ awx_namespace }} -o jsonpath="{.data.password}" | base64 --decode
+  changed_when: false
+  register: awx_secret
+
+- name: Get node_inventory id
+  shell: >
+    set -o pipefail && \
+    awx --conf.host http://{{ awx_svc_ip.stdout }}:{{ awx_svc_port.stdout }} --conf.username {{ awx_username }} \
+    --conf.password {{ awx_secret.stdout }} --conf.insecure inventory list -f human | grep node_inventory
+  changed_when: false
+  register: inventory_id
+
+- name: Node inventory not found in AWX
+  fail:
+    msg: "{{ node_inventory_fail_msg }}"
+  when: not inventory_id.stdout
+
+- name: Get node_inventory
+  command: awx --conf.host http://{{ awx_svc_ip.stdout }}:{{ awx_svc_port.stdout }} --conf.username {{ awx_username }} \
+    --conf.password {{ awx_secret.stdout }} --conf.insecure hosts list --inventory {{ inventory_id.stdout[0] }}
+  changed_when: false
+  register: node_inventory_output
+
+- name: Save the json data
+  set_fact:
+    node_inventory_jsondata: "{{ node_inventory_output.stdout | from_json }}"
+
+- name: Add temporary hosts
+  add_host:
+    name: "{{ item.name }}"
+    groups: "{{ item.summary_fields.groups.results[0].name }}"
+  with_items: "{{ node_inventory_jsondata | json_query('results') }}"
+  no_log: true
+
+- name: Update slurm telemetry code path
+  replace:
+    path: "{{ role_path }}/files/update_service_tags.yml"
+    regexp: '{{ item }}.*'
+    replace: "{{ item }} {{ slurm_telemetry_code_dir }}/monster/config.yml"
+  with_items:
+    - "dest:"
+    - "path:"

+ 24 - 0
telemetry/roles/slurm_telemetry/tasks/main.yml

@@ -0,0 +1,24 @@
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+- name: Include common variables
+  include_vars: ../../common/vars/main.yml
+
+- name: Include timescaledb variables
+  include_vars: ../../timescaledb/vars/main.yml
+
+- name: Prepare MonSter input file
+  include_tasks: update_timescaledb_details.yml
+  when: hostvars[groups['manager'][0]]['slurm_service']

+ 55 - 0
telemetry/roles/slurm_telemetry/tasks/update_timescaledb_details.yml

@@ -0,0 +1,55 @@
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+- name: Get timescaledb service IP
+  command: kubectl get svc {{ timescaledb_k8s_name }} -n {{ namespace }} -o=jsonpath='{.spec.clusterIP}'
+  changed_when: false
+  register: timescaledb_svc_ip
+
+- name: Get timescaledb service port
+  command: kubectl get svc {{ timescaledb_k8s_name }} -n {{ namespace }} -o=jsonpath='{.spec.ports[0].port}'
+  changed_when: false
+  register: timescaledb_svc_port
+
+- name: Update timescaledb service IP
+  replace:
+    path: "{{ slurm_telemetry_code_dir }}/monster/config.yml"
+    regexp: "  host:.*"
+    replace: "  host: {{ timescaledb_svc_ip.stdout }}"
+
+- name: Update timescaledb service port
+  replace:
+    path: "{{ slurm_telemetry_code_dir }}/monster/config.yml"
+    regexp: "  port:.*"
+    replace: "  port: {{ timescaledb_svc_port.stdout }}"
+    before: "# Slurm REST API Configuration"
+
+- name: Update timescaledb username
+  replace:
+    path: "{{ slurm_telemetry_code_dir }}/monster/config.yml"
+    regexp: "  username:.*"
+    replace: "  username: {{ timescaledb_user }}"
+
+- name: Update timescaledb password
+  replace:
+    path: "{{ slurm_telemetry_code_dir }}/monster/config.yml"
+    regexp: "  password:.*"
+    replace: "  password: {{ timescaledb_password }}"
+
+- name: Update timescaledb database
+  replace:
+    path: "{{ slurm_telemetry_code_dir }}/monster/config.yml"
+    regexp: "  database:.*"
+    replace: "  database: {{ timescaledb_name }}"

+ 22 - 0
telemetry/roles/slurm_telemetry/vars/main.yml

@@ -0,0 +1,22 @@
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+# usage: get_node_inventory.yml, update_timescaledb_details.yml
+slurm_telemetry_code_dir: "{{ mount_location }}slurm"
+slurm_telemetry_code_dir_mode: 0755
+awx_namespace: awx
+awx_username: admin
+awx_fail_msg: "AWX service not found. AWX needs to be installed"
+node_inventory_fail_msg: "AWX node inventory not found. Node inventory needs be created in AWX"

+ 22 - 0
telemetry/telemetry.yml

@@ -21,3 +21,25 @@
     - common
     - timescaledb
     - grafana_config
+
+- name: Get node inventory
+  hosts: localhost
+  gather_facts: false
+  tasks:
+    - name: Get node inventory
+      include_role:
+        name: slurm_telemetry
+        tasks_from: get_node_inventory.yml
+      tags: slurm_telemetry
+
+- name: Update slurm node IPs and service tags
+  import_playbook: "{{ playbook_dir }}/roles/slurm_telemetry/files/update_service_tags.yml"
+  tags: slurm_telemetry
+
+- name: Slurm Telemetry
+  hosts: localhost
+  connection: local
+  gather_facts: false
+  roles:
+   - slurm_telemetry
+  tags: slurm_telemetry