浏览代码

Merge branch 'dellhpc:devel' into devel

abhishek-sa1 3 年之前
父节点
当前提交
0007b7585e
共有 52 个文件被更改,包括 28415 次插入176 次删除
  1. 7 26
      control_plane/roles/control_plane_device/files/Dockerfile
  2. 3 3
      control_plane/roles/control_plane_device/files/inventory_creation.yml
  3. 6 3
      control_plane/roles/control_plane_device/files/k8s_mngmnt_network.yml
  4. 6 57
      control_plane/roles/control_plane_device/files/mngmnt_container_configure.yml
  5. 0 20
      control_plane/roles/control_plane_device/files/tftp
  6. 7 19
      control_plane/roles/control_plane_ib/files/Dockerfile
  7. 5 5
      control_plane/roles/control_plane_ib/files/infiniband_container_configure.yml
  8. 6 3
      control_plane/roles/control_plane_ib/files/k8s_infiniband.yml
  9. 1363 0
      control_plane/roles/control_plane_monitoring/files/CoreDNS.json
  10. 1527 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_API_server.json
  11. 2266 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Compute_Resources_Cluster.json
  12. 2039 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Compute_Resources_Namespace_Pods.json
  13. 918 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Compute_Resources_Node_Pods.json
  14. 1638 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Compute_Resources_Pod.json
  15. 1850 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Compute_Resources_Workload.json
  16. 2071 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Kubelet.json
  17. 1615 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Networking_Cluster.json
  18. 1297 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Networking_Namespace_Pods.json
  19. 1492 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Networking_Namespace_Workload.json
  20. 1085 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Networking_Pod.json
  21. 1226 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Networking_Workload.json
  22. 963 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_Scheduler.json
  23. 820 0
      control_plane/roles/control_plane_monitoring/files/Kubernetes_StatefulSets.json
  24. 1163 0
      control_plane/roles/control_plane_monitoring/files/Prometheus_Overview.json
  25. 73 0
      control_plane/roles/control_plane_monitoring/tasks/configure_k8s_grafana.yml
  26. 15 0
      control_plane/roles/control_plane_monitoring/tasks/install_grafana.yml
  27. 57 0
      control_plane/roles/control_plane_monitoring/tasks/install_k8s_prom_stack.yml
  28. 23 0
      control_plane/roles/control_plane_monitoring/tasks/main.yml
  29. 47 0
      control_plane/roles/control_plane_monitoring/vars/main.yml
  30. 70 0
      control_plane/roles/provision_cobbler/files/Dockerfile_leap
  31. 5 5
      control_plane/roles/provision_cobbler/files/cobbler_configurations.yml
  32. 152 0
      control_plane/roles/provision_cobbler/files/cobbler_configurations_leap.yml
  33. 3 3
      control_plane/roles/provision_cobbler/files/inventory_creation.yml
  34. 86 0
      control_plane/roles/provision_cobbler/files/temp_leap15.xml
  35. 21 0
      control_plane/roles/provision_cobbler/files/tftp.yml
  36. 5 0
      control_plane/roles/provision_cobbler/tasks/check_prerequisites.yml
  37. 13 1
      control_plane/roles/provision_cobbler/tasks/cobbler_image.yml
  38. 13 3
      control_plane/roles/provision_cobbler/tasks/configure_cobbler.yml
  39. 29 0
      control_plane/roles/provision_cobbler/tasks/dhcp_configure.yml
  40. 59 20
      control_plane/roles/provision_cobbler/tasks/provision_password.yml
  41. 1 0
      control_plane/roles/provision_cobbler/vars/main.yml
  42. 3 0
      roles/k8s_start_services/files/grafana_svc_details.ini
  43. 82 0
      roles/k8s_start_services/tasks/configure_nginx_prom_grafana.yml
  44. 10 2
      roles/k8s_start_services/tasks/main.yml
  45. 90 0
      roles/k8s_start_services/templates/nginx.conf.j2
  46. 11 1
      roles/k8s_start_services/vars/main.yml
  47. 1534 0
      roles/slurm_exporter/files/slurm-dashboard-node-exporter.json
  48. 2502 0
      roles/slurm_exporter/files/slurm-dashboard.json
  49. 64 0
      roles/slurm_exporter/tasks/configure_grafana.yml
  50. 55 2
      roles/slurm_exporter/tasks/install_prometheus.yml
  51. 7 1
      roles/slurm_exporter/tasks/main.yml
  52. 12 2
      roles/slurm_exporter/vars/main.yml

+ 7 - 26
control_plane/roles/control_plane_device/files/Dockerfile

@@ -1,35 +1,16 @@
 # Dockerfile for creating the management network container
+FROM alpine:latest
 
-FROM centos:8
-
-# RPM REPOs
-RUN dnf install -y \
-    epel-release \
-    && dnf clean all \
-    && rm -rf /var/cache/dnf
-
-RUN dnf install -y dhcp-server
-RUN dnf install -y python3-netaddr
-
-RUN yum install -y \
-  ansible \
-  cronie \
-  tftp\
-  tftp-server\
-  xinetd \
-  net-tools \
-  && yum clean all \
-  &&  rm -rf /var/cache/yum
+#Installing packages
+RUN apk add dhcp
+RUN apk add ansible
+RUN apk add openrc
 
+#Creation of directories and files
 RUN mkdir /root/omnia
+RUN touch /var/lib/dhcp/dhcpd.leases
 
 #Copy Configuration files
 COPY dhcpd.conf  /etc/dhcp/dhcpd.conf
-COPY tftp /etc/xinetd.d/tftp
 COPY inventory_creation.yml /root/
 COPY mngmnt_container_configure.yml /root/
-
-RUN systemctl enable tftp
-RUN systemctl enable dhcpd
-
-CMD ["sbin/init"]

+ 3 - 3
control_plane/roles/control_plane_device/files/inventory_creation.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
   tasks:
     - name: Read dhcp file
       set_fact:
-        var: "{{ lookup('file', '/var/lib/dhcpd/dhcpd.leases').split()| unique | select| list }}"
+        var: "{{ lookup('file', '/var/lib/dhcp/dhcpd.leases').split()| unique | select| list }}"
 
     - name: Filter the ip
       set_fact:
@@ -50,4 +50,4 @@
 
     - name: New line at end of file
       shell: echo "">> omnia/control_plane/roles/collect_device_info/files/mgmt_provisioned_hosts.yml
-      changed_when: false
+      changed_when: false

+ 6 - 3
control_plane/roles/control_plane_device/files/k8s_mngmnt_network.yml

@@ -27,10 +27,13 @@ spec:
         - name: mngmnt-network-container
           image: 'localhost/mngmnt_network_container:latest'
           imagePullPolicy: Never
-          command:
-            - /sbin/init
+          command: ["sh", "-c", "tail -f /dev/null"]
           volumeMounts:
             - name: omnia-storage
               mountPath: /root/omnia
           securityContext:
-            privileged: true
+            allowPrivilegeEscalation: true
+            capabilities:
+              add:
+                - NET_RAW
+            privileged: false

+ 6 - 57
control_plane/roles/control_plane_device/files/mngmnt_container_configure.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -18,64 +18,13 @@
   connection: local
   gather_facts: false
   tasks:
-  - name: Change mode of tftpboot
-    file:
-      path: /var/lib/tftpboot
-      mode: 0777
-
-  - name: Link for tftp services
-    shell: cp -v /usr/lib/systemd/system/tftp.service /etc/systemd/system/tftp-server.service
-
-  - name: Link for tftp services
-    shell: cp -v /usr/lib/systemd/system/tftp.socket /etc/systemd/system/tftp-server.socket
-
-  - name: Edit the tftp-server service file
-    replace:
-      path: /etc/systemd/system/tftp-server.service
-      regexp: ^Requires=tftp.socket
-      replace: Requires=tftp-server.socket
-
-  - name: Edit the tftp-server service file
-    replace:
-      path: /etc/systemd/system/tftp-server.service
-      regexp: ^ExecStart=/usr/sbin/in.tftpd -s /var/lib/tftpboot
-      replace: ExecStart=/usr/sbin/in.tftpd -c -p -s /var/lib/tftpboot
-
-  - name: Edit the tftp-server service file
-    replace:
-      path: /etc/systemd/system/tftp-server.service
-      regexp: ^Also=tftp.socket
-      replace: Also=tftp.socket
-
-  - name: Edit the tftp-server service file
-    lineinfile:
-      path: /etc/systemd/system/tftp-server.service
-      insertafter: '^[Install]'
-      line: 'WantedBy=multi-user.target'
-
-  - name: Edit the tftp-server socket file
-    lineinfile:
-      path: /etc/systemd/system/tftp-server.socket
-      line: "BindIPv6Only=both"
-      insertafter: [Socket]
-
-  - name: Start tftp services
-    service:
-      name: tftp-server
-      state: started
-
-  - name: Start dhcpd services
-    service:
-      name: dhcpd
-      state: started
-
-  - name: Fetch ansible-playbook location
-    command: whereis ansible-playbook
-    changed_when: false
-    register: ansible_playbook_location
+  - name: Install python-netaddr
+    package:
+      name: py3-netaddr
+      state: present
 
   - name: Add inventory cron job
     cron:
       name: Create inventory
       minute: "*/5"
-      job: "{{ ansible_playbook_location.stdout.split(' ')[1] }} /root/inventory_creation.yml"
+      job: /root/inventory_creation.yml"

+ 0 - 20
control_plane/roles/control_plane_device/files/tftp

@@ -1,20 +0,0 @@
-# default: off
-# description: The tftp server serves files using the trivial file transfer \
-#       protocol.  The tftp protocol is often used to boot diskless \
-#       workstations, download configuration files to network-aware printers, \
-#       and to start the installation process for some operating systems.
-service tftp
-{
-        socket_type             = dgram
-        protocol                = udp
-        wait                    = yes
-        user                    = root
-        server                  = /usr/sbin/in.tftpd
-        server_args             = -s /var/lib/tftpboot
-        disable                 = no
-        per_source              = 11
-        cps                     = 100 2
-        flags                   = IPv4
-}
-
-

+ 7 - 19
control_plane/roles/control_plane_ib/files/Dockerfile

@@ -1,26 +1,14 @@
 # Dockerfile for creating the management network container
+FROM alpine:latest
 
-FROM centos:8
-
-# RPM REPOs
-RUN dnf install -y \
-    epel-release \
-    && dnf clean all \
-    && rm -rf /var/cache/dnf
-
-RUN dnf install dhcp-server -y
-RUN yum install -y \
-  ansible \
-  cronie \
-  net-tools \
-  && yum clean all \
-  &&  rm -rf /var/cache/yum
+#Installation of packages
+RUN apk add dhcp
+RUN apk add ansible
+RUN apk add openrc
 
+#Creation of directories and files
 RUN mkdir /root/omnia
+RUN touch /var/lib/dhcp/dhcpd.leases
 
 #Copy Configuration files
 COPY dhcpd.conf  /etc/dhcp/dhcpd.conf
-
-RUN systemctl enable dhcpd
-
-CMD ["sbin/init"]

+ 5 - 5
control_plane/roles/control_plane_ib/files/infiniband_container_configure.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
   connection: local
   gather_facts: false
   tasks:
-  - name: Start dhcpd services
-    service:
-      name: dhcpd
-      state: started
+  - name: Install netaddr
+    package:
+      name: py3-netaddr
+      state: present

+ 6 - 3
control_plane/roles/control_plane_ib/files/k8s_infiniband.yml

@@ -27,10 +27,13 @@ spec:
         - name: infiniband-container
           image: 'localhost/infiniband-container:latest'
           imagePullPolicy: Never
-          command:
-            - /sbin/init
+          command: ["sh", "-c", "tail -f /dev/null"]
           volumeMounts:
             - name: omnia-storage
               mountPath: /root/omnia
           securityContext:
-            privileged: true
+            allowPrivilegeEscalation: true
+            capabilities:
+              add:
+                - NET_RAW
+            privileged: false

文件差异内容过多而无法显示
+ 1363 - 0
control_plane/roles/control_plane_monitoring/files/CoreDNS.json


文件差异内容过多而无法显示
+ 1527 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_API_server.json


文件差异内容过多而无法显示
+ 2266 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Compute_Resources_Cluster.json


文件差异内容过多而无法显示
+ 2039 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Compute_Resources_Namespace_Pods.json


+ 918 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Compute_Resources_Node_Pods.json

@@ -0,0 +1,918 @@
+{
+  "__inputs": [],
+  "__elements": [],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "8.3.2"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph (old)",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "table-old",
+      "name": "Table (old)",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "iteration": 1640151600624,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 5,
+      "panels": [],
+      "title": "CPU Usage",
+      "type": "row"
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 10,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 24,
+        "x": 0,
+        "y": 1
+      },
+      "hiddenSeries": false,
+      "id": 1,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 0,
+      "links": [],
+      "nullPointMode": "null as zero",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": true,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{pod}}",
+          "refId": "A",
+          "step": 10
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "CPU Usage",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        },
+        {
+          "format": "short",
+          "logBase": 1,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 8
+      },
+      "id": 6,
+      "panels": [],
+      "title": "CPU Quota",
+      "type": "row"
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "columns": [],
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fontSize": "100%",
+      "gridPos": {
+        "h": 7,
+        "w": 24,
+        "x": 0,
+        "y": 9
+      },
+      "id": 2,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null as zero",
+      "percentage": false,
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "showHeader": true,
+      "sort": {
+        "col": 0,
+        "desc": true
+      },
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "styles": [
+        {
+          "alias": "Time",
+          "align": "auto",
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "pattern": "Time",
+          "type": "hidden"
+        },
+        {
+          "alias": "CPU Usage",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #A",
+          "thresholds": [],
+          "type": "number",
+          "unit": "short"
+        },
+        {
+          "alias": "CPU Requests",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #B",
+          "thresholds": [],
+          "type": "number",
+          "unit": "short"
+        },
+        {
+          "alias": "CPU Requests %",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #C",
+          "thresholds": [],
+          "type": "number",
+          "unit": "percentunit"
+        },
+        {
+          "alias": "CPU Limits",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #D",
+          "thresholds": [],
+          "type": "number",
+          "unit": "short"
+        },
+        {
+          "alias": "CPU Limits %",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #E",
+          "thresholds": [],
+          "type": "number",
+          "unit": "percentunit"
+        },
+        {
+          "alias": "Pod",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "pod",
+          "thresholds": [],
+          "type": "number",
+          "unit": "short"
+        },
+        {
+          "alias": "",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "pattern": "/.*/",
+          "thresholds": [],
+          "type": "string",
+          "unit": "short"
+        }
+      ],
+      "targets": [
+        {
+          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A",
+          "step": 10
+        },
+        {
+          "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "B",
+          "step": 10
+        },
+        {
+          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "C",
+          "step": 10
+        },
+        {
+          "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "D",
+          "step": 10
+        },
+        {
+          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "E",
+          "step": 10
+        }
+      ],
+      "thresholds": [],
+      "title": "CPU Quota",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "transform": "table",
+      "type": "table-old",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        },
+        {
+          "format": "short",
+          "logBase": 1,
+          "show": false
+        }
+      ]
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 16
+      },
+      "id": 7,
+      "panels": [],
+      "title": "Memory Usage",
+      "type": "row"
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 10,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 24,
+        "x": 0,
+        "y": 17
+      },
+      "hiddenSeries": false,
+      "id": 3,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 0,
+      "links": [],
+      "nullPointMode": "null as zero",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": true,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{pod}}",
+          "refId": "A",
+          "step": 10
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "Memory Usage (w/o cache)",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "bytes",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        },
+        {
+          "format": "short",
+          "logBase": 1,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 24
+      },
+      "id": 8,
+      "panels": [],
+      "title": "Memory Quota",
+      "type": "row"
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "columns": [],
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fontSize": "100%",
+      "gridPos": {
+        "h": 7,
+        "w": 24,
+        "x": 0,
+        "y": 25
+      },
+      "id": 4,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null as zero",
+      "percentage": false,
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "showHeader": true,
+      "sort": {
+        "col": 0,
+        "desc": true
+      },
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "styles": [
+        {
+          "alias": "Time",
+          "align": "auto",
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "pattern": "Time",
+          "type": "hidden"
+        },
+        {
+          "alias": "Memory Usage",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #A",
+          "thresholds": [],
+          "type": "number",
+          "unit": "bytes"
+        },
+        {
+          "alias": "Memory Requests",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #B",
+          "thresholds": [],
+          "type": "number",
+          "unit": "bytes"
+        },
+        {
+          "alias": "Memory Requests %",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #C",
+          "thresholds": [],
+          "type": "number",
+          "unit": "percentunit"
+        },
+        {
+          "alias": "Memory Limits",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #D",
+          "thresholds": [],
+          "type": "number",
+          "unit": "bytes"
+        },
+        {
+          "alias": "Memory Limits %",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #E",
+          "thresholds": [],
+          "type": "number",
+          "unit": "percentunit"
+        },
+        {
+          "alias": "Memory Usage (RSS)",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #F",
+          "thresholds": [],
+          "type": "number",
+          "unit": "bytes"
+        },
+        {
+          "alias": "Memory Usage (Cache)",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #G",
+          "thresholds": [],
+          "type": "number",
+          "unit": "bytes"
+        },
+        {
+          "alias": "Memory Usage (Swap)",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "Value #H",
+          "thresholds": [],
+          "type": "number",
+          "unit": "bytes"
+        },
+        {
+          "alias": "Pod",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "link": false,
+          "linkTargetBlank": false,
+          "linkTooltip": "Drill down",
+          "linkUrl": "",
+          "pattern": "pod",
+          "thresholds": [],
+          "type": "number",
+          "unit": "short"
+        },
+        {
+          "alias": "",
+          "align": "auto",
+          "colors": [],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "pattern": "/.*/",
+          "thresholds": [],
+          "type": "string",
+          "unit": "short"
+        }
+      ],
+      "targets": [
+        {
+          "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A",
+          "step": 10
+        },
+        {
+          "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "B",
+          "step": 10
+        },
+        {
+          "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~\"$node\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "C",
+          "step": 10
+        },
+        {
+          "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "D",
+          "step": 10
+        },
+        {
+          "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~\"$node\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "E",
+          "step": 10
+        },
+        {
+          "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "F",
+          "step": 10
+        },
+        {
+          "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "G",
+          "step": 10
+        },
+        {
+          "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
+          "format": "table",
+          "instant": true,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "H",
+          "step": 10
+        }
+      ],
+      "thresholds": [],
+      "title": "Memory Quota",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "transform": "table",
+      "type": "table-old",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        },
+        {
+          "format": "short",
+          "logBase": 1,
+          "show": false
+        }
+      ]
+    }
+  ],
+  "refresh": "10s",
+  "schemaVersion": 33,
+  "style": "dark",
+  "tags": [
+    "kubernetes-mixin"
+  ],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "selected": true,
+          "text": "default",
+          "value": "default"
+        },
+        "hide": 0,
+        "includeAll": false,
+        "multi": false,
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "queryValue": "",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "type": "datasource"
+      },
+      {
+        "current": {},
+        "datasource": {
+          "uid": "$datasource"
+        },
+        "definition": "",
+        "hide": 2,
+        "includeAll": false,
+        "multi": false,
+        "name": "cluster",
+        "options": [],
+        "query": {
+          "query": "label_values(kube_pod_info, cluster)",
+          "refId": "Prometheus-cluster-Variable-Query"
+        },
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "tagValuesQuery": "",
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      },
+      {
+        "current": {},
+        "datasource": {
+          "uid": "$datasource"
+        },
+        "definition": "",
+        "hide": 0,
+        "includeAll": false,
+        "multi": true,
+        "name": "node",
+        "options": [],
+        "query": {
+          "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)",
+          "refId": "Prometheus-node-Variable-Query"
+        },
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "tagValuesQuery": "",
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ],
+    "time_options": [
+      "5m",
+      "15m",
+      "1h",
+      "6h",
+      "12h",
+      "24h",
+      "2d",
+      "7d",
+      "30d"
+    ]
+  },
+  "timezone": "UTC",
+  "title": "Kubernetes / Compute Resources / Node (Pods)",
+  "uid": "200ac8fdbfbb74b39aff88118e4d1c2c",
+  "version": 1,
+  "weekStart": ""
+}

文件差异内容过多而无法显示
+ 1638 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Compute_Resources_Pod.json


文件差异内容过多而无法显示
+ 1850 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Compute_Resources_Workload.json


文件差异内容过多而无法显示
+ 2071 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Kubelet.json


文件差异内容过多而无法显示
+ 1615 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Networking_Cluster.json


文件差异内容过多而无法显示
+ 1297 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Networking_Namespace_Pods.json


文件差异内容过多而无法显示
+ 1492 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Networking_Namespace_Workload.json


文件差异内容过多而无法显示
+ 1085 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Networking_Pod.json


文件差异内容过多而无法显示
+ 1226 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Networking_Workload.json


+ 963 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_Scheduler.json

@@ -0,0 +1,963 @@
+{
+  "__inputs": [],
+  "__elements": [],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "8.3.2"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph (old)",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "stat",
+      "name": "Stat",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": false,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "iteration": 1640159989547,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [
+            {
+              "options": {
+                "match": "null",
+                "result": {
+                  "text": "N/A"
+                }
+              },
+              "type": "special"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 4,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "links": [],
+      "maxDataPoints": 100,
+      "options": {
+        "colorMode": "none",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "min"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.3.2",
+      "targets": [
+        {
+          "expr": "sum(up{job=\"kube-scheduler\"})",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "Up",
+      "type": "stat"
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 10,
+        "x": 4,
+        "y": 0
+      },
+      "hiddenSeries": false,
+      "id": 3,
+      "legend": {
+        "alignAsTable": true,
+        "avg": false,
+        "current": true,
+        "max": false,
+        "min": false,
+        "rightSide": true,
+        "show": true,
+        "total": false,
+        "values": true
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}} e2e",
+          "refId": "A"
+        },
+        {
+          "expr": "sum(rate(scheduler_binding_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}} binding",
+          "refId": "B"
+        },
+        {
+          "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}} scheduling algorithm",
+          "refId": "C"
+        },
+        {
+          "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}} volume",
+          "refId": "D"
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "Scheduling Rate",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "ops",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        },
+        {
+          "format": "ops",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 10,
+        "x": 14,
+        "y": 0
+      },
+      "hiddenSeries": false,
+      "id": 4,
+      "legend": {
+        "alignAsTable": true,
+        "avg": false,
+        "current": true,
+        "max": false,
+        "min": false,
+        "rightSide": true,
+        "show": true,
+        "total": false,
+        "values": true
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}} e2e",
+          "refId": "A"
+        },
+        {
+          "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}} binding",
+          "refId": "B"
+        },
+        {
+          "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}} scheduling algorithm",
+          "refId": "C"
+        },
+        {
+          "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}} volume",
+          "refId": "D"
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "Scheduling latency 99th Quantile",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "s",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        },
+        {
+          "format": "s",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 0,
+        "y": 7
+      },
+      "hiddenSeries": false,
+      "id": 5,
+      "legend": {
+        "alignAsTable": false,
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "rightSide": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "2xx",
+          "refId": "A"
+        },
+        {
+          "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "3xx",
+          "refId": "B"
+        },
+        {
+          "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "4xx",
+          "refId": "C"
+        },
+        {
+          "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "5xx",
+          "refId": "D"
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "Kube API Request Rate",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "ops",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        },
+        {
+          "format": "ops",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 16,
+        "x": 8,
+        "y": 7
+      },
+      "hiddenSeries": false,
+      "id": 6,
+      "legend": {
+        "alignAsTable": false,
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "rightSide": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{verb}} {{url}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "Post Request Latency 99th Quantile",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "s",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        },
+        {
+          "format": "s",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 24,
+        "x": 0,
+        "y": 14
+      },
+      "hiddenSeries": false,
+      "id": 7,
+      "legend": {
+        "alignAsTable": true,
+        "avg": false,
+        "current": true,
+        "max": false,
+        "min": false,
+        "rightSide": true,
+        "show": true,
+        "total": false,
+        "values": true
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{verb}} {{url}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "Get Request Latency 99th Quantile",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "s",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        },
+        {
+          "format": "s",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 0,
+        "y": 21
+      },
+      "hiddenSeries": false,
+      "id": 8,
+      "legend": {
+        "alignAsTable": false,
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "rightSide": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "process_resident_memory_bytes{job=\"kube-scheduler\", instance=~\"$instance\"}",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "Memory",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "bytes",
+          "logBase": 1,
+          "show": true
+        },
+        {
+          "format": "bytes",
+          "logBase": 1,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 8,
+        "y": 21
+      },
+      "hiddenSeries": false,
+      "id": 9,
+      "legend": {
+        "alignAsTable": false,
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "rightSide": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "rate(process_cpu_seconds_total{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "CPU usage",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "bytes",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        },
+        {
+          "format": "bytes",
+          "logBase": 1,
+          "min": 0,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 16,
+        "y": 21
+      },
+      "hiddenSeries": false,
+      "id": 10,
+      "legend": {
+        "alignAsTable": false,
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "rightSide": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "go_goroutines{job=\"kube-scheduler\",instance=~\"$instance\"}",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "Goroutines",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "logBase": 1,
+          "show": true
+        },
+        {
+          "format": "short",
+          "logBase": 1,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    }
+  ],
+  "refresh": "10s",
+  "schemaVersion": 33,
+  "style": "dark",
+  "tags": [
+    "kubernetes-mixin"
+  ],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "selected": false,
+          "text": "default",
+          "value": "default"
+        },
+        "hide": 0,
+        "includeAll": false,
+        "multi": false,
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "type": "datasource"
+      },
+      {
+        "current": {},
+        "datasource": {
+          "uid": "$datasource"
+        },
+        "definition": "",
+        "hide": 0,
+        "includeAll": true,
+        "multi": false,
+        "name": "instance",
+        "options": [],
+        "query": {
+          "query": "label_values(process_cpu_seconds_total{job=\"kube-scheduler\"}, instance)",
+          "refId": "Prometheus-instance-Variable-Query"
+        },
+        "refresh": 2,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "tagValuesQuery": "",
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ],
+    "time_options": [
+      "5m",
+      "15m",
+      "1h",
+      "6h",
+      "12h",
+      "24h",
+      "2d",
+      "7d",
+      "30d"
+    ]
+  },
+  "timezone": "UTC",
+  "title": "Kubernetes / Scheduler",
+  "uid": "2e6b6a3b4bddf1427b3a55aa1311c656",
+  "version": 1,
+  "weekStart": ""
+}

+ 820 - 0
control_plane/roles/control_plane_monitoring/files/Kubernetes_StatefulSets.json

@@ -0,0 +1,820 @@
+{
+  "__inputs": [],
+  "__elements": [],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "8.3.2"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph (old)",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "stat",
+      "name": "Stat",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": false,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "iteration": 1640160026561,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "fixedColor": "rgb(31, 120, 193)",
+            "mode": "fixed"
+          },
+          "mappings": [
+            {
+              "options": {
+                "match": "null",
+                "result": {
+                  "text": "0"
+                }
+              },
+              "type": "special"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "links": [],
+      "maxDataPoints": 100,
+      "options": {
+        "colorMode": "none",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.3.2",
+      "targets": [
+        {
+          "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "CPU",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "fixedColor": "rgb(31, 120, 193)",
+            "mode": "fixed"
+          },
+          "mappings": [
+            {
+              "options": {
+                "match": "null",
+                "result": {
+                  "text": "0"
+                }
+              },
+              "type": "special"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 8,
+        "y": 0
+      },
+      "id": 3,
+      "links": [],
+      "maxDataPoints": 100,
+      "options": {
+        "colorMode": "none",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.3.2",
+      "targets": [
+        {
+          "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "Memory",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "fixedColor": "rgb(31, 120, 193)",
+            "mode": "fixed"
+          },
+          "mappings": [
+            {
+              "options": {
+                "match": "null",
+                "result": {
+                  "text": "0"
+                }
+              },
+              "type": "special"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 16,
+        "y": 0
+      },
+      "id": 4,
+      "links": [],
+      "maxDataPoints": 100,
+      "options": {
+        "colorMode": "none",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.3.2",
+      "targets": [
+        {
+          "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "Network",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [
+            {
+              "options": {
+                "match": "null",
+                "result": {
+                  "text": "0"
+                }
+              },
+              "type": "special"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 3,
+        "w": 6,
+        "x": 0,
+        "y": 7
+      },
+      "id": 5,
+      "links": [],
+      "maxDataPoints": 100,
+      "options": {
+        "colorMode": "none",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.3.2",
+      "targets": [
+        {
+          "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "Desired Replicas",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [
+            {
+              "options": {
+                "match": "null",
+                "result": {
+                  "text": "0"
+                }
+              },
+              "type": "special"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 3,
+        "w": 6,
+        "x": 6,
+        "y": 7
+      },
+      "id": 6,
+      "links": [],
+      "maxDataPoints": 100,
+      "options": {
+        "colorMode": "none",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.3.2",
+      "targets": [
+        {
+          "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "Replicas of current version",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [
+            {
+              "options": {
+                "match": "null",
+                "result": {
+                  "text": "0"
+                }
+              },
+              "type": "special"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 3,
+        "w": 6,
+        "x": 12,
+        "y": 7
+      },
+      "id": 7,
+      "links": [],
+      "maxDataPoints": 100,
+      "options": {
+        "colorMode": "none",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.3.2",
+      "targets": [
+        {
+          "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "Observed Generation",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [
+            {
+              "options": {
+                "match": "null",
+                "result": {
+                  "text": "0"
+                }
+              },
+              "type": "special"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 3,
+        "w": 6,
+        "x": 18,
+        "y": 7
+      },
+      "id": 8,
+      "links": [],
+      "maxDataPoints": 100,
+      "options": {
+        "colorMode": "none",
+        "graphMode": "none",
+        "justifyMode": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "8.3.2",
+      "targets": [
+        {
+          "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "Metadata Generation",
+      "type": "stat"
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": {
+        "uid": "$datasource"
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 0,
+        "y": 10
+      },
+      "hiddenSeries": false,
+      "id": 9,
+      "legend": {
+        "alignAsTable": false,
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "rightSide": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "8.3.2",
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "replicas specified",
+          "refId": "A"
+        },
+        {
+          "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "replicas created",
+          "refId": "B"
+        },
+        {
+          "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "ready",
+          "refId": "C"
+        },
+        {
+          "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "replicas of current version",
+          "refId": "D"
+        },
+        {
+          "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "updated",
+          "refId": "E"
+        }
+      ],
+      "thresholds": [],
+      "timeRegions": [],
+      "title": "Replicas",
+      "tooltip": {
+        "shared": false,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "mode": "time",
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "logBase": 1,
+          "show": true
+        },
+        {
+          "format": "short",
+          "logBase": 1,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false
+      }
+    }
+  ],
+  "refresh": "",
+  "schemaVersion": 33,
+  "style": "dark",
+  "tags": [
+    "kubernetes-mixin"
+  ],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "selected": false,
+          "text": "default",
+          "value": "default"
+        },
+        "hide": 0,
+        "includeAll": false,
+        "multi": false,
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "type": "datasource"
+      },
+      {
+        "current": {},
+        "datasource": {
+          "uid": "$datasource"
+        },
+        "definition": "",
+        "hide": 2,
+        "includeAll": false,
+        "label": "cluster",
+        "multi": false,
+        "name": "cluster",
+        "options": [],
+        "query": {
+          "query": "label_values(kube_statefulset_metadata_generation, cluster)",
+          "refId": "Prometheus-cluster-Variable-Query"
+        },
+        "refresh": 2,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "tagValuesQuery": "",
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      },
+      {
+        "current": {},
+        "datasource": {
+          "uid": "$datasource"
+        },
+        "definition": "",
+        "hide": 0,
+        "includeAll": false,
+        "label": "Namespace",
+        "multi": false,
+        "name": "namespace",
+        "options": [],
+        "query": {
+          "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
+          "refId": "Prometheus-namespace-Variable-Query"
+        },
+        "refresh": 2,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "tagValuesQuery": "",
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      },
+      {
+        "current": {},
+        "datasource": {
+          "uid": "$datasource"
+        },
+        "definition": "",
+        "hide": 0,
+        "includeAll": false,
+        "label": "Name",
+        "multi": false,
+        "name": "statefulset",
+        "options": [],
+        "query": {
+          "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)",
+          "refId": "Prometheus-statefulset-Variable-Query"
+        },
+        "refresh": 2,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "tagValuesQuery": "",
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ],
+    "time_options": [
+      "5m",
+      "15m",
+      "1h",
+      "6h",
+      "12h",
+      "24h",
+      "2d",
+      "7d",
+      "30d"
+    ]
+  },
+  "timezone": "UTC",
+  "title": "Kubernetes / StatefulSets",
+  "uid": "a31c1f46e6f727cb37c0d731a7245005",
+  "version": 1,
+  "weekStart": ""
+}

文件差异内容过多而无法显示
+ 1163 - 0
control_plane/roles/control_plane_monitoring/files/Prometheus_Overview.json


+ 73 - 0
control_plane/roles/control_plane_monitoring/tasks/configure_k8s_grafana.yml

@@ -0,0 +1,73 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Get grafana service IP
+  command: kubectl get svc -n {{ grafana_namespace }} -o=jsonpath='{.items[0].spec.clusterIP}'
+  changed_when: false
+  register: grafana_svc_ip
+
+- name: Get grafana service port
+  command: kubectl get svc -n {{ grafana_namespace }} -o=jsonpath='{.items[0].spec.ports[0].port}'
+  changed_when: false
+  register: grafana_svc_port
+
+- name: Get kube-prometheus svc IP
+  command: kubectl get svc -n {{ monitoring_namespace }} -l app=kube-prometheus-stack-prometheus -o=jsonpath='{.items[0].spec.clusterIP}'
+  changed_when: false
+  register: kube_prom_svc_ip
+
+- name: Get kube-prometheus svc port
+  command: kubectl get svc -n {{ monitoring_namespace }} -l app=kube-prometheus-stack-prometheus -o=jsonpath='{.items[0].spec.ports[0].port}'
+  changed_when: false
+  register: kube_prom_svc_port
+
+- name: Install community.grafana collection
+  command: ansible-galaxy collection install community.grafana
+  changed_when: True
+
+- name: Create prometheus datasource
+  community.grafana.grafana_datasource:
+    name: control-plane-prometheus
+    grafana_url: "http://{{ grafana_svc_ip.stdout }}:{{ grafana_svc_port.stdout }}"
+    grafana_user: "{{ grafana_username }}"
+    grafana_password: "{{ grafana_password }}"
+    ds_type: prometheus
+    ds_url: "http://{{ kube_prom_svc_ip.stdout }}:{{ kube_prom_svc_port.stdout }}"
+    access: proxy
+  no_log: true
+
+- name: Import K8s Grafana dashboards
+  community.grafana.grafana_dashboard:
+    grafana_url: "http://{{ grafana_svc_ip.stdout }}:{{ grafana_svc_port.stdout }}"
+    grafana_user: "{{ grafana_username }}"
+    grafana_password: "{{ grafana_password }}"
+    state: present
+    commit_message: Updated by ansible
+    overwrite: yes
+    path: "{{ role_path }}/files/{{ item }}"
+  with_items: "{{ grafana_dashboard_json_files }}"
+  no_log: true
+
+- name: Save grafana svc ip
+  replace:
+    path: "{{ role_path }}/../../../roles/k8s_start_services/files/grafana_svc_details.ini"
+    regexp: "ip=.*"
+    replace: "ip={{ grafana_svc_ip.stdout }}"
+
+- name: Save grafana svc ip
+  replace:
+    path: "{{ role_path }}/../../../roles/k8s_start_services/files/grafana_svc_details.ini"
+    regexp: "port=.*"
+    replace: "port={{ grafana_svc_port.stdout }}"

+ 15 - 0
control_plane/roles/control_plane_monitoring/tasks/install_grafana.yml

@@ -0,0 +1,15 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+

+ 57 - 0
control_plane/roles/control_plane_monitoring/tasks/install_k8s_prom_stack.yml

@@ -0,0 +1,57 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Install kubernetes.core collection
+  command: ansible-galaxy collection install kubernetes.core
+  changed_when: True
+
+- name: Add prometheus-community chart
+  kubernetes.core.helm_repository:
+    name: prometheus-community
+    repo_url: "https://kubernetes.github.io/ingress-nginx"
+  failed_when: False
+
+- name: Install kube-prom-stack
+  block:
+    - name: Install kube-prom-stack
+      kubernetes.core.helm:
+        name: monitoring
+        chart_ref: prometheus-community/kube-prometheus-stack
+        chart_version: "{{ k8s_prom_stack_chart_version }}"
+        update_repo_cache: True
+        release_namespace: "{{ monitoring_namespace }}"
+        create_namespace : yes
+        wait: true
+        values:
+          grafana:
+            enabled: False
+  rescue:
+    - name: Delete existing kube-prom-stack crd
+      command: "kubectl delete crd {{ item }}"
+      changed_when: True
+      with_items: "{{ k8s_prom_stack_crd }}"
+
+    - name: Install kube-prom-stack
+      kubernetes.core.helm:
+        name: monitoring
+        chart_ref: prometheus-community/kube-prometheus-stack
+        chart_version: "{{ k8s_prom_stack_chart_version }}"
+        update_repo_cache: True
+        release_namespace: "{{ monitoring_namespace }}"
+        create_namespace : yes
+        wait: true
+        values:
+          grafana:
+            enabled: False

+ 23 - 0
control_plane/roles/control_plane_monitoring/tasks/main.yml

@@ -0,0 +1,23 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+#- name: Install Grafana
+#  import_tasks: install_grafana.yml
+
+- name: Install K8s prometheus stack
+  import_tasks: install_k8s_prom_stack.yml
+
+- name: Configure K8s grafana
+  import_tasks: configure_k8s_grafana.yml

+ 47 - 0
control_plane/roles/control_plane_monitoring/vars/main.yml

@@ -0,0 +1,47 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+grafana_username: ""
+grafana_password: ""
+grafana_namespace: grafana
+
+monitoring_namespace: monitoring
+k8s_prom_stack_chart_version: 25.0.0
+k8s_prom_stack_crd:
+  - alertmanagerconfigs.monitoring.coreos.com
+  - alertmanagers.monitoring.coreos.com
+  - podmonitors.monitoring.coreos.com
+  - probes.monitoring.coreos.com
+  - prometheuses.monitoring.coreos.com
+  - prometheusrules.monitoring.coreos.com
+  - servicemonitors.monitoring.coreos.com
+  - thanosrulers.monitoring.coreos.com
+
+grafana_dashboard_json_files:
+  - CoreDNS.json
+  - Kubernetes_API_server.json
+  - Kubernetes_Compute_Resources_Cluster.json
+  - Kubernetes_Compute_Resources_Namespace_Pods.json
+  - Kubernetes_Compute_Resources_Node_Pods.json
+  - Kubernetes_Compute_Resources_Pod.json
+  - Kubernetes_Compute_Resources_Workload.json
+  - Kubernetes_Kubelet.json
+  - Kubernetes_Networking_Cluster.json
+  - Kubernetes_Networking_Namespace_Pods.json
+  - Kubernetes_Networking_Namespace_Workload.json
+  - Kubernetes_Networking_Pod.json
+  - Kubernetes_Networking_Workload.json
+  - Kubernetes_Scheduler.json
+  - Prometheus_Overview.json

+ 70 - 0
control_plane/roles/provision_cobbler/files/Dockerfile_leap

@@ -0,0 +1,70 @@
+FROM registry.opensuse.org/opensuse/leap:15.3
+
+#Enable all repose
+RUN zypper mr -ea
+
+# Refresh and update the repo
+RUN zypper --gpg-auto-import-keys refresh
+RUN zypper --gpg-auto-import-keys up -y
+
+#Enable systemd
+RUN zypper in -y dbus-1 systemd-sysvinit
+RUN cp /usr/lib/systemd/system/dbus.service /etc/systemd/system/; \
+    sed -i 's/OOMScoreAdjust=-900//' /etc/systemd/system/dbus.service
+
+VOLUME ["/sys/fs/cgroup", "/run"]
+
+RUN zypper in -y git-core
+RUN git clone -b v3.2.2 https://github.com/cobbler/cobbler.git
+RUN cd cobbler/
+
+RUN mkdir /root/omnia
+
+RUN zypper in --no-confirm python3-librepo \
+                      python3-schema \
+                      syslinux \
+                      wget \
+                      dhcp-server \
+                      python3-pykickstart \
+                      dnf-plugins-core \
+                      cronie \
+                      xinetd \
+                      python3-coverage \
+                      python3-Cheetah3 \
+                      python3-netaddr \
+                      python3-devel \
+                      python3-future \
+                      apache2-mod_wsgi-python3 \
+                      gcc \
+                      python-rpm-macros \
+                      rpm-build \
+                      ansible \
+                      grub2-x86_64-efi \
+                      shim \
+                      yum-utils \
+                      python3-Sphinx \
+                      python3-wheel \
+                      bash-completion \
+                      tftp \
+                      vim
+
+RUN useradd -ms /bin/bash dhcpd
+
+#Copy Configuration files
+COPY settings.yaml /etc/cobbler/settings.yaml
+COPY dhcp.template  /etc/cobbler/dhcp.template
+COPY modules.conf  /etc/cobbler/modules.conf
+COPY tftp /etc/xinetd.d/tftp
+COPY .users.digest /etc/cobbler/users.digest
+COPY cobbler_configurations_leap.yml /root
+COPY tftp.yml /root
+COPY inventory_creation.yml /root
+
+EXPOSE 69 80 443 25151
+
+VOLUME [ "/srv/www/cobbler", "/var/lib/cobbler/backup", "/mnt" ]
+
+RUN systemctl enable apache2
+RUN systemctl enable dhcpd
+
+CMD ["sbin/init"]

+ 5 - 5
control_plane/roles/provision_cobbler/files/cobbler_configurations.yml

@@ -110,7 +110,7 @@
       mode: 0775
     tags: install
     when: name_iso == "rocky"
-
+    
   - name: Pxe menu
     copy:
       src: "/root/omnia/control_plane/roles/provision_cobbler/files/menu.yml"
@@ -121,8 +121,8 @@
   - name: Assign default grub option
     replace:
       path: "/var/lib/cobbler/grub_config/grub/grub.cfg"
-      regexp: "^set default=\'local\'"
-      replace: "set default=\'1\'"
+      regexp: "^set default='local'"
+      replace: "set default='1'"
     tags: install
 
   - name: Assign default grub timeout
@@ -155,10 +155,10 @@
     cron:
       name: Start tftp service
       minute: "*"
-      job: "{{ ansible_playbook_path.stdout.split(' ')[1] }} /root/tftp.yml"
+      job: "{{ ansible_playbook_path.stdout.split(' ')[1] }} /root/tftp.yml -e provision_os={{ name_iso }}"
 
   - name: Add inventory cron job
     cron:
       name: Create inventory
       minute: "*/5"
-      job: "{{ ansible_playbook_path.stdout.split(' ')[1] }} /root/inventory_creation.yml"
+      job: "{{ ansible_playbook_path.stdout.split(' ')[1] }} /root/inventory_creation.yml -e dhcpd_lease_file=\"/var/lib/dhcpd/dhcpd.leases\""

+ 152 - 0
control_plane/roles/provision_cobbler/files/cobbler_configurations_leap.yml

@@ -0,0 +1,152 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Initial cobbler setup
+  hosts: localhost
+  connection: local
+  gather_facts: false
+  tasks:
+  - name: Inside cobbler container
+    debug:
+      msg: "Hiii! I am cobbler"
+
+  - name: Make
+    command: make rpms
+    args:
+      chdir: /cobbler
+    changed_when: false
+
+  - name: Install cobbler and cobbler-web
+    command: zypper in -y --allow-unsigned-rpm cobbler-3.2.2-1.noarch.rpm cobbler-web-3.2.2-1.noarch.rpm
+    args:
+      chdir: /cobbler/rpm-build
+    changed_when: false
+    
+  - name: Run script
+    shell: sh ./mkgrub.sh
+    args:
+      chdir: /usr/share/cobbler/bin
+    changed_when: false
+  
+  - name: Add settings to http
+    shell: echo "LoadModule wsgi_module modules/mod_wsgi_python3.so" >/etc/apache2/conf.d/wsgi.conf
+    changed_when: false
+
+  - name: Add interface to the /etc/sysconfig/dhcpd
+    replace:
+      path: "/etc/sysconfig/dhcpd"
+      regexp: "^DHCPD_INTERFACE=\"\""
+      replace: "DHCPD_INTERFACE={{ host_network_nic }}"
+
+  - name: Enable cobbler
+    command: systemctl enable {{ item }}
+    with_items:
+      - cobblerd
+      - tftp
+    changed_when: false
+
+  - name: Restart httpd
+    service:
+      name: "{{ item }}"
+      state: restarted
+    loop:
+      - apache2
+      - cobblerd
+
+  - name: Start services
+    service:
+      name: "{{ item }}"
+      state: started
+    loop:
+      - cobblerd
+      - tftp
+
+  - name: Adding curl
+    shell: export PATH="/usr/bin/curl:$PATH"
+
+  - name: Run import command
+    command: cobbler import --arch=x86_64 --path=/mnt --name="{{ name_iso }}"
+    changed_when: false
+
+  - name: Kickstart profile - leap
+    copy:
+      src: "/root/leap15.xml"
+      dest: "/var/lib/cobbler/templates/sample_autoyast.xml"
+      mode: 0775
+    tags: install
+
+  - name: Pxe menu
+    copy:
+      src: "/root/omnia/control_plane/roles/provision_cobbler/files/menu.yml"
+      dest: "/etc/cobbler/boot_loader_conf/pxedefault.template"
+      mode: 0775
+    tags: install
+
+  - name: Assign default grub option
+    replace:
+      path: "/var/lib/cobbler/grub_config/grub/grub.cfg"
+      regexp: "^set default='local'"
+      replace: "set default='1'"
+    tags: install
+
+  - name: Assign default grub timeout
+    replace:
+      path: "/var/lib/cobbler/grub_config/grub/grub.cfg"
+      regexp: '^set timeout=80'
+      replace: 'set timeout=10'
+    tags: install
+
+  - name: Syncing of cobbler
+    command: cobbler sync
+    changed_when: false 
+
+  - name: Restart cobblerd, http, xinetd and dhcpd
+    service:
+      name: "{{ item }}"
+      state: restarted
+    loop:
+      - cobblerd
+      - apache2
+      - xinetd
+      - dhcpd
+
+  - name: Fetch ansible-playbook path
+    command: whereis ansible-playbook
+    changed_when: false
+    register: ansible_playbook_path
+
+#  - name: Set dhcpd_lease_file variable for inventory creation
+#    cron:
+#      env: yes
+#      name: dhcpd_lease_file
+#      value: "/var/lib/dhcp/db/dhcpd.leases"
+
+#  - name: Set provision_os variable for inventory creation
+#    cron:
+#      env: yes
+#      name: provision_os
+#      value: "{{ name_iso }}"
+
+  - name: Add tftp cron job
+    cron:
+      name: Start tftp service
+      minute: "*"
+      job: "{{ ansible_playbook_path.stdout.split(' ')[1] }} /root/tftp.yml -e provision_os={{ name_iso }}"
+
+  - name: Add inventory cron job
+    cron:
+      name: Create inventory
+      minute: "*/5"
+      job: "{{ ansible_playbook_path.stdout.split(' ')[1] }} /root/inventory_creation.yml -e dhcpd_lease_file=\"/var/lib/dhcp/db/dhcpd.leases\""

+ 3 - 3
control_plane/roles/provision_cobbler/files/inventory_creation.yml

@@ -4,8 +4,8 @@
   tasks:
     - name: Read dhcp file
       set_fact:
-        var: "{{ lookup('file', '/var/lib/dhcpd/dhcpd.leases').split()| unique | select| list }}"
-
+        var: "{{ lookup('file', '{{ dhcpd_lease_file }}').split()| unique | select| list }}"
+    
     - name: Filter the ip
       set_fact:
         vars_new: "{{ var| ipv4('address')| to_nice_yaml}}"
@@ -35,4 +35,4 @@
 
     - name: New line at end of file
       shell: echo "">> omnia/control_plane/roles/collect_node_info/files/provisioned_hosts.yml
-      changed_when: false
+      changed_when: false

+ 86 - 0
control_plane/roles/provision_cobbler/files/temp_leap15.xml

@@ -0,0 +1,86 @@
+<?xml version="1.0"?>
+<!DOCTYPE profile>
+<profile xmlns="http://www.suse.com/1.0/yast2ns" xmlns:config="http://www.suse.com/1.0/configns">
+  <install>
+    <init>
+      <info_file>
+        <![CDATA[
+          install: http://ip/cblr/links/leap-x86_64/
+          textmode: 1]]>
+      </info_file>
+    </init>
+  </install>
+  <general>
+    <mode>
+      <confirm config:type="boolean">false</confirm>
+      <final_reboot config:type="boolean">true</final_reboot>
+    </mode>
+  </general>
+  <bootloader t="map">
+    <loader_type>default</loader_type>
+  </bootloader>
+  <host t="map">
+    <hosts t="list">
+      <hosts_entry t="map">
+        <host_address>127.0.0.1</host_address>
+        <names t="list">
+          <name>localhost</name>
+        </names>
+      </hosts_entry>
+    </hosts>
+  </host>
+  <networking t="map">
+    <dns t="map">
+      <dhcp_hostname t="boolean">false</dhcp_hostname>
+      <hostname>localhost.localdomain</hostname>
+      <resolv_conf_policy>auto</resolv_conf_policy>
+    </dns>
+  </networking>
+  <services-manager t="map">
+    <default_target>multi-user</default_target>
+    <services t="map">
+      <enable t="list">
+        <service>chronyd</service>
+        <service>sshd</service>
+      </enable>
+    </services>
+  </services-manager>
+  <software t="map">
+    <install_recommended t="boolean">true</install_recommended>
+    <instsource/>
+    <packages t="list">
+      <package>openssh</package>
+      <package>firewalld</package>
+      <package>chrony</package>
+    </packages>
+  </software>
+  <ssh_import t="map">
+    <copy_config t="boolean">false</copy_config>
+    <import t="boolean">false</import>
+  </ssh_import>
+  <timezone t="map">
+    <timezone>yast_timezone</timezone>
+  </timezone>
+  <users t="list">
+    <user t="map">
+      <authorized_keys t="list"/>
+      <encrypted t="boolean">true</encrypted>
+      <fullname>root</fullname>
+      <gid>0</gid>
+      <home>/root</home>
+      <home_btrfs_subvolume t="boolean">false</home_btrfs_subvolume>
+      <password_settings t="map">
+        <expire/>
+        <flag/>
+        <inact/>
+        <max/>
+        <min/>
+        <warn/>
+      </password_settings>
+      <shell>/bin/bash</shell>
+      <uid>0</uid>
+      <user_password>yast_password</user_password>
+      <username>root</username>
+    </user>
+  </users>
+</profile>

+ 21 - 0
control_plane/roles/provision_cobbler/files/tftp.yml

@@ -17,6 +17,27 @@
   hosts: localhost
   connection: local
   tasks:
+    - name: Configure tftp for leap
+      block:
+        - name: Stop the tftp.socket
+          command: systemctl stop tftp.socket
+
+        - name: Modify tftp.socket to listsen on IPv4
+          replace:
+            path: "/usr/lib/systemd/system/tftp.socket"
+            regexp: 'ListenDatagram=69'
+            replace: 'ListenDatagram=0.0.0.0:69'
+        
+        - name: Reload the configurations
+          command: systemctl daemon-reload
+          
+        - name: Enable tftp.socket
+          command: systemctl enable tftp.socket
+
+        - name: Start tftp.socket
+          command: systemctl start tftp.socket
+      when: provision_os == "leap"
+
     - name: Fetch tftp status
       command: systemctl is-active tftp
       args:

+ 5 - 0
control_plane/roles/provision_cobbler/tasks/check_prerequisites.yml

@@ -32,6 +32,11 @@
     cobbler_kickstart_file: "{{ cobbler_rocky_ks }}"
   when: provision_os == os_supported_rocky
 
+- name: Set rocky kickstart file name
+  set_fact:
+    cobbler_kickstart_file: "{{ cobbler_leap_ks }}"
+  when: provision_os == os_supported_leap
+
 - name: Check if any backup file exists
   block:
   - name: Check status of backup file

+ 13 - 1
control_plane/roles/provision_cobbler/tasks/cobbler_image.yml

@@ -18,7 +18,19 @@
   changed_when: true
   args:
     chdir: "{{ role_path }}/files/"
-  when: not cobbler_image_status
+  when: 
+    - not cobbler_image_status
+    - provision_os != os_supported_leap
+  tags: install
+
+- name: Image creation (It may take 5-10 mins)
+  command: "buildah bud -f Dockerfile_leap -t {{ cobbler_image_name }}:{{ cobbler_image_tag }} --network host ."
+  changed_when: true
+  args:
+    chdir: "{{ role_path }}/files/"
+  when: 
+    - not cobbler_image_status
+    - provision_os == os_supported_leap
   tags: install
 
 - name: Update image name in k8s_cobbler.yml

+ 13 - 3
control_plane/roles/provision_cobbler/tasks/configure_cobbler.yml

@@ -34,7 +34,7 @@
   wait_for:
     timeout: 30
 
-- name: Copy dhcpd.leases from cobbler
+- name: Copy kickstart file inside cobbler container
   command: kubectl cp {{ role_path }}/files/{{ cobbler_kickstart_file }} {{ cobbler_pod_name.stdout }}:/root/{{ cobbler_kickstart_file }} -n {{ cobbler_namespace }}
   changed_when: true
   when: not cobbler_config_status
@@ -43,7 +43,17 @@
   command: "kubectl exec --stdin --tty -n {{ cobbler_namespace }} {{ cobbler_pod_name.stdout }} -- ansible-playbook /root/cobbler_configurations.yml -e name_iso={{ provision_os }}"
   changed_when: true
   tags: install
-  when: not cobbler_config_status
+  when: 
+    - not cobbler_config_status
+    - provision_os != os_supported_leap
+      
+- name: Configuring cobbler inside container (It may take 5-10 mins)
+  command: "kubectl exec --stdin --tty -n {{ cobbler_namespace }} {{ cobbler_pod_name.stdout }} -- ansible-playbook /root/cobbler_configurations_leap.yml -e name_iso={{ provision_os }} -e host_network_nic={{ host_network_nic }}"
+  changed_when: true
+  tags: install
+  when: 
+    - not cobbler_config_status
+    - provision_os == os_supported_leap
 
 - name: Schedule task
   cron:
@@ -67,4 +77,4 @@
     - "{{ role_path }}/files/dhcp.template"
     - "{{ role_path }}/files/settings"
     - "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
-    - "{{ role_path }}/files/temp_host_mapping_file.csv.bak"
+    - "{{ role_path }}/files/temp_host_mapping_file.csv.bak"

+ 29 - 0
control_plane/roles/provision_cobbler/tasks/dhcp_configure.yml

@@ -69,3 +69,32 @@
     path: "{{ role_path }}/files/settings.yaml"
     regexp: '^next_server: 127.0.0.1'
     replace: 'next_server: {{ hpc_ip }}'
+
+- name: Set the server_args for {{ provision_os }} in tftp file
+  replace:
+    path: "{{ role_path }}/files/tftp"
+    regexp: '^        server_args             = -s /srv/tftpboot'
+    replace: '        server_args             = -s /var/lib/tftpboot'
+  when:
+    - provision_os != os_supported_leap
+      
+- name: Set the parameters for {{ provision_os }}
+  block:
+    - name: Set the tftpboot_location for {{ provision_os }}
+      replace:
+        path: "{{ role_path }}/files/settings.yaml"
+        regexp: '^tftpboot_location: "/var/lib/tftpboot"'
+        replace: 'tftpboot_location: "/srv/tftpboot"'
+
+    - name: Set the webdir for {{ provision_os }}
+      replace:
+        path: "{{ role_path }}/files/settings.yaml"
+        regexp: '^webdir: "/var/www/cobbler"'
+        replace: 'webdir: "/srv/www/cobbler"'
+
+    - name: Set the server_args for {{ provision_os }} in tftp file
+      replace:
+        path: "{{ role_path }}/files/tftp"
+        regexp: '^        server_args             = -s /var/lib/tftpboot'
+        replace: '        server_args             = -s /srv/tftpboot'
+  when: provision_os == os_supported_leap

+ 59 - 20
control_plane/roles/provision_cobbler/tasks/provision_password.yml

@@ -88,6 +88,23 @@
       with_items: "{{ rocky_host_nic }}"
   when: provision_os == os_supported_rocky
 
+- name: Kickstart configuration - leap
+  block:
+    - name: Create the kickstart file
+      copy:
+        src: "{{ role_path }}/files/temp_leap15.xml"
+        dest: "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
+        mode: 0775
+      tags: install
+
+    - name: Configure kickstart file - IP
+      replace:
+        path: "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
+        regexp: '^          install: http://ip/cblr/links/leap-x86_64/'
+        replace: '          install: http://{{ hpc_ip }}/cblr/links/leap-x86_64/'
+      tags: install
+  when: provision_os == os_supported_leap
+
 - name: Random phrase generation
   command: openssl rand -base64 12
   changed_when: false
@@ -116,29 +133,51 @@
   no_log: true
   tags: install
 
-- name: Configure kickstart file - Password
-  replace:
-    path: "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
-    regexp: '^rootpw --iscrypted ks_password'
-    replace: 'rootpw --iscrypted {{ login_pass.stdout }}'
-  no_log: true
-  tags: install
+- name: Configure kickstart file for {{ provision_os }}
+  block:
+    - name: Configure kickstart file - Password
+      replace:
+        path: "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
+        regexp: '^rootpw --iscrypted ks_password'
+        replace: 'rootpw --iscrypted {{ login_pass.stdout }}'
+      no_log: true
+      tags: install
 
-- name: Configure kickstart file - timezone
-  replace:
-    path: "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
-    regexp: '^timezone --utc ks_timezone'
-    replace: 'timezone --utc {{ timezone }}'
-  tags: install
+    - name: Configure kickstart file - timezone
+      replace:
+        path: "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
+        regexp: '^timezone --utc ks_timezone'
+        replace: 'timezone --utc {{ timezone }}'
+      tags: install
 
-- name: Configure kickstart file - language
-  replace:
-    path: "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
-    regexp: '^lang ks_language'
-    replace: 'lang {{ language }}'
-  tags: install
+    - name: Configure kickstart file - language
+      replace:
+        path: "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
+        regexp: '^lang ks_language'
+        replace: 'lang {{ language }}'
+      tags: install
+  when: 
+    - provision_os != os_supported_leap
+
+- name: Configure kickstart file for {{ provision_os }}
+  block:
+    - name: Configure kickstart file - Password
+      replace:
+        path: "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
+        regexp: '^      <user_password>ks_password</user_password>'
+        replace: '      <user_password>{{ login_pass.stdout }}</user_password>'
+      no_log: true
+      tags: install
+
+    - name: Configure kickstart file - timezone
+      replace:
+        path: "{{ role_path }}/files/{{ cobbler_kickstart_file }}"
+        regexp: '^    <timezone>ks_timezone</timezone>'
+        replace: '    <timezone>{{ timezone }}</timezone>'
+      tags: install
+  when: provision_os == os_supported_leap
 
 - name: Remove ^M characters
   command: dos2unix {{ role_path }}/files/{{ cobbler_kickstart_file }}
   changed_when: false
-  failed_when: false
+  failed_when: false

+ 1 - 0
control_plane/roles/provision_cobbler/vars/main.yml

@@ -30,6 +30,7 @@ username: cobbler
 user_mode: 0644
 cobbler_centos_ks: centos7.ks
 cobbler_rocky_ks: rocky8.ks
+cobbler_leap_ks: leap15.xml
 centos_host_nic:
  - em1
  - em2

+ 3 - 0
roles/k8s_start_services/files/grafana_svc_details.ini

@@ -0,0 +1,3 @@
+[grafana_svc]
+ip=
+port=

+ 82 - 0
roles/k8s_start_services/tasks/configure_nginx_prom_grafana.yml

@@ -0,0 +1,82 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Include control_plane_monitoring variables
+  include_vars: "{{ role_path }}/../../control_plane/roles/control_plane_monitoring/vars/main.yml"
+
+- name: Install Nginx
+  package:
+    name: nginx
+    state: present
+  when: ansible_facts['distribution'] != opensuse_os_name
+
+- name: Install Nginx
+  zypper:
+    name: nginx
+    state: present
+  when: ansible_facts['distribution'] == opensuse_os_name
+
+- name: Start and enable nginx service
+  service:
+    name: nginx
+    state: restarted
+    enabled: yes
+
+- name: Get prometheus service IP
+  command: kubectl get svc -l app=prometheus,component=server -o=jsonpath='{.items[0].spec.clusterIP}'
+  changed_when: false
+  register: prometheus_svc_ip
+
+- name: Configure nginx.conf (1/2)
+  replace:
+    path: "{{ role_path }}/templates/nginx.conf.j2"
+    regexp: '        server_name  .*'
+    replace: "        server_name  {{ ansible_default_ipv4.address }};"
+  delegate_to: localhost
+
+- name: Configure nginx.conf (2/2)
+  replace:
+    path: "{{ role_path }}/templates/nginx.conf.j2"
+    regexp: '          proxy_pass http://.*'
+    replace: "          proxy_pass http://{{ prometheus_svc_ip.stdout }};"
+  delegate_to: localhost
+
+- name: Load nginx conf
+  template:
+    src: nginx.conf.j2
+    dest: "{{ nginx_conf_file_path }}"
+    mode: "{{ nginx_conf_file_mode }}"
+
+- name: Validate nginx conf file
+  command: nginx -t
+  changed_when: false
+
+- name: Start and enable nginx service
+  service:
+    name: nginx
+    state: restarted
+    enabled: yes
+
+- name: Create prometheus datasource in grafana
+  community.grafana.grafana_datasource:
+    name: "hpc-prometheus-{{ ansible_default_ipv4.address }}"
+    grafana_url: "http://{{ grafana_svc_ip }}:{{ grafana_svc_port }}"
+    grafana_user: "{{ grafana_username }}"
+    grafana_password: "{{ grafana_password }}"
+    ds_type: prometheus
+    ds_url: "http://{{ ansible_default_ipv4.address }}"
+    access: direct
+  delegate_to: localhost
+  no_log: true

+ 10 - 2
roles/k8s_start_services/tasks/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -20,4 +20,12 @@
 
 - name: Check K8s pods
   include_tasks: check_k8s_pods.yml
-  tags: install
+  tags: install
+
+- name: Configure Nginx/Prometheus/Grafana
+  include_tasks: configure_nginx_prom_grafana.yml
+  when:
+    - "'manager' in group_names"
+    - grafana_svc_ip != ""
+    - grafana_svc_port != ""
+  tags: install

+ 90 - 0
roles/k8s_start_services/templates/nginx.conf.j2

@@ -0,0 +1,90 @@
+# For more information on configuration, see:
+#   * Official English Documentation: http://nginx.org/en/docs/
+#   * Official Russian Documentation: http://nginx.org/ru/docs/
+
+user nginx;
+worker_processes auto;
+error_log /var/log/nginx/error.log;
+pid /run/nginx.pid;
+
+# Load dynamic modules. See /usr/share/doc/nginx/README.dynamic.
+include /usr/share/nginx/modules/*.conf;
+
+events {
+    worker_connections 1024;
+}
+
+http {
+    log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
+                      '$status $body_bytes_sent "$http_referer" '
+                      '"$http_user_agent" "$http_x_forwarded_for"';
+
+    access_log  /var/log/nginx/access.log  main;
+
+    sendfile            on;
+    tcp_nopush          on;
+    tcp_nodelay         on;
+    keepalive_timeout   65;
+    types_hash_max_size 2048;
+
+    include             /etc/nginx/mime.types;
+    default_type        application/octet-stream;
+
+    # Load modular configuration files from the /etc/nginx/conf.d directory.
+    # See http://nginx.org/en/docs/ngx_core_module.html#include
+    # for more information.
+    include /etc/nginx/conf.d/*.conf;
+
+    server {
+        listen       80 default_server;
+        listen       [::]:80 default_server;
+        server_name  server_ip;
+        root         /usr/share/nginx/html;
+
+        # Load configuration files for the default server block.
+        include /etc/nginx/default.d/*.conf;
+
+        location / {
+          proxy_pass http://prometheus_svc_ip;
+        }
+
+        error_page 404 /404.html;
+            location = /40x.html {
+        }
+
+        error_page 500 502 503 504 /50x.html;
+            location = /50x.html {
+        }
+    }
+
+# Settings for a TLS enabled server.
+#
+#    server {
+#        listen       443 ssl http2 default_server;
+#        listen       [::]:443 ssl http2 default_server;
+#        server_name  _;
+#        root         /usr/share/nginx/html;
+#
+#        ssl_certificate "/etc/pki/nginx/server.crt";
+#        ssl_certificate_key "/etc/pki/nginx/private/server.key";
+#        ssl_session_cache shared:SSL:1m;
+#        ssl_session_timeout  10m;
+#        ssl_ciphers PROFILE=SYSTEM;
+#        ssl_prefer_server_ciphers on;
+#
+#        # Load configuration files for the default server block.
+#        include /etc/nginx/default.d/*.conf;
+#
+#        location / {
+#        }
+#
+#        error_page 404 /404.html;
+#            location = /40x.html {
+#        }
+#
+#        error_page 500 502 503 504 /50x.html;
+#            location = /50x.html {
+#        }
+#    }
+
+}

+ 11 - 1
roles/k8s_start_services/vars/main.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -94,3 +94,13 @@ spark_operator_repo: https://googlecloudplatform.github.io/spark-on-k8s-operator
 operator_image_tag: v1beta2-1.2.3-3.1.1
 
 volcano_scheduling_yaml_url: https://raw.githubusercontent.com/volcano-sh/volcano/v1.3.0/installer/volcano-development.yaml
+
+nginx_conf_file_path: /etc/nginx/nginx.conf
+
+nginx_conf_file_mode: 2534
+
+grafana_svc_ip: "{{ lookup('ini', 'ip section=grafana_svc file={{ role_path }}/files/grafana_svc_details.ini') }}"
+
+grafana_svc_port: "{{ lookup('ini', 'port section=grafana_svc file={{ role_path }}/files/grafana_svc_details.ini') }}"
+
+opensuse_os_name: "openSUSE Leap"

文件差异内容过多而无法显示
+ 1534 - 0
roles/slurm_exporter/files/slurm-dashboard-node-exporter.json


文件差异内容过多而无法显示
+ 2502 - 0
roles/slurm_exporter/files/slurm-dashboard.json


+ 64 - 0
roles/slurm_exporter/tasks/configure_grafana.yml

@@ -0,0 +1,64 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Include control_plane_monitoring variables
+  include_vars: "{{ role_path }}/../../control_plane/roles/control_plane_monitoring/vars/main.yml"
+
+- name: Create prometheus datasource in grafana
+  community.grafana.grafana_datasource:
+    name: "hpc-prometheus-{{ ansible_default_ipv4.address }}"
+    grafana_url: "http://{{ grafana_svc_ip }}:{{ grafana_svc_port }}"
+    grafana_user: "{{ grafana_username }}"
+    grafana_password: "{{ grafana_password }}"
+    ds_type: prometheus
+    ds_url: "http://{{ ansible_default_ipv4.address }}"
+    access: direct
+  delegate_to: localhost
+  no_log: true
+
+- name: Replace data source in slurm dashboard
+  replace:
+    path: "{{ role_path }}/files/{{ item }}"
+    regexp: '"uid": "hpc.*'
+    replace: '"uid": "hpc-prometheus-{{ ansible_default_ipv4.address }}"'
+  with_items: "{{ slurm_dashboard_json_files }}"
+  delegate_to: localhost
+
+- name: Replace title in slurm dashboard (1/2)
+  replace:
+    path: "{{ role_path }}/files/slurm-dashboard-node-exporter.json"
+    regexp: '"title": "SLURM - Node Exporter Server Metrics.*'
+    replace: '"title": "SLURM - Node Exporter Server Metrics - ({{ ansible_default_ipv4.address }})",'
+  delegate_to: localhost
+
+- name: Replace title in slurm dashboard (2/2)
+  replace:
+    path: "{{ role_path }}/files/slurm-dashboard.json"
+    regexp: '"title": "SLURM - CPUs/GPUs, Nodes, Jobs, Scheduler.*'
+    replace: '"title": "SLURM - CPUs/GPUs, Nodes, Jobs, Scheduler ({{ ansible_default_ipv4.address }})",'
+  delegate_to: localhost
+
+- name: Import Slurm Grafana dashboards
+  community.grafana.grafana_dashboard:
+    grafana_url: "http://{{ grafana_svc_ip }}:{{ grafana_svc_port }}"
+    grafana_user: "{{ grafana_username }}"
+    grafana_password: "{{ grafana_password }}"
+    state: present
+    commit_message: Updated by ansible
+    overwrite: yes
+    path: "{{ role_path }}/files/{{ item }}"
+  with_items: "{{ slurm_dashboard_json_files }}"
+  delegate_to: localhost
+  no_log: true

+ 55 - 2
roles/slurm_exporter/tasks/install_prometheus.yml

@@ -1,4 +1,4 @@
-#  Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -37,4 +37,57 @@
           scrape_interval:  30s
           scrape_timeout:   30s
           static_configs:
-            - targets: ['localhost:{{ slurm_exporter_port }}']
+            - targets: ['localhost:{{ slurm_exporter_port }}']
+
+- name: Install & configure Nginx
+  block:
+    - name: Install Nginx
+      package:
+        name: nginx
+        state: present
+      when: ansible_facts['distribution'] != opensuse_os_name
+
+    - name: Install Nginx
+      zypper:
+        name: nginx
+        state: present
+      when: ansible_facts['distribution'] == opensuse_os_name
+
+    - name: Start and enable nginx service
+      service:
+        name: nginx
+        state: restarted
+        enabled: yes
+
+    - name: Configure nginx.conf (1/2)
+      replace:
+        path: "../../k8s_start_services/templates/nginx.conf.j2"
+        regexp: '        server_name  .*'
+        replace: "        server_name  {{ ansible_default_ipv4.address }};"
+      delegate_to: localhost
+
+    - name: Configure nginx.conf (2/2)
+      replace:
+        path: "../../k8s_start_services/templates/nginx.conf.j2"
+        regexp: '          proxy_pass http://.*'
+        replace: "          proxy_pass {{ prometheus_ip }};"
+      delegate_to: localhost
+
+    - name: Load nginx conf
+      template:
+        src: "../../k8s_start_services/templates/nginx.conf.j2"
+        dest: "{{ nginx_conf_file_path }}"
+        mode: "{{ nginx_conf_file_mode }}"
+
+    - name: Validate nginx conf file
+      command: nginx -t
+      changed_when: false
+
+    - name: Start and enable nginx service
+      service:
+        name: nginx
+        state: restarted
+        enabled: yes
+  when:
+    - grafana_svc_ip != ""
+    - grafana_svc_port != ""

+ 7 - 1
roles/slurm_exporter/tasks/main.yml

@@ -1,4 +1,4 @@
-# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -37,3 +37,9 @@
   when:
     - "'kubernetes' in ansible_skip_tags"
     - "'No such file' in k8s_installation_status.stderr"
+
+- name: Configure grafana dashboard
+  include_tasks: configure_grafana.yml
+  when:
+    - grafana_svc_ip != ""
+    - grafana_svc_port != ""

+ 12 - 2
roles/slurm_exporter/vars/main.yml

@@ -1,4 +1,4 @@
-# Copyright 2020 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -38,4 +38,14 @@ systemd_path_dest: "/etc/systemd/system/"
 slurm_exporter_port: "8081"
 
 #Usage: configure_prometheus_pod.yml
-slurm_config_file: "slurm_exporter_config.yaml"
+slurm_config_file: "slurm_exporter_config.yaml"
+
+#Usage: configure_grafana.yml
+prometheus_ip: http://localhost:9090
+nginx_conf_file_path: /etc/nginx/nginx.conf
+nginx_conf_file_mode: 2534
+grafana_svc_ip: "{{ lookup('ini', 'ip section=grafana_svc file=../../k8s_start_services/files/grafana_svc_details.ini') }}"
+grafana_svc_port: "{{ lookup('ini', 'port section=grafana_svc file=../../k8s_start_services/files/grafana_svc_details.ini') }}"
+slurm_dashboard_json_files:
+  - slurm-dashboard.json
+  - slurm-dashboard-node-exporter.json