Browse Source

monitoring changes for control_plane

Signed-off-by: abhishek-sa1 <abhishek.sa3@dell.com>
abhishek-sa1 3 years ago
parent
commit
eb6bbdaeb5

+ 1 - 1
control_plane/input_params/security_vars.yml

@@ -63,7 +63,7 @@ allow_deny: "Allow"
 
 # This variable is used to disable services.
 # Accepted values: "true" or "false". 
-# Default values: false  
+# Default value: false  
 # Root access is needed.
 restrict_program_support: false
 

+ 17 - 1
control_plane/roles/control_plane_monitoring/files/promtail_config.yml

@@ -33,6 +33,22 @@ scrape_configs:
       job: Directory Server Errors
       __path__: /var/log/dirsrv/slapd-OMNIA-TEST/errors
 
+- job_name: Directory Server Utilization for Leap
+  static_configs:
+  - targets:
+      - localhost
+    labels:
+      job: Directory Server Utilization
+      __path__: /var/log/dirsrv/slapd-ldap1/access
+
+- job_name: Directory Server Errors for Leap
+  static_configs:
+  - targets:
+      - localhost
+    labels:
+      job: Directory Server Errors
+      __path__: /var/log/dirsrv/slapd-ldap1/errors
+
 - job_name: PKI Transactions
   static_configs:
   - targets:
@@ -135,4 +151,4 @@ scrape_configs:
       - localhost
     labels:
       job: k8s pods
-      __path__: /var/log/pods/*/*/*log
+      __path__: /var/log/pods/*/*/*log

+ 8 - 1
control_plane/roles/control_plane_monitoring/tasks/install_k8s_prom_stack.yml

@@ -13,6 +13,13 @@
 #  limitations under the License.
 ---
 
+- name: Pull image for kube-webhook-certgen
+  command: buildah pull "{{ kube_prom_stack_image_name }}:{{ kube_prom_stack_image_tag }}"
+  changed_when: true
+  register: pull_prom_stack_image
+  until: pull_prom_stack_image is not failed
+  retries: "{{ image_creation_retries }}"
+
 - name: Add prometheus-community chart
   kubernetes.core.helm_repository:
     name: prometheus-community
@@ -60,4 +67,4 @@
 
 - name: Wait for kube-prom-stack pod to come to ready state
   command: kubectl wait --for=condition=ready --timeout=10m -n "{{ monitoring_namespace }}" pod -l app=kube-prometheus-stack-operator
-  changed_when: false
+  changed_when: false

+ 9 - 2
control_plane/roles/control_plane_monitoring/tasks/k8s_loki_pod.yml

@@ -13,6 +13,13 @@
 #  limitations under the License.
 ---
 
+- name: Pull image for loki
+  command: buildah pull "{{ loki_image_name }}:{{ loki_image_tag }}"
+  changed_when: true
+  register: pull_loki_image
+  until: pull_loki_image is not failed
+  retries: "{{ image_creation_retries }}"
+
 - name: Deploy loki pod
   kubernetes.core.k8s:
     state: present
@@ -50,7 +57,7 @@
 
                   containers:
                     - name: loki
-                      image: docker.io/grafana/loki:2.4.1
+                      image: "{{ loki_image_name }}:{{ loki_image_tag }}"
                       imagePullPolicy: "Always"
                       ports:
                         - containerPort: "{{ loki_port }}"
@@ -60,4 +67,4 @@
                       imagePullPolicy: "Never"
                       volumeMounts:
                         - name: varlog
-                          mountPath: /var/log
+                          mountPath: /var/log

+ 4 - 1
control_plane/roles/control_plane_monitoring/tasks/promtail_image.yml

@@ -23,5 +23,8 @@
 - name: Promtail image creation
   command: "buildah bud -f Dockerfile_promtail -t {{ promtail_image_name }}:{{ promtail_image_tag }} ."
   changed_when: true
+  register: create_promtail_image
+  until: create_promtail_image is not failed
+  retries: "{{ image_creation_retries }}"
   args:
-    chdir: "{{ role_path }}/files/"
+    chdir: "{{ role_path }}/files/"

+ 5 - 0
control_plane/roles/control_plane_monitoring/vars/main.yml

@@ -27,6 +27,8 @@ k8s_prom_stack_crd:
   - prometheusrules.monitoring.coreos.com
   - servicemonitors.monitoring.coreos.com
   - thanosrulers.monitoring.coreos.com
+kube_prom_stack_image_name: k8s.gcr.io/ingress-nginx/kube-webhook-certgen
+kube_prom_stack_image_tag: v1.0
 
 # Usage: configure_k8s_prom_grafana.yml
 grafana_namespace: grafana
@@ -50,7 +52,10 @@ grafana_dashboard_json_files:
 # Usage: promtail_image.yml
 promtail_image_name: "promtail"
 promtail_image_tag: "latest"
+image_creation_retries: 20
 
 # Usage: k8s_loki_pod.yml
 loki_k8s_name: loki
 loki_port: 3100
+loki_image_name: docker.io/grafana/loki
+loki_image_tag: 2.4.1

+ 1 - 1
omnia_security_config.yml

@@ -53,7 +53,7 @@ allow_deny: "Allow"
 
 # This variable is used to disable services.
 # Accepted values: "true" or "false". 
-# Default values are: true  
+# Default value: false  
 # Root access is needed.
 restrict_program_support: false