Browse Source

Merge branch 'dellhpc:devel' into devel

Cassey Goveas 3 years ago
parent
commit
c03e88a798

+ 9 - 0
.all-contributorsrc

@@ -355,6 +355,15 @@
       "contributions": [
         "design"
       ]
+    },
+    {
+      "login": "Zipexpo",
+      "name": "nvtngan",
+      "avatar_url": "https://avatars.githubusercontent.com/u/18387748?v=4",
+      "profile": "http://www.myweb.ttu.edu/ngu00336/",
+      "contributions": [
+        "code"
+      ]
     }
   ],
   "contributorsPerLine": 7,

+ 1 - 0
README.md

@@ -74,6 +74,7 @@ Thanks goes to everyone who makes Omnia possible ([emoji key](https://allcontrib
     <td align="center"><a href="https://github.com/Lakshmi-Patneedi"><img src="https://avatars.githubusercontent.com/u/94051091?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Lakshmi-Patneedi</b></sub></a><br /><a href="https://github.com/dellhpc/omnia/commits?author=Lakshmi-Patneedi" title="Code">💻</a></td>
     <td align="center"><a href="https://github.com/Artlands"><img src="https://avatars.githubusercontent.com/u/31781106?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Jie Li</b></sub></a><br /><a href="https://github.com/dellhpc/omnia/commits?author=Artlands" title="Code">💻</a></td>
     <td align="center"><a href="https://github.com/githubyongchen"><img src="https://avatars.githubusercontent.com/u/5414112?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Yong Chen</b></sub></a><br /><a href="#design-githubyongchen" title="Design">🎨</a></td>
+    <td align="center"><a href="http://www.myweb.ttu.edu/ngu00336/"><img src="https://avatars.githubusercontent.com/u/18387748?v=4?s=100" width="100px;" alt=""/><br /><sub><b>nvtngan</b></sub></a><br /><a href="https://github.com/dellhpc/omnia/commits?author=Zipexpo" title="Code">💻</a></td>
   </tr>
 </table>
 

+ 29 - 7
control_plane/roles/control_plane_k8s/tasks/k8s_installation_leap.yml

@@ -1,4 +1,4 @@
-#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#  Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -55,11 +55,23 @@
   changed_when: true
 
 - name: Installing cri-o
-  zypper:
+  package:
     name: cri-o
     state: present
   changed_when: true
 
+- name: Installing cri-o tools
+  package:
+    name: cri-tools
+    state: present
+  changed_when: true
+
+- name: Reloading deamon
+  command: systemctl daemon-reload
+  changed_when: true
+  args:
+    warn: false
+
 - name: Start and enable crio
   service:
     name: crio
@@ -68,13 +80,14 @@
     enabled: yes
   register: crio_repo_result
   until: crio_repo_result is not failed
-  retries: "{{ min_retries }}"
+  retries: "{{ max_retries }}"
 
 - name: Add kubernetes repo
   zypper_repository:
      name: google-k8s
      repo: "{{ k8s_repo_leap }}"
      state: present
+     disable_gpg_check: yes
      autorefresh: yes
 
 - name: Import rpm-package key for installing kubernetes
@@ -94,7 +107,7 @@
   changed_when: false
 
 - name: Refresh repositories and installing conntrack-tools
-  zypper:
+  package:
     name: conntrack-tools
     state: present
     update_cache: yes
@@ -111,6 +124,9 @@
     responses:
         (.*) [1/2/c/d/?](.): '2'
         (.*)(y): 'y'
+  register: kubeadm_status
+  until: kubeadm_status is not failed
+  retries: "{{ max_retries }}"
 
 - name: Install Kubelet
   ansible.builtin.expect:
@@ -118,6 +134,9 @@
     responses:
         (.*) [1/2/c/d/?](.): '2'
         (.*)(y): 'y'
+  register: kubelet_status
+  until: kubelet_status is not failed
+  retries: "{{ max_retries }}"
 
 - name: Install Kubectl
   zypper:
@@ -125,10 +144,13 @@
      state: present
      oldpackage: yes
      force: yes
+  register: kubectl_status
+  until: kubectl_status is not failed
+  retries: "{{ max_retries }}"
 
 - name: Install common packages
   zypper:
-    name: "{{ common_pkgs_leap }}"
+    name: "{{ common_packages }}"
     state: present
 
 - name: Versionlocking kubeadm
@@ -161,7 +183,7 @@
   tags: install
 
 - name: Install docker-compose
-  zypper:
+  package:
     name: docker-compose
     state: present
   when: docker_username and docker_password
@@ -174,7 +196,7 @@
     enabled: yes
   register: crio_repo_result
   until: crio_repo_result is not failed
-  retries: "{{ min_retries }}"
+  retries: "{{ max_retries }}"
 
 - name: Start and enable kubernetes - kubelet
   service:

+ 2 - 4
control_plane/roles/control_plane_k8s/tasks/main.yml

@@ -15,9 +15,7 @@
 
 - name: Install K8s packages
   include_tasks: k8s_installation.yml
-  when:
-    - ( mgmt_os == os_supported_centos ) and ( ansible_distribution_version >= os_supported_centos_version ) or
-      ( mgmt_os == os_supported_rocky ) and ( ansible_distribution_version >= os_supported_rocky_version )
+  when: os_supported_leap not in mgmt_os
 
 - name: Install K8s packages for leap
   include_tasks: k8s_installation_leap.yml
@@ -33,4 +31,4 @@
   import_tasks: k8s_init.yml
 
 - name: Deploy K8s dashboard
-  import_tasks: k8s_services.yml
+  import_tasks: k8s_services.yml

+ 1 - 5
control_plane/roles/control_plane_k8s/vars/main.yml

@@ -25,14 +25,10 @@ docker_repo_dest_leap: /etc/YaST2/docker-ce.repo
 kubeadm_version: kubeadm-1.21.0-0.x86_64
 kubelet_version: kubelet-1.21.0-0.x86_64
 kubectl_version: kubectl-1.21.0-0.x86_64
-common_pkgs_leap:
-  - openssl
-  - bash-completion
-  - buildah
 os_supported_leap: "leap"
 os_supported_leap_version: "15.3"
 min_retries: 3
-max_retries: 3
+max_retries: 10
 min_delay: 10
 wait_time: 30
  

+ 1 - 1
control_plane/roles/provision_cobbler/tasks/check_prerequisites.yml

@@ -142,7 +142,7 @@
     cobbler_config_status: true
   when:
     - cobbler_container_status
-    - (provision_os in cobbler_profile_list.stdout) or (previous_os in cobbler_profile_list.stdout)
+    - (provision_os in cobbler_profile_list.stdout) or (previous_os in cobbler_profile_list.stdout) or ( "" in cobbler_profile_list.stdout)
     - "'* * * * * /usr/bin/ansible-playbook /root/tftp.yml' in crontab_list.stdout"
     - "'*/5 * * * * /usr/bin/ansible-playbook /root/inventory_creation.yml' in crontab_list.stdout"
 

+ 28 - 20
control_plane/roles/provision_cobbler/tasks/main.yml

@@ -19,46 +19,55 @@
   include_tasks: check_prerequisites.yml
 
 - name: Multi profile creation
-  import_tasks: multi_profile_creation.yml
+  include_tasks: multi_profile_creation.yml
   when: provision_os_change
 
 - name: Cobbler pod creation
   block:
     - name: Mount iso image
-      import_tasks: mount_iso.yml
-      when: not cobbler_image_status
+      include_tasks: mount_iso.yml
+      when:
+        - not cobbler_image_status
 
     - name: Modify firewall settings for Cobbler
-      import_tasks: firewall_settings.yml
-      when: not cobbler_container_status
+      include_tasks: firewall_settings.yml
+      when:
+        - not cobbler_container_status
 
     - name: Include common variables
       include_vars: ../../control_plane_common/vars/main.yml
-      when: not cobbler_container_status
+      when:
+        - not cobbler_container_status
 
     - name: Internet validation
       include_tasks: ../../control_plane_common/tasks/internet_validation.yml
-      when: not cobbler_container_status
+      when:
+        - not cobbler_container_status
 
     - name: Dhcp Configuration
-      import_tasks: dhcp_configure.yml
-      when: (not cobbler_image_status) or ( backup_map_status )
+      include_tasks: dhcp_configure.yml
+      when:
+        - ( not cobbler_image_status ) or ( backup_map_status ) or ( not cobbler_container_status )
 
     - name: Provision password validation
-      import_tasks: provision_password.yml
-      when: not cobbler_image_status
+      include_tasks: provision_password.yml
+      when:
+        - ( not cobbler_image_status ) or ( not cobbler_container_status )
 
     - name: Mapping file validation
-      import_tasks: mapping_file.yml
-      when: (not cobbler_image_status) and (host_mapping_file) or ( backup_map_status)
+      include_tasks: mapping_file.yml
+      when:
+        - ( not cobbler_image_status ) and ( host_mapping_file ) or ( backup_map_status )
 
     - name: Cobbler image creation
-      import_tasks: cobbler_image.yml
-      when: not cobbler_container_status
+      include_tasks: cobbler_image.yml
+      when:
+        - not cobbler_container_status
 
     - name: Cobbler configuration
-      import_tasks: configure_cobbler.yml
-      when: not cobbler_config_status
+      include_tasks: configure_cobbler.yml
+      when:
+        - not cobbler_config_status
 
     - name: Cobbler container status message
       block:
@@ -71,6 +80,5 @@
           debug:
             msg: "{{ message_installed }}"
             verbosity: 2
-          when: not cobbler_container_status
-      tags: install
-  when: not provision_os_change
+          when:	not cobbler_container_status
+  when: not provision_os_change

BIN
docs/images/omnia-logo-transparent.png


+ 321 - 0
telemetry/roles/grafana_config/files/parallel-coordinate.json

@@ -0,0 +1,321 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 21,
+  "iteration": 1644431955119,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "type": "postgres",
+        "uid": "telemetry-postgres"
+      },
+      "gridPos": {
+        "h": 17,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "axisDisplay": "left",
+        "axisTitleAngle": 0,
+        "colorBy": "node",
+        "colorContinous": "RdYlBu",
+        "colorLegend": [
+          "#1f77b4",
+          "#ff7f0e",
+          "#2ca02c",
+          "#d62728",
+          "#9467bd",
+          "#8c564b",
+          "#e377c2",
+          "#7f7f7f",
+          "#bcbd22",
+          "#17becf"
+        ],
+        "lineOpacity": 0.5,
+        "violinEnable": true,
+        "violinResolution": 20,
+        "violinWidth": 20
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) as \"memory_power\",\nCONCAT('| ',system) AS name\nFROM timeseries_metrics\nWHERE  \n$__timeFilter(time) AND \nlabel= 'PowerMetrics TotalMemoryPower' AND \nsystem IN ($ServiceTag)\nGROUP BY\ntime, name\nORDER BY time;",
+          "refId": "memory_power",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) as \"power_consumption\",\nCONCAT('| ',system) AS name\nFROM timeseries_metrics\nWHERE  \n$__timeFilter(time) AND \nlabel= 'PowerMetrics SystemPowerConsumption' AND \nsystem IN ($ServiceTag)\nGROUP BY\ntime, name\nORDER BY time;",
+          "refId": "power_consumption",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) as \"cpu_power\",\nCONCAT('| ',system) AS name\nFROM timeseries_metrics\nWHERE  \n$__timeFilter(time) AND \nlabel= 'PowerMetrics TotalCPUPower' AND \nsystem IN ($ServiceTag)\nGROUP BY\ntime, name\nORDER BY time;",
+          "refId": "cpu_power",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) as \"cpu1_temp\",\nCONCAT('| ',system) AS name\nFROM timeseries_metrics\nWHERE  \n$__timeFilter(time) AND \nlabel= 'CPU1 Temp TemperatureReading' AND \nsystem IN ($ServiceTag)\nGROUP BY\ntime, name\nORDER BY time;",
+          "refId": "cpu1_temp",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) as \"cpu2_temp\",\nCONCAT('| ',system) AS name\nFROM timeseries_metrics\nWHERE  \n$__timeFilter(time) AND \nlabel= 'CPU2 Temp TemperatureReading' AND \nsystem IN ($ServiceTag)\nGROUP BY\ntime, name\nORDER BY time;",
+          "refId": "cpu2_temp",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) as \"NIC1_temp\",\nCONCAT('| ',system) AS name\nFROM timeseries_metrics\nWHERE  \n$__timeFilter(time) AND \nlabel= 'Embedded NIC 1 Port 1 Partition 1 TemperatureReading' AND \nsystem IN ($ServiceTag)\nGROUP BY\ntime, name\nORDER BY time;",
+          "refId": "nic1_temp",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) as \"Fan1_speed\",\nCONCAT('| ',system) AS name\nFROM timeseries_metrics\nWHERE  \n$__timeFilter(time) AND \nlabel= 'Fan 1A RPMReading' AND \nsystem IN ($ServiceTag)\nGROUP BY\ntime, name\nORDER BY time;",
+          "refId": "fan1_speed",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        }
+      ],
+      "title": "Parallel-Coordinate",
+      "type": "hpcviz-idvl-hpcc-parallel-coordinate"
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 33,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "datasource": {
+          "type": "postgres",
+          "uid": "telemetry-postgres"
+        },
+        "definition": "SELECT DISTINCT system as __value from timeseries_metrics",
+        "hide": 0,
+        "includeAll": false,
+        "multi": true,
+        "name": "ServiceTag",
+        "options": [],
+        "query": "SELECT DISTINCT system as __value from timeseries_metrics",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "type": "query"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-7d",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "iDRAC-and-Slurm-Telemetry",
+  "uid": "pArBHUtnk",
+  "version": 6,
+  "weekStart": ""
+}