Browse Source

Merge pull request #924 from sakshiarora13/devel

Issue#923: Visualizations for Slurm and iDRAC telemetry
Sujit Jadhav 3 years ago
parent
commit
8eb15782f9

+ 304 - 0
telemetry/roles/grafana_config/files/PowerMap.json

@@ -0,0 +1,304 @@
+{
+  "__inputs": [
+    {
+      "name": "DS_TELEMETRY-POSTGRES",
+      "label": "telemetry-postgres",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "postgres",
+      "pluginName": "PostgreSQL"
+    }
+  ],
+  "__elements": [],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "8.3.2"
+    },
+    {
+      "type": "panel",
+      "id": "hpcviz-idvl-hpcc-stream-net",
+      "name": "stream-net",
+      "version": "1.0.0"
+    },
+    {
+      "type": "datasource",
+      "id": "postgres",
+      "name": "PostgreSQL",
+      "version": "1.0.0"
+    }
+  ],
+  "editable": false,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "iteration": 1647433675249,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "type": "postgres",
+        "uid": "telemetry-postgres"
+      },
+      "gridPos": {
+        "h": 15,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "userEncoded": true
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "table",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT\n  *\nFROM\n  slurm.jobs\nWHERE\n  user_id IN ($users)\n  AND start_time < ${__to:date:seconds}\n  AND end_time BETWEEN ${__from:date:seconds} and ${__to:date:seconds}",
+          "refId": "jobs",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",$__interval),\n  avg(CAST(value AS decimal)) AS \"memory_power\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'PowerMetrics TotalMemoryPower'AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "memory_power",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",$__interval),\n  avg(CAST(value AS decimal)) AS \"cpu_power\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'PowerMetrics TotalCPUPower'AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "cpu_power",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",$__interval),\n  avg(CAST(value AS decimal)) AS \"cpu1_temp\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'CPU1 Temp TemperatureReading'AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "cpu1_temp",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",$__interval),\n  avg(CAST(value AS decimal)) AS \"CPU2_Temp\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'CPU2 Temp TemperatureReading' AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "cpu2_temp",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",$__interval),\n  avg(CAST(value AS decimal)) AS \"NIC1_Temp\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label SIMILAR TO '% NIC 1 Port 1 Partition 1 TemperatureReading' AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "nic1_temp",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        }
+      ],
+      "title": "PowerMap",
+      "type": "hpcviz-idvl-hpcc-stream-net"
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 33,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "current": {},
+        "datasource": {
+          "type": "postgres",
+          "uid": "telemetry-postgres"
+        },
+        "definition": "SELECT DISTINCT servicetag as __value from nodes\n",
+        "hide": 0,
+        "includeAll": true,
+        "multi": true,
+        "name": "servicetag",
+        "options": [],
+        "query": "SELECT DISTINCT servicetag as __value from nodes\n",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "type": "query"
+      },
+      {
+        "current": {},
+        "datasource": {
+          "type": "postgres",
+          "uid": "telemetry-postgres"
+        },
+        "definition": "SELECT\n user_id as __value, user_name as __text, nodes as IP, nodes.os_ip_addr, nodes.servicetag\nFROM\n  slurm.jobs\nINNER JOIN nodes\nON nodes.os_ip_addr = ANY(nodes)\nWHERE nodes.servicetag in ($servicetag) AND\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
+        "hide": 0,
+        "includeAll": true,
+        "multi": true,
+        "name": "users",
+        "options": [],
+        "query": "SELECT\n user_id as __value, user_name as __text, nodes as IP, nodes.os_ip_addr, nodes.servicetag\nFROM\n  slurm.jobs\nINNER JOIN nodes\nON nodes.os_ip_addr = ANY(nodes)\nWHERE nodes.servicetag in ($servicetag) AND\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "type": "query"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-2d",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "PowerMap",
+  "uid": "ou27WHLnk",
+  "version": 16,
+  "weekStart": ""
+}

+ 482 - 0
telemetry/roles/grafana_config/files/Sankey.json

@@ -0,0 +1,482 @@
+{
+  "__inputs": [
+    {
+      "name": "DS_TELEMETRY-POSTGRES",
+      "label": "telemetry-postgres",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "postgres",
+      "pluginName": "PostgreSQL"
+    }
+  ],
+  "__elements": [],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "8.3.2"
+    },
+    {
+      "type": "panel",
+      "id": "hpcviz-idvl-hpcc-sankey",
+      "name": "sankey",
+      "version": "1.0.0"
+    },
+    {
+      "type": "datasource",
+      "id": "postgres",
+      "name": "PostgreSQL",
+      "version": "1.0.0"
+    }
+  ],
+  "editable": false,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "iteration": 1647435013504,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "type": "postgres",
+        "uid": "telemetry-postgres"
+      },
+      "gridPos": {
+        "h": 19,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 14,
+      "options": {
+        "coreLimit": 128,
+        "displayOpt": "compute_num"
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "table",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "panelId": 9,
+          "queryType": "randomWalk",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(a.timestamp,$__interval),\na.source_ip, a.jobs, a.cpus \nfrom (\n  SELECT  timestamp, CONCAT(nodes.servicetag) \n  AS nodeid, jobs, cpus, nodes.os_ip_addr AS source_ip\n  FROM slurm.node_jobs a        \n  INNER JOIN nodes               \n  ON nodes.nodeid = a.nodeid\n  WHERE $__timeFilter(a.timestamp)) \nAS a  WHERE a.source_ip IN ($NodeByUser)\nGROUP BY a.timestamp, a.source_ip, a.jobs, a.cpus ORDER BY a.timestamp",
+          "refId": "node core",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "table",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT\n  *\nFROM\n  slurm.jobs\nWHERE\n  user_id IN ($Users)\n  AND start_time < ${__to:date:seconds}\n  AND end_time BETWEEN ${__from:date:seconds} and ${__to:date:seconds}",
+          "refId": "jobs",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"memory_power\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'PowerMetrics TotalMemoryPower' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "memory_power",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"power_consumption\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'PowerMetrics SystemPowerConsumption' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "power_consumption",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu_power\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'PowerMetrics TotalCPUPower' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "cpu_power",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu_usage\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'SystemUsage CPUUsage' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "cpu_usage",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu1_temp\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'CPU1 Temp TemperatureReading' AND\ntimeseries_metrics.system in ($servicetag) \nGROUP BY time,name\nORDER BY time",
+          "refId": "cpu1_temp",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu2_temp\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'CPU2 Temp TemperatureReading' AND\ntimeseries_metrics.system in ($servicetag) \nGROUP BY time,name\nORDER BY time",
+          "refId": "cpu2_temp",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"nic_temp\",\nCONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel SIMILAR TO '% NIC 1 Port 1 Partition 1 TemperatureReading' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "nic_temp",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"fan1_speed\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'Fan 1A RPMReading' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "refId": "fan1_speed",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        },
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "telemetry-postgres"
+          },
+          "format": "time_series",
+          "group": [],
+          "hide": false,
+          "metricColumn": "none",
+          "rawQuery": true,
+          "rawSql": "SELECT $__timeGroupAlias(\"timestamp\",$__interval),\navg(value) AS \"Memory_usage\", \nCONCAT('| ',nodes.os_ip_addr) AS name\nFROM slurm.memoryusage\nINNER JOIN nodes\nON nodes.nodeid = slurm.memoryusage.nodeid\nWHERE\n$__timeFilter(\"timestamp\") AND\nnodes.servicetag in ($servicetag) \nGROUP BY time,name\nORDER BY time",
+          "refId": "memory_usage",
+          "select": [
+            [
+              {
+                "params": [
+                  "value"
+                ],
+                "type": "column"
+              }
+            ]
+          ],
+          "timeColumn": "time",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        }
+      ],
+      "title": "Sankey",
+      "transformations": [],
+      "type": "hpcviz-idvl-hpcc-sankey"
+    }
+  ],
+  "refresh": "",
+  "schemaVersion": 33,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "current": {},
+        "datasource": {
+          "type": "postgres",
+          "uid": "telemetry-postgres"
+        },
+        "definition": "SELECT DISTINCT servicetag as __value from nodes",
+        "hide": 0,
+        "includeAll": true,
+        "multi": true,
+        "name": "servicetag",
+        "options": [],
+        "query": "SELECT DISTINCT servicetag as __value from nodes",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 0,
+        "type": "query"
+      },
+      {
+        "current": {},
+        "datasource": {
+          "type": "postgres",
+          "uid": "telemetry-postgres"
+        },
+        "definition": "SELECT\n user_id as __value, user_name as __text, nodes as IP, nodes.os_ip_addr, nodes.servicetag\nFROM\n  slurm.jobs\nINNER JOIN nodes\nON nodes.os_ip_addr = ANY(nodes)\nWHERE nodes.servicetag in ($servicetag) AND\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
+        "hide": 0,
+        "includeAll": true,
+        "multi": true,
+        "name": "Users",
+        "options": [],
+        "query": "SELECT\n user_id as __value, user_name as __text, nodes as IP, nodes.os_ip_addr, nodes.servicetag\nFROM\n  slurm.jobs\nINNER JOIN nodes\nON nodes.os_ip_addr = ANY(nodes)\nWHERE nodes.servicetag in ($servicetag) AND\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 0,
+        "type": "query"
+      },
+      {
+        "current": {},
+        "datasource": {
+          "type": "postgres",
+          "uid": "telemetry-postgres"
+        },
+        "definition": "SELECT DISTINCT unnest(nodes) as node \nFROM slurm.jobs WHERE \nuser_id IN ($Users)  AND start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
+        "hide": 0,
+        "includeAll": true,
+        "multi": true,
+        "name": "NodeByUser",
+        "options": [],
+        "query": "SELECT DISTINCT unnest(nodes) as node \nFROM slurm.jobs WHERE \nuser_id IN ($Users)  AND start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
+        "refresh": 2,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "type": "query"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-2d",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Sankey",
+  "uid": "27YRlmz7y",
+  "version": 35,
+  "weekStart": ""
+}

+ 14 - 13
telemetry/roles/grafana_config/files/SpiralLayout.json

@@ -34,7 +34,7 @@
   "fiscalYearStartMonth": 0,
   "graphTooltip": 0,
   "id": null,
-  "iteration": 1646754961002,
+  "iteration": 1647434054378,
   "links": [],
   "liveNow": false,
   "panels": [
@@ -97,7 +97,7 @@
           "hide": false,
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",'10m'),\n  avg(CAST(value AS decimal)) AS \"memory_power\",\n  CONCAT('| ',system) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'PowerMetrics TotalMemoryPower' AND\n  system IN (CAST($servicetag AS text))\nGROUP BY time,name\nORDER BY time",
+          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",$__interval),\n  avg(CAST(value AS decimal)) AS \"memory_power\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'PowerMetrics TotalMemoryPower'AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
           "refId": "memory_power",
           "select": [
             [
@@ -128,7 +128,7 @@
           "hide": false,
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",'10m'),\n  avg(CAST(value AS decimal)) AS \"cpu_power\",\n  CONCAT('| ',system) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'PowerMetrics TotalCPUPower'AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",$__interval),\n  avg(CAST(value AS decimal)) AS \"cpu_power\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'PowerMetrics TotalCPUPower'AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
           "refId": "cpu_power",
           "select": [
             [
@@ -159,7 +159,7 @@
           "hide": false,
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",'10m'),\n  avg(CAST(value AS decimal)) AS \"cpu1_temp\",\n  CONCAT('| ',system) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'CPU1 Temp TemperatureReading'AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",$__interval),\n  avg(CAST(value AS decimal)) AS \"cpu1_temp\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'CPU1 Temp TemperatureReading'AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
           "refId": "cpu1_temp",
           "select": [
             [
@@ -190,7 +190,7 @@
           "hide": false,
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",'10m'),\n  avg(CAST(value AS decimal)) AS \"CPU2_Temp\",\n  CONCAT('| ',system) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'CPU2 Temp TemperatureReading' AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time\n",
+          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",$__interval),\n  avg(CAST(value AS decimal)) AS \"CPU2_Temp\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'CPU2 Temp TemperatureReading' AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
           "refId": "cpu2_temp",
           "select": [
             [
@@ -221,7 +221,7 @@
           "hide": false,
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",'10m'),\n  avg(CAST(value AS decimal)) AS \"NIC1_Temp\",\n  CONCAT('| ',system) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label= 'Embedded NIC 1 Port 1 Partition 1 TemperatureReading' AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
+          "rawSql": "SELECT\n  $__timeGroupAlias(\"time\",$__interval),\n  avg(CAST(value AS decimal)) AS \"NIC1_Temp\",\n  CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n  $__timeFilter(\"time\") AND\n  label SIMILAR TO '% NIC 1 Port 1 Partition 1 TemperatureReading' AND\n  system IN ($servicetag)\nGROUP BY time,name\nORDER BY time",
           "refId": "nic1_temp",
           "select": [
             [
@@ -243,10 +243,11 @@
           ]
         }
       ],
-      "title": "SpiralLayout",
+      "title": "Spiral-Layout",
       "type": "hpcviz-idvl-hpcc-spiral-layout"
     }
   ],
+  "refresh": "5s",
   "schemaVersion": 33,
   "style": "dark",
   "tags": [],
@@ -260,7 +261,7 @@
         },
         "definition": "SELECT DISTINCT servicetag as __value from nodes\n",
         "hide": 0,
-        "includeAll": false,
+        "includeAll": true,
         "multi": true,
         "name": "servicetag",
         "options": [],
@@ -277,13 +278,13 @@
           "type": "postgres",
           "uid": "telemetry-postgres"
         },
-        "definition": "SELECT\n user_id as __value, user_name as __text\nFROM\n  slurm.jobs\nWHERE\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
+        "definition": "SELECT\n user_id as __value, user_name as __text, nodes as IP, nodes.os_ip_addr, nodes.servicetag\nFROM\n  slurm.jobs\nINNER JOIN nodes\nON nodes.os_ip_addr = ANY(nodes)\nWHERE nodes.servicetag in ($servicetag) AND\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
         "hide": 0,
         "includeAll": true,
         "multi": true,
         "name": "users",
         "options": [],
-        "query": "SELECT\n user_id as __value, user_name as __text\nFROM\n  slurm.jobs\nWHERE\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
+        "query": "SELECT\n user_id as __value, user_name as __text, nodes as IP, nodes.os_ip_addr, nodes.servicetag\nFROM\n  slurm.jobs\nINNER JOIN nodes\nON nodes.os_ip_addr = ANY(nodes)\nWHERE nodes.servicetag in ($servicetag) AND\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
         "refresh": 1,
         "regex": "",
         "skipUrlSync": false,
@@ -293,13 +294,13 @@
     ]
   },
   "time": {
-    "from": "now-6M",
+    "from": "now-2d",
     "to": "now"
   },
   "timepicker": {},
   "timezone": "",
   "title": "SpiralLayout",
-  "uid": "ou27WHLnt",
-  "version": 4,
+  "uid": "ou27WHLni",
+  "version": 7,
   "weekStart": ""
 }

+ 40 - 27
telemetry/roles/grafana_config/files/parallel-coordinate.json

@@ -1,28 +1,40 @@
 {
-  "annotations": {
-    "list": [
-      {
-        "builtIn": 1,
-        "datasource": "-- Grafana --",
-        "enable": true,
-        "hide": true,
-        "iconColor": "rgba(0, 211, 255, 1)",
-        "name": "Annotations & Alerts",
-        "target": {
-          "limit": 100,
-          "matchAny": false,
-          "tags": [],
-          "type": "dashboard"
-        },
-        "type": "dashboard"
-      }
-    ]
-  },
-  "editable": true,
+  "__inputs": [
+    {
+      "name": "DS_TELEMETRY-POSTGRES",
+      "label": "telemetry-postgres",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "postgres",
+      "pluginName": "PostgreSQL"
+    }
+  ],
+  "__elements": [],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "8.3.2"
+    },
+    {
+      "type": "panel",
+      "id": "hpcviz-idvl-hpcc-parallel-coordinate",
+      "name": "parallel-coordinate",
+      "version": "2.0.0"
+    },
+    {
+      "type": "datasource",
+      "id": "postgres",
+      "name": "PostgreSQL",
+      "version": "1.0.0"
+    }
+  ],
+  "editable": false,
   "fiscalYearStartMonth": 0,
   "graphTooltip": 0,
-  "id": 21,
-  "iteration": 1644431955119,
+  "id": null,
+  "iteration": 1647449602865,
   "links": [],
   "liveNow": false,
   "panels": [
@@ -225,7 +237,7 @@
           "hide": false,
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) as \"NIC1_temp\",\nCONCAT('| ',system) AS name\nFROM timeseries_metrics\nWHERE  \n$__timeFilter(time) AND \nlabel= 'Embedded NIC 1 Port 1 Partition 1 TemperatureReading' AND \nsystem IN ($ServiceTag)\nGROUP BY\ntime, name\nORDER BY time;",
+          "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) as \"NIC1_temp\",\nCONCAT('| ',system) AS name\nFROM timeseries_metrics\nWHERE  \n$__timeFilter(time) AND \nlabel SIMILAR TO '% NIC 1 Port 1 Partition 1 TemperatureReading' AND \nsystem IN ($ServiceTag)\nGROUP BY\ntime, name\nORDER BY time;",
           "refId": "nic1_temp",
           "select": [
             [
@@ -282,20 +294,21 @@
       "type": "hpcviz-idvl-hpcc-parallel-coordinate"
     }
   ],
-  "refresh": "5s",
+  "refresh": 5s,
   "schemaVersion": 33,
   "style": "dark",
   "tags": [],
   "templating": {
     "list": [
       {
+        "current": {},
         "datasource": {
           "type": "postgres",
           "uid": "telemetry-postgres"
         },
         "definition": "SELECT DISTINCT system as __value from timeseries_metrics",
         "hide": 0,
-        "includeAll": false,
+        "includeAll": true,
         "multi": true,
         "name": "ServiceTag",
         "options": [],
@@ -313,9 +326,9 @@
     "to": "now"
   },
   "timepicker": {},
-  "timezone": "",
+  "timezone": "browser",
   "title": "Parallel-Coordinate",
   "uid": "pArBHUtnk",
-  "version": 6,
+  "version": 10,
   "weekStart": ""
 }

+ 2 - 0
telemetry/roles/grafana_config/vars/main.yml

@@ -18,4 +18,6 @@ grafana_namespace: grafana
 telemetry_folder_name: telemetry
 dashboards:
   - parallel-coordinate.json
+  - Sankey.json
   - SpiralLayout.json
+  - PowerMap.json