|
@@ -0,0 +1,482 @@
|
|
|
+{
|
|
|
+ "__inputs": [
|
|
|
+ {
|
|
|
+ "name": "DS_TELEMETRY-POSTGRES",
|
|
|
+ "label": "telemetry-postgres",
|
|
|
+ "description": "",
|
|
|
+ "type": "datasource",
|
|
|
+ "pluginId": "postgres",
|
|
|
+ "pluginName": "PostgreSQL"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "__elements": [],
|
|
|
+ "__requires": [
|
|
|
+ {
|
|
|
+ "type": "grafana",
|
|
|
+ "id": "grafana",
|
|
|
+ "name": "Grafana",
|
|
|
+ "version": "8.3.2"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "type": "panel",
|
|
|
+ "id": "hpcviz-idvl-hpcc-sankey",
|
|
|
+ "name": "sankey",
|
|
|
+ "version": "1.0.0"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "type": "datasource",
|
|
|
+ "id": "postgres",
|
|
|
+ "name": "PostgreSQL",
|
|
|
+ "version": "1.0.0"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "editable": false,
|
|
|
+ "fiscalYearStartMonth": 0,
|
|
|
+ "graphTooltip": 0,
|
|
|
+ "id": null,
|
|
|
+ "iteration": 1647435013504,
|
|
|
+ "links": [],
|
|
|
+ "liveNow": false,
|
|
|
+ "panels": [
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "gridPos": {
|
|
|
+ "h": 19,
|
|
|
+ "w": 24,
|
|
|
+ "x": 0,
|
|
|
+ "y": 0
|
|
|
+ },
|
|
|
+ "id": 14,
|
|
|
+ "options": {
|
|
|
+ "coreLimit": 128,
|
|
|
+ "displayOpt": "compute_num"
|
|
|
+ },
|
|
|
+ "targets": [
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "table",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "panelId": 9,
|
|
|
+ "queryType": "randomWalk",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT $__timeGroupAlias(a.timestamp,$__interval),\na.source_ip, a.jobs, a.cpus \nfrom (\n SELECT timestamp, CONCAT(nodes.servicetag) \n AS nodeid, jobs, cpus, nodes.os_ip_addr AS source_ip\n FROM slurm.node_jobs a \n INNER JOIN nodes \n ON nodes.nodeid = a.nodeid\n WHERE $__timeFilter(a.timestamp)) \nAS a WHERE a.source_ip IN ($NodeByUser)\nGROUP BY a.timestamp, a.source_ip, a.jobs, a.cpus ORDER BY a.timestamp",
|
|
|
+ "refId": "node core",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "table",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT\n *\nFROM\n slurm.jobs\nWHERE\n user_id IN ($Users)\n AND start_time < ${__to:date:seconds}\n AND end_time BETWEEN ${__from:date:seconds} and ${__to:date:seconds}",
|
|
|
+ "refId": "jobs",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "time_series",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"memory_power\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'PowerMetrics TotalMemoryPower' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
|
|
|
+ "refId": "memory_power",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "time_series",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"power_consumption\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'PowerMetrics SystemPowerConsumption' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
|
|
|
+ "refId": "power_consumption",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "time_series",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu_power\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'PowerMetrics TotalCPUPower' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
|
|
|
+ "refId": "cpu_power",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "time_series",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu_usage\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'SystemUsage CPUUsage' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
|
|
|
+ "refId": "cpu_usage",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "time_series",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu1_temp\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'CPU1 Temp TemperatureReading' AND\ntimeseries_metrics.system in ($servicetag) \nGROUP BY time,name\nORDER BY time",
|
|
|
+ "refId": "cpu1_temp",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "time_series",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu2_temp\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'CPU2 Temp TemperatureReading' AND\ntimeseries_metrics.system in ($servicetag) \nGROUP BY time,name\nORDER BY time",
|
|
|
+ "refId": "cpu2_temp",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "time_series",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"nic_temp\",\nCONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel SIMILAR TO '% NIC 1 Port 1 Partition 1 TemperatureReading' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
|
|
|
+ "refId": "nic_temp",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "time_series",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"fan1_speed\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'Fan 1A RPMReading' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
|
|
|
+ "refId": "fan1_speed",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "format": "time_series",
|
|
|
+ "group": [],
|
|
|
+ "hide": false,
|
|
|
+ "metricColumn": "none",
|
|
|
+ "rawQuery": true,
|
|
|
+ "rawSql": "SELECT $__timeGroupAlias(\"timestamp\",$__interval),\navg(value) AS \"Memory_usage\", \nCONCAT('| ',nodes.os_ip_addr) AS name\nFROM slurm.memoryusage\nINNER JOIN nodes\nON nodes.nodeid = slurm.memoryusage.nodeid\nWHERE\n$__timeFilter(\"timestamp\") AND\nnodes.servicetag in ($servicetag) \nGROUP BY time,name\nORDER BY time",
|
|
|
+ "refId": "memory_usage",
|
|
|
+ "select": [
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "params": [
|
|
|
+ "value"
|
|
|
+ ],
|
|
|
+ "type": "column"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ "timeColumn": "time",
|
|
|
+ "where": [
|
|
|
+ {
|
|
|
+ "name": "$__timeFilter",
|
|
|
+ "params": [],
|
|
|
+ "type": "macro"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "title": "Sankey",
|
|
|
+ "transformations": [],
|
|
|
+ "type": "hpcviz-idvl-hpcc-sankey"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "refresh": "",
|
|
|
+ "schemaVersion": 33,
|
|
|
+ "style": "dark",
|
|
|
+ "tags": [],
|
|
|
+ "templating": {
|
|
|
+ "list": [
|
|
|
+ {
|
|
|
+ "current": {},
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "definition": "SELECT DISTINCT servicetag as __value from nodes",
|
|
|
+ "hide": 0,
|
|
|
+ "includeAll": true,
|
|
|
+ "multi": true,
|
|
|
+ "name": "servicetag",
|
|
|
+ "options": [],
|
|
|
+ "query": "SELECT DISTINCT servicetag as __value from nodes",
|
|
|
+ "refresh": 1,
|
|
|
+ "regex": "",
|
|
|
+ "skipUrlSync": false,
|
|
|
+ "sort": 0,
|
|
|
+ "type": "query"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "current": {},
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "definition": "SELECT\n user_id as __value, user_name as __text, nodes as IP, nodes.os_ip_addr, nodes.servicetag\nFROM\n slurm.jobs\nINNER JOIN nodes\nON nodes.os_ip_addr = ANY(nodes)\nWHERE nodes.servicetag in ($servicetag) AND\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
|
|
|
+ "hide": 0,
|
|
|
+ "includeAll": true,
|
|
|
+ "multi": true,
|
|
|
+ "name": "Users",
|
|
|
+ "options": [],
|
|
|
+ "query": "SELECT\n user_id as __value, user_name as __text, nodes as IP, nodes.os_ip_addr, nodes.servicetag\nFROM\n slurm.jobs\nINNER JOIN nodes\nON nodes.os_ip_addr = ANY(nodes)\nWHERE nodes.servicetag in ($servicetag) AND\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
|
|
|
+ "refresh": 1,
|
|
|
+ "regex": "",
|
|
|
+ "skipUrlSync": false,
|
|
|
+ "sort": 0,
|
|
|
+ "type": "query"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "current": {},
|
|
|
+ "datasource": {
|
|
|
+ "type": "postgres",
|
|
|
+ "uid": "telemetry-postgres"
|
|
|
+ },
|
|
|
+ "definition": "SELECT DISTINCT unnest(nodes) as node \nFROM slurm.jobs WHERE \nuser_id IN ($Users) AND start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
|
|
|
+ "hide": 0,
|
|
|
+ "includeAll": true,
|
|
|
+ "multi": true,
|
|
|
+ "name": "NodeByUser",
|
|
|
+ "options": [],
|
|
|
+ "query": "SELECT DISTINCT unnest(nodes) as node \nFROM slurm.jobs WHERE \nuser_id IN ($Users) AND start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
|
|
|
+ "refresh": 2,
|
|
|
+ "regex": "",
|
|
|
+ "skipUrlSync": false,
|
|
|
+ "sort": 1,
|
|
|
+ "type": "query"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "time": {
|
|
|
+ "from": "now-2d",
|
|
|
+ "to": "now"
|
|
|
+ },
|
|
|
+ "timepicker": {},
|
|
|
+ "timezone": "",
|
|
|
+ "title": "Sankey",
|
|
|
+ "uid": "27YRlmz7y",
|
|
|
+ "version": 35,
|
|
|
+ "weekStart": ""
|
|
|
+}
|