Sankey.json 15 KB


  1. {
  2. "__inputs": [
  3. {
  4. "name": "DS_TELEMETRY-POSTGRES",
  5. "label": "telemetry-postgres",
  6. "description": "",
  7. "type": "datasource",
  8. "pluginId": "postgres",
  9. "pluginName": "PostgreSQL"
  10. }
  11. ],
  12. "__elements": [],
  13. "__requires": [
  14. {
  15. "type": "grafana",
  16. "id": "grafana",
  17. "name": "Grafana",
  18. "version": "8.3.2"
  19. },
  20. {
  21. "type": "panel",
  22. "id": "hpcviz-idvl-hpcc-sankey",
  23. "name": "sankey",
  24. "version": "1.0.0"
  25. },
  26. {
  27. "type": "datasource",
  28. "id": "postgres",
  29. "name": "PostgreSQL",
  30. "version": "1.0.0"
  31. }
  32. ],
  33. "editable": false,
  34. "fiscalYearStartMonth": 0,
  35. "graphTooltip": 0,
  36. "id": null,
  37. "iteration": 1647435013504,
  38. "links": [],
  39. "liveNow": false,
  40. "panels": [
  41. {
  42. "datasource": {
  43. "type": "postgres",
  44. "uid": "telemetry-postgres"
  45. },
  46. "gridPos": {
  47. "h": 19,
  48. "w": 24,
  49. "x": 0,
  50. "y": 0
  51. },
  52. "id": 14,
  53. "options": {
  54. "coreLimit": 128,
  55. "displayOpt": "compute_num"
  56. },
  57. "targets": [
  58. {
  59. "datasource": {
  60. "type": "postgres",
  61. "uid": "telemetry-postgres"
  62. },
  63. "format": "table",
  64. "group": [],
  65. "hide": false,
  66. "metricColumn": "none",
  67. "panelId": 9,
  68. "queryType": "randomWalk",
  69. "rawQuery": true,
  70. "rawSql": "SELECT $__timeGroupAlias(a.timestamp,$__interval),\na.source_ip, a.jobs, a.cpus \nfrom (\n SELECT timestamp, CONCAT(nodes.servicetag) \n AS nodeid, jobs, cpus, nodes.os_ip_addr AS source_ip\n FROM slurm.node_jobs a \n INNER JOIN nodes \n ON nodes.nodeid = a.nodeid\n WHERE $__timeFilter(a.timestamp)) \nAS a WHERE a.source_ip IN ($NodeByUser)\nGROUP BY a.timestamp, a.source_ip, a.jobs, a.cpus ORDER BY a.timestamp",
  71. "refId": "node core",
  72. "select": [
  73. [
  74. {
  75. "params": [
  76. "value"
  77. ],
  78. "type": "column"
  79. }
  80. ]
  81. ],
  82. "timeColumn": "time",
  83. "where": [
  84. {
  85. "name": "$__timeFilter",
  86. "params": [],
  87. "type": "macro"
  88. }
  89. ]
  90. },
  91. {
  92. "datasource": {
  93. "type": "postgres",
  94. "uid": "telemetry-postgres"
  95. },
  96. "format": "table",
  97. "group": [],
  98. "hide": false,
  99. "metricColumn": "none",
  100. "rawQuery": true,
  101. "rawSql": "SELECT\n *\nFROM\n slurm.jobs\nWHERE\n user_id IN ($Users)\n AND start_time < ${__to:date:seconds}\n AND end_time BETWEEN ${__from:date:seconds} and ${__to:date:seconds}",
  102. "refId": "jobs",
  103. "select": [
  104. [
  105. {
  106. "params": [
  107. "value"
  108. ],
  109. "type": "column"
  110. }
  111. ]
  112. ],
  113. "timeColumn": "time",
  114. "where": [
  115. {
  116. "name": "$__timeFilter",
  117. "params": [],
  118. "type": "macro"
  119. }
  120. ]
  121. },
  122. {
  123. "datasource": {
  124. "type": "postgres",
  125. "uid": "telemetry-postgres"
  126. },
  127. "format": "time_series",
  128. "group": [],
  129. "hide": false,
  130. "metricColumn": "none",
  131. "rawQuery": true,
  132. "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"memory_power\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'PowerMetrics TotalMemoryPower' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
  133. "refId": "memory_power",
  134. "select": [
  135. [
  136. {
  137. "params": [
  138. "value"
  139. ],
  140. "type": "column"
  141. }
  142. ]
  143. ],
  144. "timeColumn": "time",
  145. "where": [
  146. {
  147. "name": "$__timeFilter",
  148. "params": [],
  149. "type": "macro"
  150. }
  151. ]
  152. },
  153. {
  154. "datasource": {
  155. "type": "postgres",
  156. "uid": "telemetry-postgres"
  157. },
  158. "format": "time_series",
  159. "group": [],
  160. "hide": false,
  161. "metricColumn": "none",
  162. "rawQuery": true,
  163. "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"power_consumption\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'PowerMetrics SystemPowerConsumption' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
  164. "refId": "power_consumption",
  165. "select": [
  166. [
  167. {
  168. "params": [
  169. "value"
  170. ],
  171. "type": "column"
  172. }
  173. ]
  174. ],
  175. "timeColumn": "time",
  176. "where": [
  177. {
  178. "name": "$__timeFilter",
  179. "params": [],
  180. "type": "macro"
  181. }
  182. ]
  183. },
  184. {
  185. "datasource": {
  186. "type": "postgres",
  187. "uid": "telemetry-postgres"
  188. },
  189. "format": "time_series",
  190. "group": [],
  191. "hide": false,
  192. "metricColumn": "none",
  193. "rawQuery": true,
  194. "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu_power\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'PowerMetrics TotalCPUPower' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
  195. "refId": "cpu_power",
  196. "select": [
  197. [
  198. {
  199. "params": [
  200. "value"
  201. ],
  202. "type": "column"
  203. }
  204. ]
  205. ],
  206. "timeColumn": "time",
  207. "where": [
  208. {
  209. "name": "$__timeFilter",
  210. "params": [],
  211. "type": "macro"
  212. }
  213. ]
  214. },
  215. {
  216. "datasource": {
  217. "type": "postgres",
  218. "uid": "telemetry-postgres"
  219. },
  220. "format": "time_series",
  221. "group": [],
  222. "hide": false,
  223. "metricColumn": "none",
  224. "rawQuery": true,
  225. "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu_usage\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'SystemUsage CPUUsage' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
  226. "refId": "cpu_usage",
  227. "select": [
  228. [
  229. {
  230. "params": [
  231. "value"
  232. ],
  233. "type": "column"
  234. }
  235. ]
  236. ],
  237. "timeColumn": "time",
  238. "where": [
  239. {
  240. "name": "$__timeFilter",
  241. "params": [],
  242. "type": "macro"
  243. }
  244. ]
  245. },
  246. {
  247. "datasource": {
  248. "type": "postgres",
  249. "uid": "telemetry-postgres"
  250. },
  251. "format": "time_series",
  252. "group": [],
  253. "hide": false,
  254. "metricColumn": "none",
  255. "rawQuery": true,
  256. "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu1_temp\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'CPU1 Temp TemperatureReading' AND\ntimeseries_metrics.system in ($servicetag) \nGROUP BY time,name\nORDER BY time",
  257. "refId": "cpu1_temp",
  258. "select": [
  259. [
  260. {
  261. "params": [
  262. "value"
  263. ],
  264. "type": "column"
  265. }
  266. ]
  267. ],
  268. "timeColumn": "time",
  269. "where": [
  270. {
  271. "name": "$__timeFilter",
  272. "params": [],
  273. "type": "macro"
  274. }
  275. ]
  276. },
  277. {
  278. "datasource": {
  279. "type": "postgres",
  280. "uid": "telemetry-postgres"
  281. },
  282. "format": "time_series",
  283. "group": [],
  284. "hide": false,
  285. "metricColumn": "none",
  286. "rawQuery": true,
  287. "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"cpu2_temp\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'CPU2 Temp TemperatureReading' AND\ntimeseries_metrics.system in ($servicetag) \nGROUP BY time,name\nORDER BY time",
  288. "refId": "cpu2_temp",
  289. "select": [
  290. [
  291. {
  292. "params": [
  293. "value"
  294. ],
  295. "type": "column"
  296. }
  297. ]
  298. ],
  299. "timeColumn": "time",
  300. "where": [
  301. {
  302. "name": "$__timeFilter",
  303. "params": [],
  304. "type": "macro"
  305. }
  306. ]
  307. },
  308. {
  309. "datasource": {
  310. "type": "postgres",
  311. "uid": "telemetry-postgres"
  312. },
  313. "format": "time_series",
  314. "group": [],
  315. "hide": false,
  316. "metricColumn": "none",
  317. "rawQuery": true,
  318. "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"nic_temp\",\nCONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel SIMILAR TO '% NIC 1 Port 1 Partition 1 TemperatureReading' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
  319. "refId": "nic_temp",
  320. "select": [
  321. [
  322. {
  323. "params": [
  324. "value"
  325. ],
  326. "type": "column"
  327. }
  328. ]
  329. ],
  330. "timeColumn": "time",
  331. "where": [
  332. {
  333. "name": "$__timeFilter",
  334. "params": [],
  335. "type": "macro"
  336. }
  337. ]
  338. },
  339. {
  340. "datasource": {
  341. "type": "postgres",
  342. "uid": "telemetry-postgres"
  343. },
  344. "format": "time_series",
  345. "group": [],
  346. "hide": false,
  347. "metricColumn": "none",
  348. "rawQuery": true,
  349. "rawSql": "SELECT $__timeGroupAlias(\"time\",$__interval),\navg(CAST(value AS decimal)) AS \"fan1_speed\",\n CONCAT('| ',nodes.os_ip_addr) AS name\nFROM timeseries_metrics\nINNER JOIN nodes\nON nodes.servicetag = timeseries_metrics.system\nWHERE\n$__timeFilter(\"time\") AND\nlabel= 'Fan 1A RPMReading' AND\ntimeseries_metrics.system in ($servicetag)\nGROUP BY time,name\nORDER BY time",
  350. "refId": "fan1_speed",
  351. "select": [
  352. [
  353. {
  354. "params": [
  355. "value"
  356. ],
  357. "type": "column"
  358. }
  359. ]
  360. ],
  361. "timeColumn": "time",
  362. "where": [
  363. {
  364. "name": "$__timeFilter",
  365. "params": [],
  366. "type": "macro"
  367. }
  368. ]
  369. },
  370. {
  371. "datasource": {
  372. "type": "postgres",
  373. "uid": "telemetry-postgres"
  374. },
  375. "format": "time_series",
  376. "group": [],
  377. "hide": false,
  378. "metricColumn": "none",
  379. "rawQuery": true,
  380. "rawSql": "SELECT $__timeGroupAlias(\"timestamp\",$__interval),\navg(value) AS \"Memory_usage\", \nCONCAT('| ',nodes.os_ip_addr) AS name\nFROM slurm.memoryusage\nINNER JOIN nodes\nON nodes.nodeid = slurm.memoryusage.nodeid\nWHERE\n$__timeFilter(\"timestamp\") AND\nnodes.servicetag in ($servicetag) \nGROUP BY time,name\nORDER BY time",
  381. "refId": "memory_usage",
  382. "select": [
  383. [
  384. {
  385. "params": [
  386. "value"
  387. ],
  388. "type": "column"
  389. }
  390. ]
  391. ],
  392. "timeColumn": "time",
  393. "where": [
  394. {
  395. "name": "$__timeFilter",
  396. "params": [],
  397. "type": "macro"
  398. }
  399. ]
  400. }
  401. ],
  402. "title": "Sankey",
  403. "transformations": [],
  404. "type": "hpcviz-idvl-hpcc-sankey"
  405. }
  406. ],
  407. "refresh": "",
  408. "schemaVersion": 33,
  409. "style": "dark",
  410. "tags": [],
  411. "templating": {
  412. "list": [
  413. {
  414. "current": {},
  415. "datasource": {
  416. "type": "postgres",
  417. "uid": "telemetry-postgres"
  418. },
  419. "definition": "SELECT DISTINCT servicetag as __value from nodes",
  420. "hide": 0,
  421. "includeAll": true,
  422. "multi": true,
  423. "name": "servicetag",
  424. "options": [],
  425. "query": "SELECT DISTINCT servicetag as __value from nodes",
  426. "refresh": 1,
  427. "regex": "",
  428. "skipUrlSync": false,
  429. "sort": 0,
  430. "type": "query"
  431. },
  432. {
  433. "current": {},
  434. "datasource": {
  435. "type": "postgres",
  436. "uid": "telemetry-postgres"
  437. },
  438. "definition": "SELECT\n user_id as __value, user_name as __text, nodes as IP, nodes.os_ip_addr, nodes.servicetag\nFROM\n slurm.jobs\nINNER JOIN nodes\nON nodes.os_ip_addr = ANY(nodes)\nWHERE nodes.servicetag in ($servicetag) AND\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
  439. "hide": 0,
  440. "includeAll": true,
  441. "multi": true,
  442. "name": "Users",
  443. "options": [],
  444. "query": "SELECT\n user_id as __value, user_name as __text, nodes as IP, nodes.os_ip_addr, nodes.servicetag\nFROM\n slurm.jobs\nINNER JOIN nodes\nON nodes.os_ip_addr = ANY(nodes)\nWHERE nodes.servicetag in ($servicetag) AND\n start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
  445. "refresh": 1,
  446. "regex": "",
  447. "skipUrlSync": false,
  448. "sort": 0,
  449. "type": "query"
  450. },
  451. {
  452. "current": {},
  453. "datasource": {
  454. "type": "postgres",
  455. "uid": "telemetry-postgres"
  456. },
  457. "definition": "SELECT DISTINCT unnest(nodes) as node \nFROM slurm.jobs WHERE \nuser_id IN ($Users) AND start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
  458. "hide": 0,
  459. "includeAll": true,
  460. "multi": true,
  461. "name": "NodeByUser",
  462. "options": [],
  463. "query": "SELECT DISTINCT unnest(nodes) as node \nFROM slurm.jobs WHERE \nuser_id IN ($Users) AND start_time < ${__to:date:seconds} AND end_time > ${__from:date:seconds}",
  464. "refresh": 2,
  465. "regex": "",
  466. "skipUrlSync": false,
  467. "sort": 1,
  468. "type": "query"
  469. }
  470. ]
  471. },
  472. "time": {
  473. "from": "now-24h",
  474. "to": "now"
  475. },
  476. "timepicker": {},
  477. "timezone": "",
  478. "title": "Sankey",
  479. "uid": "27YRlmz7y",
  480. "version": 35,
  481. "weekStart": ""
  482. }