3 anni fa · 08ac20c223
--- a/telemetry/roles/slurm_telemetry/files/monster/dump.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/dump.py
@@ -0,0 +1,204 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+import utils
			
 
				+import logger
			
 
				+from pgcopy import CopyManager
			
 
				+from dateutil import tz
			
 
				+from dateutil.parser import parse as parse_time
			
 
				+
			
 
				+log = logger.get_logger(__name__)
			
 
				+
			
 
				+
			
 
				+def dump_node_jobs(timestamp: object, node_jobs: dict, conn: object):
			
 
				+    """dump_node_jobs Dump Node-Jobs
			
 
				+
			
 
				+    Dump node-jobs correlation to TimeScaleDB
			
 
				+
			
 
				+    Args:
			
 
				+        timestamp (object): Attached timestamp
			
 
				+        node_jobs (dict): Node-jobs correlation
			
 
				+        conn (object): TimeScaleDB connection object
			
 
				+    """
			
 
				+    try:
			
 
				+        all_records = []
			
 
				+        target_table = 'slurm.node_jobs'
			
 
				+        cols = ('timestamp', 'nodeid', 'jobs', 'cpus')
			
 
				+        for node, job_info in node_jobs.items():
			
 
				+            all_records.append((timestamp, int(node), job_info['jobs'], job_info['cpus']))
			
 
				+        mgr = CopyManager(conn, target_table, cols)
			
 
				+        mgr.copy(all_records)
			
 
				+        conn.commit()
			
 
				+    except Exception as err:
			
 
				+        curs = conn.cursor()
			
 
				+        curs.execute("ROLLBACK")
			
 
				+        conn.commit()
			
 
				+        log.error(f"Fail to dump node-jobs correlation: {err}")
			
 
				+
			
 
				+
			
 
				+def dump_job_metrics(job_metrics: dict, conn: object):
			
 
				+    """dump_job_metrics Dump Job Metircs
			
 
				+
			
 
				+    Dump job metrics to TimeScaleDB
			
 
				+
			
 
				+    Args:
			
 
				+        job_metrics (dict): Job Metrics
			
 
				+        conn (object): TimeScaleDB connection object
			
 
				+    """
			
 
				+    try:
			
 
				+        target_table = 'slurm.jobs'
			
 
				+        cols = ('job_id', 'array_job_id', 'array_task_id', 'name','job_state', 
			
 
				+                'user_id', 'user_name', 'group_id', 'cluster', 'partition', 
			
 
				+                'command', 'current_working_directory', 'batch_flag', 
			
 
				+                'batch_host', 'nodes', 'node_count', 'cpus', 'tasks', 
			
 
				+                'tasks_per_node', 'cpus_per_task', 'memory_per_node', 
			
 
				+                'memory_per_cpu', 'priority', 'time_limit', 'deadline', 
			
 
				+                'submit_time', 'preempt_time', 'suspend_time', 'eligible_time', 
			
 
				+                'start_time', 'end_time', 'resize_time', 'restart_cnt', 
			
 
				+                'exit_code', 'derived_exit_code')
			
 
				+
			
 
				+        cur = conn.cursor()
			
 
				+        all_records = []
			
 
				+
			
 
				+        for job in job_metrics:
			
 
				+            job_id = job[cols.index('job_id')]
			
 
				+            check_sql = f"SELECT EXISTS(SELECT 1 FROM slurm.jobs WHERE job_id={job_id})"
			
 
				+            cur.execute(check_sql)
			
 
				+            (job_exists, ) = cur.fetchall()[0]
			
 
				+
			
 
				+            if job_exists:
			
 
				+                # Update
			
 
				+                nodes = job[cols.index('nodes')]
			
 
				+                job_state = job[cols.index('job_state')]
			
 
				+                user_name = job[cols.index('user_name')]
			
 
				+                start_time = job[cols.index('start_time')]
			
 
				+                end_time = job[cols.index('end_time')]
			
 
				+                resize_time = job[cols.index('resize_time')]
			
 
				+                restart_cnt = job[cols.index('restart_cnt')]
			
 
				+                exit_code = job[cols.index('exit_code')]
			
 
				+                derived_exit_code = job[cols.index('derived_exit_code')]
			
 
				+                update_sql = """ UPDATE slurm.jobs 
			
 
				+                                 SET nodes = %s, job_state = %s, user_name = %s, start_time = %s, end_time = %s, resize_time = %s, restart_cnt = %s, exit_code = %s, derived_exit_code = %s
			
 
				+                                 WHERE job_id = %s """
			
 
				+                cur.execute(update_sql, (nodes, job_state, user_name, start_time, end_time, resize_time, restart_cnt, exit_code, derived_exit_code, job_id))
			
 
				+            else:
			
 
				+                all_records.append(job)
			
 
				+
			
 
				+        mgr = CopyManager(conn, target_table, cols)
			
 
				+        mgr.copy(all_records)
			
 
				+        conn.commit()
			
 
				+    except Exception as err:
			
 
				+        curs = conn.cursor()
			
 
				+        curs.execute("ROLLBACK")
			
 
				+        conn.commit()
			
 
				+        log.error(f"Fail to dump job metrics: {err}")
			
 
				+
			
 
				+
			
 
				+def dump_node_metrics(timestamp: object, 
			
 
				+                      node_metrics: dict, 
			
 
				+                      conn: object):
			
 
				+    """dump_node_metrics Dump Node Metrics
			
 
				+
			
 
				+    Dump node metrics to TimeScaleDB
			
 
				+
			
 
				+    Args:
			
 
				+        timestamp (object): attached timestamp
			
 
				+        node_metrics (dict): node metrics
			
 
				+        conn (object): TimeScaleDB connection object
			
 
				+    """
			
 
				+    schema = 'slurm'
			
 
				+    try:
			
 
				+        metric_names = list(list(node_metrics.values())[0].keys())
			
 
				+
			
 
				+        for metric_name in metric_names:
			
 
				+            all_records = []
			
 
				+            target_table = f'{schema}.{metric_name}'
			
 
				+            cols = ('timestamp', 'nodeid', 'value')
			
 
				+            for node, node_data in node_metrics.items():
			
 
				+                all_records.append((timestamp, int(node), node_data[metric_name]))
			
 
				+            mgr = CopyManager(conn, target_table, cols)
			
 
				+            mgr.copy(all_records)
			
 
				+            conn.commit()
			
 
				+    except Exception as err:
			
 
				+        curs = conn.cursor()
			
 
				+        curs.execute("ROLLBACK")
			
 
				+        conn.commit()
			
 
				+        log.error(f"Fail to dump node metrics : {err}")
			
 
				+
			
 
				+
			
 
				+def dump_idrac(ip: str, 
			
 
				+               idrac_metrics: dict,
			
 
				+               metric_dtype_mapping: dict, 
			
 
				+               ip_id_mapping: dict,
			
 
				+               conn: object, ):
			
 
				+    """dump_idrac Dump iDRAC Metrics
			
 
				+
			
 
				+    Dump node metrics to TimeScaleDB
			
 
				+
			
 
				+    Args:
			
 
				+        ip (str): ip address of iDRAC
			
 
				+        idrac_metrics (dict): iDRAC Metrics
			
 
				+        metric_dtype_mapping (dict): Metric-Datatype mapping
			
 
				+        ip_id_mapping (dict): ip-id mapping
			
 
				+        conn (object): TimeScaleDB connection object
			
 
				+    """
			
 
				+    try:
			
 
				+        schema_name = 'idrac'
			
 
				+        nodeid = ip_id_mapping[ip]
			
 
				+
			
 
				+        for table_name, table_metrics in idrac_metrics.items():
			
 
				+            all_records = []
			
 
				+            dtype = metric_dtype_mapping[table_name]
			
 
				+
			
 
				+            table_name = table_name.lower()
			
 
				+            target_table = f"{schema_name}.{table_name}"
			
 
				+
			
 
				+            cols = ('timestamp', 'nodeid', 'source', 'fqdd', 'value')
			
 
				+            for metric in table_metrics:
			
 
				+                # We have to offset timestamp by -6/-5 hours. For some unknow
			
 
				+                # reasons, the timestamp reported in iDRAC is not configured
			
 
				+                # correctly.
			
 
				+                timestamp = parse_time(metric['Timestamp'])
			
 
				+                timestamp = timestamp.astimezone(tz.tzlocal())
			
 
				+                timestamp = timestamp.replace(tzinfo=tz.tzutc())
			
 
				+                timestamp = timestamp.astimezone(tz.tzlocal())
			
 
				+
			
 
				+                source = metric['Source']
			
 
				+                fqdd = metric['FQDD']
			
 
				+                if metric['Value']:
			
 
				+                    value = utils.cast_value_type(metric['Value'], dtype)
			
 
				+                    all_records.append((timestamp, nodeid, source, fqdd, value))
			
 
				+
			
 
				+            mgr = CopyManager(conn, target_table, cols)
			
 
				+            mgr.copy(all_records)
			
 
				+        conn.commit()
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Fail to dump idrac metrics ({ip}): {err}")
			
--- a/telemetry/roles/slurm_telemetry/files/monster/logger.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/logger.py
@@ -0,0 +1,60 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+from pathlib import Path
			
 
				+from logging.handlers import TimedRotatingFileHandler
			
 
				+import logging
			
 
				+
			
 
				+
			
 
				+def setup_logger(file_name):
			
 
				+    monster_path = Path(__file__).resolve().parent.parent
			
 
				+    log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
			
 
				+    
			
 
				+    logger = logging.getLogger(file_name)
			
 
				+    formatter = logging.Formatter(log_format)
			
 
				+
			
 
				+    log_handler = TimedRotatingFileHandler(filename=f'{monster_path}/log/monster.log', when="midnight", interval=1, backupCount=7)
			
 
				+    log_handler.setLevel(logging.ERROR)
			
 
				+    log_handler.setFormatter(formatter)
			
 
				+
			
 
				+    if not logger.handlers:
			
 
				+        stream_handler = logging.StreamHandler()
			
 
				+        stream_handler.setLevel(logging.ERROR)
			
 
				+        stream_handler.setFormatter(formatter)
			
 
				+
			
 
				+        logger.addHandler(stream_handler)
			
 
				+        logger.addHandler(log_handler)
			
 
				+
			
 
				+    return logger
			
 
				+
			
 
				+
			
 
				+def get_logger(file_name):
			
 
				+    return setup_logger(file_name)
			
--- a/telemetry/roles/slurm_telemetry/files/monster/mslurm.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/mslurm.py
@@ -0,0 +1,117 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+import time
			
 
				+import pytz
			
 
				+import utils
			
 
				+import dump
			
 
				+import slurm
			
 
				+import parse
			
 
				+import logger
			
 
				+import psycopg2
			
 
				+import schedule
			
 
				+
			
 
				+from datetime import datetime
			
 
				+
			
 
				+log = logger.get_logger(__name__)
			
 
				+
			
 
				+
			
 
				+def monitor_slurm():
			
 
				+    """monitor_slurm Monitor Slurm
			
 
				+
			
 
				+    Monitor Slurm Metrics
			
 
				+    """
			
 
				+    connection = utils.init_tsdb_connection()
			
 
				+    node_id_mapping = utils.get_node_id_mapping(connection)
			
 
				+    os_idrac_hostname_mapping = utils.get_os_idrac_hostname_mapping()
			
 
				+    slurm_config = utils.get_config('slurm_rest_api')
			
 
				+    
			
 
				+    #Schedule fetch slurm
			
 
				+    schedule.every().minutes.at(":00").do(fetch_slurm, 
			
 
				+                                          slurm_config, 
			
 
				+                                          connection, 
			
 
				+                                          node_id_mapping,
			
 
				+                                          os_idrac_hostname_mapping)
			
 
				+
			
 
				+    while True:
			
 
				+        try:
			
 
				+            schedule.run_pending()
			
 
				+            time.sleep(1)
			
 
				+        except KeyboardInterrupt:
			
 
				+            schedule.clear()
			
 
				+            break
			
 
				+        
			
 
				+
			
 
				+def fetch_slurm(slurm_config: dict, 
			
 
				+                connection: str, 
			
 
				+                node_id_mapping: dict,
			
 
				+                os_idrac_hostname_mapping: dict):
			
 
				+    """fetch_slurm Fetch Slurm Metrics
			
 
				+
			
 
				+    Fetch Slurm metrics from the Slurm REST API
			
 
				+
			
 
				+    Args:
			
 
				+        slurm_config (dict): slurm configuration
			
 
				+        connection (str): tsdb connection
			
 
				+        node_id_mapping (dict): node-ip mapping
			
 
				+        os_idrac_hostname_mapping (dict): OS-iDRAC hostname mapping
			
 
				+    """
			
 
				+    token = slurm.read_slurm_token(slurm_config)
			
 
				+    timestamp = datetime.now(pytz.utc).replace(microsecond=0)
			
 
				+
			
 
				+    # Get nodes data
			
 
				+    nodes_url = slurm.get_slurm_url(slurm_config, 'nodes')
			
 
				+    nodes_data = slurm.call_slurm_api(slurm_config, token, nodes_url)
			
 
				+
			
 
				+    # Get jobs data
			
 
				+    jobs_url = slurm.get_slurm_url(slurm_config, 'jobs')
			
 
				+    jobs_data = slurm.call_slurm_api(slurm_config, token, jobs_url)
			
 
				+
			
 
				+    # Process slurm data
			
 
				+    if nodes_data and jobs_data:
			
 
				+        job_metrics = parse.parse_jobs_metrics(jobs_data, 
			
 
				+                                               os_idrac_hostname_mapping)
			
 
				+        node_metrics = parse.parse_node_metrics(nodes_data, 
			
 
				+                                                node_id_mapping,
			
 
				+                                                os_idrac_hostname_mapping)
			
 
				+        node_jobs = parse.parse_node_jobs(jobs_data,
			
 
				+                                          node_id_mapping,
			
 
				+                                          os_idrac_hostname_mapping)
			
 
				+
			
 
				+        # Dump metrics
			
 
				+        with psycopg2.connect(connection) as conn:
			
 
				+            dump.dump_job_metrics(job_metrics, conn)
			
 
				+            dump.dump_node_metrics(timestamp, node_metrics, conn)
			
 
				+            dump.dump_node_jobs(timestamp, node_jobs, conn)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    monitor_slurm()
			
--- a/telemetry/roles/slurm_telemetry/files/monster/parse.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/parse.py
@@ -0,0 +1,224 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import logger
			
 
				+import hostlist
			
 
				+
			
 
				+
			
 
				+log = logger.get_logger(__name__)
			
 
				+
			
 
				+def parse_jobs_metrics(jobs_data: dict,
			
 
				+                       os_idrac_hostname_mapping: dict):
			
 
				+    """parse_jobs_metrics Parse Jobs Metrics
			
 
				+
			
 
				+    Parse jobs metrics get from Slurm API
			
 
				+
			
 
				+    Args:
			
 
				+        jobs_data (dict): Job data get from Slurm APi
			
 
				+        os_idrac_hostname_mapping (dict): OS-iDRAC hostname mapping
			
 
				+
			
 
				+    Returns:
			
 
				+        list: Parsed jobs info
			
 
				+    """
			
 
				+    jobs_metrics = []
			
 
				+
			
 
				+    all_jobs = jobs_data['jobs']
			
 
				+    attributes = ['job_id', 'array_job_id', 'array_task_id', 'name','job_state', 
			
 
				+                  'user_id', 'user_name', 'group_id', 'cluster', 'partition', 
			
 
				+                  'command', 'current_working_directory', 'batch_flag', 
			
 
				+                  'batch_host', 'nodes', 'node_count', 'cpus', 'tasks', 
			
 
				+                  'tasks_per_node', 'cpus_per_task', 'memory_per_node', 
			
 
				+                  'memory_per_cpu', 'priority', 'time_limit', 'deadline', 
			
 
				+                  'submit_time', 'preempt_time', 'suspend_time', 
			
 
				+                  'eligible_time', 'start_time', 'end_time', 'resize_time', 
			
 
				+                  'restart_cnt', 'exit_code', 'derived_exit_code']
			
 
				+    
			
 
				+    for job in all_jobs:
			
 
				+        nodes = job['nodes']
			
 
				+        hostnames = hostlist.expand_hostlist(nodes)
			
 
				+
			
 
				+        # Mapping OS hostnames to iDRAC hostnames.
			
 
				+        if os_idrac_hostname_mapping:
			
 
				+            try:
			
 
				+                hostnames = [os_idrac_hostname_mapping[i] for i in hostnames]
			
 
				+            except Exception as err:
			
 
				+                log.error(f"Cannot mapping OS-iDRAC hostname: {err}")
			
 
				+
			
 
				+        metrics = []
			
 
				+        for attribute in attributes:
			
 
				+            if attribute == 'nodes':
			
 
				+                metrics.append(hostnames)
			
 
				+            else:
			
 
				+                # Some attributes values are larger than 2147483647, which is 
			
 
				+                # not INT4, and cannot saved in TSDB
			
 
				+                if type(job[attribute]) is int and job[attribute] > 2147483647:
			
 
				+                    metrics.append(2147483647)
			
 
				+                else:
			
 
				+                    metrics.append(job[attribute])
			
 
				+        tuple_metrics = tuple(metrics)
			
 
				+        jobs_metrics.append(tuple_metrics)
			
 
				+            
			
 
				+    return jobs_metrics
			
 
				+
			
 
				+
			
 
				+def parse_node_metrics(nodes_data: dict, 
			
 
				+                       node_id_mapping: dict,
			
 
				+                       os_idrac_hostname_mapping: dict):
			
 
				+    """parse_node_metrics Parse Node Metircs
			
 
				+
			
 
				+    Parse Nodes metrics get from Slurm API
			
 
				+
			
 
				+    Args:
			
 
				+        nodes_data (dict): Nodes data get from Slurm APi
			
 
				+        node_id_mapping (dict): Node-Id mapping
			
 
				+        os_idrac_hostname_mapping (dict): OS-iDRAC hostname mapping
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: Parsed node metrics
			
 
				+    """
			
 
				+    all_node_metrics = {}
			
 
				+    state_mapping = {
			
 
				+        'allocated': 1,
			
 
				+        'idle':0,
			
 
				+        'down': -1
			
 
				+    }
			
 
				+    all_nodes = nodes_data['nodes']
			
 
				+    for node in all_nodes:
			
 
				+        hostname = node['hostname']
			
 
				+
			
 
				+        # Mapping the OS hostname to iDRAC hostname. The hostname in 
			
 
				+        # node_id_mapping is using iDRAC hostname.
			
 
				+        if os_idrac_hostname_mapping:
			
 
				+            try:
			
 
				+                hostname = os_idrac_hostname_mapping[hostname]
			
 
				+            except Exception as err:
			
 
				+                log.error(f"Cannot mapping OS-iDRAC hostname: {err}")
			
 
				+
			
 
				+        # Only process those nodes that are in node_id_mapping dict. 
			
 
				+        if hostname in node_id_mapping:
			
 
				+            node_id = node_id_mapping[hostname]
			
 
				+            # CPU load
			
 
				+            cpu_load = int(node['cpu_load'])
			
 
				+            # Some down nodes report cpu_load large than 2147483647, which is 
			
 
				+            # not INT4 and cannot saved in TSDB
			
 
				+            if cpu_load > 2147483647: 
			
 
				+                cpu_load = 2147483647
			
 
				+            # Memory usage
			
 
				+            free_memory = node['free_memory']
			
 
				+            real_memory = node['real_memory']
			
 
				+            memory_usage = ((real_memory - free_memory)/real_memory) * 100
			
 
				+            memory_used = real_memory - free_memory
			
 
				+            f_memory_usage = float("{:.2f}".format(memory_usage))
			
 
				+            # Status
			
 
				+            state = node['state']
			
 
				+            f_state = state_mapping[state]
			
 
				+            node_data = {
			
 
				+                'cpu_load': cpu_load,
			
 
				+                'memoryusage': f_memory_usage,
			
 
				+                'memory_used': memory_used,
			
 
				+                'state': f_state
			
 
				+            }
			
 
				+            all_node_metrics.update({
			
 
				+                node_id: node_data
			
 
				+            })
			
 
				+    return all_node_metrics
			
 
				+
			
 
				+
			
 
				+def parse_node_jobs(jobs_metrics: dict, 
			
 
				+                    node_id_mapping:dict,
			
 
				+                    os_idrac_hostname_mapping: dict):
			
 
				+    """parse_node_jobs Parse Node-Jobs
			
 
				+
			
 
				+    Parse nodes-job correlation
			
 
				+
			
 
				+    Args:
			
 
				+        jobs_metrics (dict): Job metrics get from Slurm APi
			
 
				+        node_id_mapping (dict): Node-Id mapping
			
 
				+        os_idrac_hostname_mapping (dict): OS-iDRAC hostname mapping
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: node-jobs correlation
			
 
				+    """
			
 
				+  
			
 
				+    node_jobs = {}
			
 
				+    all_jobs = jobs_metrics['jobs']
			
 
				+    # Get job-nodes correlation
			
 
				+    job_nodes = {}
			
 
				+    for job in all_jobs:
			
 
				+        valid_flag = True
			
 
				+        if job['job_state'] == "RUNNING":
			
 
				+            job_id = job['job_id']
			
 
				+            nodes = job['nodes']
			
 
				+            # Get node ids
			
 
				+            hostnames = hostlist.expand_hostlist(nodes)
			
 
				+
			
 
				+            # Mapping OS hostnames to iDRAC hostnames.
			
 
				+            if os_idrac_hostname_mapping:
			
 
				+                try:
			
 
				+                    hostnames = [os_idrac_hostname_mapping[i] for i in hostnames]
			
 
				+                except Exception as err:
			
 
				+                    log.error(f"Cannot mapping OS-iDRAC hostname: {err}")
			
 
				+            
			
 
				+            # Check if hostname is in node_id_mapping. 
			
 
				+            # If not, ignore this job info.
			
 
				+            for hostname in hostnames:
			
 
				+                if hostname not in node_id_mapping:
			
 
				+                    valid_flag = False
			
 
				+                    break
			
 
				+
			
 
				+            if valid_flag:
			
 
				+                node_ids = [node_id_mapping[i] for i in hostnames]
			
 
				+                node_ids.sort()
			
 
				+                # Get cpu counts for each node
			
 
				+                allocated_nodes = job['job_resources']['allocated_nodes']
			
 
				+                cpu_counts = [resource['cpus'] for node, resource in allocated_nodes.items()]
			
 
				+                job_nodes.update({
			
 
				+                    job_id: {
			
 
				+                        'nodes': node_ids,
			
 
				+                        'cpus': cpu_counts
			
 
				+                    }
			
 
				+                })
			
 
				+    # Get nodes-job correlation
			
 
				+    for job, nodes_cpus in job_nodes.items():
			
 
				+        for i, node in enumerate(nodes_cpus['nodes']):
			
 
				+            if node not in node_jobs:
			
 
				+                node_jobs.update({
			
 
				+                    node: {
			
 
				+                        'jobs':[job],
			
 
				+                        'cpus':[nodes_cpus['cpus'][i]]
			
 
				+                    }
			
 
				+                })
			
 
				+            else:
			
 
				+                node_jobs[node]['jobs'].append(job)
			
 
				+                node_jobs[node]['cpus'].append(nodes_cpus['cpus'][i])
			
 
				+
			
 
				+    return node_jobs
			
--- a/telemetry/roles/slurm_telemetry/files/monster/process.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/process.py
@@ -0,0 +1,391 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+import logger
			
 
				+import time
			
 
				+import multiprocessing
			
 
				+
			
 
				+log = logger.get_logger(__name__)
			
 
				+
			
 
				+
			
 
				+def partition(arr:list, cores: int):
			
 
				+    """partition Partition a list
			
 
				+
			
 
				+    Partition urls/nodes into several groups based on # of cores
			
 
				+
			
 
				+    Args:
			
 
				+        arr (list): A list to be partitioned
			
 
				+        cores (int): Number of cores of the compute running MonSter
			
 
				+
			
 
				+    Returns:
			
 
				+        list: partitioned list
			
 
				+    """
			
 
				+    
			
 
				+    groups = []
			
 
				+    try:
			
 
				+        arr_len = len(arr)
			
 
				+        arr_per_core = arr_len // cores
			
 
				+        arr_surplus = arr_len % cores
			
 
				+
			
 
				+        increment = 1
			
 
				+        for i in range(cores):
			
 
				+            if(arr_surplus != 0 and i == (cores-1)):
			
 
				+                groups.append(arr[i * arr_per_core:])
			
 
				+            else:
			
 
				+                groups.append(arr[i * arr_per_core : increment * arr_per_core])
			
 
				+                increment += 1
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Cannot Partition the list: {err}")
			
 
				+    return groups
			
 
				+
			
 
				+
			
 
				+def fetch(urls: list, nodes: list, username: str, password:str):
			
 
				+    """fetch Fetch Data From Urls
			
 
				+
			
 
				+    Fetch Data From Urls of the specified nodes
			
 
				+
			
 
				+    Args:
			
 
				+        urls (list): idrac urls
			
 
				+        nodes (list): a list of ip addresses of idracs
			
 
				+        username (str): idrac username
			
 
				+        password (str): idrac password
			
 
				+
			
 
				+    Returns:
			
 
				+        [type]: [description]
			
 
				+    """
			
 
				+    idrac_metrics = []
			
 
				+    try:
			
 
				+        asyn_requests = AsyncioRequests(auth = (username, password),
			
 
				+                                        timeout = (15, 45),
			
 
				+                                        max_retries = 3)
			
 
				+        idrac_metrics = asyn_requests.bulk_fetch(urls, nodes)
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Cannot fetch data from idrac urls: {err}")
			
 
				+    return idrac_metrics
			
 
				+
			
 
				+
			
 
				+def parallel_fetch(urllist: list, 
			
 
				+                   nodelist: list, 
			
 
				+                   cores: int,
			
 
				+                   username: str, 
			
 
				+                   password:str):
			
 
				+    """parallel_fetch Parallel Fetch Data from Urls
			
 
				+
			
 
				+    Parallel fetch data from rrls of the specified nodes
			
 
				+
			
 
				+    Args:
			
 
				+        urllist (list): idrac urls
			
 
				+        nodelist (list): a list of ip addresses of idracs
			
 
				+        cores (int): Number of cores of the compute running MonSter
			
 
				+        username (str): idrac username
			
 
				+        password (str): idrac password
			
 
				+
			
 
				+    Returns:
			
 
				+        list: idrac metrics in a list
			
 
				+    """
			
 
				+    
			
 
				+    flatten_metrics = []
			
 
				+    try:
			
 
				+        # Partition
			
 
				+        urls_group = partition(urllist, cores)
			
 
				+        nodes_group = partition(nodelist, cores)
			
 
				+
			
 
				+        fetch_args = []
			
 
				+        for i in range(cores):
			
 
				+            urls = urls_group[i]
			
 
				+            nodes = nodes_group[i]
			
 
				+            fetch_args.append((urls, nodes, username, password))
			
 
				+
			
 
				+        with multiprocessing.Pool() as pool:
			
 
				+            metrics = pool.starmap(fetch, fetch_args)
			
 
				+
			
 
				+        flatten_metrics = [item for sublist in metrics for item in sublist]
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Cannot parallel fetch data from idrac urls: {err}")
			
 
				+
			
 
				+    return flatten_metrics
			
 
				+
			
 
				+
			
 
				+def extract(system_info: dict, bmc_info: dict):
			
 
				+    """extract Extract Info
			
 
				+
			
 
				+    Extract system and bmc info
			
 
				+
			
 
				+    Args:
			
 
				+        system_info (dict): System info
			
 
				+        bmc_info (dict): BMC info
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: extracted info
			
 
				+    """
			
 
				+    
			
 
				+    bmc_ip_addr = system_info["node"]
			
 
				+    system_metrics = system_info["metrics"]
			
 
				+    bmc_metrics = bmc_info["metrics"]
			
 
				+    
			
 
				+    general = ["UUID", "SerialNumber", "HostName", "Model", "Manufacturer"]
			
 
				+    processor = ["ProcessorModel", "ProcessorCount", "LogicalProcessorCount"]
			
 
				+    memory = ["TotalSystemMemoryGiB"]
			
 
				+    bmc = ["BmcModel", "BmcFirmwareVersion"]
			
 
				+    metrics = {}
			
 
				+    try:
			
 
				+        # Update service tag
			
 
				+        if system_metrics:
			
 
				+            service_tag = system_metrics.get("SKU", None)
			
 
				+        else:
			
 
				+            service_tag = None
			
 
				+
			
 
				+        metrics.update({
			
 
				+            "ServiceTag": service_tag
			
 
				+        })
			
 
				+
			
 
				+        # Update System metrics
			
 
				+        if system_metrics:
			
 
				+            for metric in general:
			
 
				+                metrics.update({
			
 
				+                    metric: system_metrics.get(metric, None)
			
 
				+                })
			
 
				+            for metric in processor:
			
 
				+                if metric.startswith("Processor"):
			
 
				+                    metrics.update({
			
 
				+                        metric: system_metrics.get("ProcessorSummary", {}).get(metric[9:], None)
			
 
				+                    })
			
 
				+                else:
			
 
				+                    metrics.update({
			
 
				+                        metric: system_metrics.get("ProcessorSummary", {}).get(metric, None)
			
 
				+                    })
			
 
				+            for metric in memory:
			
 
				+                metrics.update({
			
 
				+                    metric: system_metrics.get("MemorySummary", {}).get("TotalSystemMemoryGiB", None)
			
 
				+                })
			
 
				+        else:
			
 
				+            for metric in general + processor + memory:
			
 
				+                metrics.update({
			
 
				+                    metric: None
			
 
				+                })
			
 
				+
			
 
				+        metrics.update({
			
 
				+            "Bmc_Ip_Addr": bmc_ip_addr
			
 
				+        })
			
 
				+
			
 
				+        # Update BMC metrics
			
 
				+        if bmc_metrics:
			
 
				+            for metric in bmc:
			
 
				+                metrics.update({
			
 
				+                    metric: bmc_metrics.get(metric[3:], None)
			
 
				+                })
			
 
				+        else:
			
 
				+            for metric in bmc:
			
 
				+                metrics.update({
			
 
				+                    metric: None
			
 
				+                })
			
 
				+        
			
 
				+        # Update Status
			
 
				+        if  (not system_metrics and 
			
 
				+             not bmc_metrics):
			
 
				+            metrics.update({
			
 
				+                "Status": "BMC unreachable in this query"
			
 
				+            })
			
 
				+        else:
			
 
				+            metrics.update({
			
 
				+                "Status": system_metrics.get("Status", {}).get("Health", None)
			
 
				+            })
			
 
				+            
			
 
				+        return metrics
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Cannot extract info from system and bmc: {err}")
			
 
				+
			
 
				+
			
 
				+def parallel_extract(system_info_list: list, 
			
 
				+                     bmc_info_list: list):
			
 
				+    """parallel_extract Parallel Extract Info
			
 
				+
			
 
				+    Parallel extract system and bmc info
			
 
				+
			
 
				+    Args:
			
 
				+        system_info_list (list): a list of system info
			
 
				+        bmc_info_list (list): a list of bmc info
			
 
				+
			
 
				+    Returns:
			
 
				+        list: a list of extracted info
			
 
				+    """
			
 
				+    
			
 
				+    info = []
			
 
				+    try:
			
 
				+        process_args = zip(system_info_list, 
			
 
				+                           bmc_info_list)
			
 
				+        with multiprocessing.Pool() as pool:
			
 
				+            info = pool.starmap(extract, process_args)
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Cannot parallel extract info from system and bmc: {err}")
			
 
				+    return info
			
 
				+
			
 
				+"""
			
 
				+    Process data in the MetricValues, generate raw records
			
 
				+    """
			
 
				+
			
 
				+
			
 
				+def process_idrac(ip: str, report: str, metrics: list):
			
 
				+    """process_idrac Process iDRAC Metrics
			
 
				+
			
 
				+    Process iDRAC metircs in the MetricValues and generate records
			
 
				+
			
 
				+    Args:
			
 
				+        ip (str): iDRAC ip address
			
 
				+        report (str): report name
			
 
				+        metrics (list): a list of metric names
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: processed idrac metrics grouped by table name
			
 
				+    """
			
 
				+    idrac_metrics = {}
			
 
				+    try:
			
 
				+        if report == "PowerStatistics":
			
 
				+            # PowerStatistics is better to be pulled
			
 
				+            pass
			
 
				+        else:
			
 
				+            for metric in metrics:
			
 
				+                table_name = ''
			
 
				+                timestamp = ''
			
 
				+                source = ''
			
 
				+                fqdd = ''
			
 
				+                value = ''
			
 
				+
			
 
				+                try:
			
 
				+                    table_name = metric['MetricId']
			
 
				+                    timestamp = metric['Timestamp']
			
 
				+                    source = metric['Oem']['Dell']['Source']
			
 
				+                    fqdd = metric['Oem']['Dell']['FQDD']
			
 
				+                    value = metric['MetricValue']
			
 
				+
			
 
				+                    # print(f"Time Stamp: {timestamp}")
			
 
				+
			
 
				+                except:
			
 
				+                    pass
			
 
				+
			
 
				+                if table_name and timestamp and source and fqdd and value:
			
 
				+                    record = {
			
 
				+                        'Timestamp': timestamp,
			
 
				+                        'Source': source,
			
 
				+                        'FQDD': fqdd,
			
 
				+                        'Value': value
			
 
				+                    }
			
 
				+
			
 
				+                    if table_name not in idrac_metrics:
			
 
				+                        idrac_metrics.update({
			
 
				+                            table_name: [record]
			
 
				+                        })
			
 
				+                    else:
			
 
				+                        idrac_metrics[table_name].append(record)
			
 
				+    
			
 
				+    except Exception as err:
			
 
				+            log.error(f"Fail to process idrac metrics: {err}")
			
 
				+    
			
 
				+    return idrac_metrics
			
 
				+
			
 
				+
			
 
				+class AsyncioRequests:
			
 
				+    import aiohttp
			
 
				+    import asyncio
			
 
				+    from aiohttp import ClientSession
			
 
				+
			
 
				+
			
 
				+    def __init__(self, verify_ssl: bool = False, auth: tuple = (), 
			
 
				+                 timeout: tuple = (15, 45), max_retries: int = 3):
			
 
				+        self.metrics = {}
			
 
				+        self.timestamp = int(time.time() * 1000000000)
			
 
				+        self.retry = 0
			
 
				+        self.connector=self.aiohttp.TCPConnector(verify_ssl=verify_ssl)
			
 
				+        if auth:
			
 
				+            self.auth = self.aiohttp.BasicAuth(*auth)
			
 
				+        else:
			
 
				+            self.auth = None
			
 
				+        self.timeout = self.aiohttp.ClientTimeout(*timeout)
			
 
				+        self.max_retries = max_retries
			
 
				+        self.loop = self.asyncio.get_event_loop()
			
 
				+        
			
 
				+    
			
 
				+    async def __fetch_json(self, 
			
 
				+                           url: str, 
			
 
				+                           node: str, 
			
 
				+                           session: ClientSession):
			
 
				+        """__fetch_json Fetch Url
			
 
				+
			
 
				+        Get request wrapper to fetch json data from API
			
 
				+
			
 
				+        Args:
			
 
				+            url (str): url of idrac
			
 
				+            node (str): ip address of the idrac
			
 
				+            session (ClientSession): Client Session
			
 
				+
			
 
				+        Returns:
			
 
				+            dict: The return of url in json format
			
 
				+        """
			
 
				+        
			
 
				+        try:
			
 
				+            resp = await session.request(method='GET', url=url)
			
 
				+            resp.raise_for_status()
			
 
				+            json = await resp.json()
			
 
				+            return {"node": node, 
			
 
				+                    "metrics": json, 
			
 
				+                    "timestamp": self.timestamp}
			
 
				+        except (TimeoutError):
			
 
				+            self.retry += 1
			
 
				+            if self.retry >= self.max_retries:
			
 
				+                log.error(f"Cannot fetch data from {node} : {url}")
			
 
				+                return {"node": node, 
			
 
				+                        "metrics": {}, 
			
 
				+                        "timestamp": self.timestamp}
			
 
				+            return await self.__fetch_json(url, node, session)
			
 
				+        except Exception as err:
			
 
				+            log.error(f"Cannot fetch data from {url} : {err}")
			
 
				+            return {"node": node, 
			
 
				+                    "metrics": {}, 
			
 
				+                    "timestamp": self.timestamp}
			
 
				+
			
 
				+
			
 
				+    async def __requests(self, urls: list, nodes: list):
			
 
				+        async with self.ClientSession(connector=self.connector, 
			
 
				+                                      auth = self.auth, 
			
 
				+                                      timeout = self.timeout) as session:
			
 
				+            tasks = []
			
 
				+            for i, url in enumerate(urls):
			
 
				+                tasks.append(self.__fetch_json(url=url, 
			
 
				+                                               node=nodes[i], 
			
 
				+                                               session=session))
			
 
				+            return await self.asyncio.gather(*tasks)
			
 
				+
			
 
				+
			
 
				+    def bulk_fetch(self, urls: list, nodes: list):
			
 
				+        self.metrics =  self.loop.run_until_complete(self.__requests(urls, nodes))
			
 
				+        self.loop.close()
			
 
				+        return self.metrics
			
 
				+    
			
--- a/telemetry/roles/slurm_telemetry/files/monster/rparser.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/rparser.py
@@ -0,0 +1,244 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+JSON_COMMA = ','
			
 
				+JSON_COLON = ':'
			
 
				+JSON_LEFTBRACKET = '['
			
 
				+JSON_RIGHTBRACKET = ']'
			
 
				+JSON_LEFTBRACE = '{'
			
 
				+JSON_RIGHTBRACE = '}'
			
 
				+JSON_QUOTE = '"'
			
 
				+
			
 
				+JSON_QUOTE = '"'
			
 
				+JSON_WHITESPACE = [' ', '\t', '\b', '\n', '\r']
			
 
				+JSON_SYNTAX = [JSON_COMMA, JSON_COLON, JSON_LEFTBRACKET, JSON_RIGHTBRACKET,
			
 
				+               JSON_LEFTBRACE, JSON_RIGHTBRACE]
			
 
				+
			
 
				+FALSE_LEN = len('false')
			
 
				+TRUE_LEN = len('true')
			
 
				+NULL_LEN = len('null')
			
 
				+
			
 
				+
			
 
				+def lex_string(string):
			
 
				+    json_string = ''
			
 
				+
			
 
				+    if string[0] == JSON_QUOTE:
			
 
				+        string = string[1:]
			
 
				+    else:
			
 
				+        return None, string
			
 
				+
			
 
				+    for c in string:
			
 
				+        if c == JSON_QUOTE:
			
 
				+            return json_string, string[len(json_string)+1:]
			
 
				+        else:
			
 
				+            json_string += c
			
 
				+
			
 
				+    # raise Exception('Expected end-of-string quote')
			
 
				+    return json_string, ''
			
 
				+    
			
 
				+
			
 
				+def lex_number(string):
			
 
				+    json_number = ''
			
 
				+
			
 
				+    number_characters = [str(d) for d in range(0, 10)] + ['-', 'e', '.']
			
 
				+
			
 
				+    for c in string:
			
 
				+        if c in number_characters:
			
 
				+            json_number += c
			
 
				+        else:
			
 
				+            break
			
 
				+
			
 
				+    rest = string[len(json_number):]
			
 
				+
			
 
				+    if not len(json_number):
			
 
				+        return None, string
			
 
				+
			
 
				+    if '.' in json_number:
			
 
				+        return float(json_number), rest
			
 
				+
			
 
				+    return int(json_number), rest
			
 
				+
			
 
				+
			
 
				+def lex_bool(string):
			
 
				+    string_len = len(string)
			
 
				+
			
 
				+    if string_len >= TRUE_LEN and \
			
 
				+       string[:TRUE_LEN] == 'true':
			
 
				+        return True, string[TRUE_LEN:]
			
 
				+    elif string_len >= FALSE_LEN and \
			
 
				+         string[:FALSE_LEN] == 'false':
			
 
				+        return False, string[FALSE_LEN:]
			
 
				+
			
 
				+    return None, string
			
 
				+
			
 
				+
			
 
				+def lex_null(string):
			
 
				+    string_len = len(string)
			
 
				+
			
 
				+    if string_len >= NULL_LEN and \
			
 
				+       string[:NULL_LEN] == 'null':
			
 
				+        return True, string[NULL_LEN]
			
 
				+
			
 
				+    return None, string
			
 
				+
			
 
				+
			
 
				+def lex(string):
			
 
				+    tokens = []
			
 
				+
			
 
				+    while len(string):
			
 
				+        json_string, string = lex_string(string)
			
 
				+        if json_string is not None:
			
 
				+            tokens.append(json_string)
			
 
				+            continue
			
 
				+
			
 
				+        json_number, string = lex_number(string)
			
 
				+        if json_number is not None:
			
 
				+            tokens.append(json_number)
			
 
				+            continue
			
 
				+
			
 
				+        json_bool, string = lex_bool(string)
			
 
				+        if json_bool is not None:
			
 
				+            tokens.append(json_bool)
			
 
				+            continue
			
 
				+
			
 
				+        json_null, string = lex_null(string)
			
 
				+        if json_null is not None:
			
 
				+            tokens.append(None)
			
 
				+            continue
			
 
				+
			
 
				+        if string[0] in JSON_WHITESPACE:
			
 
				+            string = string[1:]
			
 
				+        elif string[0] in JSON_SYNTAX:
			
 
				+            tokens.append(string[0])
			
 
				+            string = string[1:]
			
 
				+        else:
			
 
				+            raise Exception('Unexpected character: {}'.format(string[0]))
			
 
				+        
			
 
				+        # print(tokens)
			
 
				+
			
 
				+    return tokens
			
 
				+
			
 
				+
			
 
				+def parse_array(tokens):
			
 
				+    # print("PARSE ARRAY: ")
			
 
				+    # print(tokens)
			
 
				+    json_array = []
			
 
				+
			
 
				+    if tokens:
			
 
				+        t = tokens[0]
			
 
				+        if t == JSON_RIGHTBRACKET:
			
 
				+            return json_array, tokens[1:]
			
 
				+
			
 
				+        while True:
			
 
				+            json, tokens = parse(tokens)
			
 
				+            json_array.append(json)
			
 
				+
			
 
				+            # print(f'Json array: {json_array}')
			
 
				+
			
 
				+            if tokens:
			
 
				+                t = tokens[0]
			
 
				+                if t == JSON_RIGHTBRACKET:
			
 
				+                    return json_array, tokens[1:]
			
 
				+                elif t != JSON_COMMA:
			
 
				+                    raise Exception('Expected comma after object in array')
			
 
				+                else:
			
 
				+                    tokens = tokens[1:]
			
 
				+            else:
			
 
				+                return json_array, None
			
 
				+    return None, None
			
 
				+    # raise Exception('Expected end-of-array bracket')
			
 
				+
			
 
				+
			
 
				+def parse_object(tokens):
			
 
				+    # print("PARSE OBJECT: ")
			
 
				+    # print(tokens)
			
 
				+    json_object = {}
			
 
				+
			
 
				+    if tokens:
			
 
				+        t = tokens[0]
			
 
				+
			
 
				+        if t == JSON_RIGHTBRACE:
			
 
				+            return json_object, tokens[1:]
			
 
				+
			
 
				+        while True:
			
 
				+            if tokens:
			
 
				+                json_key = tokens[0]
			
 
				+                # print(f'Json key: {json_key}')
			
 
				+
			
 
				+                if type(json_key) is str:
			
 
				+                    tokens = tokens[1:]
			
 
				+                else:
			
 
				+                    raise Exception('Expected string key, got: {}'.format(json_key))
			
 
				+
			
 
				+                if tokens:
			
 
				+                    if tokens[0] != JSON_COLON:
			
 
				+                        raise Exception('Expected colon after key in object, got: {}'.format(t))
			
 
				+
			
 
				+                    json_value, tokens = parse(tokens[1:])
			
 
				+                    # print(f'Json value: {json_value}')
			
 
				+
			
 
				+                    json_object[json_key] = json_value
			
 
				+
			
 
				+                    # print(f'Json object: {json_object}')
			
 
				+
			
 
				+                if tokens:
			
 
				+                    t = tokens[0]
			
 
				+                    if t == JSON_RIGHTBRACE:
			
 
				+                        return json_object, tokens[1:]
			
 
				+                    elif t != JSON_COMMA:
			
 
				+                        raise Exception('Expected comma after pair in object, got: {}'.format(t))
			
 
				+
			
 
				+                    tokens = tokens[1:]
			
 
				+            else:
			
 
				+                return json_object, None
			
 
				+    return None, None
			
 
				+    # raise Exception('Expected end-of-object brace')
			
 
				+
			
 
				+def parse(tokens):
			
 
				+    if tokens:
			
 
				+        t = tokens[0]
			
 
				+
			
 
				+        # print("PARSE: ")
			
 
				+        # print(tokens)
			
 
				+
			
 
				+        if t == JSON_LEFTBRACKET:
			
 
				+            return parse_array(tokens[1:])
			
 
				+        elif t == JSON_LEFTBRACE:
			
 
				+            return parse_object(tokens[1:])
			
 
				+        else:
			
 
				+            return t, tokens[1:]
			
 
				+    else:
			
 
				+        return None, None
			
 
				+
			
 
				+
			
 
				+def report_parser(string):
			
 
				+    tokens = lex(string)
			
 
				+    return parse(tokens)[0]
			
--- a/telemetry/roles/slurm_telemetry/files/monster/schema.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/schema.py
@@ -0,0 +1,125 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+import utils
			
 
				+import logger
			
 
				+
			
 
				+log = logger.get_logger(__name__)
			
 
				+
			
 
				+
			
 
				+def build_idrac_table_schemas(metric_definitions: list):
			
 
				+    """build_table_schemas Build iDRAC Table Schemas
			
 
				+
			
 
				+    Build table schemas based on the idrac telemetry metric definitions
			
 
				+
			
 
				+    Args:
			
 
				+        metric_definitions (list): idrac telemetry metric definitions
			
 
				+    
			
 
				+    Returns:
			
 
				+        dict: iDRAC table schemas
			
 
				+    """
			
 
				+    table_schemas = {}
			
 
				+
			
 
				+    try:
			
 
				+        for metric in metric_definitions:
			
 
				+            table_name = metric['Id']
			
 
				+            metric_type = metric['MetricDataType']
			
 
				+            metric_unit = metric.get('Units', None)
			
 
				+
			
 
				+            # For network metrics, use BIG INT for storing the metric readings
			
 
				+            if metric_unit == 'By' or metric_unit == 'Pkt':
			
 
				+                value_type = 'BIGINT'
			
 
				+            else:
			
 
				+                value_type = utils.data_type_mapping.get(metric_type, 'TEXT')
			
 
				+            
			
 
				+            column_names = ['Timestamp', 'NodeID', 'Source', 'FQDD', 'Value']
			
 
				+            column_types = ['TIMESTAMPTZ NOT NULL', 'INT NOT NULL', 'TEXT', \
			
 
				+                            'TEXT', value_type]
			
 
				+            
			
 
				+            table_schemas.update({
			
 
				+                table_name: {
			
 
				+                    'column_names': column_names,
			
 
				+                    'column_types': column_types,
			
 
				+                }
			
 
				+            })
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Cannot build idrac table schemas: {err}")
			
 
				+    return table_schemas
			
 
				+
			
 
				+
			
 
				+def build_slurm_table_schemas():
			
 
				+    """build_slurm_table_schemas Build Slurm Table Schemas
			
 
				+
			
 
				+    Build slurm table schemas for storing resource usage metrics obtained from 
			
 
				+    slurm
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: slurm table schemas
			
 
				+    """
			
 
				+    table_schemas = {}
			
 
				+    add_tables = {
			
 
				+        'memoryusage':{
			
 
				+            'add_columns': ['Value'],
			
 
				+            'add_types': ['REAL']
			
 
				+        },
			
 
				+        'memory_used':{
			
 
				+            'add_columns': ['Value'],
			
 
				+            'add_types': ['INT']
			
 
				+        },
			
 
				+        'cpu_load':{
			
 
				+            'add_columns': ['Value'],
			
 
				+            'add_types': ['INT']
			
 
				+        },
			
 
				+        'state':{
			
 
				+            'add_columns': ['Value'],
			
 
				+            'add_types': ['INT']
			
 
				+        },
			
 
				+        'node_jobs':{
			
 
				+            'add_columns': ['Jobs', 'CPUs'],
			
 
				+            'add_types': ['INTEGER[]', 'INTEGER[]']
			
 
				+        }
			
 
				+    }
			
 
				+    try:
			
 
				+        for table_name, detail in add_tables.items():
			
 
				+            column_names = ['Timestamp', 'NodeID']
			
 
				+            column_types = ['TIMESTAMPTZ NOT NULL', 'INT NOT NULL']
			
 
				+            column_names.extend(detail['add_columns'])
			
 
				+            column_types.extend(detail['add_types'])
			
 
				+
			
 
				+            table_schemas.update({
			
 
				+                table_name: {
			
 
				+                    'column_names': column_names,
			
 
				+                    'column_types': column_types
			
 
				+                }
			
 
				+            })
			
 
				+    except Exception as err:
			
 
				+        log.error(f'Cannot build slurm table schemas: {err}')
			
 
				+    return table_schemas
			
--- a/telemetry/roles/slurm_telemetry/files/monster/slurm.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/slurm.py
@@ -0,0 +1,155 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+import time
			
 
				+import json
			
 
				+import logger
			
 
				+import requests
			
 
				+import subprocess
			
 
				+
			
 
				+from requests.adapters import HTTPAdapter
			
 
				+
			
 
				+log = logger.get_logger(__name__)
			
 
				+
			
 
				+
			
 
				+def read_slurm_token(slurm_config: dict):
			
 
				+    """read_slurm_token Read Slurm token
			
 
				+
			
 
				+    Read the token file, if it is out of data, get a new token from Slurm
			
 
				+
			
 
				+    Args:
			
 
				+        slurm_config (dict): Slurm Configuration
			
 
				+    """
			
 
				+    token = ""
			
 
				+    try:
			
 
				+        with open('./token.json', 'r') as f:
			
 
				+            token_record = json.load(f)
			
 
				+            time_interval = int(time.time()) - token_record['time']
			
 
				+            if time_interval >= 3600:
			
 
				+                token = get_slurm_token(slurm_config)
			
 
				+            else:
			
 
				+                token = token_record['token']
			
 
				+    except:
			
 
				+        token = get_slurm_token(slurm_config)
			
 
				+    return token
			
 
				+
			
 
				+
			
 
				+def get_slurm_token(slurm_config: dict):
			
 
				+    """get_slurm_token Get Slurm Token
			
 
				+
			
 
				+    Get JWT token from Slurm. This requires the public key on this node to be 
			
 
				+    added to the target cluster headnode.
			
 
				+
			
 
				+    Args:
			
 
				+        slurm_config (dict): Slurm Configuration
			
 
				+
			
 
				+    Returns:
			
 
				+        srt: token
			
 
				+    """
			
 
				+    
			
 
				+    while True:
			
 
				+        try:
			
 
				+            # Setting command parameters
			
 
				+            slurm_headnode = slurm_config['headnode']
			
 
				+            
			
 
				+            print("Get a new token...")
			
 
				+            # The command used in cli
			
 
				+            command = [f"ssh {slurm_headnode} 'scontrol token lifespan=3600'"]
			
 
				+            # Get the string from command line
			
 
				+            rtn_str = subprocess.run(command, shell=True, stdout=subprocess.PIPE).stdout.decode('utf-8')
			
 
				+            # Get token
			
 
				+            token = rtn_str.splitlines()[0].split('=')[1]
			
 
				+            timestamp = int(time.time())
			
 
				+
			
 
				+            token_record = {
			
 
				+                'time': timestamp,
			
 
				+                'token': token
			
 
				+            }
			
 
				+
			
 
				+            with open('./token.json', 'w') as f:
			
 
				+                json.dump(token_record, f, indent = 4)
			
 
				+            
			
 
				+            return token
			
 
				+        except Exception as err:
			
 
				+            print("Get Slurm token error! Try in 60s.")
			
 
				+            time.sleep(60)
			
 
				+        else:
			
 
				+            break
			
 
				+
			
 
				+
			
 
				+def call_slurm_api(slurm_config: dict, token: str, url: str):
			
 
				+    """call_slurm_api Call Slurm API
			
 
				+
			
 
				+    Call Slurm API and get the data from the specified url
			
 
				+
			
 
				+    Args:
			
 
				+        slurm_config (dict): Slurm Configuration
			
 
				+        token (str): Slurm JWT token
			
 
				+        url (str): Url of Slurm API
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: slurm metrics
			
 
				+    """
			
 
				+  
			
 
				+    metrics = {}
			
 
				+    headers = {"X-SLURM-USER-NAME": slurm_config['user'], 
			
 
				+               "X-SLURM-USER-TOKEN": token}
			
 
				+    adapter = HTTPAdapter(max_retries=3)
			
 
				+    with requests.Session() as session:
			
 
				+        session.mount(url, adapter)
			
 
				+        try:
			
 
				+            response = session.get(url, headers=headers)
			
 
				+            metrics = response.json()
			
 
				+        except Exception as err:
			
 
				+            log.error(f"Fetch slurm metrics error: {err}")
			
 
				+    return metrics
			
 
				+
			
 
				+
			
 
				+def get_slurm_url(slurm_config: dict, url_type: str):
			
 
				+    """get_slurm_nodes_url Get Slurm Nodes Url
			
 
				+
			
 
				+    Get the url for reading nodes info from slurm
			
 
				+
			
 
				+    Args:
			
 
				+        slurm_config (dict): Slurm Configuration
			
 
				+        url_type: Url type. nodes or jobs
			
 
				+    """
			
 
				+    base_url = f"http://{slurm_config['ip']}:{slurm_config['port']}"
			
 
				+    url_types = ['nodes', 'jobs']
			
 
				+    if url_type not in url_types:
			
 
				+        raise ValueError(f"Invalid url type. Expected one of: {url_types}")
			
 
				+
			
 
				+    if url_type == 'nodes':
			
 
				+        url = f"{base_url}{slurm_config['slurm_nodes']}"
			
 
				+    else:
			
 
				+        url = f"{base_url}{slurm_config['slurm_jobs']}"
			
 
				+    
			
 
				+    return url
			
--- a/telemetry/roles/slurm_telemetry/files/monster/sql.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/sql.py
@@ -0,0 +1,410 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+import utils
			
 
				+import logger
			
 
				+from pgcopy import CopyManager
			
 
				+from datetime import datetime
			
 
				+
			
 
				+log = logger.get_logger(__name__)
			
 
				+
			
 
				+
			
 
				+def generate_metric_table_sqls(table_schemas: dict,
			
 
				+                               schema_name: str):
			
 
				+    """generate_metric_table_sqls General Metric Table Sqls
			
 
				+
			
 
				+    Generate sqls for creating metric tables
			
 
				+
			
 
				+    Args:
			
 
				+        table_schemas (dict): table schemas
			
 
				+        schema_name (str): schema name
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: sql statements
			
 
				+    """
			
 
				+    sql_statements = {}
			
 
				+    try:
			
 
				+        schema_sql = f"CREATE SCHEMA IF NOT EXISTS {schema_name};"
			
 
				+        sql_statements.update({
			
 
				+            'schema_sql': schema_sql
			
 
				+        })
			
 
				+        
			
 
				+        tables_sql = []
			
 
				+        for table, column in table_schemas.items():
			
 
				+            column_names = column['column_names']
			
 
				+            column_types = column['column_types']
			
 
				+            
			
 
				+            column_str = ''
			
 
				+            for i, column in enumerate(column_names):
			
 
				+                column_str += f'{column} {column_types[i]}, '
			
 
				+
			
 
				+            table_sql = f"CREATE TABLE IF NOT EXISTS {schema_name}.{table} \
			
 
				+                ({column_str}FOREIGN KEY (NodeID) REFERENCES nodes (NodeID));"
			
 
				+            tables_sql.append(table_sql)
			
 
				+
			
 
				+        sql_statements.update({
			
 
				+            'tables_sql': tables_sql,
			
 
				+        })
			
 
				+
			
 
				+    except Exception as err:
			
 
				+        log.error(f'Cannot Genrerate Metric Table Sqls: {err}')
			
 
				+    
			
 
				+    return sql_statements
			
 
				+
			
 
				+
			
 
				+def generate_slurm_job_table_sql(schema_name: str):
			
 
				+    """generate_slurm_job_table_sql Generate Slurm Job Table Sql
			
 
				+
			
 
				+    Generate sqls for creating the table that stores the jobs info
			
 
				+
			
 
				+    Args:
			
 
				+        schema_name (str): schema name
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: sql statements
			
 
				+    """
			
 
				+    
			
 
				+    sql_statements = {}
			
 
				+    table = 'jobs'
			
 
				+    try:
			
 
				+        schema_sql = f"CREATE SCHEMA if NOT EXISTS {schema_name}"
			
 
				+        sql_statements.update({
			
 
				+            'schema_sql': schema_sql
			
 
				+        })
			
 
				+        tables_sql = []
			
 
				+        column_names = ['job_id', 'array_job_id', 'array_task_id', 'name', 
			
 
				+                        'job_state', 'user_id', 'user_name', 'group_id', 
			
 
				+                        'cluster', 'partition', 'command', 
			
 
				+                        'current_working_directory', 'batch_flag', 'batch_host',
			
 
				+                        'nodes', 'node_count', 'cpus', 'tasks', 
			
 
				+                        'tasks_per_node', 'cpus_per_task', 'memory_per_node', 
			
 
				+                        'memory_per_cpu', 'priority', 'time_limit', 'deadline', 
			
 
				+                        'submit_time', 'preempt_time', 'suspend_time', 
			
 
				+                        'eligible_time', 'start_time', 'end_time', 
			
 
				+                        'resize_time', 'restart_cnt', 'exit_code', 
			
 
				+                        'derived_exit_code']
			
 
				+        column_types = ['INT PRIMARY KEY', 'INT', 'INT', 'TEXT', 'TEXT', 'INT', 
			
 
				+                        'TEXT', 'INT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 
			
 
				+                        'BOOLEAN', 'TEXT', 'TEXT[]', 'INT', 'INT', 'INT', 'INT', 
			
 
				+                        'INT', 'INT', 'INT', 'INT', 'INT', 'INT', 'INT', 'INT', 
			
 
				+                        'INT', 'INT', 'INT', 'INT', 'INT', 'INT', 'INT', 'INT']
			
 
				+        column_str = ''
			
 
				+        for i, column in enumerate(column_names):
			
 
				+            column_str += f'{column} {column_types[i]}, '
			
 
				+
			
 
				+        table_sql = f"CREATE TABLE IF NOT EXISTS {schema_name}.{table} \
			
 
				+            ({column_str[:-2]});"
			
 
				+        tables_sql.append(table_sql)
			
 
				+
			
 
				+        sql_statements.update({
			
 
				+            'tables_sql': tables_sql,
			
 
				+        })
			
 
				+    except Exception as err:
			
 
				+        print(err)
			
 
				+        log.error(f'Cannot Genrerate Job Table Sqls: {err}')
			
 
				+    
			
 
				+    return sql_statements
			
 
				+
			
 
				+
			
 
				+def generate_metric_def_table_sql():
			
 
				+    """generate_metrics_def_table_sql Generate Metrics Definition Table Sql
			
 
				+
			
 
				+    Generate a sql for creating the metrics definition table
			
 
				+
			
 
				+    Returns:
			
 
				+        str: sql string
			
 
				+    """
			
 
				+    metric_def_table_sql = "CREATE TABLE IF NOT EXISTS metrics_definition \
			
 
				+            (id SERIAL PRIMARY KEY, metric_id TEXT NOT NULL, metric_name TEXT, \
			
 
				+            description TEXT, metric_type TEXT,  metric_data_type TEXT, \
			
 
				+            units TEXT, accuracy REAL, sensing_interval TEXT, \
			
 
				+            discrete_values TEXT[], data_type TEXT, UNIQUE (id));"
			
 
				+    return metric_def_table_sql
			
 
				+
			
 
				+
			
 
				+def generate_metadata_table_sql(nodes_metadata: list, table_name: str):
			
 
				+    """generate_metadata_table_sql Generate Metadata Table Sql
			
 
				+
			
 
				+    Generate a sql for creating the node metadata table
			
 
				+
			
 
				+    Args:
			
 
				+        nodes_metadata (list): nodes metadata list
			
 
				+        table_name (str): table name 
			
 
				+
			
 
				+    Returns:
			
 
				+        str: sql string
			
 
				+    """
			
 
				+    column_names = list(nodes_metadata[0].keys())
			
 
				+    column_str = ""
			
 
				+    for i, column in enumerate(column_names):
			
 
				+        column_str += column + " TEXT, "
			
 
				+    column_str = column_str[:-2]
			
 
				+    metadata_table_sql = f" CREATE TABLE IF NOT EXISTS {table_name} \
			
 
				+        ( NodeID SERIAL PRIMARY KEY, {column_str}, UNIQUE (NodeID));"
			
 
				+    return metadata_table_sql
			
 
				+
			
 
				+
			
 
				+def update_nodes_metadata(conn: object, nodes_metadata: list, table_name: str):
			
 
				+    """update_nodes_metadata Update Nodes Metadata
			
 
				+
			
 
				+    Update nodes metadata table
			
 
				+
			
 
				+    Args:
			
 
				+        conn (object): database connection
			
 
				+        nodes_metadata (list): nodes metadata list
			
 
				+        table_name (str): table name
			
 
				+    """
			
 
				+    cur = conn.cursor()
			
 
				+    for record in nodes_metadata:
			
 
				+        col_sql = ""
			
 
				+        bmc_ip_addr = record['Bmc_Ip_Addr']
			
 
				+        for col, value in record.items():
			
 
				+            if col != 'Bmc_Ip_Addr' and col != 'HostName':
			
 
				+                col_value = col.lower() + " = '" + str(value) + "', "
			
 
				+                col_sql += col_value
			
 
				+        col_sql = col_sql[:-2]
			
 
				+        sql =  "UPDATE " + table_name + " SET " + col_sql \
			
 
				+            + " WHERE bmc_ip_addr = '" + bmc_ip_addr + "';"
			
 
				+        cur.execute(sql)
			
 
				+    
			
 
				+    conn.commit()
			
 
				+    cur.close()
			
 
				+
			
 
				+
			
 
				+def insert_nodes_metadata(conn: object, nodes_metadata: list, table_name: str):
			
 
				+    """insert_nodes_metadata Insert Nodes Metadata
			
 
				+
			
 
				+    Insert nodes metadata to metadata table
			
 
				+
			
 
				+    Args:
			
 
				+        conn (object): database connection
			
 
				+        nodes_metadata (list): nodes metadata list
			
 
				+        table_name (str): table name
			
 
				+    """
			
 
				+    cols = tuple([col.lower() for col in list(nodes_metadata[0].keys())])
			
 
				+    records = []
			
 
				+    for record in nodes_metadata:
			
 
				+        values = [str(value) for value in record.values()]
			
 
				+        records.append(tuple(values))
			
 
				+
			
 
				+    mgr = CopyManager(conn, table_name, cols)
			
 
				+    mgr.copy(records)
			
 
				+    conn.commit()
			
 
				+
			
 
				+
			
 
				+def check_table_exist(conn: object, table_name: str):
			
 
				+    """check_table_exist Check Table Exists
			
 
				+
			
 
				+    Check if the specified table exists or not
			
 
				+
			
 
				+    Args:
			
 
				+        conn (object): database connection
			
 
				+        table_name (str): table name
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: True if exists, false otherwise
			
 
				+    """
			
 
				+    cur = conn.cursor()
			
 
				+    table_exists = False
			
 
				+    sql = "SELECT EXISTS (SELECT FROM pg_tables WHERE tablename = '" + table_name + "');"
			
 
				+    cur.execute(sql)
			
 
				+    (table_exists, ) = cur.fetchall()[0]
			
 
				+
			
 
				+    if table_exists:
			
 
				+        data_exists = False
			
 
				+        sql = "SELECT EXISTS (SELECT * from " + table_name + ");"
			
 
				+        cur.execute(sql)
			
 
				+        (data_exists, ) = cur.fetchall()[0]
			
 
				+        return data_exists
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def write_metric_definitions(conn: object, metric_definitions: list):
			
 
				+    """write_metric_definitions Write Metric Definitions
			
 
				+
			
 
				+    Write metric definitions to the table
			
 
				+
			
 
				+    Args:
			
 
				+        conn (object): database connection
			
 
				+        metric_definitions (list): the metric definitions
			
 
				+    """
			
 
				+    if not check_table_exist(conn, 'metrics_definition'):
			
 
				+        cols = ('metric_id', 'metric_name', 'description', 'metric_type',
			
 
				+                    'metric_data_type', 'units', 'accuracy', 'sensing_interval',
			
 
				+                    'discrete_values', 'data_type')
			
 
				+
			
 
				+        metric_definitions_table = [(i['Id'], i['Name'], i['Description'],
			
 
				+        i['MetricType'], i['MetricDataType'], i['Units'], i['Accuracy'], 
			
 
				+        i['SensingInterval'], i['DiscreteValues'], 
			
 
				+        utils.data_type_mapping[i['MetricDataType']])for i in metric_definitions]
			
 
				+
			
 
				+        # Sort
			
 
				+        metric_definitions_table = utils.sort_tuple_list(metric_definitions_table)
			
 
				+        
			
 
				+        mgr = CopyManager(conn, 'metrics_definition', cols)
			
 
				+        mgr.copy(metric_definitions_table)
			
 
				+    
			
 
				+    conn.commit()
			
 
				+
			
 
				+
			
 
				+def write_nodes_metadata(conn: object, nodes_metadata: list):
			
 
				+    """write_nodes_metadata Write Nodes Metadata
			
 
				+
			
 
				+    Write nodes metadata to the table
			
 
				+
			
 
				+    Args:
			
 
				+        conn (object): database connection
			
 
				+        nodes_metadata (list): nodes metadata list
			
 
				+    """
			
 
				+    if not check_table_exist(conn, 'nodes'):
			
 
				+        insert_nodes_metadata(conn, nodes_metadata, 'nodes') 
			
 
				+    else:
			
 
				+        update_nodes_metadata(conn, nodes_metadata, 'nodes')
			
 
				+
			
 
				+
			
 
				+def generate_slurm_sql(metric: str, 
			
 
				+                       start: str, 
			
 
				+                       end: str, 
			
 
				+                       interval: str, 
			
 
				+                       aggregate: str):
			
 
				+    """generate_slurm_sql Generate Slurm Sql
			
 
				+
			
 
				+    Generate sql for querying slurm metrics
			
 
				+
			
 
				+    Args:
			
 
				+        metric (str): metric name
			
 
				+        start (str): start of time range
			
 
				+        end (str): end of time range
			
 
				+        interval (str): aggregation interval
			
 
				+        aggregate (str): aggregation function
			
 
				+
			
 
				+    Returns:
			
 
				+        string: sql string
			
 
				+    """
			
 
				+    sql = ""
			
 
				+    if metric == 'node_jobs':
			
 
				+        sql = f"SELECT time_bucket_gapfill('{interval}', timestamp) AS time, \
			
 
				+            nodeid, jsonb_agg(jobs) AS jobs, jsonb_agg(cpus) AS cpus \
			
 
				+            FROM slurm.{metric} \
			
 
				+            WHERE timestamp >= '{start}' \
			
 
				+            AND timestamp <= '{end}' \
			
 
				+            GROUP BY time, nodeid \
			
 
				+            ORDER BY time;"
			
 
				+    else:
			
 
				+        sql = f"SELECT time_bucket_gapfill('{interval}', timestamp) AS time, \
			
 
				+            nodeid, {aggregate}(value) AS value\
			
 
				+            FROM slurm.{metric} \
			
 
				+            WHERE timestamp >= '{start}' \
			
 
				+            AND timestamp <= '{end}' \
			
 
				+            GROUP BY time, nodeid \
			
 
				+            ORDER BY time;"
			
 
				+    return sql
			
 
				+
			
 
				+
			
 
				+def generate_idrac_sql(metric: str, 
			
 
				+                       fqdd: str,
			
 
				+                       start: str, 
			
 
				+                       end: str, 
			
 
				+                       interval: str, 
			
 
				+                       aggregate: str):
			
 
				+    """generate_idrac_sql Generate iDRAC Sql
			
 
				+
			
 
				+    Generate sql for querying idrac metrics
			
 
				+
			
 
				+    Args:
			
 
				+        metric (str): metric name
			
 
				+        fqdd (str): Fully Qualified Device Descriptor
			
 
				+        start (str): start of time range
			
 
				+        end (str): end of time range
			
 
				+        interval (str): aggregation interval
			
 
				+        aggregate (str): aggregation function
			
 
				+    
			
 
				+    Returns:
			
 
				+        string: sql string
			
 
				+    """
			
 
				+    schema = 'idrac'
			
 
				+    sql = f"SELECT time_bucket_gapfill('{interval}', timestamp) AS time, \
			
 
				+        nodeid, fqdd AS label, {aggregate}(value) AS value \
			
 
				+        FROM {schema}.{metric} \
			
 
				+        WHERE timestamp >= '{start}' \
			
 
				+        AND timestamp < '{end}' \
			
 
				+        AND fqdd = '{fqdd}' \
			
 
				+        GROUP BY time, nodeid, label \
			
 
				+        ORDER BY time;"
			
 
				+    return sql
			
 
				+
			
 
				+
			
 
				+def generate_slurm_jobs_sql(start: str,end: str):
			
 
				+    """generate_slurm_jobs_sql Generate Slurm Jobs Sql
			
 
				+
			
 
				+    Generate Sql for querying slurm jobs info
			
 
				+
			
 
				+    Args:
			
 
				+        start (str): start time
			
 
				+        end (str): end time
			
 
				+
			
 
				+    Returns:
			
 
				+        string: sql string
			
 
				+    """
			
 
				+    utc_from = datetime.strptime(start, '%Y-%m-%dT%H:%M:%S.%fZ')
			
 
				+    epoch_from = int((utc_from - datetime(1970, 1, 1)).total_seconds())
			
 
				+    utc_to = datetime.strptime(end, '%Y-%m-%dT%H:%M:%S.%fZ')
			
 
				+    epoch_to = int((utc_to - datetime(1970, 1, 1)).total_seconds())
			
 
				+
			
 
				+    sql = f"SELECT * FROM slurm.jobs \
			
 
				+            WHERE start_time < {epoch_to} \
			
 
				+            AND end_time > {epoch_from};"
			
 
				+    return sql
			
 
				+
			
 
				+
			
 
				+def generate_node_jobs_sql(start: str, end: str, interval: str):
			
 
				+    """gene_node_jobs_sql Generate Node-Jobs Sql
			
 
				+
			
 
				+    Generate SQL for querying node-jobs correlation
			
 
				+
			
 
				+    Args:
			
 
				+        start (str): start time
			
 
				+        end (str): end time
			
 
				+        interval (str): interval for aggragation
			
 
				+
			
 
				+    Returns:
			
 
				+        string: sql string
			
 
				+    """
			
 
				+    sql = f"SELECT time_bucket_gapfill('{interval}', timestamp) AS time, \
			
 
				+            nodeid, jsonb_agg(jobs) AS jobs, jsonb_agg(cpus) AS cpus \
			
 
				+            FROM slurm.node_jobs \
			
 
				+            WHERE timestamp >= '{start}' \
			
 
				+            AND timestamp <= '{end}' \
			
 
				+            GROUP BY time, nodeid \
			
 
				+            ORDER BY time;"
			
 
				+    return sql
			
 
				+
			
 
				+
			
--- a/telemetry/roles/slurm_telemetry/files/monster/tsdb.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/tsdb.py
@@ -0,0 +1,109 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+import sql
			
 
				+import utils
			
 
				+import idrac
			
 
				+import logger
			
 
				+import schema
			
 
				+import psycopg2
			
 
				+
			
 
				+log = logger.get_logger(__name__)
			
 
				+
			
 
				+
			
 
				+def init_tsdb():
			
 
				+    """init_tsdb Initialize TimeScaleDB
			
 
				+
			
 
				+    Initialize TimeScaleDB; The database specified in the configuration file
			
 
				+    should be created before run this function.
			
 
				+    """
			
 
				+    connection = utils.init_tsdb_connection()
			
 
				+    username, password = utils.get_idrac_auth()
			
 
				+    nodelist = utils.get_nodelist()
			
 
				+
			
 
				+    node = nodelist[0]
			
 
				+
			
 
				+    utils.print_status('Getting', 'metric' , 'definitions')
			
 
				+    metric_definitions = idrac.get_metric_definitions(node, username, password)
			
 
				+    
			
 
				+    utils.print_status('Getting', 'nodes' , 'metadata')
			
 
				+    nodes_metadata = idrac.get_nodes_metadata(nodelist, username, password)
			
 
				+    
			
 
				+    idrac_table_schemas = schema.build_idrac_table_schemas(metric_definitions)
			
 
				+    slurm_table_schemas = schema.build_slurm_table_schemas()
			
 
				+    
			
 
				+
			
 
				+    with psycopg2.connect(connection) as conn:
			
 
				+        cur = conn.cursor()
			
 
				+
			
 
				+        # Create node metadata table
			
 
				+        utils.print_status('Creating', 'TimeScaleDB' , 'tables')
			
 
				+        metadata_sql = sql.generate_metadata_table_sql(nodes_metadata, 'nodes')
			
 
				+        cur.execute(metadata_sql)
			
 
				+        sql.write_nodes_metadata(conn, nodes_metadata)
			
 
				+
			
 
				+        # Create schema for idrac
			
 
				+        idrac_sqls = sql.generate_metric_table_sqls(idrac_table_schemas, 'idrac')
			
 
				+        cur.execute(idrac_sqls['schema_sql'])
			
 
				+
			
 
				+        # Create schema for slurm
			
 
				+        slurm_sqls = sql.generate_metric_table_sqls(slurm_table_schemas, 'slurm')
			
 
				+        cur.execute(slurm_sqls['schema_sql'])
			
 
				+
			
 
				+        # Create idrac and slurm tables
			
 
				+        all_sqls = idrac_sqls['tables_sql'] + slurm_sqls['tables_sql']
			
 
				+        for s in all_sqls:
			
 
				+            table_name = s.split(' ')[5]
			
 
				+            cur.execute(s)
			
 
				+
			
 
				+            # Create hypertable
			
 
				+            create_hypertable_sql = "SELECT create_hypertable(" + "'" \
			
 
				+                + table_name + "', 'timestamp', if_not_exists => TRUE);"
			
 
				+            cur.execute(create_hypertable_sql)
			
 
				+        
			
 
				+        # Create table for jobs info
			
 
				+        slurm_job_sql = sql.generate_slurm_job_table_sql('slurm')
			
 
				+        cur.execute(slurm_job_sql['schema_sql'])
			
 
				+        for s in slurm_job_sql['tables_sql']:
			
 
				+            table_name = s.split(' ')[5]
			
 
				+            cur.execute(s)
			
 
				+        
			
 
				+        # Create table for metric definitions
			
 
				+        metric_def_sql = sql.generate_metric_def_table_sql()
			
 
				+        cur.execute(metric_def_sql)
			
 
				+        sql.write_metric_definitions(conn, metric_definitions)
			
 
				+        
			
 
				+        conn.commit()
			
 
				+        cur.close()
			
 
				+    utils.print_status('Finish', 'tables' , 'initialization!')
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    init_tsdb()
			
--- a/telemetry/roles/slurm_telemetry/files/monster/utils.py
+++ b/telemetry/roles/slurm_telemetry/files/monster/utils.py
@@ -0,0 +1,296 @@
 
				+"""
			
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2022 Texas Tech University
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+"""
			
 
				+
			
 
				+"""
			
 
				+This file is part of MonSter.
			
 
				+
			
 
				+Author:
			
 
				+    Jie Li, jie.li@ttu.edu
			
 
				+"""
			
 
				+
			
 
				+import yaml
			
 
				+import logger
			
 
				+import hostlist
			
 
				+import psycopg2
			
 
				+from pathlib import Path
			
 
				+
			
 
				+log = logger.get_logger(__name__)
			
 
				+
			
 
				+data_type_mapping = {
			
 
				+    'Decimal': 'REAL',
			
 
				+    'Integer': 'BIGINT',
			
 
				+    'DateTime': 'TIMESTAMPTZ',
			
 
				+    'Enumeration': 'TEXT',
			
 
				+}
			
 
				+
			
 
				+DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
			
 
				+
			
 
				+class bcolors:
			
 
				+    HEADER = '\033[95m'
			
 
				+    OKBLUE = '\033[94m'
			
 
				+    OKCYAN = '\033[96m'
			
 
				+    OKGREEN = '\033[92m'
			
 
				+    WARNING = '\033[93m'
			
 
				+    FAIL = '\033[91m'
			
 
				+    ENDC = '\033[0m'
			
 
				+    BOLD = '\033[1m'
			
 
				+    UNDERLINE = '\033[4m'
			
 
				+
			
 
				+
			
 
				+def print_status(action: str, target: str, obj: str):
			
 
				+    """print_status Print Status
			
 
				+
			
 
				+    Print status in a nice way
			
 
				+
			
 
				+    Args:
			
 
				+        status (str): status
			
 
				+    """
			
 
				+    print(f'{action} {bcolors.OKBLUE}{target}{bcolors.ENDC} {obj}...')
			
 
				+
			
 
				+
			
 
				+def parse_config():
			
 
				+    """parse_config Parse Config
			
 
				+
			
 
				+    Parse configuration files
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: Configuration in json format
			
 
				+    """
			
 
				+    cfg = []
			
 
				+    monster_path = Path(__file__).resolve().parent
			
 
				+    try:
			
 
				+        with open(f'{monster_path}/config.yml', 'r') as ymlfile:
			
 
				+            cfg = yaml.safe_load(ymlfile)
			
 
				+            return cfg
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Parsing Configuration Error: {err}")
			
 
				+
			
 
				+
			
 
				+def get_idrac_auth():
			
 
				+    """get_idrac_auth Get iDRAC Authentication
			
 
				+
			
 
				+    Get username and password for accessing idrac reports
			
 
				+    """
			
 
				+    idrac_config = parse_config()['idrac']
			
 
				+    username = idrac_config['username']
			
 
				+    password = idrac_config['password']
			
 
				+    return(username, password)
			
 
				+
			
 
				+
			
 
				+def get_config(target: str):
			
 
				+    """get_config Get Config
			
 
				+
			
 
				+    Get Configuration for the specified target 
			
 
				+
			
 
				+    Args:
			
 
				+        target (str): configuration target
			
 
				+
			
 
				+    Raises:
			
 
				+        ValueError: Invalid configuration target
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: configurations of specified target
			
 
				+    """
			
 
				+    
			
 
				+    targets = ['timescaledb', 'idrac', 'slurm_rest_api']
			
 
				+    if target not in targets:
			
 
				+        raise ValueError(f"Invalid configuration target. Expected one of: {targets}")
			
 
				+
			
 
				+    config = parse_config()[target]
			
 
				+    return config
			
 
				+
			
 
				+
			
 
				+def init_tsdb_connection():
			
 
				+    """init_tsdb_connection Initialize TimeScaleDB Connection
			
 
				+
			
 
				+    Initialize TimeScaleDB Connection according to the configuration
			
 
				+    """
			
 
				+    config_tsdb = parse_config()['timescaledb']
			
 
				+
			
 
				+    db_host = config_tsdb['host']
			
 
				+    db_port = config_tsdb['port']
			
 
				+    db_user = config_tsdb['username']
			
 
				+    db_pswd = config_tsdb['password']
			
 
				+    db_dbnm = config_tsdb['database']
			
 
				+    connection = f"postgresql://{db_user}:{db_pswd}@{db_host}:{db_port}/{db_dbnm}"
			
 
				+    return connection
			
 
				+
			
 
				+
			
 
				+def get_nodelist():
			
 
				+    """get_nodelist Get Nodelist
			
 
				+
			
 
				+    Generate the nodelist according to the configuration
			
 
				+    """
			
 
				+    idrac_config = parse_config()['idrac']['nodelist']
			
 
				+    nodelist = []
			
 
				+
			
 
				+    try:
			
 
				+        for i in idrac_config:
			
 
				+            nodes = hostlist.expand_hostlist(i)
			
 
				+            nodelist.extend(nodes)
			
 
				+        
			
 
				+        return nodelist
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Cannot generate nodelist: {err}")
			
 
				+
			
 
				+
			
 
				+def sort_tuple_list(tuple_list:list):
			
 
				+    """sort_tuple Sort a list of tuple
			
 
				+
			
 
				+    Sort tuple. Ref: https://www.geeksforgeeks.org/python-program-to-sort-a-\
			
 
				+    list-of-tuples-by-second-item/
			
 
				+
			
 
				+    Args:
			
 
				+        tuple_list (list): a list of tuple
			
 
				+    """
			
 
				+    tuple_list.sort(key = lambda x: x[0])  
			
 
				+    return tuple_list
			
 
				+
			
 
				+
			
 
				+def get_os_idrac_hostname_mapping():
			
 
				+    """get_os_idrac_hostname_mapping Get OS iDRAC hostname mapping
			
 
				+
			
 
				+    Read configuration file and get OS idrac hostname mapping if configured
			
 
				+    """
			
 
				+    hostnames_mapping = parse_config()['hostnames']
			
 
				+    return hostnames_mapping
			
 
				+
			
 
				+
			
 
				+def get_node_id_mapping(connection: str):
			
 
				+    """get_node_id_mapping Get Node-Id Mapping
			
 
				+
			
 
				+    Get node-id mapping from the nodes metadata table
			
 
				+
			
 
				+    Args:
			
 
				+        connection (str): timescaledb connection
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: node-id mapping
			
 
				+    """
			
 
				+    
			
 
				+    mapping = {}
			
 
				+    try:
			
 
				+        with psycopg2.connect(connection) as conn:
			
 
				+            cur = conn.cursor()
			
 
				+            query = "SELECT nodeid, hostname FROM nodes"
			
 
				+            cur.execute(query)
			
 
				+            for (nodeid, hostname) in cur.fetchall():
			
 
				+                mapping.update({
			
 
				+                    hostname: nodeid
			
 
				+                })
			
 
				+            cur.close()
			
 
				+            return mapping
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Cannot generate node-id mapping: {err}")
			
 
				+
			
 
				+
			
 
				+def get_metric_dtype_mapping(conn: object):
			
 
				+    """get_table_dtype_mapping Get Metric-datatype mapping
			
 
				+
			
 
				+    Get Metric-datatype mapping from the metric definition
			
 
				+
			
 
				+    Args:
			
 
				+        conn (object): TimeScaleDB connection object
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: Metric-datatype mapping
			
 
				+    """
			
 
				+    mapping = {}
			
 
				+    cur = conn.cursor()
			
 
				+    query = "SELECT metric_id, data_type FROM metrics_definition;"
			
 
				+    cur.execute(query)
			
 
				+    for (metric, data_type) in cur.fetchall():
			
 
				+        mapping.update({
			
 
				+            metric: data_type
			
 
				+        })
			
 
				+    cur.close()
			
 
				+    return mapping
			
 
				+
			
 
				+
			
 
				+def get_ip_id_mapping(conn: object):
			
 
				+    """get_ip_id_mapping Get IP-ID mapping
			
 
				+
			
 
				+    Get iDRAC-ip address - node-id mapping
			
 
				+
			
 
				+    Args:
			
 
				+        conn (object): TimeScaleDB connection object
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: ip-id mapping
			
 
				+    """
			
 
				+    mapping = {}
			
 
				+    cur = conn.cursor()
			
 
				+    query = "SELECT nodeid, bmc_ip_addr FROM nodes"
			
 
				+    cur.execute(query)
			
 
				+    for (nodeid, bmc_ip_addr) in cur.fetchall():
			
 
				+        mapping.update({
			
 
				+            bmc_ip_addr: nodeid
			
 
				+        })
			
 
				+    cur.close()
			
 
				+    return mapping
			
 
				+
			
 
				+
			
 
				+def cast_value_type(value, dtype):
			
 
				+    """cast_value_type Cast Value Data Type
			
 
				+
			
 
				+    Cast value data type based on the datatype in TimeScaleDB
			
 
				+
			
 
				+    Args:
			
 
				+        value ([type]): value to be casted
			
 
				+        dtype ([type]): TimeScaleDB data type
			
 
				+
			
 
				+    Returns:
			
 
				+        object: casted datatype
			
 
				+    """
			
 
				+    try:
			
 
				+        if dtype =="BIGINT":
			
 
				+            return int(value)
			
 
				+        elif dtype == "REAL":
			
 
				+            return float(value)
			
 
				+        else:
			
 
				+            return value
			
 
				+    except ValueError:
			
 
				+        return value
			
 
				+
			
 
				+
			
 
				+def partition(arr:list, cores: int):
			
 
				+    """
			
 
				+    Partition urls/nodes into several groups based on # of cores
			
 
				+    """
			
 
				+    groups = []
			
 
				+    try:
			
 
				+        arr_len = len(arr)
			
 
				+        arr_per_core = arr_len // cores
			
 
				+        arr_surplus = arr_len % cores
			
 
				+
			
 
				+        increment = 1
			
 
				+        for i in range(cores):
			
 
				+            if(arr_surplus != 0 and i == (cores-1)):
			
 
				+                groups.append(arr[i * arr_per_core:])
			
 
				+            else:
			
 
				+                groups.append(arr[i * arr_per_core : increment * arr_per_core])
			
 
				+                increment += 1
			
 
				+    except Exception as err:
			
 
				+        log.error(f"Cannot Partition List : {err}")
			
 
				+    return groups