hpc_queue.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import json
  2. import subprocess
  3. from collections import defaultdict
  4. from tempfile import TemporaryFile
  5. def get_pbspro_job_count(job_name_prefix=''):
  6. '''
  7. A function for fetching running and queued job information from a PBSPro HPC cluster
  8. :param job_name_prefix: A text to filter running jobs, default ''
  9. :returns: A defaultdict object with the following structure
  10. { job_name: {'Q': counts, 'R': counts }}
  11. '''
  12. try:
  13. with TemporaryFile() as tmp_file:
  14. subprocess.\
  15. check_call(
  16. ['qstat','-t','-f','-F','json'],
  17. stdout=tmp_file)
  18. tmp_file.seek(0)
  19. json_data = tmp_file.read()
  20. json_data = json.loads(json_data)
  21. jobs = json_data.get('Jobs')
  22. active_jobs = dict()
  23. if jobs is not None:
  24. active_jobs = defaultdict(lambda: defaultdict(int))
  25. if len(jobs) > 0:
  26. for _,job_data in jobs.items():
  27. job_name = job_data.get('Job_Name')
  28. job_state = job_data.get('job_state')
  29. if job_name.startswith(job_name_prefix):
  30. if job_state == 'Q':
  31. active_jobs[job_name]['Q'] += 1
  32. if job_state == 'R':
  33. active_jobs[job_name]['R'] += 1
  34. return active_jobs
  35. except Exception as e:
  36. raise ValueError('Failed to get job counts from hpc, error: {0}'.format(e))