dag7_hpc_scheduler.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. from datetime import timedelta
  2. from airflow.models import DAG,Variable
  3. from airflow.utils.dates import days_ago
  4. from airflow.operators.bash_operator import BashOperator
  5. from airflow.contrib.operators.ssh_operator import SSHOperator
  6. from airflow.contrib.hooks.ssh_hook import SSHHook
  7. ## ARG
  8. default_args = {
  9. 'owner': 'airflow',
  10. 'depends_on_past': False,
  11. 'start_date': days_ago(2),
  12. 'email_on_failure': False,
  13. 'email_on_retry': False,
  14. 'retries': 1,
  15. 'retry_delay': timedelta(minutes=5),
  16. }
  17. ## DAG
  18. dag = \
  19. DAG(
  20. dag_id='dag7_hpc_scheduler',
  21. catchup=False,
  22. schedule_interval="*/15 * * * *",
  23. max_active_runs=1,
  24. tags=['igf-lims','hpc'],
  25. default_args=default_args)
  26. ## SSH HOOK
  27. hpc_hook = SSHHook(ssh_conn_id='hpc_conn')
  28. igf_lims_ssh_hook = \
  29. SSHHook(
  30. key_file=Variable.get('hpc_ssh_key_file'),
  31. username=Variable.get('hpc_user'),
  32. remote_host=Variable.get('igf_lims_server_hostname'))
  33. with dag:
  34. ## TASK
  35. run_hpc_scheduler = \
  36. SSHOperator(
  37. task_id='run_hpc_scheduler',
  38. dag=dag,
  39. ssh_hook=hpc_hook,
  40. queue='generic',
  41. command="""
  42. source /etc/bashrc; \
  43. qsub /project/tgu/data2/airflow_test/github/data-management-python/scripts/hpc/run_hpc_scheduler.sh """)
  44. ## TASK
  45. restart_flower_server = \
  46. SSHOperator(
  47. task_id='restart_flower_server',
  48. dag=dag,
  49. ssh_hook=igf_lims_ssh_hook,
  50. queue='hpc_4G',
  51. command="docker restart airflow_flower")
  52. ## PIPELNE
  53. run_hpc_scheduler >> restart_flower_server