dag5_primary_analysis_and_qc_processing.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. from datetime import timedelta
  2. from airflow.models import DAG,Variable
  3. from airflow.utils.dates import days_ago
  4. from airflow.operators.bash_operator import BashOperator
  5. from airflow.contrib.operators.ssh_operator import SSHOperator
  6. from airflow.contrib.hooks.ssh_hook import SSHHook
  7. default_args = {
  8. 'owner': 'airflow',
  9. 'depends_on_past': False,
  10. 'start_date': days_ago(2),
  11. 'email_on_failure': False,
  12. 'email_on_retry': False,
  13. 'retries': 1,
  14. 'retry_delay': timedelta(minutes=5),
  15. }
  16. orwell_ssh_hook = \
  17. SSHHook(
  18. key_file=Variable.get('hpc_ssh_key_file'),
  19. username=Variable.get('hpc_user'),
  20. remote_host='orwell.hh.med.ic.ac.uk')
  21. hpc_hook = SSHHook(ssh_conn_id='hpc_conn')
  22. dag = \
  23. DAG(
  24. dag_id='dag5_primary_analysis_and_qc_processing',
  25. schedule_interval="@hourly",
  26. max_active_runs=1,
  27. catchup=False,
  28. tags=['hpc','orwell'],
  29. default_args=default_args)
  30. with dag:
  31. update_exp_metadata = \
  32. BashOperator(
  33. task_id = 'update_exp_metadata',
  34. dag = dag,
  35. queue='hpc_4G',
  36. bash_command = 'bash /rds/general/user/igf/home/git_repo/IGF-cron-scripts/hpc/update_exp_metadata.sh '
  37. )
  38. find_new_exp_for_analysis = \
  39. SSHOperator(
  40. task_id = 'find_new_exp_for_analysis',
  41. dag = dag,
  42. ssh_hook = orwell_ssh_hook,
  43. queue='hpc_4G',
  44. command = 'bash /home/igf/igf_code/IGF-cron-scripts/orwell/find_new_exp_for_analysis.sh '
  45. )
  46. seed_analysis_pipeline = \
  47. SSHOperator(
  48. task_id = 'seed_analysis_pipeline',
  49. dag = dag,
  50. ssh_hook=hpc_hook,
  51. queue='hpc_4G',
  52. command = 'bash /rds/general/user/igf/home/git_repo/IGF-cron-scripts/hpc/seed_analysis_pipeline.sh '
  53. )
  54. update_exp_metadata >> find_new_exp_for_analysis >> seed_analysis_pipeline