dag5_primary_analysis_and_qc_processing.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. from datetime import timedelta
  2. from airflow.models import DAG,Variable
  3. from airflow.utils.dates import days_ago
  4. from airflow.operators.bash_operator import BashOperator
  5. from airflow.contrib.operators.ssh_operator import SSHOperator
  6. from airflow.contrib.hooks.ssh_hook import SSHHook
  7. ## ARGS
  8. default_args = {
  9. 'owner': 'airflow',
  10. 'depends_on_past': False,
  11. 'start_date': days_ago(2),
  12. 'email_on_failure': False,
  13. 'email_on_retry': False,
  14. 'retries': 1,
  15. 'retry_delay': timedelta(minutes=5),
  16. }
  17. ## SSH HOOK
  18. orwell_ssh_hook = \
  19. SSHHook(
  20. key_file=Variable.get('hpc_ssh_key_file',default_var=None),
  21. username=Variable.get('hpc_user',default_var=None),
  22. remote_host=Variable.get('orwell_server_hostname',default_var=None))
  23. hpc_hook = SSHHook(ssh_conn_id='hpc_conn')
  24. ## DAG
  25. dag = \
  26. DAG(
  27. dag_id='dag5_primary_analysis_and_qc_processing',
  28. schedule_interval="@hourly",
  29. max_active_runs=1,
  30. catchup=False,
  31. tags=['hpc','orwell'],
  32. default_args=default_args)
  33. with dag:
  34. ## TASK
  35. update_exp_metadata = \
  36. BashOperator(
  37. task_id='update_exp_metadata',
  38. dag=dag,
  39. queue='hpc_4G',
  40. bash_command='bash /rds/general/user/igf/home/git_repo/IGF-cron-scripts/hpc/update_exp_metadata.sh ')
  41. ## TASK
  42. find_new_exp_for_analysis = \
  43. SSHOperator(
  44. task_id='find_new_exp_for_analysis',
  45. dag=dag,
  46. ssh_hook=orwell_ssh_hook,
  47. queue='hpc_4G',
  48. command='bash /home/igf/igf_code/IGF-cron-scripts/orwell/find_new_exp_for_analysis.sh ')
  49. ## TASK
  50. seed_analysis_pipeline = \
  51. SSHOperator(
  52. task_id='seed_analysis_pipeline',
  53. dag=dag,
  54. ssh_hook=hpc_hook,
  55. queue='hpc_4G',
  56. command='bash /rds/general/user/igf/home/git_repo/IGF-cron-scripts/hpc/seed_analysis_pipeline.sh ')
  57. ## PIPELINE
  58. update_exp_metadata >> find_new_exp_for_analysis >> seed_analysis_pipeline