dag6_seqrun_processing.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. from datetime import timedelta
  2. from airflow.models import DAG,Variable
  3. from airflow.utils.dates import days_ago
  4. from airflow.operators.bash_operator import BashOperator
  5. from airflow.contrib.operators.ssh_operator import SSHOperator
  6. from airflow.contrib.hooks.ssh_hook import SSHHook
  7. ## ARGS
  8. default_args = {
  9. 'owner': 'airflow',
  10. 'depends_on_past': False,
  11. 'start_date': days_ago(2),
  12. 'email_on_failure': False,
  13. 'email_on_retry': False,
  14. 'retries': 1,
  15. 'retry_delay': timedelta(minutes=5),
  16. }
  17. ## DAG
  18. dag = \
  19. DAG(
  20. dag_id='dag6_seqrun_processing',
  21. catchup=False,
  22. schedule_interval="@hourly",
  23. max_active_runs=1,
  24. tags=['hpc','orwell'],
  25. default_args=default_args)
  26. ## SSH HOOK
  27. orwell_ssh_hook = \
  28. SSHHook(
  29. key_file=Variable.get('hpc_ssh_key_file'),
  30. username=Variable.get('hpc_user'),
  31. remote_host='orwell.hh.med.ic.ac.uk')
  32. hpc_hook = SSHHook(ssh_conn_id='hpc_conn')
  33. with dag:
  34. ## TASK
  35. switch_off_project_barcode = \
  36. SSHOperator(
  37. task_id='switch_off_project_barcode',
  38. dag=dag,
  39. ssh_hook=orwell_ssh_hook,
  40. queue='hpc_4G',
  41. command='bash /home/igf/igf_code/IGF-cron-scripts/orwell/switch_off_project_barcode_check.sh ')
  42. ## TASK
  43. change_samplesheet_for_run = \
  44. SSHOperator(
  45. task_id='change_samplesheet_for_run',
  46. dag=dag,
  47. queue='hpc_4G',
  48. ssh_hook=orwell_ssh_hook,
  49. command='bash /home/igf/igf_code/IGF-cron-scripts/orwell/change_samplesheet_for_seqrun.sh ')
  50. ## TASK
  51. restart_seqrun_processing = \
  52. SSHOperator(
  53. task_id='restart_seqrun_processing',
  54. dag=dag,
  55. queue='hpc_4G',
  56. ssh_hook=orwell_ssh_hook,
  57. command='bash /home/igf/igf_code/IGF-cron-scripts/orwell/restart_seqrun_processing.sh ')
  58. ## TASK
  59. register_project_metadata = \
  60. SSHOperator(
  61. task_id='register_project_metadata',
  62. dag=dag,
  63. queue='hpc_4G',
  64. ssh_hook=orwell_ssh_hook,
  65. command='bash /home/igf/igf_code/IGF-cron-scripts/orwell/register_metadata.sh ')
  66. ## TASK
  67. find_new_seqrun = \
  68. SSHOperator(
  69. task_id='find_new_seqrun',
  70. dag=dag,
  71. queue='hpc_4G',
  72. ssh_hook=orwell_ssh_hook,
  73. command='bash /home/igf/igf_code/IGF-cron-scripts/orwell/find_new_seqrun.sh ')
  74. ## TASK
  75. seed_demultiplexing_pipe = \
  76. SSHOperator(
  77. task_id='seed_demultiplexing_pipe',
  78. dag=dag,
  79. ssh_hook=hpc_hook,
  80. queue='hpc_4G',
  81. command='bash /rds/general/user/igf/home/git_repo/IGF-cron-scripts/hpc/seed_demultiplexing_pipeline.sh ')
  82. ## PIPELINE
  83. switch_off_project_barcode >> change_samplesheet_for_run >> restart_seqrun_processing
  84. restart_seqrun_processing >> register_project_metadata >> find_new_seqrun >> seed_demultiplexing_pipe