dag9_tenx_single_cell_immune_profiling.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.logging.upload_log_msg import send_log_to_channels,log_success,log_failure,log_sleep
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_scanpy_for_sc_5p_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  20. ## ARGS
  21. default_args = {
  22. 'owner': 'airflow',
  23. 'depends_on_past': False,
  24. 'start_date': days_ago(2),
  25. 'email_on_failure': False,
  26. 'email_on_retry': False,
  27. 'retries': 4,
  28. 'max_active_runs':10,
  29. 'catchup':False,
  30. 'retry_delay': timedelta(minutes=5),
  31. 'provide_context': True,
  32. }
  33. FEATURE_TYPE_LIST = Variable.get('tenx_single_cell_immune_profiling_feature_types')
  34. ## DAG
  35. dag = \
  36. DAG(
  37. dag_id='dag9_tenx_single_cell_immune_profiling',
  38. schedule_interval=None,
  39. tags=['hpc','analysis','tenx','sc'],
  40. default_args=default_args,
  41. orientation='LR')
  42. with dag:
  43. ## TASK
  44. fetch_analysis_info_and_branch = \
  45. BranchPythonOperator(
  46. task_id='fetch_analysis_info',
  47. dag=dag,
  48. queue='hpc_4g',
  49. params={'no_analysis_task':'no_analysis',
  50. 'analysis_description_xcom_key':'analysis_description',
  51. 'analysis_info_xcom_key':'analysis_info'},
  52. python_callable=fetch_analysis_info_and_branch_func)
  53. ## TASK
  54. configure_cellranger_run = \
  55. PythonOperator(
  56. task_id='configure_cellranger_run',
  57. dag=dag,
  58. queue='hpc_4g',
  59. params={'xcom_pull_task_id':'fetch_analysis_info_and_branch',
  60. 'analysis_description_xcom_key':'analysis_description',
  61. 'analysis_info_xcom_key':'analysis_info',
  62. 'library_csv_xcom_key':'cellranger_library_csv'},
  63. python_callable=configure_cellranger_run_func)
  64. for analysis_name in FEATURE_TYPE_LIST:
  65. ## TASK
  66. task_branch = \
  67. DummyOperator(
  68. task_id=analysis_name,
  69. dag=dag)
  70. run_trim_list = list()
  71. for run_id in range(0,32):
  72. ## TASK
  73. t = \
  74. PythonOperator(
  75. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  76. dag=dag,
  77. params={'xcom_pull_task_id':'fetch_analysis_info_and_branch',
  78. 'analysis_info_xcom_key':'analysis_info',
  79. 'analysis_name':analysis_name,
  80. 'run_id':run_id,
  81. 'r1_length':26,
  82. 'r2_length':0,
  83. 'fastq_input_dir_tag':'fastq_dir',
  84. 'fastq_output_dir_tag':'output_path'},
  85. python_callable=run_sc_read_trimmming_func)
  86. run_trim_list.append(t)
  87. ## TASK
  88. collect_trimmed_files = \
  89. DummyOperator(
  90. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  91. dag=dag)
  92. ## PIPELINE
  93. fetch_analysis_info_and_branch >> task_branch
  94. task_branch >> run_trim_list
  95. run_trim_list >> collect_trimmed_files
  96. collect_trimmed_files >> configure_cellranger_run
  97. ## TASK
  98. no_analysis = \
  99. DummyOperator(
  100. task_id='no_analysis',
  101. dag=dag)
  102. ## PIPELINE
  103. fetch_analysis_info_and_branch >> no_analysis
  104. ## TASK
  105. run_cellranger = \
  106. PythonOperator(
  107. task_id='run_cellranger',
  108. dag=dag,
  109. params={'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  110. 'analysis_description_xcom_key':'analysis_description',
  111. 'library_csv_xcom_key':'cellranger_library_csv',
  112. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  113. 'cellranger_xcom_key':'cellranger_output',
  114. 'cellranger_options':['--localcores 8','--localmem 64']},
  115. python_callable=run_cellranger_tool)
  116. ## PIPELINE
  117. configure_cellranger_run >> run_cellranger
  118. ## TASK
  119. decide_analysis_branch = \
  120. BranchPythonOperator(
  121. task_id='decide_analysis_branch',
  122. dag=dag,
  123. python_callable=decide_analysis_branch_func,
  124. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  125. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  126. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  127. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  128. 'run_scirpy_vdj_t_task':'run_scirpy_vdj_t',
  129. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  130. 'run_picard_alignment_summary_task':'run_picard_alignment_summary',
  131. 'convert_bam_to_cram_task':'convert_bam_to_cram',
  132. 'library_csv_xcom_key':'cellranger_library_csv',
  133. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  134. ## PIPELINE
  135. run_cellranger >> decide_analysis_branch
  136. ## TASK
  137. load_cellranger_result_to_db = \
  138. PythonOperator(
  139. task_id='load_cellranger_result_to_db',
  140. dag=dag,
  141. python_callable=load_cellranger_result_to_db_func,
  142. params={'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  143. 'analysis_description_xcom_key':'analysis_description',
  144. 'cellranger_xcom_key':'cellranger_output',
  145. 'cellranger_xcom_pull_task':'run_cellranger',
  146. 'collection_type':'CELLRANGER_MULTI',
  147. 'collection_table':'sample',
  148. 'xcom_collection_name_key':'sample_igf_id',
  149. 'genome_column':'genome_build',
  150. 'analysis_name':'cellranger_multi',
  151. 'output_xcom_key':'loaded_output_files',
  152. 'html_xcom_key':'html_report_file',
  153. 'html_report_file_name':'web_summary.html'})
  154. upload_report_to_ftp = \
  155. PythonOperator(
  156. task_id='upload_report_to_ftp',
  157. dag=dag,
  158. python_callable=ftp_files_upload_for_analysis,
  159. params={'xcom_pull_task':'load_cellranger_result_to_db',
  160. 'xcom_pull_files_key':'html_report_file',
  161. 'collection_name_task':'load_cellranger_result_to_db',
  162. 'collection_name_key':'sample_igf_id',
  163. 'collection_type':'FTP_CELLRANGER_MULTI',
  164. 'collection_table':'sample',
  165. 'collect_remote_file':True})
  166. upload_report_to_box = \
  167. DummyOperator(
  168. task_id='upload_report_to_box',
  169. dag=dag,
  170. params={'xcom_pull_task':'load_cellranger_result_to_db',
  171. 'xcom_pull_files_key':'html_report_file'})
  172. upload_results_to_irods = \
  173. PythonOperator(
  174. task_id='upload_results_to_irods',
  175. dag=dag,
  176. python_callable=irods_files_upload_for_analysis,
  177. params={'xcom_pull_task':'load_cellranger_result_to_db',
  178. 'xcom_pull_files_key':'loaded_output_files',
  179. 'collection_name_key':'sample_igf_id',
  180. 'analysis_name':'cellranger_multi'})
  181. ## PIPELINE
  182. decide_analysis_branch >> load_cellranger_result_to_db
  183. load_cellranger_result_to_db >> upload_report_to_ftp
  184. load_cellranger_result_to_db >> upload_report_to_box
  185. load_cellranger_result_to_db >> upload_results_to_irods
  186. ## TASK
  187. run_scanpy_for_sc_5p = \
  188. PythonOperator(
  189. task_id='run_scanpy_for_sc_5p',
  190. dag=dag,
  191. python_callable=run_scanpy_for_sc_5p_func,
  192. params={'cellranger_xcom_key':'cellranger_output',
  193. 'cellranger_xcom_pull_task':'run_cellranger',
  194. 'scanpy_timeout':1200,
  195. 'allow_errors':False,
  196. 'output_notebook_key':'scanpy_notebook',
  197. 'output_cellbrowser_key':'cellbrowser_dirs',
  198. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  199. 'analysis_description_xcom_key':'analysis_description'})
  200. load_scanpy_report_for_sc_5p_to_db = \
  201. PythonOperator(
  202. task_id='load_scanpy_report_for_sc_5p_to_db',
  203. dag=dag,
  204. python_callable=load_analysis_files_func,
  205. params={'collection_name_task':'load_cellranger_result_to_db',
  206. 'collection_name_key':'sample_igf_id',
  207. 'file_name_task':'run_scanpy_for_sc_5p',
  208. 'file_name_key':'scanpy_notebook',
  209. 'analysis_name':'scanpy_5p',
  210. 'collection_type':'SCANPY_HTML',
  211. 'collection_table':'sample',
  212. 'output_files_key':'output_db_files'})
  213. upload_scanpy_report_for_sc_5p_to_ftp = \
  214. PythonOperator(
  215. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  216. dag=dag,
  217. python_callable=ftp_files_upload_for_analysis,
  218. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  219. 'xcom_pull_files_key':'output_db_files',
  220. 'collection_name_task':'load_cellranger_result_to_db',
  221. 'collection_name_key':'sample_igf_id',
  222. 'collection_type':'FTP_SCANPY_HTML',
  223. 'collection_table':'sample',
  224. 'collect_remote_file':True})
  225. upload_scanpy_report_for_sc_5p_to_box = \
  226. DummyOperator(
  227. task_id='upload_scanpy_report_for_sc_5p_to_box',
  228. dag=dag)
  229. upload_cellbrowser_for_sc_5p_to_ftp = \
  230. DummyOperator(
  231. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  232. dag=dag,
  233. python_callable=ftp_files_upload_for_analysis,
  234. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  235. 'xcom_pull_files_key':'cellbrowser_dirs',
  236. 'collection_name_task':'load_cellranger_result_to_db',
  237. 'collection_name_key':'sample_igf_id',
  238. 'collection_type':'FTP_CELLBROWSER',
  239. 'collection_table':'sample',
  240. 'collect_remote_file':True})
  241. ## PIPELINE
  242. decide_analysis_branch >> run_scanpy_for_sc_5p
  243. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  244. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  245. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  246. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  247. ## TASK
  248. run_scirpy_for_vdj = \
  249. PythonOperator(
  250. task_id='run_scirpy_for_vdj',
  251. dag=dag,
  252. python_callable=run_singlecell_notebook_wrapper_func,
  253. params={'cellranger_xcom_key':'cellranger_output',
  254. 'cellranger_xcom_pull_task':'run_cellranger',
  255. 'scanpy_timeout':1200,
  256. 'allow_errors':False,
  257. 'kernel_name':'python3',
  258. 'vdj_dir':'vdj',
  259. 'count_dir':'count',
  260. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  261. 'output_notebook_key':'scirpy_notebook',
  262. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  263. 'analysis_description_xcom_key':'analysis_description',
  264. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  265. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  266. load_scanpy_report_for_vdj_to_db = \
  267. PythonOperator(
  268. task_id='load_scanpy_report_for_vdj_to_db',
  269. dag=dag,
  270. python_callable=load_analysis_files_func,
  271. params={'collection_name_task':'load_cellranger_result_to_db',
  272. 'collection_name_key':'sample_igf_id',
  273. 'file_name_task':'run_scirpy_for_vdj',
  274. 'file_name_key':'scirpy_notebook',
  275. 'analysis_name':'scirpy_vdj',
  276. 'collection_type':'SCIRPY_VDJ_HTML',
  277. 'collection_table':'sample',
  278. 'output_files_key':'output_db_files'})
  279. upload_scanpy_report_for_vdj_to_ftp = \
  280. PythonOperator(
  281. task_id='upload_scanpy_report_for_vdj_to_ftp',
  282. dag=dag,
  283. python_callable=ftp_files_upload_for_analysis,
  284. params={'xcom_pull_task':'load_scanpy_report_for_vdj_to_db',
  285. 'xcom_pull_files_key':'output_db_files',
  286. 'collection_name_task':'load_cellranger_result_to_db',
  287. 'collection_name_key':'sample_igf_id',
  288. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  289. 'collection_table':'sample',
  290. 'collect_remote_file':True})
  291. upload_scanpy_report_for_vdj_to_box = \
  292. DummyOperator(
  293. task_id='upload_scanpy_report_for_vdj_to_box',
  294. dag=dag)
  295. ## PIPELINE
  296. decide_analysis_branch >> run_scirpy_for_vdj
  297. run_scirpy_for_vdj >> load_scanpy_report_for_vdj_to_db
  298. load_scanpy_report_for_vdj_to_db >> upload_scanpy_report_for_vdj_to_ftp
  299. load_scanpy_report_for_vdj_to_db >> upload_scanpy_report_for_vdj_to_box
  300. ## TASK
  301. run_scirpy_for_vdj_b = \
  302. DummyOperator(
  303. task_id='run_scirpy_for_vdj_b',
  304. dag=dag,
  305. python_callable=run_singlecell_notebook_wrapper_func,
  306. params={'cellranger_xcom_key':'cellranger_output',
  307. 'cellranger_xcom_pull_task':'run_cellranger',
  308. 'scanpy_timeout':1200,
  309. 'allow_errors':False,
  310. 'kernel_name':'python3',
  311. 'vdj_dir':'vdj_b',
  312. 'count_dir':'count',
  313. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  314. 'output_notebook_key':'scirpy_notebook',
  315. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  316. 'analysis_description_xcom_key':'analysis_description',
  317. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  318. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  319. load_scanpy_report_for_vdj_b_to_db = \
  320. PythonOperator(
  321. task_id='load_scanpy_report_for_vdj_b_to_db',
  322. dag=dag,
  323. python_callable=load_analysis_files_func,
  324. params={'collection_name_task':'load_cellranger_result_to_db',
  325. 'collection_name_key':'sample_igf_id',
  326. 'file_name_task':'run_scirpy_for_vdj_b',
  327. 'file_name_key':'scirpy_notebook',
  328. 'analysis_name':'scirpy_vdj_b',
  329. 'collection_type':'SCIRPY_VDJ_B_HTML',
  330. 'collection_table':'sample',
  331. 'output_files_key':'output_db_files'})
  332. upload_scanpy_report_for_vdj_b_to_ftp = \
  333. PythonOperator(
  334. task_id='upload_scanpy_report_for_vdj_b_to_ftp',
  335. dag=dag,
  336. python_callable=ftp_files_upload_for_analysis,
  337. params={'xcom_pull_task':'load_scanpy_report_for_vdj_b_to_db',
  338. 'xcom_pull_files_key':'output_db_files',
  339. 'collection_name_task':'load_cellranger_result_to_db',
  340. 'collection_name_key':'sample_igf_id',
  341. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  342. 'collection_table':'sample',
  343. 'collect_remote_file':True})
  344. upload_scanpy_report_for_vdj_b_to_box = \
  345. DummyOperator(
  346. task_id='upload_scanpy_report_for_vdj_b_to_box',
  347. dag=dag)
  348. ## PIPELINE
  349. decide_analysis_branch >> run_scirpy_for_vdj_b
  350. run_scirpy_for_vdj_b >> load_scanpy_report_for_vdj_b_to_db
  351. load_scanpy_report_for_vdj_b_to_db >> upload_scanpy_report_for_vdj_b_to_ftp
  352. load_scanpy_report_for_vdj_b_to_db >> upload_scanpy_report_for_vdj_b_to_box
  353. ## TASK
  354. run_scirpy_for_vdj_t = \
  355. DummyOperator(
  356. task_id='run_scirpy_for_vdj_t',
  357. dag=dag,
  358. python_callable=run_singlecell_notebook_wrapper_func,
  359. params={'cellranger_xcom_key':'cellranger_output',
  360. 'cellranger_xcom_pull_task':'run_cellranger',
  361. 'scanpy_timeout':1200,
  362. 'allow_errors':False,
  363. 'kernel_name':'python3',
  364. 'vdj_dir':'vdj_t',
  365. 'count_dir':'count',
  366. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  367. 'output_notebook_key':'scirpy_notebook',
  368. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  369. 'analysis_description_xcom_key':'analysis_description',
  370. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  371. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  372. load_scanpy_report_for_vdj_t_to_db = \
  373. PythonOperator(
  374. task_id='load_scanpy_report_for_vdj_t_to_db',
  375. dag=dag,
  376. python_callable=load_analysis_files_func,
  377. params={'collection_name_task':'load_cellranger_result_to_db',
  378. 'collection_name_key':'sample_igf_id',
  379. 'file_name_task':'run_scirpy_for_vdj_t',
  380. 'file_name_key':'scirpy_notebook',
  381. 'analysis_name':'scirpy_vdj_t',
  382. 'collection_type':'SCIRPY_VDJ_T_HTML',
  383. 'collection_table':'sample',
  384. 'output_files_key':'output_db_files'})
  385. upload_scanpy_report_for_vdj_t_to_ftp = \
  386. PythonOperator(
  387. task_id='upload_scanpy_report_for_vdj_t_to_ftp',
  388. dag=dag,
  389. python_callable=ftp_files_upload_for_analysis,
  390. params={'xcom_pull_task':'load_scanpy_report_for_vdj_t_to_db',
  391. 'xcom_pull_files_key':'output_db_files',
  392. 'collection_name_task':'load_cellranger_result_to_db',
  393. 'collection_name_key':'sample_igf_id',
  394. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  395. 'collection_table':'sample',
  396. 'collect_remote_file':True})
  397. upload_scanpy_report_for_vdj_t_to_box = \
  398. DummyOperator(
  399. task_id='upload_scanpy_report_for_vdj_t_to_box',
  400. dag=dag)
  401. ## PIPELINE
  402. decide_analysis_branch >> run_scirpy_for_vdj_t
  403. run_scirpy_for_vdj_t >> load_scanpy_report_for_vdj_t_to_db
  404. load_scanpy_report_for_vdj_t_to_db >> upload_scanpy_report_for_vdj_t_to_ftp
  405. load_scanpy_report_for_vdj_t_to_db >> upload_scanpy_report_for_vdj_t_to_box
  406. ## TASK
  407. run_seurat_for_sc_5p = \
  408. PythonOperator(
  409. task_id='run_seurat_for_sc_5p',
  410. dag=dag,
  411. python_callable=run_singlecell_notebook_wrapper_func,
  412. params={'cellranger_xcom_key':'cellranger_output',
  413. 'cellranger_xcom_pull_task':'run_cellranger',
  414. 'scanpy_timeout':1200,
  415. 'allow_errors':False,
  416. 'kernel_name':'R',
  417. 'vdj_dir':'vdj',
  418. 'count_dir':'count',
  419. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  420. 'output_notebook_key':'seurat_notebook',
  421. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  422. 'analysis_description_xcom_key':'analysis_description',
  423. 'template_ipynb_path':Variable.get('seurat_single_sample_template'),
  424. 'singularity_image_path':Variable.get('seurat_notebook_image')})
  425. load_seurat_report_for_sc_5p_db = \
  426. PythonOperator(
  427. task_id='load_seurat_report_for_sc_5p_db',
  428. dag=dag,
  429. python_callable=load_analysis_files_func,
  430. params={'collection_name_task':'load_cellranger_result_to_db',
  431. 'collection_name_key':'sample_igf_id',
  432. 'file_name_task':'run_seurat_for_sc_5p',
  433. 'file_name_key':'seurat_notebook',
  434. 'analysis_name':'seurat_5p',
  435. 'collection_type':'SEURAT_HTML',
  436. 'collection_table':'sample',
  437. 'output_files_key':'output_db_files'})
  438. upload_seurat_report_for_sc_5p_ftp = \
  439. DummyOperator(
  440. task_id='upload_seurat_report_for_sc_5p_ftp',
  441. dag=dag,
  442. python_callable=ftp_files_upload_for_analysis,
  443. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  444. 'xcom_pull_files_key':'output_db_files',
  445. 'collection_name_task':'load_cellranger_result_to_db',
  446. 'collection_name_key':'sample_igf_id',
  447. 'collection_type':'FTP_SEURAT_HTML',
  448. 'collection_table':'sample',
  449. 'collect_remote_file':True})
  450. upload_seurat_report_for_sc_5p_to_box = \
  451. DummyOperator(
  452. task_id='upload_seurat_report_for_sc_5p_to_box',
  453. dag=dag)
  454. ## PIPELINE
  455. decide_analysis_branch >> run_seurat_for_sc_5p
  456. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  457. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  458. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  459. ## TASK
  460. convert_bam_to_cram = \
  461. DummyOperator(
  462. task_id='convert_bam_to_cram',
  463. dag=dag,
  464. params={'cellranger_xcom_key':'cellranger_output',
  465. 'cellranger_xcom_pull_task':'run_cellranger'})
  466. upload_cram_to_irods = \
  467. DummyOperator(
  468. task_id='upload_cram_to_irods',
  469. dag=dag)
  470. ## PIPELINE
  471. decide_analysis_branch >> convert_bam_to_cram
  472. convert_bam_to_cram >> upload_cram_to_irods
  473. ## TASK
  474. run_picard_alignment_summary = \
  475. DummyOperator(
  476. task_id='run_picard_alignment_summary',
  477. dag=dag,
  478. params={'cellranger_xcom_key':'cellranger_output',
  479. 'cellranger_xcom_pull_task':'run_cellranger'})
  480. ## PIPELINE
  481. decide_analysis_branch >> run_picard_alignment_summary
  482. ## TASK
  483. run_picard_qual_summary = \
  484. DummyOperator(
  485. task_id='run_picard_qual_summary',
  486. dag=dag)
  487. ## PIPELINE
  488. run_picard_alignment_summary >> run_picard_qual_summary
  489. ## TASK
  490. run_picard_rna_summary = \
  491. DummyOperator(
  492. task_id='run_picard_rna_summary',
  493. dag=dag)
  494. ## PIPELINE
  495. run_picard_qual_summary >> run_picard_rna_summary
  496. ## TASK
  497. run_picard_gc_summary = \
  498. DummyOperator(
  499. task_id='run_picard_gc_summary',
  500. dag=dag)
  501. ## PIPELINE
  502. run_picard_rna_summary >> run_picard_gc_summary
  503. ## TASK
  504. run_samtools_stats = \
  505. DummyOperator(
  506. task_id='run_samtools_stats',
  507. dag=dag)
  508. ## PIPELINE
  509. run_picard_gc_summary >> run_samtools_stats
  510. ## TASK
  511. run_samtools_idxstats = \
  512. DummyOperator(
  513. task_id='run_samtools_idxstats',
  514. dag=dag)
  515. ## PIPELINE
  516. run_samtools_stats >> run_samtools_idxstats
  517. ## TASK
  518. run_multiqc = \
  519. DummyOperator(
  520. task_id='run_multiqc',
  521. dag=dag)
  522. ## PIPELINE
  523. run_samtools_idxstats >> run_multiqc
  524. ## TASK
  525. upload_multiqc_to_ftp = \
  526. DummyOperator(
  527. task_id='upload_multiqc_to_ftp',
  528. dag=dag)
  529. ## PIPELINE
  530. run_multiqc >> upload_multiqc_to_ftp
  531. ## TASK
  532. upload_multiqc_to_box = \
  533. DummyOperator(
  534. task_id='upload_multiqc_to_box',
  535. dag=dag)
  536. ## PIPELINE
  537. run_multiqc >> upload_multiqc_to_box
  538. ## TASK
  539. update_analysis_and_status = \
  540. DummyOperator(
  541. task_id='update_analysis_and_status',
  542. dag=dag)
  543. ## PIPELINE
  544. upload_multiqc_to_ftp >> update_analysis_and_status
  545. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  546. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  547. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  548. upload_scanpy_report_for_vdj_to_ftp >> update_analysis_and_status
  549. upload_scanpy_report_for_vdj_to_box >> update_analysis_and_status
  550. upload_scanpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  551. upload_scanpy_report_for_vdj_b_to_box >> update_analysis_and_status
  552. upload_scanpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  553. upload_scanpy_report_for_vdj_t_to_box >> update_analysis_and_status
  554. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  555. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  556. upload_results_to_irods >> update_analysis_and_status
  557. upload_report_to_ftp >> update_analysis_and_status
  558. upload_report_to_box >> update_analysis_and_status
  559. upload_cram_to_irods >> update_analysis_and_status