dag9_tenx_single_cell_immune_profiling.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import task_branch_function
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import upload_analysis_file_to_box
  20. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import convert_bam_to_cram_func
  21. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_picard_for_cellranger
  22. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_samtools_for_cellranger
  23. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_multiqc_for_cellranger
  24. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import index_and_copy_bam_for_parallel_analysis
  25. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import change_pipeline_status
  26. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import clean_up_files
  27. ## ARGS
  28. default_args = {
  29. 'owner': 'airflow',
  30. 'depends_on_past': False,
  31. 'start_date': days_ago(2),
  32. 'email_on_failure': False,
  33. 'email_on_retry': False,
  34. 'retries': 4,
  35. 'max_active_runs':10,
  36. 'catchup':False,
  37. 'retry_delay': timedelta(minutes=5),
  38. 'provide_context': True,
  39. }
  40. FEATURE_TYPE_LIST = \
  41. Variable.get('tenx_single_cell_immune_profiling_feature_types',default_var={})#.split(',')
  42. ## DAG
  43. dag = \
  44. DAG(
  45. dag_id='dag9_tenx_single_cell_immune_profiling',
  46. schedule_interval=None,
  47. tags=['hpc','analysis','tenx','sc'],
  48. default_args=default_args,
  49. orientation='LR')
  50. with dag:
  51. ## TASK
  52. fetch_analysis_info_and_branch = \
  53. BranchPythonOperator(
  54. task_id='fetch_analysis_info',
  55. dag=dag,
  56. queue='hpc_4G',
  57. params={'no_analysis_task':'no_analysis',
  58. 'analysis_description_xcom_key':'analysis_description',
  59. 'analysis_info_xcom_key':'analysis_info'},
  60. python_callable=fetch_analysis_info_and_branch_func)
  61. ## TASK
  62. configure_cellranger_run = \
  63. PythonOperator(
  64. task_id='configure_cellranger_run',
  65. dag=dag,
  66. queue='hpc_4G',
  67. trigger_rule='none_failed_or_skipped',
  68. params={'xcom_pull_task_id':'fetch_analysis_info',
  69. 'analysis_description_xcom_key':'analysis_description',
  70. 'analysis_info_xcom_key':'analysis_info',
  71. 'library_csv_xcom_key':'cellranger_library_csv'},
  72. python_callable=configure_cellranger_run_func)
  73. for analysis_name in FEATURE_TYPE_LIST.keys():
  74. ## TASK
  75. task_branch = \
  76. BranchPythonOperator(
  77. task_id=analysis_name,
  78. dag=dag,
  79. queue='hpc_4G',
  80. params={'xcom_pull_task_id':'fetch_analysis_info',
  81. 'analysis_info_xcom_key':'analysis_info',
  82. 'analysis_name':analysis_name,
  83. 'task_prefix':'run_trim'},
  84. python_callable=task_branch_function)
  85. run_trim_list = list()
  86. for run_id in range(0,10):
  87. ## TASK
  88. t = \
  89. PythonOperator(
  90. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  91. dag=dag,
  92. queue='hpc_4G',
  93. params={'xcom_pull_task_id':'fetch_analysis_info',
  94. 'analysis_info_xcom_key':'analysis_info',
  95. 'analysis_description_xcom_key':'analysis_description',
  96. 'analysis_name':analysis_name,
  97. 'run_id':run_id,
  98. 'r1_length':0,
  99. 'r2_length':0,
  100. 'fastq_input_dir_tag':'fastq_dir',
  101. 'use_ephemeral_space':True,
  102. 'fastq_output_dir_tag':'output_path'},
  103. python_callable=run_sc_read_trimmming_func)
  104. run_trim_list.append(t)
  105. ## TASK
  106. collect_trimmed_files = \
  107. DummyOperator(
  108. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  109. trigger_rule='none_failed_or_skipped',
  110. dag=dag)
  111. ## PIPELINE
  112. fetch_analysis_info_and_branch >> task_branch
  113. task_branch >> run_trim_list
  114. run_trim_list >> collect_trimmed_files
  115. collect_trimmed_files >> configure_cellranger_run
  116. ## TASK
  117. no_analysis = \
  118. DummyOperator(
  119. task_id='no_analysis',
  120. dag=dag)
  121. ## PIPELINE
  122. fetch_analysis_info_and_branch >> no_analysis
  123. ## TASK
  124. run_cellranger = \
  125. PythonOperator(
  126. task_id='run_cellranger',
  127. dag=dag,
  128. queue='hpc_64G16t24hr',
  129. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  130. 'analysis_description_xcom_key':'analysis_description',
  131. 'library_csv_xcom_key':'cellranger_library_csv',
  132. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  133. 'cellranger_xcom_key':'cellranger_output',
  134. 'cellranger_options':['--localcores 16','--localmem 64']},
  135. python_callable=run_cellranger_tool)
  136. ## PIPELINE
  137. configure_cellranger_run >> run_cellranger
  138. ## TASK
  139. decide_analysis_branch = \
  140. BranchPythonOperator(
  141. task_id='decide_analysis_branch',
  142. dag=dag,
  143. queue='hpc_4G',
  144. python_callable=decide_analysis_branch_func,
  145. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  146. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  147. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  148. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  149. 'run_scirpy_vdj_t_task':'run_scirpy_for_vdj_t',
  150. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  151. 'copy_bam_for_parallel_runs_task':'copy_bam_for_parallel_runs',
  152. 'library_csv_xcom_key':'cellranger_library_csv',
  153. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  154. ## PIPELINE
  155. run_cellranger >> decide_analysis_branch
  156. ## TASK
  157. load_cellranger_result_to_db = \
  158. PythonOperator(
  159. task_id='load_cellranger_result_to_db',
  160. dag=dag,
  161. queue='hpc_4G',
  162. python_callable=load_cellranger_result_to_db_func,
  163. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  164. 'analysis_description_xcom_key':'analysis_description',
  165. 'cellranger_xcom_key':'cellranger_output',
  166. 'cellranger_xcom_pull_task':'run_cellranger',
  167. 'collection_type':'CELLRANGER_MULTI',
  168. 'collection_table':'sample',
  169. 'xcom_collection_name_key':'sample_igf_id',
  170. 'genome_column':'genome_build',
  171. 'analysis_name':'cellranger_multi',
  172. 'output_xcom_key':'loaded_output_files',
  173. 'html_xcom_key':'html_report_file',
  174. 'html_report_file_name':'web_summary.html'})
  175. upload_cellranger_report_to_ftp = \
  176. PythonOperator(
  177. task_id='upload_cellranger_report_to_ftp',
  178. dag=dag,
  179. queue='hpc_4G',
  180. python_callable=ftp_files_upload_for_analysis,
  181. params={'xcom_pull_task':'load_cellranger_result_to_db',
  182. 'xcom_pull_files_key':'html_report_file',
  183. 'collection_name_task':'load_cellranger_result_to_db',
  184. 'collection_name_key':'sample_igf_id',
  185. 'collection_type':'FTP_CELLRANGER_MULTI',
  186. 'collection_table':'sample',
  187. 'collect_remote_file':True})
  188. upload_cellranger_report_to_box = \
  189. PythonOperator(
  190. task_id='upload_cellranger_report_to_box',
  191. dag=dag,
  192. queue='hpc_4G',
  193. python_callable=upload_analysis_file_to_box,
  194. params={'xcom_pull_task':'load_cellranger_result_to_db',
  195. 'xcom_pull_files_key':'html_report_file',
  196. 'analysis_tag':'cellranger_multi'})
  197. upload_cellranger_results_to_irods = \
  198. PythonOperator(
  199. task_id='upload_cellranger_results_to_irods',
  200. dag=dag,
  201. queue='hpc_4G',
  202. python_callable=irods_files_upload_for_analysis,
  203. params={'xcom_pull_task':'load_cellranger_result_to_db',
  204. 'xcom_pull_files_key':'loaded_output_files',
  205. 'collection_name_key':'sample_igf_id',
  206. 'collection_name_task':'load_cellranger_result_to_db',
  207. 'analysis_name':'cellranger_multi'})
  208. ## PIPELINE
  209. decide_analysis_branch >> load_cellranger_result_to_db
  210. load_cellranger_result_to_db >> upload_cellranger_report_to_ftp
  211. load_cellranger_result_to_db >> upload_cellranger_report_to_box
  212. load_cellranger_result_to_db >> upload_cellranger_results_to_irods
  213. ## TASK
  214. run_scanpy_for_sc_5p = \
  215. PythonOperator(
  216. task_id='run_scanpy_for_sc_5p',
  217. dag=dag,
  218. queue='hpc_4G',
  219. python_callable=run_singlecell_notebook_wrapper_func,
  220. params={'cellranger_xcom_key':'cellranger_output',
  221. 'cellranger_xcom_pull_task':'run_cellranger',
  222. 'scanpy_timeout':1200,
  223. 'allow_errors':False,
  224. 'kernel_name':'python3',
  225. 'count_dir':'count',
  226. 'analysis_name':'scanpy',
  227. 'output_notebook_key':'scanpy_notebook',
  228. 'output_cellbrowser_key':'cellbrowser_dirs',
  229. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  230. 'analysis_description_xcom_key':'analysis_description'})
  231. load_scanpy_report_for_sc_5p_to_db = \
  232. PythonOperator(
  233. task_id='load_scanpy_report_for_sc_5p_to_db',
  234. dag=dag,
  235. queue='hpc_4G',
  236. python_callable=load_analysis_files_func,
  237. params={'collection_name_task':'load_cellranger_result_to_db',
  238. 'collection_name_key':'sample_igf_id',
  239. 'file_name_task':'run_scanpy_for_sc_5p',
  240. 'file_name_key':'scanpy_notebook',
  241. 'analysis_name':'scanpy_5p',
  242. 'collection_type':'SCANPY_HTML',
  243. 'collection_table':'sample',
  244. 'output_files_key':'output_db_files'})
  245. upload_scanpy_report_for_sc_5p_to_ftp = \
  246. PythonOperator(
  247. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  248. dag=dag,
  249. queue='hpc_4G',
  250. python_callable=ftp_files_upload_for_analysis,
  251. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  252. 'xcom_pull_files_key':'output_db_files',
  253. 'collection_name_task':'load_cellranger_result_to_db',
  254. 'collection_name_key':'sample_igf_id',
  255. 'collection_type':'FTP_SCANPY_HTML',
  256. 'collection_table':'sample',
  257. 'collect_remote_file':True})
  258. upload_scanpy_report_for_sc_5p_to_box = \
  259. PythonOperator(
  260. task_id='upload_scanpy_report_for_sc_5p_to_box',
  261. dag=dag,
  262. queue='hpc_4G',
  263. python_callable=upload_analysis_file_to_box,
  264. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  265. 'xcom_pull_files_key':'output_db_files',
  266. 'analysis_tag':'scanpy_single_sample_report'})
  267. upload_cellbrowser_for_sc_5p_to_ftp = \
  268. PythonOperator(
  269. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  270. dag=dag,
  271. queue='hpc_4G',
  272. python_callable=ftp_files_upload_for_analysis,
  273. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  274. 'xcom_pull_files_key':'cellbrowser_dirs',
  275. 'collection_name_task':'load_cellranger_result_to_db',
  276. 'collection_name_key':'sample_igf_id',
  277. 'collection_type':'FTP_CELLBROWSER',
  278. 'collection_table':'sample',
  279. 'collect_remote_file':True})
  280. ## PIPELINE
  281. decide_analysis_branch >> run_scanpy_for_sc_5p
  282. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  283. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  284. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  285. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  286. ## TASK
  287. run_scirpy_for_vdj = \
  288. PythonOperator(
  289. task_id='run_scirpy_for_vdj',
  290. dag=dag,
  291. queue='hpc_4G',
  292. python_callable=run_singlecell_notebook_wrapper_func,
  293. params={'cellranger_xcom_key':'cellranger_output',
  294. 'cellranger_xcom_pull_task':'run_cellranger',
  295. 'scanpy_timeout':1200,
  296. 'allow_errors':False,
  297. 'kernel_name':'python3',
  298. 'analysis_name':'scirpy',
  299. 'vdj_dir':'vdj',
  300. 'count_dir':'count',
  301. 'output_notebook_key':'scirpy_notebook',
  302. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  303. 'analysis_description_xcom_key':'analysis_description'})
  304. load_scirpy_report_for_vdj_to_db = \
  305. PythonOperator(
  306. task_id='load_scirpy_report_for_vdj_to_db',
  307. dag=dag,
  308. queue='hpc_4G',
  309. python_callable=load_analysis_files_func,
  310. params={'collection_name_task':'load_cellranger_result_to_db',
  311. 'collection_name_key':'sample_igf_id',
  312. 'file_name_task':'run_scirpy_for_vdj',
  313. 'file_name_key':'scirpy_notebook',
  314. 'analysis_name':'scirpy_vdj',
  315. 'collection_type':'SCIRPY_VDJ_HTML',
  316. 'collection_table':'sample',
  317. 'output_files_key':'output_db_files'})
  318. upload_scirpy_report_for_vdj_to_ftp = \
  319. PythonOperator(
  320. task_id='upload_scirpy_report_for_vdj_to_ftp',
  321. dag=dag,
  322. queue='hpc_4G',
  323. python_callable=ftp_files_upload_for_analysis,
  324. params={'xcom_pull_task':'load_scirpy_report_for_vdj_to_db',
  325. 'xcom_pull_files_key':'output_db_files',
  326. 'collection_name_task':'load_cellranger_result_to_db',
  327. 'collection_name_key':'sample_igf_id',
  328. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  329. 'collection_table':'sample',
  330. 'collect_remote_file':True})
  331. upload_scirpy_report_for_vdj_to_box = \
  332. PythonOperator(
  333. task_id='upload_scirpy_report_for_vdj_to_box',
  334. dag=dag,
  335. queue='hpc_4G',
  336. python_callable=upload_analysis_file_to_box,
  337. params={'xcom_pull_task':'load_scirpy_report_for_vdj_to_db',
  338. 'xcom_pull_files_key':'output_db_files',
  339. 'analysis_tag':'scirpy_vdj_single_sample_report'})
  340. ## PIPELINE
  341. decide_analysis_branch >> run_scirpy_for_vdj
  342. run_scirpy_for_vdj >> load_scirpy_report_for_vdj_to_db
  343. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_ftp
  344. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_box
  345. ## TASK
  346. run_scirpy_for_vdj_b = \
  347. PythonOperator(
  348. task_id='run_scirpy_for_vdj_b',
  349. dag=dag,
  350. queue='hpc_4G',
  351. python_callable=run_singlecell_notebook_wrapper_func,
  352. params={'cellranger_xcom_key':'cellranger_output',
  353. 'cellranger_xcom_pull_task':'run_cellranger',
  354. 'scanpy_timeout':1200,
  355. 'allow_errors':False,
  356. 'kernel_name':'python3',
  357. 'analysis_name':'scirpy',
  358. 'vdj_dir':'vdj_b',
  359. 'count_dir':'count',
  360. 'output_notebook_key':'scirpy_notebook',
  361. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  362. 'analysis_description_xcom_key':'analysis_description'})
  363. load_scirpy_report_for_vdj_b_to_db = \
  364. PythonOperator(
  365. task_id='load_scirpy_report_for_vdj_b_to_db',
  366. dag=dag,
  367. queue='hpc_4G',
  368. python_callable=load_analysis_files_func,
  369. params={'collection_name_task':'load_cellranger_result_to_db',
  370. 'collection_name_key':'sample_igf_id',
  371. 'file_name_task':'run_scirpy_for_vdj_b',
  372. 'file_name_key':'scirpy_notebook',
  373. 'analysis_name':'scirpy_vdj_b',
  374. 'collection_type':'SCIRPY_VDJ_B_HTML',
  375. 'collection_table':'sample',
  376. 'output_files_key':'output_db_files'})
  377. upload_scirpy_report_for_vdj_b_to_ftp = \
  378. PythonOperator(
  379. task_id='upload_scirpy_report_for_vdj_b_to_ftp',
  380. dag=dag,
  381. queue='hpc_4G',
  382. python_callable=ftp_files_upload_for_analysis,
  383. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  384. 'xcom_pull_files_key':'output_db_files',
  385. 'collection_name_task':'load_cellranger_result_to_db',
  386. 'collection_name_key':'sample_igf_id',
  387. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  388. 'collection_table':'sample',
  389. 'collect_remote_file':True})
  390. upload_scirpy_report_for_vdj_b_to_box = \
  391. PythonOperator(
  392. task_id='upload_scanpy_report_for_vdj_b_to_box',
  393. dag=dag,
  394. queue='hpc_4G',
  395. python_callable=upload_analysis_file_to_box,
  396. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  397. 'xcom_pull_files_key':'output_db_files',
  398. 'analysis_tag':'scirpy_vdj_b_single_sample_report'})
  399. ## PIPELINE
  400. decide_analysis_branch >> run_scirpy_for_vdj_b
  401. run_scirpy_for_vdj_b >> load_scirpy_report_for_vdj_b_to_db
  402. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_ftp
  403. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_box
  404. ## TASK
  405. run_scirpy_for_vdj_t = \
  406. PythonOperator(
  407. task_id='run_scirpy_for_vdj_t',
  408. dag=dag,
  409. queue='hpc_4G',
  410. python_callable=run_singlecell_notebook_wrapper_func,
  411. params={'cellranger_xcom_key':'cellranger_output',
  412. 'cellranger_xcom_pull_task':'run_cellranger',
  413. 'scanpy_timeout':1200,
  414. 'allow_errors':False,
  415. 'kernel_name':'python3',
  416. 'analysis_name':'scirpy',
  417. 'vdj_dir':'vdj_t',
  418. 'count_dir':'count',
  419. 'output_notebook_key':'scirpy_notebook',
  420. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  421. 'analysis_description_xcom_key':'analysis_description'})
  422. load_scirpy_report_for_vdj_t_to_db = \
  423. PythonOperator(
  424. task_id='load_scirpy_report_for_vdj_t_to_db',
  425. dag=dag,
  426. queue='hpc_4G',
  427. python_callable=load_analysis_files_func,
  428. params={'collection_name_task':'load_cellranger_result_to_db',
  429. 'collection_name_key':'sample_igf_id',
  430. 'file_name_task':'run_scirpy_for_vdj_t',
  431. 'file_name_key':'scirpy_notebook',
  432. 'analysis_name':'scirpy_vdj_t',
  433. 'collection_type':'SCIRPY_VDJ_T_HTML',
  434. 'collection_table':'sample',
  435. 'output_files_key':'output_db_files'})
  436. upload_scirpy_report_for_vdj_t_to_ftp = \
  437. PythonOperator(
  438. task_id='upload_scirpy_report_for_vdj_t_to_ftp',
  439. dag=dag,
  440. queue='hpc_4G',
  441. python_callable=ftp_files_upload_for_analysis,
  442. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  443. 'xcom_pull_files_key':'output_db_files',
  444. 'collection_name_task':'load_cellranger_result_to_db',
  445. 'collection_name_key':'sample_igf_id',
  446. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  447. 'collection_table':'sample',
  448. 'collect_remote_file':True})
  449. upload_scirpy_report_for_vdj_t_to_box = \
  450. PythonOperator(
  451. task_id='upload_scirpy_report_for_vdj_t_to_box',
  452. dag=dag,
  453. queue='hpc_4G',
  454. python_callable=upload_analysis_file_to_box,
  455. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  456. 'xcom_pull_files_key':'output_db_files',
  457. 'analysis_tag':'scirpy_vdj_t_single_sample_report'})
  458. ## PIPELINE
  459. decide_analysis_branch >> run_scirpy_for_vdj_t
  460. run_scirpy_for_vdj_t >> load_scirpy_report_for_vdj_t_to_db
  461. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_ftp
  462. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_box
  463. ## TASK
  464. run_seurat_for_sc_5p = \
  465. PythonOperator(
  466. task_id='run_seurat_for_sc_5p',
  467. dag=dag,
  468. queue='hpc_4G',
  469. python_callable=run_singlecell_notebook_wrapper_func,
  470. params={'cellranger_xcom_key':'cellranger_output',
  471. 'cellranger_xcom_pull_task':'run_cellranger',
  472. 'scanpy_timeout':1200,
  473. 'allow_errors':False,
  474. 'kernel_name':'ir',
  475. 'analysis_name':'seurat',
  476. 'vdj_dir':'vdj',
  477. 'count_dir':'count',
  478. 'output_notebook_key':'seurat_notebook',
  479. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  480. 'analysis_description_xcom_key':'analysis_description'})
  481. load_seurat_report_for_sc_5p_db = \
  482. PythonOperator(
  483. task_id='load_seurat_report_for_sc_5p_db',
  484. dag=dag,
  485. queue='hpc_4G',
  486. python_callable=load_analysis_files_func,
  487. params={'collection_name_task':'load_cellranger_result_to_db',
  488. 'collection_name_key':'sample_igf_id',
  489. 'file_name_task':'run_seurat_for_sc_5p',
  490. 'file_name_key':'seurat_notebook',
  491. 'analysis_name':'seurat_5p',
  492. 'collection_type':'SEURAT_HTML',
  493. 'collection_table':'sample',
  494. 'output_files_key':'output_db_files'})
  495. upload_seurat_report_for_sc_5p_ftp = \
  496. PythonOperator(
  497. task_id='upload_seurat_report_for_sc_5p_ftp',
  498. dag=dag,
  499. queue='hpc_4G',
  500. python_callable=ftp_files_upload_for_analysis,
  501. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  502. 'xcom_pull_files_key':'output_db_files',
  503. 'collection_name_task':'load_cellranger_result_to_db',
  504. 'collection_name_key':'sample_igf_id',
  505. 'collection_type':'FTP_SEURAT_HTML',
  506. 'collection_table':'sample',
  507. 'collect_remote_file':True})
  508. upload_seurat_report_for_sc_5p_to_box = \
  509. PythonOperator(
  510. task_id='upload_seurat_report_for_sc_5p_to_box',
  511. dag=dag,
  512. queue='hpc_4G',
  513. python_callable=upload_analysis_file_to_box,
  514. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  515. 'xcom_pull_files_key':'output_db_files',
  516. 'analysis_tag':'seurat_single_sample_report'})
  517. ## PIPELINE
  518. decide_analysis_branch >> run_seurat_for_sc_5p
  519. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  520. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  521. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  522. ## TASK
  523. convert_cellranger_bam_to_cram = \
  524. PythonOperator(
  525. task_id='convert_cellranger_bam_to_cram',
  526. dag=dag,
  527. queue='hpc_4G4t',
  528. python_callable=convert_bam_to_cram_func,
  529. params={'xcom_pull_files_key':'cellranger_output',
  530. 'xcom_pull_task':'run_cellranger',
  531. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  532. 'analysis_description_xcom_key':'analysis_description',
  533. 'use_ephemeral_space':True,
  534. 'threads':4,
  535. 'analysis_name':'cellranger',
  536. 'collection_type':'ANALYSIS_CRAM',
  537. 'collection_table':'sample',
  538. 'cram_files_xcom_key':'cram_files'})
  539. ## TASK
  540. copy_bam_for_parallel_runs = \
  541. BranchPythonOperator(
  542. task_id='copy_bam_for_parallel_runs',
  543. dag=dag,
  544. queue='hpc_4G',
  545. python_callable=index_and_copy_bam_for_parallel_analysis,
  546. params={'xcom_pull_files_key':'cellranger_output',
  547. 'xcom_pull_task':'run_cellranger',
  548. 'list_of_tasks':[
  549. 'run_picard_alignment_summary',
  550. 'run_picard_qual_summary',
  551. 'run_picard_rna_summary',
  552. 'run_picard_gc_summary',
  553. 'run_picard_base_dist_summary',
  554. 'run_samtools_stats']
  555. })
  556. ## TASK
  557. upload_cram_to_irods = \
  558. PythonOperator(
  559. task_id='upload_cram_to_irods',
  560. dag=dag,
  561. queue='hpc_4G',
  562. python_callable=irods_files_upload_for_analysis,
  563. params={'xcom_pull_task':'convert_cellranger_bam_to_cram',
  564. 'xcom_pull_files_key':'cram_files',
  565. 'collection_name_key':'sample_igf_id',
  566. 'collection_name_task':'load_cellranger_result_to_db',
  567. 'analysis_name':'cellranger_multi'})
  568. ## PIPELINE
  569. decide_analysis_branch >> convert_cellranger_bam_to_cram
  570. convert_cellranger_bam_to_cram >> copy_bam_for_parallel_runs # we need to load metrics to cram
  571. convert_cellranger_bam_to_cram >> upload_cram_to_irods
  572. ## TASK
  573. run_picard_alignment_summary = \
  574. PythonOperator(
  575. task_id='run_picard_alignment_summary',
  576. dag=dag,
  577. queue='hpc_4G',
  578. python_callable=run_picard_for_cellranger,
  579. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  580. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  581. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  582. 'analysis_description_xcom_key':'analysis_description',
  583. 'use_ephemeral_space':True,
  584. 'load_metrics_to_cram':True,
  585. 'java_param':'-Xmx4g',
  586. 'picard_command':'CollectAlignmentSummaryMetrics',
  587. 'picard_option':{},
  588. 'analysis_files_xcom_key':'picard_alignment_summary',
  589. 'bam_files_xcom_key':None})
  590. ## PIPELINE
  591. copy_bam_for_parallel_runs >> run_picard_alignment_summary
  592. ## TASK
  593. cleanup_picard_alignment_summary_input = \
  594. PythonOperator(
  595. task_id='cleanup_picard_alignment_summary_input',
  596. dag=dag,
  597. queue='hpc_4G',
  598. python_callable=clean_up_files,
  599. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  600. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  601. ## PIPELINE
  602. run_picard_alignment_summary >> cleanup_picard_alignment_summary_input
  603. ## TASK
  604. upload_picard_alignment_summary_to_box = \
  605. PythonOperator(
  606. task_id='upload_picard_alignment_summary_to_box',
  607. dag=dag,
  608. queue='hpc_4G',
  609. python_callable=upload_analysis_file_to_box,
  610. params={'xcom_pull_task':'run_picard_alignment_summary',
  611. 'xcom_pull_files_key':'picard_alignment_summary',
  612. 'analysis_tag':'Picard-CollectAlignmentSummaryMetrics'})
  613. ## PIPELINE
  614. run_picard_alignment_summary >> upload_picard_alignment_summary_to_box
  615. ## TASK
  616. run_picard_qual_summary = \
  617. PythonOperator(
  618. task_id='run_picard_qual_summary',
  619. dag=dag,
  620. queue='hpc_4G',
  621. python_callable=run_picard_for_cellranger,
  622. params={'xcom_pull_files_key':'run_picard_qual_summary',
  623. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  624. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  625. 'analysis_description_xcom_key':'analysis_description',
  626. 'use_ephemeral_space':True,
  627. 'load_metrics_to_cram':True,
  628. 'java_param':'-Xmx4g',
  629. 'picard_command':'QualityScoreDistribution',
  630. 'picard_option':{},
  631. 'analysis_files_xcom_key':'picard_qual_summary',
  632. 'bam_files_xcom_key':None})
  633. ## PIPELINE
  634. copy_bam_for_parallel_runs >> run_picard_qual_summary
  635. ## TASK
  636. cleanup_picard_qual_summary_input = \
  637. PythonOperator(
  638. task_id='cleanup_picard_qual_summary_input',
  639. dag=dag,
  640. queue='hpc_4G',
  641. python_callable=clean_up_files,
  642. params={'xcom_pull_files_key':'run_picard_qual_summary',
  643. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  644. ## PIPELINE
  645. run_picard_qual_summary >> cleanup_picard_qual_summary_input
  646. ## TASK
  647. upload_picard_qual_summary_to_box = \
  648. PythonOperator(
  649. task_id='upload_picard_qual_summary_to_box',
  650. dag=dag,
  651. queue='hpc_4G',
  652. python_callable=upload_analysis_file_to_box,
  653. params={'xcom_pull_task':'run_picard_qual_summary',
  654. 'xcom_pull_files_key':'picard_qual_summary',
  655. 'analysis_tag':'Picard-QualityScoreDistribution'})
  656. ## PIPELINE
  657. run_picard_qual_summary >> upload_picard_qual_summary_to_box
  658. ## TASK
  659. run_picard_rna_summary = \
  660. PythonOperator(
  661. task_id='run_picard_rna_summary',
  662. dag=dag,
  663. queue='hpc_8G',
  664. python_callable=run_picard_for_cellranger,
  665. params={'xcom_pull_files_key':'run_picard_rna_summary',
  666. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  667. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  668. 'analysis_description_xcom_key':'analysis_description',
  669. 'use_ephemeral_space':True,
  670. 'load_metrics_to_cram':True,
  671. 'java_param':'-Xmx7g',
  672. 'picard_command':'CollectRnaSeqMetrics',
  673. 'picard_option':{},
  674. 'analysis_files_xcom_key':'picard_rna_summary',
  675. 'bam_files_xcom_key':None})
  676. ## PIPELINE
  677. copy_bam_for_parallel_runs >> run_picard_rna_summary
  678. ## TASK
  679. cleanup_picard_rna_summary_input = \
  680. PythonOperator(
  681. task_id='cleanup_picard_rna_summary_input',
  682. dag=dag,
  683. queue='hpc_4G',
  684. python_callable=clean_up_files,
  685. params={'xcom_pull_files_key':'run_picard_rna_summary',
  686. 'xcom_pull_task':'picard_rna_summary'})
  687. ## PIPELINE
  688. run_picard_rna_summary >> cleanup_picard_rna_summary_input
  689. ## TASK
  690. upload_picard_rna_summary_to_box = \
  691. PythonOperator(
  692. task_id='upload_picard_rna_summary_to_box',
  693. dag=dag,
  694. queue='hpc_4G',
  695. python_callable=upload_analysis_file_to_box,
  696. params={'xcom_pull_task':'run_picard_rna_summary',
  697. 'xcom_pull_files_key':'picard_rna_summary',
  698. 'analysis_tag':'Picard-CollectRnaSeqMetrics'})
  699. ## PIPELINE
  700. run_picard_rna_summary >> upload_picard_rna_summary_to_box
  701. ## TASK
  702. run_picard_gc_summary = \
  703. PythonOperator(
  704. task_id='run_picard_gc_summary',
  705. dag=dag,
  706. queue='hpc_4G',
  707. python_callable=run_picard_for_cellranger,
  708. params={'xcom_pull_files_key':'run_picard_gc_summary',
  709. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  710. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  711. 'analysis_description_xcom_key':'analysis_description',
  712. 'use_ephemeral_space':True,
  713. 'load_metrics_to_cram':True,
  714. 'java_param':'-Xmx4g',
  715. 'picard_command':'CollectGcBiasMetrics',
  716. 'picard_option':{},
  717. 'analysis_files_xcom_key':'picard_gc_summary',
  718. 'bam_files_xcom_key':None})
  719. ## PIPELINE
  720. copy_bam_for_parallel_runs >> run_picard_gc_summary
  721. ## TASK
  722. cleanup_picard_gc_summary_input = \
  723. PythonOperator(
  724. task_id='cleanup_picard_gc_summary_input',
  725. dag=dag,
  726. queue='hpc_4G',
  727. python_callable=clean_up_files,
  728. params={'xcom_pull_files_key':'run_picard_gc_summary',
  729. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  730. ## PIPELINE
  731. run_picard_gc_summary >> cleanup_picard_gc_summary_input
  732. ## TASK
  733. upload_picard_gc_summary_to_box = \
  734. PythonOperator(
  735. task_id='upload_picard_alignment_summary_to_box',
  736. dag=dag,
  737. queue='hpc_4G',
  738. python_callable=upload_analysis_file_to_box,
  739. params={'xcom_pull_task':'run_picard_gc_summary',
  740. 'xcom_pull_files_key':'picard_gc_summary',
  741. 'analysis_tag':'Picard-CollectGcBiasMetrics'})
  742. ## PIPELINE
  743. run_picard_gc_summary >> upload_picard_gc_summary_to_box
  744. ## TASK
  745. run_picard_base_dist_summary = \
  746. PythonOperator(
  747. task_id='run_picard_base_dist_summary',
  748. dag=dag,
  749. queue='hpc_4G',
  750. python_callable=run_picard_for_cellranger,
  751. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  752. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  753. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  754. 'analysis_description_xcom_key':'analysis_description',
  755. 'use_ephemeral_space':True,
  756. 'load_metrics_to_cram':True,
  757. 'java_param':'-Xmx4g',
  758. 'picard_command':'CollectBaseDistributionByCycle',
  759. 'picard_option':{},
  760. 'analysis_files_xcom_key':'picard_base_summary',
  761. 'bam_files_xcom_key':None})
  762. ## PIPELINE
  763. copy_bam_for_parallel_runs >> run_picard_base_dist_summary
  764. ## TASK
  765. cleanup_picard_base_dist_summary_input = \
  766. PythonOperator(
  767. task_id='cleanup_picard_base_dist_summary_input',
  768. dag=dag,
  769. queue='hpc_4G',
  770. python_callable=clean_up_files,
  771. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  772. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  773. ## PIPELINE
  774. run_picard_base_dist_summary >> cleanup_picard_base_dist_summary_input
  775. ## TASK
  776. upload_picard_base_dist_summary_to_box = \
  777. PythonOperator(
  778. task_id='upload_picard_base_dist_summary_to_box',
  779. dag=dag,
  780. queue='hpc_4G',
  781. python_callable=upload_analysis_file_to_box,
  782. params={'xcom_pull_task':'run_picard_base_dist_summary',
  783. 'xcom_pull_files_key':'picard_base_summary',
  784. 'analysis_tag':'Picard-CollectBaseDistributionByCycle'})
  785. ## PIPELINE
  786. run_picard_base_dist_summary >> upload_picard_base_dist_summary_to_box
  787. ## TASK
  788. run_samtools_stats = \
  789. PythonOperator(
  790. task_id='run_samtools_stats',
  791. dag=dag,
  792. queue='hpc_4G4t',
  793. python_callable=run_samtools_for_cellranger,
  794. params={'xcom_pull_files_key':'run_samtools_stats',
  795. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  796. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  797. 'analysis_description_xcom_key':'analysis_description',
  798. 'use_ephemeral_space':True,
  799. 'load_metrics_to_cram':True,
  800. 'samtools_command':'stats',
  801. 'threads':4,
  802. 'analysis_files_xcom_key':'samtools_stats'})
  803. ## PIPELINE
  804. copy_bam_for_parallel_runs >> run_samtools_stats
  805. ## TASK
  806. upload_samtools_stats_to_box = \
  807. PythonOperator(
  808. task_id='upload_samtools_stats_to_box',
  809. dag=dag,
  810. queue='hpc_4G',
  811. python_callable=upload_analysis_file_to_box,
  812. params={'xcom_pull_task':'run_samtools_stats',
  813. 'xcom_pull_files_key':'samtools_stats',
  814. 'analysis_tag':'Samtools-stats'})
  815. ## PIPELINE
  816. run_samtools_stats >> upload_samtools_stats_to_box
  817. ## TASK
  818. run_samtools_idxstats = \
  819. PythonOperator(
  820. task_id='run_samtools_idxstats',
  821. dag=dag,
  822. queue='hpc_4G4t',
  823. python_callable=run_samtools_for_cellranger,
  824. params={'xcom_pull_files_key':'run_samtools_stats',
  825. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  826. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  827. 'analysis_description_xcom_key':'analysis_description',
  828. 'use_ephemeral_space':True,
  829. 'load_metrics_to_cram':True,
  830. 'samtools_command':'idxstats',
  831. 'threads':4,
  832. 'analysis_files_xcom_key':'samtools_idxstats'})
  833. ## PIPELINE
  834. run_samtools_stats >> run_samtools_idxstats
  835. ## TASK
  836. cleanup_samtools_stats_input = \
  837. PythonOperator(
  838. task_id='cleanup_samtools_stats_input',
  839. dag=dag,
  840. queue='hpc_4G',
  841. python_callable=clean_up_files,
  842. params={'xcom_pull_files_key':'run_samtools_stats',
  843. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  844. ## PIPELINE
  845. run_samtools_idxstats >> cleanup_samtools_stats_input
  846. ## TASK
  847. upload_samtools_idxstats_to_box = \
  848. PythonOperator(
  849. task_id='upload_samtools_idxstats_to_box',
  850. dag=dag,
  851. queue='hpc_4G',
  852. python_callable=upload_analysis_file_to_box,
  853. params={'xcom_pull_task':'run_samtools_idxstats',
  854. 'xcom_pull_files_key':'samtools_idxstats',
  855. 'analysis_tag':'Samtools-idxstats'})
  856. ## PIPELINE
  857. run_samtools_idxstats >> upload_samtools_idxstats_to_box
  858. ## TASK
  859. run_multiqc = \
  860. PythonOperator(
  861. task_id='run_multiqc',
  862. dag=dag,
  863. queue='hpc_4G',
  864. trigger_rule='none_failed_or_skipped',
  865. python_callable=run_multiqc_for_cellranger,
  866. params={
  867. 'list_of_analysis_xcoms_and_tasks':{
  868. 'run_cellranger':'cellranger_output',
  869. 'run_picard_alignment_summary':'picard_alignment_summary',
  870. 'run_picard_qual_summary':'picard_qual_summary',
  871. 'run_picard_rna_summary':'picard_rna_summary',
  872. 'run_picard_gc_summary':'picard_gc_summary',
  873. 'run_picard_base_dist_summary':'picard_base_summary',
  874. 'run_samtools_stats':'samtools_stats',
  875. 'run_samtools_idxstats':'samtools_idxstats'},
  876. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  877. 'analysis_description_xcom_key':'analysis_description',
  878. 'use_ephemeral_space':True,
  879. 'multiqc_html_file_xcom_key':'multiqc_html',
  880. 'multiqc_data_file_xcom_key':'multiqc_data',
  881. 'tool_order_list':['picad','samtools']})
  882. ## PIPELINE
  883. run_picard_alignment_summary >> run_multiqc
  884. run_picard_qual_summary >> run_multiqc
  885. run_picard_rna_summary >> run_multiqc
  886. run_picard_gc_summary >> run_multiqc
  887. run_picard_base_dist_summary >> run_multiqc
  888. run_samtools_idxstats >> run_multiqc
  889. ## TASK
  890. load_multiqc_html = \
  891. PythonOperator(
  892. task_id='load_multiqc_html',
  893. dag=dag,
  894. queue='hpc_4G',
  895. python_callable=load_analysis_files_func,
  896. params={'collection_name_task':'load_cellranger_result_to_db',
  897. 'collection_name_key':'sample_igf_id',
  898. 'file_name_task':'run_multiqc',
  899. 'file_name_key':'multiqc_html',
  900. 'analysis_name':'multiqc',
  901. 'collection_type':'MULTIQC_HTML',
  902. 'collection_table':'sample',
  903. 'output_files_key':'output_db_files'})
  904. ## PIPELINE
  905. run_multiqc >> load_multiqc_html
  906. ## TASK
  907. upload_multiqc_to_ftp = \
  908. PythonOperator(
  909. task_id='upload_multiqc_to_ftp',
  910. dag=dag,
  911. queue='hpc_4G',
  912. python_callable=ftp_files_upload_for_analysis,
  913. params={'xcom_pull_task':'load_multiqc_html',
  914. 'xcom_pull_files_key':'output_db_files',
  915. 'collection_name_task':'load_cellranger_result_to_db',
  916. 'collection_name_key':'sample_igf_id',
  917. 'collection_type':'FTP_MULTIQC_HTML',
  918. 'collection_table':'sample',
  919. 'collect_remote_file':True})
  920. ## PIPELINE
  921. load_multiqc_html >> upload_multiqc_to_ftp
  922. ## TASK
  923. upload_multiqc_to_box = \
  924. PythonOperator(
  925. task_id='upload_multiqc_to_box',
  926. dag=dag,
  927. queue='hpc_4G',
  928. python_callable=upload_analysis_file_to_box,
  929. params={'xcom_pull_task':'load_multiqc_html',
  930. 'xcom_pull_files_key':'output_db_files',
  931. 'analysis_tag':'multiqc_report'})
  932. ## PIPELINE
  933. load_multiqc_html >> upload_multiqc_to_box
  934. ## TASK
  935. update_analysis_and_status = \
  936. PythonOperator(
  937. task_id='update_analysis_and_status',
  938. dag=dag,
  939. queue='hpc_4G',
  940. python_callable=change_pipeline_status,
  941. trigger_rule='none_failed_or_skipped',
  942. params={'new_status':'FINISHED',
  943. 'no_change_status':'SEEDED'})
  944. ## PIPELINE
  945. upload_multiqc_to_ftp >> update_analysis_and_status
  946. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  947. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  948. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  949. upload_scirpy_report_for_vdj_to_ftp >> update_analysis_and_status
  950. upload_scirpy_report_for_vdj_to_box >> update_analysis_and_status
  951. upload_scirpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  952. upload_scirpy_report_for_vdj_b_to_box >> update_analysis_and_status
  953. upload_scirpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  954. upload_scirpy_report_for_vdj_t_to_box >> update_analysis_and_status
  955. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  956. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  957. upload_cellranger_results_to_irods >> update_analysis_and_status
  958. upload_cellranger_report_to_ftp >> update_analysis_and_status
  959. upload_cellranger_report_to_box >> update_analysis_and_status
  960. upload_cram_to_irods >> update_analysis_and_status