dag9_tenx_single_cell_immune_profiling.py 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import task_branch_function
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import upload_analysis_file_to_box
  20. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import convert_bam_to_cram_func
  21. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_picard_for_cellranger
  22. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_samtools_for_cellranger
  23. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_multiqc_for_cellranger
  24. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import index_and_copy_bam_for_parallel_analysis
  25. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import change_pipeline_status
  26. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import clean_up_files
  27. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import create_and_update_qc_pages
  28. ## ARGS
  29. default_args = {
  30. 'owner': 'airflow',
  31. 'depends_on_past': False,
  32. 'start_date': days_ago(2),
  33. 'email_on_failure': False,
  34. 'email_on_retry': False,
  35. 'retries': 4,
  36. 'max_active_runs':10,
  37. 'catchup':False,
  38. 'retry_delay': timedelta(minutes=5),
  39. 'provide_context': True,
  40. }
  41. FEATURE_TYPE_LIST = \
  42. Variable.get('tenx_single_cell_immune_profiling_feature_types',default_var={})#.split(',')
  43. ## DAG
  44. dag = \
  45. DAG(
  46. dag_id='dag9_tenx_single_cell_immune_profiling',
  47. schedule_interval=None,
  48. tags=['hpc','analysis','tenx','sc'],
  49. default_args=default_args,
  50. orientation='LR')
  51. with dag:
  52. ## TASK
  53. fetch_analysis_info_and_branch = \
  54. BranchPythonOperator(
  55. task_id='fetch_analysis_info',
  56. dag=dag,
  57. queue='hpc_4G',
  58. params={'no_analysis_task':'no_analysis',
  59. 'analysis_description_xcom_key':'analysis_description',
  60. 'analysis_info_xcom_key':'analysis_info'},
  61. python_callable=fetch_analysis_info_and_branch_func)
  62. ## TASK
  63. configure_cellranger_run = \
  64. PythonOperator(
  65. task_id='configure_cellranger_run',
  66. dag=dag,
  67. queue='hpc_4G',
  68. trigger_rule='none_failed_or_skipped',
  69. params={'xcom_pull_task_id':'fetch_analysis_info',
  70. 'analysis_description_xcom_key':'analysis_description',
  71. 'analysis_info_xcom_key':'analysis_info',
  72. 'library_csv_xcom_key':'cellranger_library_csv'},
  73. python_callable=configure_cellranger_run_func)
  74. for analysis_name in FEATURE_TYPE_LIST.keys():
  75. ## TASK
  76. task_branch = \
  77. BranchPythonOperator(
  78. task_id=analysis_name,
  79. dag=dag,
  80. queue='hpc_4G',
  81. params={'xcom_pull_task_id':'fetch_analysis_info',
  82. 'analysis_info_xcom_key':'analysis_info',
  83. 'analysis_name':analysis_name,
  84. 'task_prefix':'run_trim'},
  85. python_callable=task_branch_function)
  86. run_trim_list = list()
  87. for run_id in range(0,10):
  88. ## TASK
  89. t = \
  90. PythonOperator(
  91. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  92. dag=dag,
  93. queue='hpc_4G',
  94. params={'xcom_pull_task_id':'fetch_analysis_info',
  95. 'analysis_info_xcom_key':'analysis_info',
  96. 'analysis_description_xcom_key':'analysis_description',
  97. 'analysis_name':analysis_name,
  98. 'run_id':run_id,
  99. 'r1_length':0,
  100. 'r2_length':0,
  101. 'fastq_input_dir_tag':'fastq_dir',
  102. 'use_ephemeral_space':True,
  103. 'fastq_output_dir_tag':'output_path'},
  104. python_callable=run_sc_read_trimmming_func)
  105. run_trim_list.append(t)
  106. ## TASK
  107. collect_trimmed_files = \
  108. DummyOperator(
  109. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  110. trigger_rule='none_failed_or_skipped',
  111. dag=dag)
  112. ## PIPELINE
  113. fetch_analysis_info_and_branch >> task_branch
  114. task_branch >> run_trim_list
  115. run_trim_list >> collect_trimmed_files
  116. collect_trimmed_files >> configure_cellranger_run
  117. ## TASK
  118. no_analysis = \
  119. DummyOperator(
  120. task_id='no_analysis',
  121. dag=dag)
  122. ## PIPELINE
  123. fetch_analysis_info_and_branch >> no_analysis
  124. ## TASK
  125. run_cellranger = \
  126. PythonOperator(
  127. task_id='run_cellranger',
  128. dag=dag,
  129. queue='hpc_64G16t24hr',
  130. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  131. 'analysis_description_xcom_key':'analysis_description',
  132. 'library_csv_xcom_key':'cellranger_library_csv',
  133. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  134. 'cellranger_xcom_key':'cellranger_output',
  135. 'cellranger_options':['--localcores 16','--localmem 64']},
  136. python_callable=run_cellranger_tool)
  137. ## PIPELINE
  138. configure_cellranger_run >> run_cellranger
  139. ## TASK
  140. decide_analysis_branch = \
  141. BranchPythonOperator(
  142. task_id='decide_analysis_branch',
  143. dag=dag,
  144. queue='hpc_4G',
  145. python_callable=decide_analysis_branch_func,
  146. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  147. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  148. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  149. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  150. 'run_scirpy_vdj_t_task':'run_scirpy_for_vdj_t',
  151. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  152. 'convert_cellranger_bam_to_cram_task':'convert_cellranger_bam_to_cram',
  153. 'library_csv_xcom_key':'cellranger_library_csv',
  154. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  155. ## PIPELINE
  156. run_cellranger >> decide_analysis_branch
  157. ## TASK
  158. load_cellranger_result_to_db = \
  159. PythonOperator(
  160. task_id='load_cellranger_result_to_db',
  161. dag=dag,
  162. queue='hpc_4G',
  163. python_callable=load_cellranger_result_to_db_func,
  164. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  165. 'analysis_description_xcom_key':'analysis_description',
  166. 'cellranger_xcom_key':'cellranger_output',
  167. 'cellranger_xcom_pull_task':'run_cellranger',
  168. 'collection_type':'CELLRANGER_MULTI',
  169. 'html_collection_type':'CELLRANGER_HTML',
  170. 'collection_table':'sample',
  171. 'xcom_collection_name_key':'sample_igf_id',
  172. 'genome_column':'genome_build',
  173. 'analysis_name':'cellranger_multi',
  174. 'output_xcom_key':'loaded_output_files',
  175. 'html_xcom_key':'html_report_file',
  176. 'html_report_file_name':'web_summary.html'})
  177. upload_cellranger_report_to_ftp = \
  178. PythonOperator(
  179. task_id='upload_cellranger_report_to_ftp',
  180. dag=dag,
  181. queue='hpc_4G',
  182. python_callable=ftp_files_upload_for_analysis,
  183. params={'xcom_pull_task':'load_cellranger_result_to_db',
  184. 'xcom_pull_files_key':'html_report_file',
  185. 'collection_name_task':'load_cellranger_result_to_db',
  186. 'collection_name_key':'sample_igf_id',
  187. 'collection_type':'FTP_CELLRANGER_HTML',
  188. 'collection_table':'sample',
  189. 'collect_remote_file':True})
  190. upload_cellranger_report_to_box = \
  191. PythonOperator(
  192. task_id='upload_cellranger_report_to_box',
  193. dag=dag,
  194. queue='hpc_4G',
  195. python_callable=upload_analysis_file_to_box,
  196. params={'xcom_pull_task':'load_cellranger_result_to_db',
  197. 'xcom_pull_files_key':'html_report_file',
  198. 'analysis_tag':'cellranger_multi'})
  199. upload_cellranger_results_to_irods = \
  200. PythonOperator(
  201. task_id='upload_cellranger_results_to_irods',
  202. dag=dag,
  203. queue='hpc_4G',
  204. python_callable=irods_files_upload_for_analysis,
  205. params={'xcom_pull_task':'load_cellranger_result_to_db',
  206. 'xcom_pull_files_key':'loaded_output_files',
  207. 'collection_name_key':'sample_igf_id',
  208. 'collection_name_task':'load_cellranger_result_to_db',
  209. 'analysis_name':'cellranger_multi'})
  210. ## PIPELINE
  211. decide_analysis_branch >> load_cellranger_result_to_db
  212. load_cellranger_result_to_db >> upload_cellranger_report_to_ftp
  213. load_cellranger_result_to_db >> upload_cellranger_report_to_box
  214. load_cellranger_result_to_db >> upload_cellranger_results_to_irods
  215. ## TASK
  216. run_scanpy_for_sc_5p = \
  217. PythonOperator(
  218. task_id='run_scanpy_for_sc_5p',
  219. dag=dag,
  220. queue='hpc_4G',
  221. python_callable=run_singlecell_notebook_wrapper_func,
  222. params={'cellranger_xcom_key':'cellranger_output',
  223. 'cellranger_xcom_pull_task':'run_cellranger',
  224. 'scanpy_timeout':1200,
  225. 'allow_errors':False,
  226. 'kernel_name':'python3',
  227. 'count_dir':'count',
  228. 'analysis_name':'scanpy',
  229. 'output_notebook_key':'scanpy_notebook',
  230. 'output_cellbrowser_key':'cellbrowser_dirs',
  231. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  232. 'analysis_description_xcom_key':'analysis_description'})
  233. load_scanpy_report_for_sc_5p_to_db = \
  234. PythonOperator(
  235. task_id='load_scanpy_report_for_sc_5p_to_db',
  236. dag=dag,
  237. queue='hpc_4G',
  238. python_callable=load_analysis_files_func,
  239. params={'collection_name_task':'load_cellranger_result_to_db',
  240. 'collection_name_key':'sample_igf_id',
  241. 'file_name_task':'run_scanpy_for_sc_5p',
  242. 'file_name_key':'scanpy_notebook',
  243. 'analysis_name':'scanpy_5p',
  244. 'collection_type':'SCANPY_HTML',
  245. 'collection_table':'sample',
  246. 'output_files_key':'output_db_files'})
  247. upload_scanpy_report_for_sc_5p_to_ftp = \
  248. PythonOperator(
  249. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  250. dag=dag,
  251. queue='hpc_4G',
  252. python_callable=ftp_files_upload_for_analysis,
  253. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  254. 'xcom_pull_files_key':'output_db_files',
  255. 'collection_name_task':'load_cellranger_result_to_db',
  256. 'collection_name_key':'sample_igf_id',
  257. 'collection_type':'FTP_SCANPY_HTML',
  258. 'collection_table':'sample',
  259. 'collect_remote_file':True})
  260. upload_scanpy_report_for_sc_5p_to_box = \
  261. PythonOperator(
  262. task_id='upload_scanpy_report_for_sc_5p_to_box',
  263. dag=dag,
  264. queue='hpc_4G',
  265. python_callable=upload_analysis_file_to_box,
  266. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  267. 'xcom_pull_files_key':'output_db_files',
  268. 'analysis_tag':'scanpy_single_sample_report'})
  269. upload_cellbrowser_for_sc_5p_to_ftp = \
  270. PythonOperator(
  271. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  272. dag=dag,
  273. queue='hpc_4G',
  274. python_callable=ftp_files_upload_for_analysis,
  275. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  276. 'xcom_pull_files_key':'cellbrowser_dirs',
  277. 'collection_name_task':'load_cellranger_result_to_db',
  278. 'collection_name_key':'sample_igf_id',
  279. 'collection_type':'FTP_CELLBROWSER',
  280. 'collection_table':'sample',
  281. 'collect_remote_file':True})
  282. ## PIPELINE
  283. decide_analysis_branch >> run_scanpy_for_sc_5p
  284. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  285. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  286. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  287. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  288. ## TASK
  289. run_scirpy_for_vdj = \
  290. PythonOperator(
  291. task_id='run_scirpy_for_vdj',
  292. dag=dag,
  293. queue='hpc_4G',
  294. python_callable=run_singlecell_notebook_wrapper_func,
  295. params={'cellranger_xcom_key':'cellranger_output',
  296. 'cellranger_xcom_pull_task':'run_cellranger',
  297. 'scanpy_timeout':1200,
  298. 'allow_errors':False,
  299. 'kernel_name':'python3',
  300. 'analysis_name':'scirpy',
  301. 'vdj_dir':'vdj',
  302. 'count_dir':'count',
  303. 'output_notebook_key':'scirpy_notebook',
  304. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  305. 'analysis_description_xcom_key':'analysis_description'})
  306. load_scirpy_report_for_vdj_to_db = \
  307. PythonOperator(
  308. task_id='load_scirpy_report_for_vdj_to_db',
  309. dag=dag,
  310. queue='hpc_4G',
  311. python_callable=load_analysis_files_func,
  312. params={'collection_name_task':'load_cellranger_result_to_db',
  313. 'collection_name_key':'sample_igf_id',
  314. 'file_name_task':'run_scirpy_for_vdj',
  315. 'file_name_key':'scirpy_notebook',
  316. 'analysis_name':'scirpy_vdj',
  317. 'collection_type':'SCIRPY_VDJ_HTML',
  318. 'collection_table':'sample',
  319. 'output_files_key':'output_db_files'})
  320. upload_scirpy_report_for_vdj_to_ftp = \
  321. PythonOperator(
  322. task_id='upload_scirpy_report_for_vdj_to_ftp',
  323. dag=dag,
  324. queue='hpc_4G',
  325. python_callable=ftp_files_upload_for_analysis,
  326. params={'xcom_pull_task':'load_scirpy_report_for_vdj_to_db',
  327. 'xcom_pull_files_key':'output_db_files',
  328. 'collection_name_task':'load_cellranger_result_to_db',
  329. 'collection_name_key':'sample_igf_id',
  330. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  331. 'collection_table':'sample',
  332. 'collect_remote_file':True})
  333. upload_scirpy_report_for_vdj_to_box = \
  334. PythonOperator(
  335. task_id='upload_scirpy_report_for_vdj_to_box',
  336. dag=dag,
  337. queue='hpc_4G',
  338. python_callable=upload_analysis_file_to_box,
  339. params={'xcom_pull_task':'load_scirpy_report_for_vdj_to_db',
  340. 'xcom_pull_files_key':'output_db_files',
  341. 'analysis_tag':'scirpy_vdj_single_sample_report'})
  342. ## PIPELINE
  343. decide_analysis_branch >> run_scirpy_for_vdj
  344. run_scirpy_for_vdj >> load_scirpy_report_for_vdj_to_db
  345. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_ftp
  346. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_box
  347. ## TASK
  348. run_scirpy_for_vdj_b = \
  349. PythonOperator(
  350. task_id='run_scirpy_for_vdj_b',
  351. dag=dag,
  352. queue='hpc_4G',
  353. python_callable=run_singlecell_notebook_wrapper_func,
  354. params={'cellranger_xcom_key':'cellranger_output',
  355. 'cellranger_xcom_pull_task':'run_cellranger',
  356. 'scanpy_timeout':1200,
  357. 'allow_errors':False,
  358. 'kernel_name':'python3',
  359. 'analysis_name':'scirpy',
  360. 'vdj_dir':'vdj_b',
  361. 'count_dir':'count',
  362. 'output_notebook_key':'scirpy_notebook',
  363. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  364. 'analysis_description_xcom_key':'analysis_description'})
  365. load_scirpy_report_for_vdj_b_to_db = \
  366. PythonOperator(
  367. task_id='load_scirpy_report_for_vdj_b_to_db',
  368. dag=dag,
  369. queue='hpc_4G',
  370. python_callable=load_analysis_files_func,
  371. params={'collection_name_task':'load_cellranger_result_to_db',
  372. 'collection_name_key':'sample_igf_id',
  373. 'file_name_task':'run_scirpy_for_vdj_b',
  374. 'file_name_key':'scirpy_notebook',
  375. 'analysis_name':'scirpy_vdj_b',
  376. 'collection_type':'SCIRPY_VDJ_B_HTML',
  377. 'collection_table':'sample',
  378. 'output_files_key':'output_db_files'})
  379. upload_scirpy_report_for_vdj_b_to_ftp = \
  380. PythonOperator(
  381. task_id='upload_scirpy_report_for_vdj_b_to_ftp',
  382. dag=dag,
  383. queue='hpc_4G',
  384. python_callable=ftp_files_upload_for_analysis,
  385. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  386. 'xcom_pull_files_key':'output_db_files',
  387. 'collection_name_task':'load_cellranger_result_to_db',
  388. 'collection_name_key':'sample_igf_id',
  389. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  390. 'collection_table':'sample',
  391. 'collect_remote_file':True})
  392. upload_scirpy_report_for_vdj_b_to_box = \
  393. PythonOperator(
  394. task_id='upload_scanpy_report_for_vdj_b_to_box',
  395. dag=dag,
  396. queue='hpc_4G',
  397. python_callable=upload_analysis_file_to_box,
  398. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  399. 'xcom_pull_files_key':'output_db_files',
  400. 'analysis_tag':'scirpy_vdj_b_single_sample_report'})
  401. ## PIPELINE
  402. decide_analysis_branch >> run_scirpy_for_vdj_b
  403. run_scirpy_for_vdj_b >> load_scirpy_report_for_vdj_b_to_db
  404. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_ftp
  405. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_box
  406. ## TASK
  407. run_scirpy_for_vdj_t = \
  408. PythonOperator(
  409. task_id='run_scirpy_for_vdj_t',
  410. dag=dag,
  411. queue='hpc_4G',
  412. python_callable=run_singlecell_notebook_wrapper_func,
  413. params={'cellranger_xcom_key':'cellranger_output',
  414. 'cellranger_xcom_pull_task':'run_cellranger',
  415. 'scanpy_timeout':1200,
  416. 'allow_errors':False,
  417. 'kernel_name':'python3',
  418. 'analysis_name':'scirpy',
  419. 'vdj_dir':'vdj_t',
  420. 'count_dir':'count',
  421. 'output_notebook_key':'scirpy_notebook',
  422. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  423. 'analysis_description_xcom_key':'analysis_description'})
  424. load_scirpy_report_for_vdj_t_to_db = \
  425. PythonOperator(
  426. task_id='load_scirpy_report_for_vdj_t_to_db',
  427. dag=dag,
  428. queue='hpc_4G',
  429. python_callable=load_analysis_files_func,
  430. params={'collection_name_task':'load_cellranger_result_to_db',
  431. 'collection_name_key':'sample_igf_id',
  432. 'file_name_task':'run_scirpy_for_vdj_t',
  433. 'file_name_key':'scirpy_notebook',
  434. 'analysis_name':'scirpy_vdj_t',
  435. 'collection_type':'SCIRPY_VDJ_T_HTML',
  436. 'collection_table':'sample',
  437. 'output_files_key':'output_db_files'})
  438. upload_scirpy_report_for_vdj_t_to_ftp = \
  439. PythonOperator(
  440. task_id='upload_scirpy_report_for_vdj_t_to_ftp',
  441. dag=dag,
  442. queue='hpc_4G',
  443. python_callable=ftp_files_upload_for_analysis,
  444. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  445. 'xcom_pull_files_key':'output_db_files',
  446. 'collection_name_task':'load_cellranger_result_to_db',
  447. 'collection_name_key':'sample_igf_id',
  448. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  449. 'collection_table':'sample',
  450. 'collect_remote_file':True})
  451. upload_scirpy_report_for_vdj_t_to_box = \
  452. PythonOperator(
  453. task_id='upload_scirpy_report_for_vdj_t_to_box',
  454. dag=dag,
  455. queue='hpc_4G',
  456. python_callable=upload_analysis_file_to_box,
  457. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  458. 'xcom_pull_files_key':'output_db_files',
  459. 'analysis_tag':'scirpy_vdj_t_single_sample_report'})
  460. ## PIPELINE
  461. decide_analysis_branch >> run_scirpy_for_vdj_t
  462. run_scirpy_for_vdj_t >> load_scirpy_report_for_vdj_t_to_db
  463. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_ftp
  464. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_box
  465. ## TASK
  466. run_seurat_for_sc_5p = \
  467. PythonOperator(
  468. task_id='run_seurat_for_sc_5p',
  469. dag=dag,
  470. queue='hpc_4G',
  471. python_callable=run_singlecell_notebook_wrapper_func,
  472. params={'cellranger_xcom_key':'cellranger_output',
  473. 'cellranger_xcom_pull_task':'run_cellranger',
  474. 'scanpy_timeout':1200,
  475. 'allow_errors':False,
  476. 'kernel_name':'ir',
  477. 'analysis_name':'seurat',
  478. 'vdj_dir':'vdj',
  479. 'count_dir':'count',
  480. 'output_notebook_key':'seurat_notebook',
  481. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  482. 'analysis_description_xcom_key':'analysis_description'})
  483. load_seurat_report_for_sc_5p_db = \
  484. PythonOperator(
  485. task_id='load_seurat_report_for_sc_5p_db',
  486. dag=dag,
  487. queue='hpc_4G',
  488. python_callable=load_analysis_files_func,
  489. params={'collection_name_task':'load_cellranger_result_to_db',
  490. 'collection_name_key':'sample_igf_id',
  491. 'file_name_task':'run_seurat_for_sc_5p',
  492. 'file_name_key':'seurat_notebook',
  493. 'analysis_name':'seurat_5p',
  494. 'collection_type':'SEURAT_HTML',
  495. 'collection_table':'sample',
  496. 'output_files_key':'output_db_files'})
  497. upload_seurat_report_for_sc_5p_ftp = \
  498. PythonOperator(
  499. task_id='upload_seurat_report_for_sc_5p_ftp',
  500. dag=dag,
  501. queue='hpc_4G',
  502. python_callable=ftp_files_upload_for_analysis,
  503. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  504. 'xcom_pull_files_key':'output_db_files',
  505. 'collection_name_task':'load_cellranger_result_to_db',
  506. 'collection_name_key':'sample_igf_id',
  507. 'collection_type':'FTP_SEURAT_HTML',
  508. 'collection_table':'sample',
  509. 'collect_remote_file':True})
  510. upload_seurat_report_for_sc_5p_to_box = \
  511. PythonOperator(
  512. task_id='upload_seurat_report_for_sc_5p_to_box',
  513. dag=dag,
  514. queue='hpc_4G',
  515. python_callable=upload_analysis_file_to_box,
  516. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  517. 'xcom_pull_files_key':'output_db_files',
  518. 'analysis_tag':'seurat_single_sample_report'})
  519. ## PIPELINE
  520. decide_analysis_branch >> run_seurat_for_sc_5p
  521. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  522. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  523. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  524. ## TASK
  525. convert_cellranger_bam_to_cram = \
  526. PythonOperator(
  527. task_id='convert_cellranger_bam_to_cram',
  528. dag=dag,
  529. queue='hpc_4G4t',
  530. python_callable=convert_bam_to_cram_func,
  531. params={'xcom_pull_files_key':'cellranger_output',
  532. 'xcom_pull_task':'run_cellranger',
  533. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  534. 'analysis_description_xcom_key':'analysis_description',
  535. 'use_ephemeral_space':True,
  536. 'threads':4,
  537. 'analysis_name':'cellranger',
  538. 'collection_type':'ANALYSIS_CRAM',
  539. 'collection_table':'sample',
  540. 'cram_files_xcom_key':'cram_files'})
  541. ## TASK
  542. copy_bam_for_parallel_runs = \
  543. BranchPythonOperator(
  544. task_id='copy_bam_for_parallel_runs',
  545. dag=dag,
  546. queue='hpc_4G',
  547. python_callable=index_and_copy_bam_for_parallel_analysis,
  548. params={'xcom_pull_files_key':'cellranger_output',
  549. 'xcom_pull_task':'run_cellranger',
  550. 'list_of_tasks':[
  551. 'run_picard_alignment_summary',
  552. 'run_picard_qual_summary',
  553. 'run_picard_rna_summary',
  554. 'run_picard_gc_summary',
  555. 'run_picard_base_dist_summary',
  556. 'run_samtools_stats']})
  557. ## TASK
  558. upload_cram_to_irods = \
  559. PythonOperator(
  560. task_id='upload_cram_to_irods',
  561. dag=dag,
  562. queue='hpc_4G',
  563. python_callable=irods_files_upload_for_analysis,
  564. params={'xcom_pull_task':'convert_cellranger_bam_to_cram',
  565. 'xcom_pull_files_key':'cram_files',
  566. 'collection_name_key':'sample_igf_id',
  567. 'collection_name_task':'load_cellranger_result_to_db',
  568. 'analysis_name':'cellranger_multi'})
  569. ## PIPELINE
  570. decide_analysis_branch >> convert_cellranger_bam_to_cram
  571. convert_cellranger_bam_to_cram >> copy_bam_for_parallel_runs # we need to load metrics to cram
  572. convert_cellranger_bam_to_cram >> upload_cram_to_irods
  573. ## TASK
  574. run_picard_alignment_summary = \
  575. PythonOperator(
  576. task_id='run_picard_alignment_summary',
  577. dag=dag,
  578. queue='hpc_4G',
  579. python_callable=run_picard_for_cellranger,
  580. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  581. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  582. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  583. 'analysis_description_xcom_key':'analysis_description',
  584. 'use_ephemeral_space':True,
  585. 'load_metrics_to_cram':True,
  586. 'java_param':'-Xmx4g',
  587. 'picard_command':'CollectAlignmentSummaryMetrics',
  588. 'picard_option':{},
  589. 'analysis_files_xcom_key':'picard_alignment_summary',
  590. 'bam_files_xcom_key':None})
  591. ## PIPELINE
  592. copy_bam_for_parallel_runs >> run_picard_alignment_summary
  593. ## TASK
  594. cleanup_picard_alignment_summary_input = \
  595. PythonOperator(
  596. task_id='cleanup_picard_alignment_summary_input',
  597. dag=dag,
  598. queue='hpc_4G',
  599. python_callable=clean_up_files,
  600. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  601. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  602. ## PIPELINE
  603. run_picard_alignment_summary >> cleanup_picard_alignment_summary_input
  604. ## TASK
  605. upload_picard_alignment_summary_to_box = \
  606. PythonOperator(
  607. task_id='upload_picard_alignment_summary_to_box',
  608. dag=dag,
  609. queue='hpc_4G',
  610. python_callable=upload_analysis_file_to_box,
  611. params={'xcom_pull_task':'run_picard_alignment_summary',
  612. 'xcom_pull_files_key':'picard_alignment_summary',
  613. 'analysis_tag':'Picard-CollectAlignmentSummaryMetrics'})
  614. ## PIPELINE
  615. run_picard_alignment_summary >> upload_picard_alignment_summary_to_box
  616. ## TASK
  617. run_picard_qual_summary = \
  618. PythonOperator(
  619. task_id='run_picard_qual_summary',
  620. dag=dag,
  621. queue='hpc_4G',
  622. python_callable=run_picard_for_cellranger,
  623. params={'xcom_pull_files_key':'run_picard_qual_summary',
  624. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  625. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  626. 'analysis_description_xcom_key':'analysis_description',
  627. 'use_ephemeral_space':True,
  628. 'load_metrics_to_cram':True,
  629. 'java_param':'-Xmx4g',
  630. 'picard_command':'QualityScoreDistribution',
  631. 'picard_option':{},
  632. 'analysis_files_xcom_key':'picard_qual_summary',
  633. 'bam_files_xcom_key':None})
  634. ## PIPELINE
  635. copy_bam_for_parallel_runs >> run_picard_qual_summary
  636. ## TASK
  637. cleanup_picard_qual_summary_input = \
  638. PythonOperator(
  639. task_id='cleanup_picard_qual_summary_input',
  640. dag=dag,
  641. queue='hpc_4G',
  642. python_callable=clean_up_files,
  643. params={'xcom_pull_files_key':'run_picard_qual_summary',
  644. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  645. ## PIPELINE
  646. run_picard_qual_summary >> cleanup_picard_qual_summary_input
  647. ## TASK
  648. upload_picard_qual_summary_to_box = \
  649. PythonOperator(
  650. task_id='upload_picard_qual_summary_to_box',
  651. dag=dag,
  652. queue='hpc_4G',
  653. python_callable=upload_analysis_file_to_box,
  654. params={'xcom_pull_task':'run_picard_qual_summary',
  655. 'xcom_pull_files_key':'picard_qual_summary',
  656. 'analysis_tag':'Picard-QualityScoreDistribution'})
  657. ## PIPELINE
  658. run_picard_qual_summary >> upload_picard_qual_summary_to_box
  659. ## TASK
  660. run_picard_rna_summary = \
  661. PythonOperator(
  662. task_id='run_picard_rna_summary',
  663. dag=dag,
  664. queue='hpc_8G',
  665. python_callable=run_picard_for_cellranger,
  666. params={'xcom_pull_files_key':'run_picard_rna_summary',
  667. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  668. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  669. 'analysis_description_xcom_key':'analysis_description',
  670. 'use_ephemeral_space':True,
  671. 'load_metrics_to_cram':True,
  672. 'java_param':'-Xmx7g',
  673. 'picard_command':'CollectRnaSeqMetrics',
  674. 'picard_option':{},
  675. 'analysis_files_xcom_key':'picard_rna_summary',
  676. 'bam_files_xcom_key':None})
  677. ## PIPELINE
  678. copy_bam_for_parallel_runs >> run_picard_rna_summary
  679. ## TASK
  680. cleanup_picard_rna_summary_input = \
  681. PythonOperator(
  682. task_id='cleanup_picard_rna_summary_input',
  683. dag=dag,
  684. queue='hpc_4G',
  685. python_callable=clean_up_files,
  686. params={'xcom_pull_files_key':'run_picard_rna_summary',
  687. 'xcom_pull_task':'picard_rna_summary'})
  688. ## PIPELINE
  689. run_picard_rna_summary >> cleanup_picard_rna_summary_input
  690. ## TASK
  691. upload_picard_rna_summary_to_box = \
  692. PythonOperator(
  693. task_id='upload_picard_rna_summary_to_box',
  694. dag=dag,
  695. queue='hpc_4G',
  696. python_callable=upload_analysis_file_to_box,
  697. params={'xcom_pull_task':'run_picard_rna_summary',
  698. 'xcom_pull_files_key':'picard_rna_summary',
  699. 'analysis_tag':'Picard-CollectRnaSeqMetrics'})
  700. ## PIPELINE
  701. run_picard_rna_summary >> upload_picard_rna_summary_to_box
  702. ## TASK
  703. run_picard_gc_summary = \
  704. PythonOperator(
  705. task_id='run_picard_gc_summary',
  706. dag=dag,
  707. queue='hpc_4G',
  708. python_callable=run_picard_for_cellranger,
  709. params={'xcom_pull_files_key':'run_picard_gc_summary',
  710. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  711. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  712. 'analysis_description_xcom_key':'analysis_description',
  713. 'use_ephemeral_space':True,
  714. 'load_metrics_to_cram':True,
  715. 'java_param':'-Xmx4g',
  716. 'picard_command':'CollectGcBiasMetrics',
  717. 'picard_option':{},
  718. 'analysis_files_xcom_key':'picard_gc_summary',
  719. 'bam_files_xcom_key':None})
  720. ## PIPELINE
  721. copy_bam_for_parallel_runs >> run_picard_gc_summary
  722. ## TASK
  723. cleanup_picard_gc_summary_input = \
  724. PythonOperator(
  725. task_id='cleanup_picard_gc_summary_input',
  726. dag=dag,
  727. queue='hpc_4G',
  728. python_callable=clean_up_files,
  729. params={'xcom_pull_files_key':'run_picard_gc_summary',
  730. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  731. ## PIPELINE
  732. run_picard_gc_summary >> cleanup_picard_gc_summary_input
  733. ## TASK
  734. upload_picard_gc_summary_to_box = \
  735. PythonOperator(
  736. task_id='upload_picard_alignment_summary_to_box',
  737. dag=dag,
  738. queue='hpc_4G',
  739. python_callable=upload_analysis_file_to_box,
  740. params={'xcom_pull_task':'run_picard_gc_summary',
  741. 'xcom_pull_files_key':'picard_gc_summary',
  742. 'analysis_tag':'Picard-CollectGcBiasMetrics'})
  743. ## PIPELINE
  744. run_picard_gc_summary >> upload_picard_gc_summary_to_box
  745. ## TASK
  746. run_picard_base_dist_summary = \
  747. PythonOperator(
  748. task_id='run_picard_base_dist_summary',
  749. dag=dag,
  750. queue='hpc_4G',
  751. python_callable=run_picard_for_cellranger,
  752. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  753. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  754. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  755. 'analysis_description_xcom_key':'analysis_description',
  756. 'use_ephemeral_space':True,
  757. 'load_metrics_to_cram':True,
  758. 'java_param':'-Xmx4g',
  759. 'picard_command':'CollectBaseDistributionByCycle',
  760. 'picard_option':{},
  761. 'analysis_files_xcom_key':'picard_base_summary',
  762. 'bam_files_xcom_key':None})
  763. ## PIPELINE
  764. copy_bam_for_parallel_runs >> run_picard_base_dist_summary
  765. ## TASK
  766. cleanup_picard_base_dist_summary_input = \
  767. PythonOperator(
  768. task_id='cleanup_picard_base_dist_summary_input',
  769. dag=dag,
  770. queue='hpc_4G',
  771. python_callable=clean_up_files,
  772. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  773. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  774. ## PIPELINE
  775. run_picard_base_dist_summary >> cleanup_picard_base_dist_summary_input
  776. ## TASK
  777. upload_picard_base_dist_summary_to_box = \
  778. PythonOperator(
  779. task_id='upload_picard_base_dist_summary_to_box',
  780. dag=dag,
  781. queue='hpc_4G',
  782. python_callable=upload_analysis_file_to_box,
  783. params={'xcom_pull_task':'run_picard_base_dist_summary',
  784. 'xcom_pull_files_key':'picard_base_summary',
  785. 'analysis_tag':'Picard-CollectBaseDistributionByCycle'})
  786. ## PIPELINE
  787. run_picard_base_dist_summary >> upload_picard_base_dist_summary_to_box
  788. ## TASK
  789. run_samtools_stats = \
  790. PythonOperator(
  791. task_id='run_samtools_stats',
  792. dag=dag,
  793. queue='hpc_4G4t',
  794. python_callable=run_samtools_for_cellranger,
  795. params={'xcom_pull_files_key':'run_samtools_stats',
  796. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  797. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  798. 'analysis_description_xcom_key':'analysis_description',
  799. 'use_ephemeral_space':True,
  800. 'load_metrics_to_cram':True,
  801. 'samtools_command':'stats',
  802. 'threads':4,
  803. 'analysis_files_xcom_key':'samtools_stats'})
  804. ## PIPELINE
  805. copy_bam_for_parallel_runs >> run_samtools_stats
  806. ## TASK
  807. upload_samtools_stats_to_box = \
  808. PythonOperator(
  809. task_id='upload_samtools_stats_to_box',
  810. dag=dag,
  811. queue='hpc_4G',
  812. python_callable=upload_analysis_file_to_box,
  813. params={'xcom_pull_task':'run_samtools_stats',
  814. 'xcom_pull_files_key':'samtools_stats',
  815. 'analysis_tag':'Samtools-stats'})
  816. ## PIPELINE
  817. run_samtools_stats >> upload_samtools_stats_to_box
  818. ## TASK
  819. run_samtools_idxstats = \
  820. PythonOperator(
  821. task_id='run_samtools_idxstats',
  822. dag=dag,
  823. queue='hpc_4G4t',
  824. python_callable=run_samtools_for_cellranger,
  825. params={'xcom_pull_files_key':'run_samtools_stats',
  826. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  827. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  828. 'analysis_description_xcom_key':'analysis_description',
  829. 'use_ephemeral_space':True,
  830. 'load_metrics_to_cram':True,
  831. 'samtools_command':'idxstats',
  832. 'threads':4,
  833. 'analysis_files_xcom_key':'samtools_idxstats'})
  834. ## PIPELINE
  835. run_samtools_stats >> run_samtools_idxstats
  836. ## TASK
  837. cleanup_samtools_stats_input = \
  838. PythonOperator(
  839. task_id='cleanup_samtools_stats_input',
  840. dag=dag,
  841. queue='hpc_4G',
  842. python_callable=clean_up_files,
  843. params={'xcom_pull_files_key':'run_samtools_stats',
  844. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  845. ## PIPELINE
  846. run_samtools_idxstats >> cleanup_samtools_stats_input
  847. ## TASK
  848. upload_samtools_idxstats_to_box = \
  849. PythonOperator(
  850. task_id='upload_samtools_idxstats_to_box',
  851. dag=dag,
  852. queue='hpc_4G',
  853. python_callable=upload_analysis_file_to_box,
  854. params={'xcom_pull_task':'run_samtools_idxstats',
  855. 'xcom_pull_files_key':'samtools_idxstats',
  856. 'analysis_tag':'Samtools-idxstats'})
  857. ## PIPELINE
  858. run_samtools_idxstats >> upload_samtools_idxstats_to_box
  859. ## TASK
  860. run_multiqc = \
  861. PythonOperator(
  862. task_id='run_multiqc',
  863. dag=dag,
  864. queue='hpc_4G',
  865. trigger_rule='none_failed_or_skipped',
  866. python_callable=run_multiqc_for_cellranger,
  867. params={
  868. 'list_of_analysis_xcoms_and_tasks':{
  869. 'run_cellranger':'cellranger_output',
  870. 'run_picard_alignment_summary':'picard_alignment_summary',
  871. 'run_picard_qual_summary':'picard_qual_summary',
  872. 'run_picard_rna_summary':'picard_rna_summary',
  873. 'run_picard_gc_summary':'picard_gc_summary',
  874. 'run_picard_base_dist_summary':'picard_base_summary',
  875. 'run_samtools_stats':'samtools_stats',
  876. 'run_samtools_idxstats':'samtools_idxstats'},
  877. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  878. 'analysis_description_xcom_key':'analysis_description',
  879. 'use_ephemeral_space':True,
  880. 'multiqc_html_file_xcom_key':'multiqc_html',
  881. 'multiqc_data_file_xcom_key':'multiqc_data',
  882. 'tool_order_list':['picad','samtools']})
  883. ## PIPELINE
  884. run_picard_alignment_summary >> run_multiqc
  885. run_picard_qual_summary >> run_multiqc
  886. run_picard_rna_summary >> run_multiqc
  887. run_picard_gc_summary >> run_multiqc
  888. run_picard_base_dist_summary >> run_multiqc
  889. run_samtools_idxstats >> run_multiqc
  890. ## TASK
  891. load_multiqc_html = \
  892. PythonOperator(
  893. task_id='load_multiqc_html',
  894. dag=dag,
  895. queue='hpc_4G',
  896. python_callable=load_analysis_files_func,
  897. params={'collection_name_task':'load_cellranger_result_to_db',
  898. 'collection_name_key':'sample_igf_id',
  899. 'file_name_task':'run_multiqc',
  900. 'file_name_key':'multiqc_html',
  901. 'analysis_name':'multiqc',
  902. 'collection_type':'MULTIQC_HTML',
  903. 'collection_table':'sample',
  904. 'output_files_key':'output_db_files'})
  905. ## PIPELINE
  906. run_multiqc >> load_multiqc_html
  907. ## TASK
  908. upload_multiqc_to_ftp = \
  909. PythonOperator(
  910. task_id='upload_multiqc_to_ftp',
  911. dag=dag,
  912. queue='hpc_4G',
  913. python_callable=ftp_files_upload_for_analysis,
  914. params={'xcom_pull_task':'load_multiqc_html',
  915. 'xcom_pull_files_key':'output_db_files',
  916. 'collection_name_task':'load_cellranger_result_to_db',
  917. 'collection_name_key':'sample_igf_id',
  918. 'collection_type':'FTP_MULTIQC_HTML',
  919. 'collection_table':'sample',
  920. 'collect_remote_file':True})
  921. ## PIPELINE
  922. load_multiqc_html >> upload_multiqc_to_ftp
  923. ## TASK
  924. upload_multiqc_to_box = \
  925. PythonOperator(
  926. task_id='upload_multiqc_to_box',
  927. dag=dag,
  928. queue='hpc_4G',
  929. python_callable=upload_analysis_file_to_box,
  930. params={'xcom_pull_task':'load_multiqc_html',
  931. 'xcom_pull_files_key':'output_db_files',
  932. 'analysis_tag':'multiqc_report'})
  933. ## PIPELINE
  934. load_multiqc_html >> upload_multiqc_to_box
  935. ## TASK
  936. update_analysis_and_status = \
  937. PythonOperator(
  938. task_id='update_analysis_and_status',
  939. dag=dag,
  940. queue='hpc_4G',
  941. python_callable=change_pipeline_status,
  942. trigger_rule='none_failed_or_skipped',
  943. params={'new_status':'FINISHED',
  944. 'no_change_status':'SEEDED'})
  945. ## PIPELINE
  946. upload_multiqc_to_ftp >> update_analysis_and_status
  947. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  948. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  949. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  950. upload_scirpy_report_for_vdj_to_ftp >> update_analysis_and_status
  951. upload_scirpy_report_for_vdj_to_box >> update_analysis_and_status
  952. upload_scirpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  953. upload_scirpy_report_for_vdj_b_to_box >> update_analysis_and_status
  954. upload_scirpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  955. upload_scirpy_report_for_vdj_t_to_box >> update_analysis_and_status
  956. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  957. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  958. upload_cellranger_results_to_irods >> update_analysis_and_status
  959. upload_cellranger_report_to_ftp >> update_analysis_and_status
  960. upload_cellranger_report_to_box >> update_analysis_and_status
  961. upload_cram_to_irods >> update_analysis_and_status
  962. ## TASK
  963. update_qc_pages = \
  964. PythonOperator(
  965. task_id='update_qc_pages',
  966. dag=dag,
  967. queue='hpc_4G',
  968. python_callable=create_and_update_qc_pages,
  969. params={'use_ephemeral_space':True,
  970. 'collection_type_list':[
  971. 'FTP_MULTIQC_HTML',
  972. 'FTP_SEURAT_HTML',
  973. 'FTP_SCIRPY_VDJ_T_HTML',
  974. 'FTP_SCIRPY_VDJ_B_HTML',
  975. 'FTP_SCIRPY_VDJ_HTML',
  976. 'FTP_CELLBROWSER',
  977. 'FTP_SCANPY_HTML',
  978. 'FTP_CELLRANGER_HTML']})
  979. ## PIPELINE
  980. update_analysis_and_status >> update_qc_pages