dag9_tenx_single_cell_immune_profiling.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_scanpy_for_sc_5p_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import task_branch_function
  20. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import upload_analysis_file_to_box
  21. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import convert_bam_to_cram_func
  22. ## ARGS
  23. default_args = {
  24. 'owner': 'airflow',
  25. 'depends_on_past': False,
  26. 'start_date': days_ago(2),
  27. 'email_on_failure': False,
  28. 'email_on_retry': False,
  29. 'retries': 4,
  30. 'max_active_runs':10,
  31. 'catchup':False,
  32. 'retry_delay': timedelta(minutes=5),
  33. 'provide_context': True,
  34. }
  35. FEATURE_TYPE_LIST = Variable.get('tenx_single_cell_immune_profiling_feature_types').split(',')
  36. ## DAG
  37. dag = \
  38. DAG(
  39. dag_id='dag9_tenx_single_cell_immune_profiling',
  40. schedule_interval=None,
  41. tags=['hpc','analysis','tenx','sc'],
  42. default_args=default_args,
  43. orientation='LR')
  44. with dag:
  45. ## TASK
  46. fetch_analysis_info_and_branch = \
  47. BranchPythonOperator(
  48. task_id='fetch_analysis_info',
  49. dag=dag,
  50. queue='hpc_4G',
  51. params={'no_analysis_task':'no_analysis',
  52. 'analysis_description_xcom_key':'analysis_description',
  53. 'analysis_info_xcom_key':'analysis_info'},
  54. python_callable=fetch_analysis_info_and_branch_func)
  55. ## TASK
  56. configure_cellranger_run = \
  57. PythonOperator(
  58. task_id='configure_cellranger_run',
  59. dag=dag,
  60. queue='hpc_4G',
  61. trigger_rule='none_failed_or_skipped',
  62. params={'xcom_pull_task_id':'fetch_analysis_info',
  63. 'analysis_description_xcom_key':'analysis_description',
  64. 'analysis_info_xcom_key':'analysis_info',
  65. 'library_csv_xcom_key':'cellranger_library_csv'},
  66. python_callable=configure_cellranger_run_func)
  67. for analysis_name in FEATURE_TYPE_LIST:
  68. ## TASK
  69. task_branch = \
  70. BranchPythonOperator(
  71. task_id=analysis_name,
  72. dag=dag,
  73. queue='hpc_4G',
  74. params={'xcom_pull_task_id':'fetch_analysis_info',
  75. 'analysis_info_xcom_key':'analysis_info',
  76. 'analysis_name':analysis_name,
  77. 'task_prefix':'run_trim'},
  78. python_callable=task_branch_function)
  79. run_trim_list = list()
  80. for run_id in range(0,10):
  81. ## TASK
  82. t = \
  83. PythonOperator(
  84. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  85. dag=dag,
  86. queue='hpc_4G',
  87. params={'xcom_pull_task_id':'fetch_analysis_info',
  88. 'analysis_info_xcom_key':'analysis_info',
  89. 'analysis_description_xcom_key':'analysis_description',
  90. 'analysis_name':analysis_name,
  91. 'run_id':run_id,
  92. 'r1_length':0,
  93. 'r2_length':0,
  94. 'fastq_input_dir_tag':'fastq_dir',
  95. 'fastq_output_dir_tag':'output_path'},
  96. python_callable=run_sc_read_trimmming_func)
  97. run_trim_list.append(t)
  98. ## TASK
  99. collect_trimmed_files = \
  100. DummyOperator(
  101. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  102. trigger_rule='none_failed_or_skipped',
  103. dag=dag)
  104. ## PIPELINE
  105. fetch_analysis_info_and_branch >> task_branch
  106. task_branch >> run_trim_list
  107. run_trim_list >> collect_trimmed_files
  108. collect_trimmed_files >> configure_cellranger_run
  109. ## TASK
  110. no_analysis = \
  111. DummyOperator(
  112. task_id='no_analysis',
  113. dag=dag)
  114. ## PIPELINE
  115. fetch_analysis_info_and_branch >> no_analysis
  116. ## TASK
  117. run_cellranger = \
  118. PythonOperator(
  119. task_id='run_cellranger',
  120. dag=dag,
  121. queue='hpc_64G16t24hr',
  122. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  123. 'analysis_description_xcom_key':'analysis_description',
  124. 'library_csv_xcom_key':'cellranger_library_csv',
  125. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  126. 'cellranger_xcom_key':'cellranger_output',
  127. 'cellranger_options':['--localcores 16','--localmem 64']},
  128. python_callable=run_cellranger_tool)
  129. ## PIPELINE
  130. configure_cellranger_run >> run_cellranger
  131. ## TASK
  132. decide_analysis_branch = \
  133. BranchPythonOperator(
  134. task_id='decide_analysis_branch',
  135. dag=dag,
  136. queue='hpc_4G',
  137. python_callable=decide_analysis_branch_func,
  138. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  139. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  140. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  141. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  142. 'run_scirpy_vdj_t_task':'run_scirpy_for_vdj_t',
  143. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  144. 'run_picard_alignment_summary_task':'run_picard_alignment_summary',
  145. 'convert_bam_to_cram_task':'convert_bam_to_cram',
  146. 'library_csv_xcom_key':'cellranger_library_csv',
  147. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  148. ## PIPELINE
  149. run_cellranger >> decide_analysis_branch
  150. ## TASK
  151. load_cellranger_result_to_db = \
  152. PythonOperator(
  153. task_id='load_cellranger_result_to_db',
  154. dag=dag,
  155. queue='hpc_4G',
  156. python_callable=load_cellranger_result_to_db_func,
  157. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  158. 'analysis_description_xcom_key':'analysis_description',
  159. 'cellranger_xcom_key':'cellranger_output',
  160. 'cellranger_xcom_pull_task':'run_cellranger',
  161. 'collection_type':'CELLRANGER_MULTI',
  162. 'collection_table':'sample',
  163. 'xcom_collection_name_key':'sample_igf_id',
  164. 'genome_column':'genome_build',
  165. 'analysis_name':'cellranger_multi',
  166. 'output_xcom_key':'loaded_output_files',
  167. 'html_xcom_key':'html_report_file',
  168. 'html_report_file_name':'web_summary.html'})
  169. upload_cellranger_report_to_ftp = \
  170. PythonOperator(
  171. task_id='upload_cellranger_report_to_ftp',
  172. dag=dag,
  173. queue='hpc_4G',
  174. python_callable=ftp_files_upload_for_analysis,
  175. params={'xcom_pull_task':'load_cellranger_result_to_db',
  176. 'xcom_pull_files_key':'html_report_file',
  177. 'collection_name_task':'load_cellranger_result_to_db',
  178. 'collection_name_key':'sample_igf_id',
  179. 'collection_type':'FTP_CELLRANGER_MULTI',
  180. 'collection_table':'sample',
  181. 'collect_remote_file':True})
  182. upload_cellranger_report_to_box = \
  183. PythonOperator(
  184. task_id='upload_cellranger_report_to_box',
  185. dag=dag,
  186. queue='hpc_4G',
  187. python_callable=None,
  188. params={'xcom_pull_task':'load_cellranger_result_to_db',
  189. 'xcom_pull_files_key':'html_report_file'})
  190. upload_cellranger_results_to_irods = \
  191. PythonOperator(
  192. task_id='upload_cellranger_results_to_irods',
  193. dag=dag,
  194. queue='hpc_4G',
  195. python_callable=irods_files_upload_for_analysis,
  196. params={'xcom_pull_task':'load_cellranger_result_to_db',
  197. 'xcom_pull_files_key':'loaded_output_files',
  198. 'collection_name_key':'sample_igf_id',
  199. 'analysis_name':'cellranger_multi'})
  200. ## PIPELINE
  201. decide_analysis_branch >> load_cellranger_result_to_db
  202. load_cellranger_result_to_db >> upload_cellranger_report_to_ftp
  203. load_cellranger_result_to_db >> upload_cellranger_report_to_box
  204. load_cellranger_result_to_db >> upload_cellranger_results_to_irods
  205. ## TASK
  206. run_scanpy_for_sc_5p = \
  207. PythonOperator(
  208. task_id='run_scanpy_for_sc_5p',
  209. dag=dag,
  210. queue='hpc_4G',
  211. python_callable=run_singlecell_notebook_wrapper_func,
  212. params={'cellranger_xcom_key':'cellranger_output',
  213. 'cellranger_xcom_pull_task':'run_cellranger',
  214. 'scanpy_timeout':1200,
  215. 'allow_errors':False,
  216. 'kernel_name':'python3',
  217. 'count_dir':'count',
  218. 'analysis_name':'scanpy',
  219. 'output_notebook_key':'scanpy_notebook',
  220. 'output_cellbrowser_key':'cellbrowser_dirs',
  221. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  222. 'analysis_description_xcom_key':'analysis_description'})
  223. load_scanpy_report_for_sc_5p_to_db = \
  224. PythonOperator(
  225. task_id='load_scanpy_report_for_sc_5p_to_db',
  226. dag=dag,
  227. queue='hpc_4G',
  228. python_callable=load_analysis_files_func,
  229. params={'collection_name_task':'load_cellranger_result_to_db',
  230. 'collection_name_key':'sample_igf_id',
  231. 'file_name_task':'run_scanpy_for_sc_5p',
  232. 'file_name_key':'scanpy_notebook',
  233. 'analysis_name':'scanpy_5p',
  234. 'collection_type':'SCANPY_HTML',
  235. 'collection_table':'sample',
  236. 'output_files_key':'output_db_files'})
  237. upload_scanpy_report_for_sc_5p_to_ftp = \
  238. PythonOperator(
  239. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  240. dag=dag,
  241. queue='hpc_4G',
  242. python_callable=ftp_files_upload_for_analysis,
  243. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  244. 'xcom_pull_files_key':'output_db_files',
  245. 'collection_name_task':'load_cellranger_result_to_db',
  246. 'collection_name_key':'sample_igf_id',
  247. 'collection_type':'FTP_SCANPY_HTML',
  248. 'collection_table':'sample',
  249. 'collect_remote_file':True})
  250. upload_scanpy_report_for_sc_5p_to_box = \
  251. PythonOperator(
  252. task_id='upload_scanpy_report_for_sc_5p_to_box',
  253. dag=dag,
  254. queue='hpc_4G',
  255. python_callable=upload_analysis_file_to_box,
  256. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  257. 'xcom_pull_files_key':'output_db_files',
  258. 'analysis_tag':'scanpy_single_sample_report'})
  259. upload_cellbrowser_for_sc_5p_to_ftp = \
  260. PythonOperator(
  261. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  262. dag=dag,
  263. queue='hpc_4G',
  264. python_callable=ftp_files_upload_for_analysis,
  265. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  266. 'xcom_pull_files_key':'cellbrowser_dirs',
  267. 'collection_name_task':'load_cellranger_result_to_db',
  268. 'collection_name_key':'sample_igf_id',
  269. 'collection_type':'FTP_CELLBROWSER',
  270. 'collection_table':'sample',
  271. 'collect_remote_file':True})
  272. ## PIPELINE
  273. decide_analysis_branch >> run_scanpy_for_sc_5p
  274. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  275. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  276. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  277. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  278. ## TASK
  279. run_scirpy_for_vdj = \
  280. PythonOperator(
  281. task_id='run_scirpy_for_vdj',
  282. dag=dag,
  283. queue='hpc_4G',
  284. python_callable=run_singlecell_notebook_wrapper_func,
  285. params={'cellranger_xcom_key':'cellranger_output',
  286. 'cellranger_xcom_pull_task':'run_cellranger',
  287. 'scanpy_timeout':1200,
  288. 'allow_errors':False,
  289. 'kernel_name':'python3',
  290. 'analysis_name':'scirpy',
  291. 'vdj_dir':'vdj',
  292. 'count_dir':'count',
  293. 'output_notebook_key':'scirpy_notebook',
  294. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  295. 'analysis_description_xcom_key':'analysis_description'})
  296. load_scirpy_report_for_vdj_to_db = \
  297. PythonOperator(
  298. task_id='load_scirpy_report_for_vdj_to_db',
  299. dag=dag,
  300. queue='hpc_4G',
  301. python_callable=load_analysis_files_func,
  302. params={'collection_name_task':'load_cellranger_result_to_db',
  303. 'collection_name_key':'sample_igf_id',
  304. 'file_name_task':'run_scirpy_for_vdj',
  305. 'file_name_key':'scirpy_notebook',
  306. 'analysis_name':'scirpy_vdj',
  307. 'collection_type':'SCIRPY_VDJ_HTML',
  308. 'collection_table':'sample',
  309. 'output_files_key':'output_db_files'})
  310. upload_scirpy_report_for_vdj_to_ftp = \
  311. PythonOperator(
  312. task_id='upload_scirpy_report_for_vdj_to_ftp',
  313. dag=dag,
  314. queue='hpc_4G',
  315. python_callable=ftp_files_upload_for_analysis,
  316. params={'xcom_pull_task':'load_scanpy_report_for_vdj_to_db',
  317. 'xcom_pull_files_key':'output_db_files',
  318. 'collection_name_task':'load_cellranger_result_to_db',
  319. 'collection_name_key':'sample_igf_id',
  320. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  321. 'collection_table':'sample',
  322. 'collect_remote_file':True})
  323. upload_scirpy_report_for_vdj_to_box = \
  324. PythonOperator(
  325. task_id='upload_scirpy_report_for_vdj_to_box',
  326. dag=dag,
  327. queue='hpc_4G',
  328. python_callable=upload_analysis_file_to_box,
  329. params={'xcom_pull_task':'load_scanpy_report_for_vdj_to_db',
  330. 'xcom_pull_files_key':'output_db_files',
  331. 'analysis_tag':'scirpy_vdj_single_sample_report'})
  332. ## PIPELINE
  333. decide_analysis_branch >> run_scirpy_for_vdj
  334. run_scirpy_for_vdj >> load_scirpy_report_for_vdj_to_db
  335. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_ftp
  336. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_box
  337. ## TASK
  338. run_scirpy_for_vdj_b = \
  339. PythonOperator(
  340. task_id='run_scirpy_for_vdj_b',
  341. dag=dag,
  342. queue='hpc_4G',
  343. python_callable=run_singlecell_notebook_wrapper_func,
  344. params={'cellranger_xcom_key':'cellranger_output',
  345. 'cellranger_xcom_pull_task':'run_cellranger',
  346. 'scanpy_timeout':1200,
  347. 'allow_errors':False,
  348. 'kernel_name':'python3',
  349. 'analysis_name':'scirpy',
  350. 'vdj_dir':'vdj_b',
  351. 'count_dir':'count',
  352. 'output_notebook_key':'scirpy_notebook',
  353. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  354. 'analysis_description_xcom_key':'analysis_description'})
  355. load_scirpy_report_for_vdj_b_to_db = \
  356. PythonOperator(
  357. task_id='load_scirpy_report_for_vdj_b_to_db',
  358. dag=dag,
  359. queue='hpc_4G',
  360. python_callable=load_analysis_files_func,
  361. params={'collection_name_task':'load_cellranger_result_to_db',
  362. 'collection_name_key':'sample_igf_id',
  363. 'file_name_task':'run_scirpy_for_vdj_b',
  364. 'file_name_key':'scirpy_notebook',
  365. 'analysis_name':'scirpy_vdj_b',
  366. 'collection_type':'SCIRPY_VDJ_B_HTML',
  367. 'collection_table':'sample',
  368. 'output_files_key':'output_db_files'})
  369. upload_scirpy_report_for_vdj_b_to_ftp = \
  370. PythonOperator(
  371. task_id='upload_scirpy_report_for_vdj_b_to_ftp',
  372. dag=dag,
  373. queue='hpc_4G',
  374. python_callable=ftp_files_upload_for_analysis,
  375. params={'xcom_pull_task':'load_scanpy_report_for_vdj_b_to_db',
  376. 'xcom_pull_files_key':'output_db_files',
  377. 'collection_name_task':'load_cellranger_result_to_db',
  378. 'collection_name_key':'sample_igf_id',
  379. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  380. 'collection_table':'sample',
  381. 'collect_remote_file':True})
  382. upload_scirpy_report_for_vdj_b_to_box = \
  383. PythonOperator(
  384. task_id='upload_scanpy_report_for_vdj_b_to_box',
  385. dag=dag,
  386. queue='hpc_4G',
  387. python_callable=upload_analysis_file_to_box,
  388. params={'xcom_pull_task':'load_scanpy_report_for_vdj_b_to_db',
  389. 'xcom_pull_files_key':'output_db_files',
  390. 'analysis_tag':'scirpy_vdj_b_single_sample_report'})
  391. ## PIPELINE
  392. decide_analysis_branch >> run_scirpy_for_vdj_b
  393. run_scirpy_for_vdj_b >> load_scirpy_report_for_vdj_b_to_db
  394. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_ftp
  395. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_box
  396. ## TASK
  397. run_scirpy_for_vdj_t = \
  398. PythonOperator(
  399. task_id='run_scirpy_for_vdj_t',
  400. dag=dag,
  401. queue='hpc_4G',
  402. python_callable=run_singlecell_notebook_wrapper_func,
  403. params={'cellranger_xcom_key':'cellranger_output',
  404. 'cellranger_xcom_pull_task':'run_cellranger',
  405. 'scanpy_timeout':1200,
  406. 'allow_errors':False,
  407. 'kernel_name':'python3',
  408. 'analysis_name':'scirpy',
  409. 'vdj_dir':'vdj_t',
  410. 'count_dir':'count',
  411. 'output_notebook_key':'scirpy_notebook',
  412. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  413. 'analysis_description_xcom_key':'analysis_description'})
  414. load_scirpy_report_for_vdj_t_to_db = \
  415. PythonOperator(
  416. task_id='load_scirpy_report_for_vdj_t_to_db',
  417. dag=dag,
  418. queue='hpc_4G',
  419. python_callable=load_analysis_files_func,
  420. params={'collection_name_task':'load_cellranger_result_to_db',
  421. 'collection_name_key':'sample_igf_id',
  422. 'file_name_task':'run_scirpy_for_vdj_t',
  423. 'file_name_key':'scirpy_notebook',
  424. 'analysis_name':'scirpy_vdj_t',
  425. 'collection_type':'SCIRPY_VDJ_T_HTML',
  426. 'collection_table':'sample',
  427. 'output_files_key':'output_db_files'})
  428. upload_scirpy_report_for_vdj_t_to_ftp = \
  429. PythonOperator(
  430. task_id='upload_scirpy_report_for_vdj_t_to_ftp',
  431. dag=dag,
  432. queue='hpc_4G',
  433. python_callable=ftp_files_upload_for_analysis,
  434. params={'xcom_pull_task':'load_scanpy_report_for_vdj_t_to_db',
  435. 'xcom_pull_files_key':'output_db_files',
  436. 'collection_name_task':'load_cellranger_result_to_db',
  437. 'collection_name_key':'sample_igf_id',
  438. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  439. 'collection_table':'sample',
  440. 'collect_remote_file':True})
  441. upload_scirpy_report_for_vdj_t_to_box = \
  442. PythonOperator(
  443. task_id='upload_scirpy_report_for_vdj_t_to_box',
  444. dag=dag,
  445. queue='hpc_4G',
  446. python_callable=upload_analysis_file_to_box,
  447. params={'xcom_pull_task':'load_scanpy_report_for_vdj_t_to_db',
  448. 'xcom_pull_files_key':'output_db_files',
  449. 'analysis_tag':'scirpy_vdj_t_single_sample_report'})
  450. ## PIPELINE
  451. decide_analysis_branch >> run_scirpy_for_vdj_t
  452. run_scirpy_for_vdj_t >> load_scirpy_report_for_vdj_t_to_db
  453. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_ftp
  454. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_box
  455. ## TASK
  456. run_seurat_for_sc_5p = \
  457. PythonOperator(
  458. task_id='run_seurat_for_sc_5p',
  459. dag=dag,
  460. queue='hpc_4G',
  461. python_callable=run_singlecell_notebook_wrapper_func,
  462. params={'cellranger_xcom_key':'cellranger_output',
  463. 'cellranger_xcom_pull_task':'run_cellranger',
  464. 'scanpy_timeout':1200,
  465. 'allow_errors':False,
  466. 'kernel_name':'R',
  467. 'analysis_name':'seurat',
  468. 'vdj_dir':'vdj',
  469. 'count_dir':'count',
  470. 'output_notebook_key':'seurat_notebook',
  471. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  472. 'analysis_description_xcom_key':'analysis_description'})
  473. load_seurat_report_for_sc_5p_db = \
  474. PythonOperator(
  475. task_id='load_seurat_report_for_sc_5p_db',
  476. dag=dag,
  477. queue='hpc_4G',
  478. python_callable=load_analysis_files_func,
  479. params={'collection_name_task':'load_cellranger_result_to_db',
  480. 'collection_name_key':'sample_igf_id',
  481. 'file_name_task':'run_seurat_for_sc_5p',
  482. 'file_name_key':'seurat_notebook',
  483. 'analysis_name':'seurat_5p',
  484. 'collection_type':'SEURAT_HTML',
  485. 'collection_table':'sample',
  486. 'output_files_key':'output_db_files'})
  487. upload_seurat_report_for_sc_5p_ftp = \
  488. PythonOperator(
  489. task_id='upload_seurat_report_for_sc_5p_ftp',
  490. dag=dag,
  491. queue='hpc_4G',
  492. python_callable=ftp_files_upload_for_analysis,
  493. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  494. 'xcom_pull_files_key':'output_db_files',
  495. 'collection_name_task':'load_cellranger_result_to_db',
  496. 'collection_name_key':'sample_igf_id',
  497. 'collection_type':'FTP_SEURAT_HTML',
  498. 'collection_table':'sample',
  499. 'collect_remote_file':True})
  500. upload_seurat_report_for_sc_5p_to_box = \
  501. PythonOperator(
  502. task_id='upload_seurat_report_for_sc_5p_to_box',
  503. dag=dag,
  504. queue='hpc_4G',
  505. python_callable=upload_analysis_file_to_box,
  506. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  507. 'xcom_pull_files_key':'output_db_files',
  508. 'analysis_tag':'seurat_single_sample_report'})
  509. ## PIPELINE
  510. decide_analysis_branch >> run_seurat_for_sc_5p
  511. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  512. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  513. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  514. ## TASK
  515. convert_bam_to_cram = \
  516. PythonOperator(
  517. task_id='convert_bam_to_cram',
  518. dag=dag,
  519. queue='hpc_4G',
  520. python_callable=convert_bam_to_cram_func,
  521. params={'xcom_pull_files_key':'cellranger_output',
  522. 'xcom_pull_task':'run_cellranger'})
  523. upload_cram_to_irods = \
  524. PythonOperator(
  525. task_id='upload_cram_to_irods',
  526. dag=dag,
  527. queue='hpc_4G',
  528. python_callable=irods_files_upload_for_analysis,
  529. params={'xcom_pull_task':'',
  530. 'xcom_pull_files_key':'',
  531. 'collection_name_key':'',
  532. 'analysis_name':''})
  533. ## PIPELINE
  534. decide_analysis_branch >> convert_bam_to_cram
  535. convert_bam_to_cram >> upload_cram_to_irods
  536. ## TASK
  537. run_picard_alignment_summary = \
  538. PythonOperator(
  539. task_id='run_picard_alignment_summary',
  540. dag=dag,
  541. queue='hpc_4G',
  542. python_callable=None,
  543. params={'cellranger_xcom_key':'cellranger_output',
  544. 'cellranger_xcom_pull_task':'run_cellranger'})
  545. ## PIPELINE
  546. decide_analysis_branch >> run_picard_alignment_summary
  547. ## TASK
  548. run_picard_qual_summary = \
  549. PythonOperator(
  550. task_id='run_picard_qual_summary',
  551. dag=dag,
  552. queue='hpc_4G',
  553. python_callable=None)
  554. ## PIPELINE
  555. run_picard_alignment_summary >> run_picard_qual_summary
  556. ## TASK
  557. run_picard_rna_summary = \
  558. PythonOperator(
  559. task_id='run_picard_rna_summary',
  560. dag=dag,
  561. queue='hpc_4G',
  562. python_callable=None)
  563. ## PIPELINE
  564. run_picard_qual_summary >> run_picard_rna_summary
  565. ## TASK
  566. run_picard_gc_summary = \
  567. PythonOperator(
  568. task_id='run_picard_gc_summary',
  569. dag=dag,
  570. queue='hpc_4G',
  571. python_callable=None)
  572. ## PIPELINE
  573. run_picard_rna_summary >> run_picard_gc_summary
  574. ## TASK
  575. run_samtools_stats = \
  576. PythonOperator(
  577. task_id='run_samtools_stats',
  578. dag=dag,
  579. queue='hpc_4G',
  580. python_callable=None)
  581. ## PIPELINE
  582. run_picard_gc_summary >> run_samtools_stats
  583. ## TASK
  584. run_samtools_idxstats = \
  585. PythonOperator(
  586. task_id='run_samtools_idxstats',
  587. dag=dag,
  588. queue='hpc_4G',
  589. python_callable=None)
  590. ## PIPELINE
  591. run_samtools_stats >> run_samtools_idxstats
  592. ## TASK
  593. run_multiqc = \
  594. PythonOperator(
  595. task_id='run_multiqc',
  596. dag=dag,
  597. queue='hpc_4G',
  598. python_callable=None)
  599. ## PIPELINE
  600. run_samtools_idxstats >> run_multiqc
  601. ## TASK
  602. upload_multiqc_to_ftp = \
  603. PythonOperator(
  604. task_id='upload_multiqc_to_ftp',
  605. dag=dag,
  606. queue='hpc_4G',
  607. python_callable=None)
  608. ## PIPELINE
  609. run_multiqc >> upload_multiqc_to_ftp
  610. ## TASK
  611. upload_multiqc_to_box = \
  612. PythonOperator(
  613. task_id='upload_multiqc_to_box',
  614. dag=dag,
  615. queue='hpc_4G',
  616. python_callable=None)
  617. ## PIPELINE
  618. run_multiqc >> upload_multiqc_to_box
  619. ## TASK
  620. update_analysis_and_status = \
  621. PythonOperator(
  622. task_id='update_analysis_and_status',
  623. dag=dag,
  624. queue='hpc_4G',
  625. python_callable=None,
  626. trigger_rule='none_failed_or_skipped')
  627. ## PIPELINE
  628. upload_multiqc_to_ftp >> update_analysis_and_status
  629. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  630. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  631. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  632. upload_scirpy_report_for_vdj_to_ftp >> update_analysis_and_status
  633. upload_scirpy_report_for_vdj_to_box >> update_analysis_and_status
  634. upload_scirpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  635. upload_scirpy_report_for_vdj_b_to_box >> update_analysis_and_status
  636. upload_scirpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  637. upload_scirpy_report_for_vdj_t_to_box >> update_analysis_and_status
  638. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  639. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  640. upload_cellranger_results_to_irods >> update_analysis_and_status
  641. upload_cellranger_report_to_ftp >> update_analysis_and_status
  642. upload_cellranger_report_to_box >> update_analysis_and_status
  643. upload_cram_to_irods >> update_analysis_and_status