dag9_tenx_single_cell_immune_profiling.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_scanpy_for_sc_5p_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import task_branch_function
  20. ## ARGS
  21. default_args = {
  22. 'owner': 'airflow',
  23. 'depends_on_past': False,
  24. 'start_date': days_ago(2),
  25. 'email_on_failure': False,
  26. 'email_on_retry': False,
  27. 'retries': 4,
  28. 'max_active_runs':10,
  29. 'catchup':False,
  30. 'retry_delay': timedelta(minutes=5),
  31. 'provide_context': True,
  32. }
  33. FEATURE_TYPE_LIST = Variable.get('tenx_single_cell_immune_profiling_feature_types').split(',')
  34. ## DAG
  35. dag = \
  36. DAG(
  37. dag_id='dag9_tenx_single_cell_immune_profiling',
  38. schedule_interval=None,
  39. tags=['hpc','analysis','tenx','sc'],
  40. default_args=default_args,
  41. orientation='LR')
  42. with dag:
  43. ## TASK
  44. fetch_analysis_info_and_branch = \
  45. BranchPythonOperator(
  46. task_id='fetch_analysis_info',
  47. dag=dag,
  48. queue='hpc_4G',
  49. params={'no_analysis_task':'no_analysis',
  50. 'analysis_description_xcom_key':'analysis_description',
  51. 'analysis_info_xcom_key':'analysis_info'},
  52. python_callable=fetch_analysis_info_and_branch_func)
  53. ## TASK
  54. configure_cellranger_run = \
  55. PythonOperator(
  56. task_id='configure_cellranger_run',
  57. dag=dag,
  58. queue='hpc_4G',
  59. trigger_rule='none_failed_or_skipped',
  60. params={'xcom_pull_task_id':'fetch_analysis_info',
  61. 'analysis_description_xcom_key':'analysis_description',
  62. 'analysis_info_xcom_key':'analysis_info',
  63. 'library_csv_xcom_key':'cellranger_library_csv'},
  64. python_callable=configure_cellranger_run_func)
  65. for analysis_name in FEATURE_TYPE_LIST:
  66. ## TASK
  67. task_branch = \
  68. BranchPythonOperator(
  69. task_id=analysis_name,
  70. dag=dag,
  71. queue='hpc_4G',
  72. params={'xcom_pull_task_id':'fetch_analysis_info',
  73. 'analysis_info_xcom_key':'analysis_info',
  74. 'analysis_name':analysis_name,
  75. 'task_prefix':'run_trim'},
  76. python_callable=task_branch_function)
  77. run_trim_list = list()
  78. for run_id in range(0,10):
  79. ## TASK
  80. t = \
  81. PythonOperator(
  82. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  83. dag=dag,
  84. queue='hpc_4G',
  85. params={'xcom_pull_task_id':'fetch_analysis_info',
  86. 'analysis_info_xcom_key':'analysis_info',
  87. 'analysis_description_xcom_key':'analysis_description',
  88. 'analysis_name':analysis_name,
  89. 'run_id':run_id,
  90. 'r1_length':0,
  91. 'r2_length':0,
  92. 'fastq_input_dir_tag':'fastq_dir',
  93. 'fastq_output_dir_tag':'output_path'},
  94. python_callable=run_sc_read_trimmming_func)
  95. run_trim_list.append(t)
  96. ## TASK
  97. collect_trimmed_files = \
  98. DummyOperator(
  99. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  100. trigger_rule='none_failed_or_skipped',
  101. dag=dag)
  102. ## PIPELINE
  103. fetch_analysis_info_and_branch >> task_branch
  104. task_branch >> run_trim_list
  105. run_trim_list >> collect_trimmed_files
  106. collect_trimmed_files >> configure_cellranger_run
  107. ## TASK
  108. no_analysis = \
  109. DummyOperator(
  110. task_id='no_analysis',
  111. dag=dag)
  112. ## PIPELINE
  113. fetch_analysis_info_and_branch >> no_analysis
  114. ## TASK
  115. run_cellranger = \
  116. PythonOperator(
  117. task_id='run_cellranger',
  118. dag=dag,
  119. queue='hpc_64G16t24hr',
  120. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  121. 'analysis_description_xcom_key':'analysis_description',
  122. 'library_csv_xcom_key':'cellranger_library_csv',
  123. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  124. 'cellranger_xcom_key':'cellranger_output',
  125. 'cellranger_options':['--localcores 16','--localmem 64']},
  126. python_callable=run_cellranger_tool)
  127. ## PIPELINE
  128. configure_cellranger_run >> run_cellranger
  129. ## TASK
  130. decide_analysis_branch = \
  131. BranchPythonOperator(
  132. task_id='decide_analysis_branch',
  133. dag=dag,
  134. queue='hpc_4G',
  135. python_callable=decide_analysis_branch_func,
  136. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  137. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  138. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  139. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  140. 'run_scirpy_vdj_t_task':'run_scirpy_for_vdj_t',
  141. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  142. 'run_picard_alignment_summary_task':'run_picard_alignment_summary',
  143. 'convert_bam_to_cram_task':'convert_bam_to_cram',
  144. 'library_csv_xcom_key':'cellranger_library_csv',
  145. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  146. ## PIPELINE
  147. run_cellranger >> decide_analysis_branch
  148. ## TASK
  149. load_cellranger_result_to_db = \
  150. PythonOperator(
  151. task_id='load_cellranger_result_to_db',
  152. dag=dag,
  153. queue='hpc_4G',
  154. python_callable=load_cellranger_result_to_db_func,
  155. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  156. 'analysis_description_xcom_key':'analysis_description',
  157. 'cellranger_xcom_key':'cellranger_output',
  158. 'cellranger_xcom_pull_task':'run_cellranger',
  159. 'collection_type':'CELLRANGER_MULTI',
  160. 'collection_table':'sample',
  161. 'xcom_collection_name_key':'sample_igf_id',
  162. 'genome_column':'genome_build',
  163. 'analysis_name':'cellranger_multi',
  164. 'output_xcom_key':'loaded_output_files',
  165. 'html_xcom_key':'html_report_file',
  166. 'html_report_file_name':'web_summary.html'})
  167. upload_cellranger_report_to_ftp = \
  168. PythonOperator(
  169. task_id='upload_cellranger_report_to_ftp',
  170. dag=dag,
  171. queue='hpc_4G',
  172. python_callable=ftp_files_upload_for_analysis,
  173. params={'xcom_pull_task':'load_cellranger_result_to_db',
  174. 'xcom_pull_files_key':'html_report_file',
  175. 'collection_name_task':'load_cellranger_result_to_db',
  176. 'collection_name_key':'sample_igf_id',
  177. 'collection_type':'FTP_CELLRANGER_MULTI',
  178. 'collection_table':'sample',
  179. 'collect_remote_file':True})
  180. upload_cellranger_report_to_box = \
  181. PythonOperator(
  182. task_id='upload_cellranger_report_to_box',
  183. dag=dag,
  184. queue='hpc_4G',
  185. python_callable=None,
  186. params={'xcom_pull_task':'load_cellranger_result_to_db',
  187. 'xcom_pull_files_key':'html_report_file'})
  188. upload_cellranger_results_to_irods = \
  189. PythonOperator(
  190. task_id='upload_cellranger_results_to_irods',
  191. dag=dag,
  192. queue='hpc_4G',
  193. python_callable=irods_files_upload_for_analysis,
  194. params={'xcom_pull_task':'load_cellranger_result_to_db',
  195. 'xcom_pull_files_key':'loaded_output_files',
  196. 'collection_name_key':'sample_igf_id',
  197. 'analysis_name':'cellranger_multi'})
  198. ## PIPELINE
  199. decide_analysis_branch >> load_cellranger_result_to_db
  200. load_cellranger_result_to_db >> upload_cellranger_report_to_ftp
  201. load_cellranger_result_to_db >> upload_cellranger_report_to_box
  202. load_cellranger_result_to_db >> upload_cellranger_results_to_irods
  203. ## TASK
  204. run_scanpy_for_sc_5p = \
  205. PythonOperator(
  206. task_id='run_scanpy_for_sc_5p',
  207. dag=dag,
  208. queue='hpc_4G',
  209. python_callable=run_singlecell_notebook_wrapper_func,
  210. params={'cellranger_xcom_key':'cellranger_output',
  211. 'cellranger_xcom_pull_task':'run_cellranger',
  212. 'scanpy_timeout':1200,
  213. 'allow_errors':False,
  214. 'kernel_name':'python3',
  215. 'count_dir':'count',
  216. 'output_notebook_key':'scanpy_notebook',
  217. 'output_cellbrowser_key':'cellbrowser_dirs',
  218. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  219. 'analysis_description_xcom_key':'analysis_description'})
  220. load_scanpy_report_for_sc_5p_to_db = \
  221. PythonOperator(
  222. task_id='load_scanpy_report_for_sc_5p_to_db',
  223. dag=dag,
  224. queue='hpc_4G',
  225. python_callable=load_analysis_files_func,
  226. params={'collection_name_task':'load_cellranger_result_to_db',
  227. 'collection_name_key':'sample_igf_id',
  228. 'file_name_task':'run_scanpy_for_sc_5p',
  229. 'file_name_key':'scanpy_notebook',
  230. 'analysis_name':'scanpy_5p',
  231. 'collection_type':'SCANPY_HTML',
  232. 'collection_table':'sample',
  233. 'output_files_key':'output_db_files'})
  234. upload_scanpy_report_for_sc_5p_to_ftp = \
  235. PythonOperator(
  236. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  237. dag=dag,
  238. queue='hpc_4G',
  239. python_callable=ftp_files_upload_for_analysis,
  240. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  241. 'xcom_pull_files_key':'output_db_files',
  242. 'collection_name_task':'load_cellranger_result_to_db',
  243. 'collection_name_key':'sample_igf_id',
  244. 'collection_type':'FTP_SCANPY_HTML',
  245. 'collection_table':'sample',
  246. 'collect_remote_file':True})
  247. upload_scanpy_report_for_sc_5p_to_box = \
  248. PythonOperator(
  249. task_id='upload_scanpy_report_for_sc_5p_to_box',
  250. dag=dag,
  251. queue='hpc_4G',
  252. python_callable=None)
  253. upload_cellbrowser_for_sc_5p_to_ftp = \
  254. PythonOperator(
  255. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  256. dag=dag,
  257. queue='hpc_4G',
  258. python_callable=ftp_files_upload_for_analysis,
  259. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  260. 'xcom_pull_files_key':'cellbrowser_dirs',
  261. 'collection_name_task':'load_cellranger_result_to_db',
  262. 'collection_name_key':'sample_igf_id',
  263. 'collection_type':'FTP_CELLBROWSER',
  264. 'collection_table':'sample',
  265. 'collect_remote_file':True})
  266. ## PIPELINE
  267. decide_analysis_branch >> run_scanpy_for_sc_5p
  268. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  269. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  270. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  271. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  272. ## TASK
  273. run_scirpy_for_vdj = \
  274. PythonOperator(
  275. task_id='run_scirpy_for_vdj',
  276. dag=dag,
  277. queue='hpc_4G',
  278. python_callable=run_singlecell_notebook_wrapper_func,
  279. params={'cellranger_xcom_key':'cellranger_output',
  280. 'cellranger_xcom_pull_task':'run_cellranger',
  281. 'scanpy_timeout':1200,
  282. 'allow_errors':False,
  283. 'kernel_name':'python3',
  284. 'vdj_dir':'vdj',
  285. 'count_dir':'count',
  286. 'output_notebook_key':'scirpy_notebook',
  287. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  288. 'analysis_description_xcom_key':'analysis_description'})
  289. load_scirpy_report_for_vdj_to_db = \
  290. PythonOperator(
  291. task_id='load_scirpy_report_for_vdj_to_db',
  292. dag=dag,
  293. queue='hpc_4G',
  294. python_callable=load_analysis_files_func,
  295. params={'collection_name_task':'load_cellranger_result_to_db',
  296. 'collection_name_key':'sample_igf_id',
  297. 'file_name_task':'run_scirpy_for_vdj',
  298. 'file_name_key':'scirpy_notebook',
  299. 'analysis_name':'scirpy_vdj',
  300. 'collection_type':'SCIRPY_VDJ_HTML',
  301. 'collection_table':'sample',
  302. 'output_files_key':'output_db_files'})
  303. upload_scirpy_report_for_vdj_to_ftp = \
  304. PythonOperator(
  305. task_id='upload_scirpy_report_for_vdj_to_ftp',
  306. dag=dag,
  307. queue='hpc_4G',
  308. python_callable=ftp_files_upload_for_analysis,
  309. params={'xcom_pull_task':'load_scanpy_report_for_vdj_to_db',
  310. 'xcom_pull_files_key':'output_db_files',
  311. 'collection_name_task':'load_cellranger_result_to_db',
  312. 'collection_name_key':'sample_igf_id',
  313. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  314. 'collection_table':'sample',
  315. 'collect_remote_file':True})
  316. upload_scirpy_report_for_vdj_to_box = \
  317. PythonOperator(
  318. task_id='upload_scirpy_report_for_vdj_to_box',
  319. dag=dag,
  320. queue='hpc_4G',
  321. python_callable=None)
  322. ## PIPELINE
  323. decide_analysis_branch >> run_scirpy_for_vdj
  324. run_scirpy_for_vdj >> load_scirpy_report_for_vdj_to_db
  325. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_ftp
  326. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_box
  327. ## TASK
  328. run_scirpy_for_vdj_b = \
  329. PythonOperator(
  330. task_id='run_scirpy_for_vdj_b',
  331. dag=dag,
  332. queue='hpc_4G',
  333. python_callable=run_singlecell_notebook_wrapper_func,
  334. params={'cellranger_xcom_key':'cellranger_output',
  335. 'cellranger_xcom_pull_task':'run_cellranger',
  336. 'scanpy_timeout':1200,
  337. 'allow_errors':False,
  338. 'kernel_name':'python3',
  339. 'vdj_dir':'vdj_b',
  340. 'count_dir':'count',
  341. 'output_notebook_key':'scirpy_notebook',
  342. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  343. 'analysis_description_xcom_key':'analysis_description'})
  344. load_scirpy_report_for_vdj_b_to_db = \
  345. PythonOperator(
  346. task_id='load_scirpy_report_for_vdj_b_to_db',
  347. dag=dag,
  348. queue='hpc_4G',
  349. python_callable=load_analysis_files_func,
  350. params={'collection_name_task':'load_cellranger_result_to_db',
  351. 'collection_name_key':'sample_igf_id',
  352. 'file_name_task':'run_scirpy_for_vdj_b',
  353. 'file_name_key':'scirpy_notebook',
  354. 'analysis_name':'scirpy_vdj_b',
  355. 'collection_type':'SCIRPY_VDJ_B_HTML',
  356. 'collection_table':'sample',
  357. 'output_files_key':'output_db_files'})
  358. upload_scirpy_report_for_vdj_b_to_ftp = \
  359. PythonOperator(
  360. task_id='upload_scirpy_report_for_vdj_b_to_ftp',
  361. dag=dag,
  362. queue='hpc_4G',
  363. python_callable=ftp_files_upload_for_analysis,
  364. params={'xcom_pull_task':'load_scanpy_report_for_vdj_b_to_db',
  365. 'xcom_pull_files_key':'output_db_files',
  366. 'collection_name_task':'load_cellranger_result_to_db',
  367. 'collection_name_key':'sample_igf_id',
  368. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  369. 'collection_table':'sample',
  370. 'collect_remote_file':True})
  371. upload_scirpy_report_for_vdj_b_to_box = \
  372. PythonOperator(
  373. task_id='upload_scanpy_report_for_vdj_b_to_box',
  374. dag=dag,
  375. queue='hpc_4G',
  376. python_callable=None)
  377. ## PIPELINE
  378. decide_analysis_branch >> run_scirpy_for_vdj_b
  379. run_scirpy_for_vdj_b >> load_scirpy_report_for_vdj_b_to_db
  380. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_ftp
  381. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_box
  382. ## TASK
  383. run_scirpy_for_vdj_t = \
  384. PythonOperator(
  385. task_id='run_scirpy_for_vdj_t',
  386. dag=dag,
  387. queue='hpc_4G',
  388. python_callable=run_singlecell_notebook_wrapper_func,
  389. params={'cellranger_xcom_key':'cellranger_output',
  390. 'cellranger_xcom_pull_task':'run_cellranger',
  391. 'scanpy_timeout':1200,
  392. 'allow_errors':False,
  393. 'kernel_name':'python3',
  394. 'vdj_dir':'vdj_t',
  395. 'count_dir':'count',
  396. 'output_notebook_key':'scirpy_notebook',
  397. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  398. 'analysis_description_xcom_key':'analysis_description'})
  399. load_scirpy_report_for_vdj_t_to_db = \
  400. PythonOperator(
  401. task_id='load_scirpy_report_for_vdj_t_to_db',
  402. dag=dag,
  403. queue='hpc_4G',
  404. python_callable=load_analysis_files_func,
  405. params={'collection_name_task':'load_cellranger_result_to_db',
  406. 'collection_name_key':'sample_igf_id',
  407. 'file_name_task':'run_scirpy_for_vdj_t',
  408. 'file_name_key':'scirpy_notebook',
  409. 'analysis_name':'scirpy_vdj_t',
  410. 'collection_type':'SCIRPY_VDJ_T_HTML',
  411. 'collection_table':'sample',
  412. 'output_files_key':'output_db_files'})
  413. upload_scirpy_report_for_vdj_t_to_ftp = \
  414. PythonOperator(
  415. task_id='upload_scirpy_report_for_vdj_t_to_ftp',
  416. dag=dag,
  417. queue='hpc_4G',
  418. python_callable=ftp_files_upload_for_analysis,
  419. params={'xcom_pull_task':'load_scanpy_report_for_vdj_t_to_db',
  420. 'xcom_pull_files_key':'output_db_files',
  421. 'collection_name_task':'load_cellranger_result_to_db',
  422. 'collection_name_key':'sample_igf_id',
  423. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  424. 'collection_table':'sample',
  425. 'collect_remote_file':True})
  426. upload_scirpy_report_for_vdj_t_to_box = \
  427. PythonOperator(
  428. task_id='upload_scirpy_report_for_vdj_t_to_box',
  429. dag=dag,
  430. queue='hpc_4G',
  431. python_callable=None)
  432. ## PIPELINE
  433. decide_analysis_branch >> run_scirpy_for_vdj_t
  434. run_scirpy_for_vdj_t >> load_scirpy_report_for_vdj_t_to_db
  435. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_ftp
  436. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_box
  437. ## TASK
  438. run_seurat_for_sc_5p = \
  439. PythonOperator(
  440. task_id='run_seurat_for_sc_5p',
  441. dag=dag,
  442. queue='hpc_4G',
  443. python_callable=run_singlecell_notebook_wrapper_func,
  444. params={'cellranger_xcom_key':'cellranger_output',
  445. 'cellranger_xcom_pull_task':'run_cellranger',
  446. 'scanpy_timeout':1200,
  447. 'allow_errors':False,
  448. 'kernel_name':'R',
  449. 'vdj_dir':'vdj',
  450. 'count_dir':'count',
  451. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  452. 'output_notebook_key':'seurat_notebook',
  453. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  454. 'analysis_description_xcom_key':'analysis_description',
  455. 'template_ipynb_path':Variable.get('seurat_single_sample_template'),
  456. 'singularity_image_path':Variable.get('seurat_notebook_image')})
  457. load_seurat_report_for_sc_5p_db = \
  458. PythonOperator(
  459. task_id='load_seurat_report_for_sc_5p_db',
  460. dag=dag,
  461. queue='hpc_4G',
  462. python_callable=load_analysis_files_func,
  463. params={'collection_name_task':'load_cellranger_result_to_db',
  464. 'collection_name_key':'sample_igf_id',
  465. 'file_name_task':'run_seurat_for_sc_5p',
  466. 'file_name_key':'seurat_notebook',
  467. 'analysis_name':'seurat_5p',
  468. 'collection_type':'SEURAT_HTML',
  469. 'collection_table':'sample',
  470. 'output_files_key':'output_db_files'})
  471. upload_seurat_report_for_sc_5p_ftp = \
  472. PythonOperator(
  473. task_id='upload_seurat_report_for_sc_5p_ftp',
  474. dag=dag,
  475. queue='hpc_4G',
  476. python_callable=ftp_files_upload_for_analysis,
  477. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  478. 'xcom_pull_files_key':'output_db_files',
  479. 'collection_name_task':'load_cellranger_result_to_db',
  480. 'collection_name_key':'sample_igf_id',
  481. 'collection_type':'FTP_SEURAT_HTML',
  482. 'collection_table':'sample',
  483. 'collect_remote_file':True})
  484. upload_seurat_report_for_sc_5p_to_box = \
  485. PythonOperator(
  486. task_id='upload_seurat_report_for_sc_5p_to_box',
  487. dag=dag,
  488. queue='hpc_4G',
  489. python_callable=None)
  490. ## PIPELINE
  491. decide_analysis_branch >> run_seurat_for_sc_5p
  492. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  493. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  494. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  495. ## TASK
  496. convert_bam_to_cram = \
  497. PythonOperator(
  498. task_id='convert_bam_to_cram',
  499. dag=dag,
  500. queue='hpc_4G',
  501. python_callable=None,
  502. params={'cellranger_xcom_key':'cellranger_output',
  503. 'cellranger_xcom_pull_task':'run_cellranger'})
  504. upload_cram_to_irods = \
  505. PythonOperator(
  506. task_id='upload_cram_to_irods',
  507. dag=dag,
  508. queue='hpc_4G',
  509. python_callable=None)
  510. ## PIPELINE
  511. decide_analysis_branch >> convert_bam_to_cram
  512. convert_bam_to_cram >> upload_cram_to_irods
  513. ## TASK
  514. run_picard_alignment_summary = \
  515. PythonOperator(
  516. task_id='run_picard_alignment_summary',
  517. dag=dag,
  518. queue='hpc_4G',
  519. python_callable=None,
  520. params={'cellranger_xcom_key':'cellranger_output',
  521. 'cellranger_xcom_pull_task':'run_cellranger'})
  522. ## PIPELINE
  523. decide_analysis_branch >> run_picard_alignment_summary
  524. ## TASK
  525. run_picard_qual_summary = \
  526. PythonOperator(
  527. task_id='run_picard_qual_summary',
  528. dag=dag,
  529. queue='hpc_4G',
  530. python_callable=None)
  531. ## PIPELINE
  532. run_picard_alignment_summary >> run_picard_qual_summary
  533. ## TASK
  534. run_picard_rna_summary = \
  535. PythonOperator(
  536. task_id='run_picard_rna_summary',
  537. dag=dag,
  538. queue='hpc_4G',
  539. python_callable=None)
  540. ## PIPELINE
  541. run_picard_qual_summary >> run_picard_rna_summary
  542. ## TASK
  543. run_picard_gc_summary = \
  544. PythonOperator(
  545. task_id='run_picard_gc_summary',
  546. dag=dag,
  547. queue='hpc_4G',
  548. python_callable=None)
  549. ## PIPELINE
  550. run_picard_rna_summary >> run_picard_gc_summary
  551. ## TASK
  552. run_samtools_stats = \
  553. PythonOperator(
  554. task_id='run_samtools_stats',
  555. dag=dag,
  556. queue='hpc_4G',
  557. python_callable=None)
  558. ## PIPELINE
  559. run_picard_gc_summary >> run_samtools_stats
  560. ## TASK
  561. run_samtools_idxstats = \
  562. PythonOperator(
  563. task_id='run_samtools_idxstats',
  564. dag=dag,
  565. queue='hpc_4G',
  566. python_callable=None)
  567. ## PIPELINE
  568. run_samtools_stats >> run_samtools_idxstats
  569. ## TASK
  570. run_multiqc = \
  571. PythonOperator(
  572. task_id='run_multiqc',
  573. dag=dag,
  574. queue='hpc_4G',
  575. python_callable=None)
  576. ## PIPELINE
  577. run_samtools_idxstats >> run_multiqc
  578. ## TASK
  579. upload_multiqc_to_ftp = \
  580. PythonOperator(
  581. task_id='upload_multiqc_to_ftp',
  582. dag=dag,
  583. queue='hpc_4G',
  584. python_callable=None)
  585. ## PIPELINE
  586. run_multiqc >> upload_multiqc_to_ftp
  587. ## TASK
  588. upload_multiqc_to_box = \
  589. PythonOperator(
  590. task_id='upload_multiqc_to_box',
  591. dag=dag,
  592. queue='hpc_4G',
  593. python_callable=None)
  594. ## PIPELINE
  595. run_multiqc >> upload_multiqc_to_box
  596. ## TASK
  597. update_analysis_and_status = \
  598. PythonOperator(
  599. task_id='update_analysis_and_status',
  600. dag=dag,
  601. queue='hpc_4G',
  602. python_callable=None,
  603. trigger_rule='none_failed_or_skipped')
  604. ## PIPELINE
  605. upload_multiqc_to_ftp >> update_analysis_and_status
  606. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  607. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  608. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  609. upload_scirpy_report_for_vdj_to_ftp >> update_analysis_and_status
  610. upload_scirpy_report_for_vdj_to_box >> update_analysis_and_status
  611. upload_scirpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  612. upload_scirpy_report_for_vdj_b_to_box >> update_analysis_and_status
  613. upload_scirpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  614. upload_scirpy_report_for_vdj_t_to_box >> update_analysis_and_status
  615. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  616. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  617. upload_cellranger_results_to_irods >> update_analysis_and_status
  618. upload_cellranger_report_to_ftp >> update_analysis_and_status
  619. upload_cellranger_report_to_box >> update_analysis_and_status
  620. upload_cram_to_irods >> update_analysis_and_status