dag9_tenx_single_cell_immune_profiling.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_scanpy_for_sc_5p_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import task_branch_function
  20. ## ARGS
  21. default_args = {
  22. 'owner': 'airflow',
  23. 'depends_on_past': False,
  24. 'start_date': days_ago(2),
  25. 'email_on_failure': False,
  26. 'email_on_retry': False,
  27. 'retries': 4,
  28. 'max_active_runs':10,
  29. 'catchup':False,
  30. 'retry_delay': timedelta(minutes=5),
  31. 'provide_context': True,
  32. }
  33. FEATURE_TYPE_LIST = Variable.get('tenx_single_cell_immune_profiling_feature_types').split(',')
  34. ## DAG
  35. dag = \
  36. DAG(
  37. dag_id='dag9_tenx_single_cell_immune_profiling',
  38. schedule_interval=None,
  39. tags=['hpc','analysis','tenx','sc'],
  40. default_args=default_args,
  41. orientation='LR')
  42. with dag:
  43. ## TASK
  44. fetch_analysis_info_and_branch = \
  45. BranchPythonOperator(
  46. task_id='fetch_analysis_info',
  47. dag=dag,
  48. queue='hpc_4G',
  49. params={'no_analysis_task':'no_analysis',
  50. 'analysis_description_xcom_key':'analysis_description',
  51. 'analysis_info_xcom_key':'analysis_info'},
  52. python_callable=fetch_analysis_info_and_branch_func)
  53. ## TASK
  54. configure_cellranger_run = \
  55. PythonOperator(
  56. task_id='configure_cellranger_run',
  57. dag=dag,
  58. queue='hpc_4G',
  59. params={'xcom_pull_task_id':'fetch_analysis_info',
  60. 'analysis_description_xcom_key':'analysis_description',
  61. 'analysis_info_xcom_key':'analysis_info',
  62. 'library_csv_xcom_key':'cellranger_library_csv'},
  63. python_callable=configure_cellranger_run_func)
  64. for analysis_name in FEATURE_TYPE_LIST:
  65. ## TASK
  66. task_branch = \
  67. BranchPythonOperator(
  68. task_id=analysis_name,
  69. dag=dag,
  70. queue='hpc_4G',
  71. params={'xcom_pull_task_id':'fetch_analysis_info',
  72. 'analysis_info_xcom_key':'analysis_info',
  73. 'analysis_name':analysis_name,
  74. 'task_prefix':'run_trim'},
  75. python_callable=task_branch_function)
  76. run_trim_list = list()
  77. for run_id in range(0,10):
  78. ## TASK
  79. t = \
  80. PythonOperator(
  81. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  82. dag=dag,
  83. queue='hpc_4G',
  84. params={'xcom_pull_task_id':'fetch_analysis_info',
  85. 'analysis_info_xcom_key':'analysis_info',
  86. 'analysis_description_xcom_key':'analysis_description',
  87. 'analysis_name':analysis_name,
  88. 'run_id':run_id,
  89. 'r1_length':0,
  90. 'r2_length':0,
  91. 'fastq_input_dir_tag':'fastq_dir',
  92. 'fastq_output_dir_tag':'output_path'},
  93. python_callable=run_sc_read_trimmming_func)
  94. run_trim_list.append(t)
  95. ## TASK
  96. collect_trimmed_files = \
  97. DummyOperator(
  98. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  99. trigger_rule='none_failed_or_skipped',
  100. dag=dag)
  101. ## PIPELINE
  102. fetch_analysis_info_and_branch >> task_branch
  103. task_branch >> run_trim_list
  104. run_trim_list >> collect_trimmed_files
  105. collect_trimmed_files >> configure_cellranger_run
  106. ## TASK
  107. no_analysis = \
  108. DummyOperator(
  109. task_id='no_analysis',
  110. dag=dag)
  111. ## PIPELINE
  112. fetch_analysis_info_and_branch >> no_analysis
  113. ## TASK
  114. run_cellranger = \
  115. PythonOperator(
  116. task_id='run_cellranger',
  117. dag=dag,
  118. queue='hpc_64G16t24hr',
  119. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  120. 'analysis_description_xcom_key':'analysis_description',
  121. 'library_csv_xcom_key':'cellranger_library_csv',
  122. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  123. 'cellranger_xcom_key':'cellranger_output',
  124. 'cellranger_options':['--localcores 8','--localmem 64']},
  125. python_callable=run_cellranger_tool)
  126. ## PIPELINE
  127. configure_cellranger_run >> run_cellranger
  128. ## TASK
  129. decide_analysis_branch = \
  130. BranchPythonOperator(
  131. task_id='decide_analysis_branch',
  132. dag=dag,
  133. queue='hpc_4G',
  134. python_callable=decide_analysis_branch_func,
  135. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  136. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  137. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  138. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  139. 'run_scirpy_vdj_t_task':'run_scirpy_vdj_t',
  140. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  141. 'run_picard_alignment_summary_task':'run_picard_alignment_summary',
  142. 'convert_bam_to_cram_task':'convert_bam_to_cram',
  143. 'library_csv_xcom_key':'cellranger_library_csv',
  144. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  145. ## PIPELINE
  146. run_cellranger >> decide_analysis_branch
  147. ## TASK
  148. load_cellranger_result_to_db = \
  149. PythonOperator(
  150. task_id='load_cellranger_result_to_db',
  151. dag=dag,
  152. queue='hpc_4G',
  153. python_callable=load_cellranger_result_to_db_func,
  154. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  155. 'analysis_description_xcom_key':'analysis_description',
  156. 'cellranger_xcom_key':'cellranger_output',
  157. 'cellranger_xcom_pull_task':'run_cellranger',
  158. 'collection_type':'CELLRANGER_MULTI',
  159. 'collection_table':'sample',
  160. 'xcom_collection_name_key':'sample_igf_id',
  161. 'genome_column':'genome_build',
  162. 'analysis_name':'cellranger_multi',
  163. 'output_xcom_key':'loaded_output_files',
  164. 'html_xcom_key':'html_report_file',
  165. 'html_report_file_name':'web_summary.html'})
  166. upload_report_to_ftp = \
  167. PythonOperator(
  168. task_id='upload_report_to_ftp',
  169. dag=dag,
  170. queue='hpc_4G',
  171. python_callable=ftp_files_upload_for_analysis,
  172. params={'xcom_pull_task':'load_cellranger_result_to_db',
  173. 'xcom_pull_files_key':'html_report_file',
  174. 'collection_name_task':'load_cellranger_result_to_db',
  175. 'collection_name_key':'sample_igf_id',
  176. 'collection_type':'FTP_CELLRANGER_MULTI',
  177. 'collection_table':'sample',
  178. 'collect_remote_file':True})
  179. upload_report_to_box = \
  180. DummyOperator(
  181. task_id='upload_report_to_box',
  182. dag=dag,
  183. queue='hpc_4G',
  184. params={'xcom_pull_task':'load_cellranger_result_to_db',
  185. 'xcom_pull_files_key':'html_report_file'})
  186. upload_results_to_irods = \
  187. PythonOperator(
  188. task_id='upload_results_to_irods',
  189. dag=dag,
  190. queue='hpc_4G',
  191. python_callable=irods_files_upload_for_analysis,
  192. params={'xcom_pull_task':'load_cellranger_result_to_db',
  193. 'xcom_pull_files_key':'loaded_output_files',
  194. 'collection_name_key':'sample_igf_id',
  195. 'analysis_name':'cellranger_multi'})
  196. ## PIPELINE
  197. decide_analysis_branch >> load_cellranger_result_to_db
  198. load_cellranger_result_to_db >> upload_report_to_ftp
  199. load_cellranger_result_to_db >> upload_report_to_box
  200. load_cellranger_result_to_db >> upload_results_to_irods
  201. ## TASK
  202. run_scanpy_for_sc_5p = \
  203. PythonOperator(
  204. task_id='run_scanpy_for_sc_5p',
  205. dag=dag,
  206. queue='hpc_4G',
  207. python_callable=run_scanpy_for_sc_5p_func,
  208. params={'cellranger_xcom_key':'cellranger_output',
  209. 'cellranger_xcom_pull_task':'run_cellranger',
  210. 'scanpy_timeout':1200,
  211. 'allow_errors':False,
  212. 'output_notebook_key':'scanpy_notebook',
  213. 'output_cellbrowser_key':'cellbrowser_dirs',
  214. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  215. 'analysis_description_xcom_key':'analysis_description'})
  216. load_scanpy_report_for_sc_5p_to_db = \
  217. PythonOperator(
  218. task_id='load_scanpy_report_for_sc_5p_to_db',
  219. dag=dag,
  220. queue='hpc_4G',
  221. python_callable=load_analysis_files_func,
  222. params={'collection_name_task':'load_cellranger_result_to_db',
  223. 'collection_name_key':'sample_igf_id',
  224. 'file_name_task':'run_scanpy_for_sc_5p',
  225. 'file_name_key':'scanpy_notebook',
  226. 'analysis_name':'scanpy_5p',
  227. 'collection_type':'SCANPY_HTML',
  228. 'collection_table':'sample',
  229. 'output_files_key':'output_db_files'})
  230. upload_scanpy_report_for_sc_5p_to_ftp = \
  231. PythonOperator(
  232. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  233. dag=dag,
  234. queue='hpc_4G',
  235. python_callable=ftp_files_upload_for_analysis,
  236. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  237. 'xcom_pull_files_key':'output_db_files',
  238. 'collection_name_task':'load_cellranger_result_to_db',
  239. 'collection_name_key':'sample_igf_id',
  240. 'collection_type':'FTP_SCANPY_HTML',
  241. 'collection_table':'sample',
  242. 'collect_remote_file':True})
  243. upload_scanpy_report_for_sc_5p_to_box = \
  244. DummyOperator(
  245. task_id='upload_scanpy_report_for_sc_5p_to_box',
  246. dag=dag)
  247. upload_cellbrowser_for_sc_5p_to_ftp = \
  248. DummyOperator(
  249. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  250. dag=dag,
  251. queue='hpc_4G',
  252. python_callable=ftp_files_upload_for_analysis,
  253. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  254. 'xcom_pull_files_key':'cellbrowser_dirs',
  255. 'collection_name_task':'load_cellranger_result_to_db',
  256. 'collection_name_key':'sample_igf_id',
  257. 'collection_type':'FTP_CELLBROWSER',
  258. 'collection_table':'sample',
  259. 'collect_remote_file':True})
  260. ## PIPELINE
  261. decide_analysis_branch >> run_scanpy_for_sc_5p
  262. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  263. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  264. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  265. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  266. ## TASK
  267. run_scirpy_for_vdj = \
  268. PythonOperator(
  269. task_id='run_scirpy_for_vdj',
  270. dag=dag,
  271. queue='hpc_4G',
  272. python_callable=run_singlecell_notebook_wrapper_func,
  273. params={'cellranger_xcom_key':'cellranger_output',
  274. 'cellranger_xcom_pull_task':'run_cellranger',
  275. 'scanpy_timeout':1200,
  276. 'allow_errors':False,
  277. 'kernel_name':'python3',
  278. 'vdj_dir':'vdj',
  279. 'count_dir':'count',
  280. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  281. 'output_notebook_key':'scirpy_notebook',
  282. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  283. 'analysis_description_xcom_key':'analysis_description',
  284. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  285. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  286. load_scanpy_report_for_vdj_to_db = \
  287. PythonOperator(
  288. task_id='load_scanpy_report_for_vdj_to_db',
  289. dag=dag,
  290. queue='hpc_4G',
  291. python_callable=load_analysis_files_func,
  292. params={'collection_name_task':'load_cellranger_result_to_db',
  293. 'collection_name_key':'sample_igf_id',
  294. 'file_name_task':'run_scirpy_for_vdj',
  295. 'file_name_key':'scirpy_notebook',
  296. 'analysis_name':'scirpy_vdj',
  297. 'collection_type':'SCIRPY_VDJ_HTML',
  298. 'collection_table':'sample',
  299. 'output_files_key':'output_db_files'})
  300. upload_scanpy_report_for_vdj_to_ftp = \
  301. PythonOperator(
  302. task_id='upload_scanpy_report_for_vdj_to_ftp',
  303. dag=dag,
  304. queue='hpc_4G',
  305. python_callable=ftp_files_upload_for_analysis,
  306. params={'xcom_pull_task':'load_scanpy_report_for_vdj_to_db',
  307. 'xcom_pull_files_key':'output_db_files',
  308. 'collection_name_task':'load_cellranger_result_to_db',
  309. 'collection_name_key':'sample_igf_id',
  310. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  311. 'collection_table':'sample',
  312. 'collect_remote_file':True})
  313. upload_scanpy_report_for_vdj_to_box = \
  314. DummyOperator(
  315. task_id='upload_scanpy_report_for_vdj_to_box',
  316. dag=dag)
  317. ## PIPELINE
  318. decide_analysis_branch >> run_scirpy_for_vdj
  319. run_scirpy_for_vdj >> load_scanpy_report_for_vdj_to_db
  320. load_scanpy_report_for_vdj_to_db >> upload_scanpy_report_for_vdj_to_ftp
  321. load_scanpy_report_for_vdj_to_db >> upload_scanpy_report_for_vdj_to_box
  322. ## TASK
  323. run_scirpy_for_vdj_b = \
  324. DummyOperator(
  325. task_id='run_scirpy_for_vdj_b',
  326. dag=dag,
  327. queue='hpc_4G',
  328. python_callable=run_singlecell_notebook_wrapper_func,
  329. params={'cellranger_xcom_key':'cellranger_output',
  330. 'cellranger_xcom_pull_task':'run_cellranger',
  331. 'scanpy_timeout':1200,
  332. 'allow_errors':False,
  333. 'kernel_name':'python3',
  334. 'vdj_dir':'vdj_b',
  335. 'count_dir':'count',
  336. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  337. 'output_notebook_key':'scirpy_notebook',
  338. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  339. 'analysis_description_xcom_key':'analysis_description',
  340. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  341. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  342. load_scanpy_report_for_vdj_b_to_db = \
  343. PythonOperator(
  344. task_id='load_scanpy_report_for_vdj_b_to_db',
  345. dag=dag,
  346. queue='hpc_4G',
  347. python_callable=load_analysis_files_func,
  348. params={'collection_name_task':'load_cellranger_result_to_db',
  349. 'collection_name_key':'sample_igf_id',
  350. 'file_name_task':'run_scirpy_for_vdj_b',
  351. 'file_name_key':'scirpy_notebook',
  352. 'analysis_name':'scirpy_vdj_b',
  353. 'collection_type':'SCIRPY_VDJ_B_HTML',
  354. 'collection_table':'sample',
  355. 'output_files_key':'output_db_files'})
  356. upload_scanpy_report_for_vdj_b_to_ftp = \
  357. PythonOperator(
  358. task_id='upload_scanpy_report_for_vdj_b_to_ftp',
  359. dag=dag,
  360. queue='hpc_4G',
  361. python_callable=ftp_files_upload_for_analysis,
  362. params={'xcom_pull_task':'load_scanpy_report_for_vdj_b_to_db',
  363. 'xcom_pull_files_key':'output_db_files',
  364. 'collection_name_task':'load_cellranger_result_to_db',
  365. 'collection_name_key':'sample_igf_id',
  366. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  367. 'collection_table':'sample',
  368. 'collect_remote_file':True})
  369. upload_scanpy_report_for_vdj_b_to_box = \
  370. DummyOperator(
  371. task_id='upload_scanpy_report_for_vdj_b_to_box',
  372. dag=dag)
  373. ## PIPELINE
  374. decide_analysis_branch >> run_scirpy_for_vdj_b
  375. run_scirpy_for_vdj_b >> load_scanpy_report_for_vdj_b_to_db
  376. load_scanpy_report_for_vdj_b_to_db >> upload_scanpy_report_for_vdj_b_to_ftp
  377. load_scanpy_report_for_vdj_b_to_db >> upload_scanpy_report_for_vdj_b_to_box
  378. ## TASK
  379. run_scirpy_for_vdj_t = \
  380. DummyOperator(
  381. task_id='run_scirpy_for_vdj_t',
  382. dag=dag,
  383. queue='hpc_4G',
  384. python_callable=run_singlecell_notebook_wrapper_func,
  385. params={'cellranger_xcom_key':'cellranger_output',
  386. 'cellranger_xcom_pull_task':'run_cellranger',
  387. 'scanpy_timeout':1200,
  388. 'allow_errors':False,
  389. 'kernel_name':'python3',
  390. 'vdj_dir':'vdj_t',
  391. 'count_dir':'count',
  392. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  393. 'output_notebook_key':'scirpy_notebook',
  394. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  395. 'analysis_description_xcom_key':'analysis_description',
  396. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  397. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  398. load_scanpy_report_for_vdj_t_to_db = \
  399. PythonOperator(
  400. task_id='load_scanpy_report_for_vdj_t_to_db',
  401. dag=dag,
  402. queue='hpc_4G',
  403. python_callable=load_analysis_files_func,
  404. params={'collection_name_task':'load_cellranger_result_to_db',
  405. 'collection_name_key':'sample_igf_id',
  406. 'file_name_task':'run_scirpy_for_vdj_t',
  407. 'file_name_key':'scirpy_notebook',
  408. 'analysis_name':'scirpy_vdj_t',
  409. 'collection_type':'SCIRPY_VDJ_T_HTML',
  410. 'collection_table':'sample',
  411. 'output_files_key':'output_db_files'})
  412. upload_scanpy_report_for_vdj_t_to_ftp = \
  413. PythonOperator(
  414. task_id='upload_scanpy_report_for_vdj_t_to_ftp',
  415. dag=dag,
  416. queue='hpc_4G',
  417. python_callable=ftp_files_upload_for_analysis,
  418. params={'xcom_pull_task':'load_scanpy_report_for_vdj_t_to_db',
  419. 'xcom_pull_files_key':'output_db_files',
  420. 'collection_name_task':'load_cellranger_result_to_db',
  421. 'collection_name_key':'sample_igf_id',
  422. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  423. 'collection_table':'sample',
  424. 'collect_remote_file':True})
  425. upload_scanpy_report_for_vdj_t_to_box = \
  426. DummyOperator(
  427. task_id='upload_scanpy_report_for_vdj_t_to_box',
  428. dag=dag)
  429. ## PIPELINE
  430. decide_analysis_branch >> run_scirpy_for_vdj_t
  431. run_scirpy_for_vdj_t >> load_scanpy_report_for_vdj_t_to_db
  432. load_scanpy_report_for_vdj_t_to_db >> upload_scanpy_report_for_vdj_t_to_ftp
  433. load_scanpy_report_for_vdj_t_to_db >> upload_scanpy_report_for_vdj_t_to_box
  434. ## TASK
  435. run_seurat_for_sc_5p = \
  436. PythonOperator(
  437. task_id='run_seurat_for_sc_5p',
  438. dag=dag,
  439. queue='hpc_4G',
  440. python_callable=run_singlecell_notebook_wrapper_func,
  441. params={'cellranger_xcom_key':'cellranger_output',
  442. 'cellranger_xcom_pull_task':'run_cellranger',
  443. 'scanpy_timeout':1200,
  444. 'allow_errors':False,
  445. 'kernel_name':'R',
  446. 'vdj_dir':'vdj',
  447. 'count_dir':'count',
  448. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  449. 'output_notebook_key':'seurat_notebook',
  450. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  451. 'analysis_description_xcom_key':'analysis_description',
  452. 'template_ipynb_path':Variable.get('seurat_single_sample_template'),
  453. 'singularity_image_path':Variable.get('seurat_notebook_image')})
  454. load_seurat_report_for_sc_5p_db = \
  455. PythonOperator(
  456. task_id='load_seurat_report_for_sc_5p_db',
  457. dag=dag,
  458. queue='hpc_4G',
  459. python_callable=load_analysis_files_func,
  460. params={'collection_name_task':'load_cellranger_result_to_db',
  461. 'collection_name_key':'sample_igf_id',
  462. 'file_name_task':'run_seurat_for_sc_5p',
  463. 'file_name_key':'seurat_notebook',
  464. 'analysis_name':'seurat_5p',
  465. 'collection_type':'SEURAT_HTML',
  466. 'collection_table':'sample',
  467. 'output_files_key':'output_db_files'})
  468. upload_seurat_report_for_sc_5p_ftp = \
  469. DummyOperator(
  470. task_id='upload_seurat_report_for_sc_5p_ftp',
  471. dag=dag,
  472. queue='hpc_4G',
  473. python_callable=ftp_files_upload_for_analysis,
  474. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  475. 'xcom_pull_files_key':'output_db_files',
  476. 'collection_name_task':'load_cellranger_result_to_db',
  477. 'collection_name_key':'sample_igf_id',
  478. 'collection_type':'FTP_SEURAT_HTML',
  479. 'collection_table':'sample',
  480. 'collect_remote_file':True})
  481. upload_seurat_report_for_sc_5p_to_box = \
  482. DummyOperator(
  483. task_id='upload_seurat_report_for_sc_5p_to_box',
  484. dag=dag)
  485. ## PIPELINE
  486. decide_analysis_branch >> run_seurat_for_sc_5p
  487. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  488. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  489. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  490. ## TASK
  491. convert_bam_to_cram = \
  492. DummyOperator(
  493. task_id='convert_bam_to_cram',
  494. dag=dag,
  495. params={'cellranger_xcom_key':'cellranger_output',
  496. 'cellranger_xcom_pull_task':'run_cellranger'})
  497. upload_cram_to_irods = \
  498. DummyOperator(
  499. task_id='upload_cram_to_irods',
  500. dag=dag)
  501. ## PIPELINE
  502. decide_analysis_branch >> convert_bam_to_cram
  503. convert_bam_to_cram >> upload_cram_to_irods
  504. ## TASK
  505. run_picard_alignment_summary = \
  506. DummyOperator(
  507. task_id='run_picard_alignment_summary',
  508. dag=dag,
  509. params={'cellranger_xcom_key':'cellranger_output',
  510. 'cellranger_xcom_pull_task':'run_cellranger'})
  511. ## PIPELINE
  512. decide_analysis_branch >> run_picard_alignment_summary
  513. ## TASK
  514. run_picard_qual_summary = \
  515. DummyOperator(
  516. task_id='run_picard_qual_summary',
  517. dag=dag)
  518. ## PIPELINE
  519. run_picard_alignment_summary >> run_picard_qual_summary
  520. ## TASK
  521. run_picard_rna_summary = \
  522. DummyOperator(
  523. task_id='run_picard_rna_summary',
  524. dag=dag)
  525. ## PIPELINE
  526. run_picard_qual_summary >> run_picard_rna_summary
  527. ## TASK
  528. run_picard_gc_summary = \
  529. DummyOperator(
  530. task_id='run_picard_gc_summary',
  531. dag=dag)
  532. ## PIPELINE
  533. run_picard_rna_summary >> run_picard_gc_summary
  534. ## TASK
  535. run_samtools_stats = \
  536. DummyOperator(
  537. task_id='run_samtools_stats',
  538. dag=dag)
  539. ## PIPELINE
  540. run_picard_gc_summary >> run_samtools_stats
  541. ## TASK
  542. run_samtools_idxstats = \
  543. DummyOperator(
  544. task_id='run_samtools_idxstats',
  545. dag=dag)
  546. ## PIPELINE
  547. run_samtools_stats >> run_samtools_idxstats
  548. ## TASK
  549. run_multiqc = \
  550. DummyOperator(
  551. task_id='run_multiqc',
  552. dag=dag)
  553. ## PIPELINE
  554. run_samtools_idxstats >> run_multiqc
  555. ## TASK
  556. upload_multiqc_to_ftp = \
  557. DummyOperator(
  558. task_id='upload_multiqc_to_ftp',
  559. dag=dag)
  560. ## PIPELINE
  561. run_multiqc >> upload_multiqc_to_ftp
  562. ## TASK
  563. upload_multiqc_to_box = \
  564. DummyOperator(
  565. task_id='upload_multiqc_to_box',
  566. dag=dag)
  567. ## PIPELINE
  568. run_multiqc >> upload_multiqc_to_box
  569. ## TASK
  570. update_analysis_and_status = \
  571. DummyOperator(
  572. task_id='update_analysis_and_status',
  573. dag=dag)
  574. ## PIPELINE
  575. upload_multiqc_to_ftp >> update_analysis_and_status
  576. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  577. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  578. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  579. upload_scanpy_report_for_vdj_to_ftp >> update_analysis_and_status
  580. upload_scanpy_report_for_vdj_to_box >> update_analysis_and_status
  581. upload_scanpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  582. upload_scanpy_report_for_vdj_b_to_box >> update_analysis_and_status
  583. upload_scanpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  584. upload_scanpy_report_for_vdj_t_to_box >> update_analysis_and_status
  585. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  586. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  587. upload_results_to_irods >> update_analysis_and_status
  588. upload_report_to_ftp >> update_analysis_and_status
  589. upload_report_to_box >> update_analysis_and_status
  590. upload_cram_to_irods >> update_analysis_and_status