dag9_tenx_single_cell_immune_profiling.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_scanpy_for_sc_5p_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  19. ## ARGS
  20. default_args = {
  21. 'owner': 'airflow',
  22. 'depends_on_past': False,
  23. 'start_date': days_ago(2),
  24. 'email_on_failure': False,
  25. 'email_on_retry': False,
  26. 'retries': 4,
  27. 'max_active_runs':10,
  28. 'catchup':False,
  29. 'retry_delay': timedelta(minutes=5),
  30. 'provide_context': True,
  31. }
  32. FEATURE_TYPE_LIST = Variable.get('tenx_single_cell_immune_profiling_feature_types').split(',')
  33. ## DAG
  34. dag = \
  35. DAG(
  36. dag_id='dag9_tenx_single_cell_immune_profiling',
  37. schedule_interval=None,
  38. tags=['hpc','analysis','tenx','sc'],
  39. default_args=default_args,
  40. orientation='LR')
  41. with dag:
  42. ## TASK
  43. fetch_analysis_info_and_branch = \
  44. BranchPythonOperator(
  45. task_id='fetch_analysis_info',
  46. dag=dag,
  47. queue='hpc_4G',
  48. params={'no_analysis_task':'no_analysis',
  49. 'analysis_description_xcom_key':'analysis_description',
  50. 'analysis_info_xcom_key':'analysis_info'},
  51. python_callable=fetch_analysis_info_and_branch_func)
  52. ## TASK
  53. configure_cellranger_run = \
  54. PythonOperator(
  55. task_id='configure_cellranger_run',
  56. dag=dag,
  57. queue='hpc_4G',
  58. params={'xcom_pull_task_id':'fetch_analysis_info_and_branch',
  59. 'analysis_description_xcom_key':'analysis_description',
  60. 'analysis_info_xcom_key':'analysis_info',
  61. 'library_csv_xcom_key':'cellranger_library_csv'},
  62. python_callable=configure_cellranger_run_func)
  63. for analysis_name in FEATURE_TYPE_LIST:
  64. ## TASK
  65. task_branch = \
  66. DummyOperator(
  67. task_id=analysis_name,
  68. dag=dag)
  69. run_trim_list = list()
  70. for run_id in range(0,32):
  71. ## TASK
  72. t = \
  73. PythonOperator(
  74. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  75. dag=dag,
  76. queue='hpc_4G',
  77. params={'xcom_pull_task_id':'fetch_analysis_info_and_branch',
  78. 'analysis_info_xcom_key':'analysis_info',
  79. 'analysis_name':analysis_name,
  80. 'run_id':run_id,
  81. 'r1_length':26,
  82. 'r2_length':0,
  83. 'fastq_input_dir_tag':'fastq_dir',
  84. 'fastq_output_dir_tag':'output_path'},
  85. python_callable=run_sc_read_trimmming_func)
  86. run_trim_list.append(t)
  87. ## TASK
  88. collect_trimmed_files = \
  89. DummyOperator(
  90. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  91. dag=dag)
  92. ## PIPELINE
  93. fetch_analysis_info_and_branch >> task_branch
  94. task_branch >> run_trim_list
  95. run_trim_list >> collect_trimmed_files
  96. collect_trimmed_files >> configure_cellranger_run
  97. ## TASK
  98. no_analysis = \
  99. DummyOperator(
  100. task_id='no_analysis',
  101. dag=dag)
  102. ## PIPELINE
  103. fetch_analysis_info_and_branch >> no_analysis
  104. ## TASK
  105. run_cellranger = \
  106. PythonOperator(
  107. task_id='run_cellranger',
  108. dag=dag,
  109. queue='hpc_64G16t24hr',
  110. params={'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  111. 'analysis_description_xcom_key':'analysis_description',
  112. 'library_csv_xcom_key':'cellranger_library_csv',
  113. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  114. 'cellranger_xcom_key':'cellranger_output',
  115. 'cellranger_options':['--localcores 8','--localmem 64']},
  116. python_callable=run_cellranger_tool)
  117. ## PIPELINE
  118. configure_cellranger_run >> run_cellranger
  119. ## TASK
  120. decide_analysis_branch = \
  121. BranchPythonOperator(
  122. task_id='decide_analysis_branch',
  123. dag=dag,
  124. queue='hpc_4G',
  125. python_callable=decide_analysis_branch_func,
  126. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  127. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  128. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  129. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  130. 'run_scirpy_vdj_t_task':'run_scirpy_vdj_t',
  131. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  132. 'run_picard_alignment_summary_task':'run_picard_alignment_summary',
  133. 'convert_bam_to_cram_task':'convert_bam_to_cram',
  134. 'library_csv_xcom_key':'cellranger_library_csv',
  135. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  136. ## PIPELINE
  137. run_cellranger >> decide_analysis_branch
  138. ## TASK
  139. load_cellranger_result_to_db = \
  140. PythonOperator(
  141. task_id='load_cellranger_result_to_db',
  142. dag=dag,
  143. queue='hpc_4G',
  144. python_callable=load_cellranger_result_to_db_func,
  145. params={'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  146. 'analysis_description_xcom_key':'analysis_description',
  147. 'cellranger_xcom_key':'cellranger_output',
  148. 'cellranger_xcom_pull_task':'run_cellranger',
  149. 'collection_type':'CELLRANGER_MULTI',
  150. 'collection_table':'sample',
  151. 'xcom_collection_name_key':'sample_igf_id',
  152. 'genome_column':'genome_build',
  153. 'analysis_name':'cellranger_multi',
  154. 'output_xcom_key':'loaded_output_files',
  155. 'html_xcom_key':'html_report_file',
  156. 'html_report_file_name':'web_summary.html'})
  157. upload_report_to_ftp = \
  158. PythonOperator(
  159. task_id='upload_report_to_ftp',
  160. dag=dag,
  161. queue='hpc_4G',
  162. python_callable=ftp_files_upload_for_analysis,
  163. params={'xcom_pull_task':'load_cellranger_result_to_db',
  164. 'xcom_pull_files_key':'html_report_file',
  165. 'collection_name_task':'load_cellranger_result_to_db',
  166. 'collection_name_key':'sample_igf_id',
  167. 'collection_type':'FTP_CELLRANGER_MULTI',
  168. 'collection_table':'sample',
  169. 'collect_remote_file':True})
  170. upload_report_to_box = \
  171. DummyOperator(
  172. task_id='upload_report_to_box',
  173. dag=dag,
  174. queue='hpc_4G',
  175. params={'xcom_pull_task':'load_cellranger_result_to_db',
  176. 'xcom_pull_files_key':'html_report_file'})
  177. upload_results_to_irods = \
  178. PythonOperator(
  179. task_id='upload_results_to_irods',
  180. dag=dag,
  181. queue='hpc_4G',
  182. python_callable=irods_files_upload_for_analysis,
  183. params={'xcom_pull_task':'load_cellranger_result_to_db',
  184. 'xcom_pull_files_key':'loaded_output_files',
  185. 'collection_name_key':'sample_igf_id',
  186. 'analysis_name':'cellranger_multi'})
  187. ## PIPELINE
  188. decide_analysis_branch >> load_cellranger_result_to_db
  189. load_cellranger_result_to_db >> upload_report_to_ftp
  190. load_cellranger_result_to_db >> upload_report_to_box
  191. load_cellranger_result_to_db >> upload_results_to_irods
  192. ## TASK
  193. run_scanpy_for_sc_5p = \
  194. PythonOperator(
  195. task_id='run_scanpy_for_sc_5p',
  196. dag=dag,
  197. queue='hpc_4G',
  198. python_callable=run_scanpy_for_sc_5p_func,
  199. params={'cellranger_xcom_key':'cellranger_output',
  200. 'cellranger_xcom_pull_task':'run_cellranger',
  201. 'scanpy_timeout':1200,
  202. 'allow_errors':False,
  203. 'output_notebook_key':'scanpy_notebook',
  204. 'output_cellbrowser_key':'cellbrowser_dirs',
  205. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  206. 'analysis_description_xcom_key':'analysis_description'})
  207. load_scanpy_report_for_sc_5p_to_db = \
  208. PythonOperator(
  209. task_id='load_scanpy_report_for_sc_5p_to_db',
  210. dag=dag,
  211. queue='hpc_4G',
  212. python_callable=load_analysis_files_func,
  213. params={'collection_name_task':'load_cellranger_result_to_db',
  214. 'collection_name_key':'sample_igf_id',
  215. 'file_name_task':'run_scanpy_for_sc_5p',
  216. 'file_name_key':'scanpy_notebook',
  217. 'analysis_name':'scanpy_5p',
  218. 'collection_type':'SCANPY_HTML',
  219. 'collection_table':'sample',
  220. 'output_files_key':'output_db_files'})
  221. upload_scanpy_report_for_sc_5p_to_ftp = \
  222. PythonOperator(
  223. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  224. dag=dag,
  225. queue='hpc_4G',
  226. python_callable=ftp_files_upload_for_analysis,
  227. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  228. 'xcom_pull_files_key':'output_db_files',
  229. 'collection_name_task':'load_cellranger_result_to_db',
  230. 'collection_name_key':'sample_igf_id',
  231. 'collection_type':'FTP_SCANPY_HTML',
  232. 'collection_table':'sample',
  233. 'collect_remote_file':True})
  234. upload_scanpy_report_for_sc_5p_to_box = \
  235. DummyOperator(
  236. task_id='upload_scanpy_report_for_sc_5p_to_box',
  237. dag=dag)
  238. upload_cellbrowser_for_sc_5p_to_ftp = \
  239. DummyOperator(
  240. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  241. dag=dag,
  242. queue='hpc_4G',
  243. python_callable=ftp_files_upload_for_analysis,
  244. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  245. 'xcom_pull_files_key':'cellbrowser_dirs',
  246. 'collection_name_task':'load_cellranger_result_to_db',
  247. 'collection_name_key':'sample_igf_id',
  248. 'collection_type':'FTP_CELLBROWSER',
  249. 'collection_table':'sample',
  250. 'collect_remote_file':True})
  251. ## PIPELINE
  252. decide_analysis_branch >> run_scanpy_for_sc_5p
  253. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  254. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  255. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  256. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  257. ## TASK
  258. run_scirpy_for_vdj = \
  259. PythonOperator(
  260. task_id='run_scirpy_for_vdj',
  261. dag=dag,
  262. queue='hpc_4G',
  263. python_callable=run_singlecell_notebook_wrapper_func,
  264. params={'cellranger_xcom_key':'cellranger_output',
  265. 'cellranger_xcom_pull_task':'run_cellranger',
  266. 'scanpy_timeout':1200,
  267. 'allow_errors':False,
  268. 'kernel_name':'python3',
  269. 'vdj_dir':'vdj',
  270. 'count_dir':'count',
  271. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  272. 'output_notebook_key':'scirpy_notebook',
  273. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  274. 'analysis_description_xcom_key':'analysis_description',
  275. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  276. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  277. load_scanpy_report_for_vdj_to_db = \
  278. PythonOperator(
  279. task_id='load_scanpy_report_for_vdj_to_db',
  280. dag=dag,
  281. queue='hpc_4G',
  282. python_callable=load_analysis_files_func,
  283. params={'collection_name_task':'load_cellranger_result_to_db',
  284. 'collection_name_key':'sample_igf_id',
  285. 'file_name_task':'run_scirpy_for_vdj',
  286. 'file_name_key':'scirpy_notebook',
  287. 'analysis_name':'scirpy_vdj',
  288. 'collection_type':'SCIRPY_VDJ_HTML',
  289. 'collection_table':'sample',
  290. 'output_files_key':'output_db_files'})
  291. upload_scanpy_report_for_vdj_to_ftp = \
  292. PythonOperator(
  293. task_id='upload_scanpy_report_for_vdj_to_ftp',
  294. dag=dag,
  295. queue='hpc_4G',
  296. python_callable=ftp_files_upload_for_analysis,
  297. params={'xcom_pull_task':'load_scanpy_report_for_vdj_to_db',
  298. 'xcom_pull_files_key':'output_db_files',
  299. 'collection_name_task':'load_cellranger_result_to_db',
  300. 'collection_name_key':'sample_igf_id',
  301. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  302. 'collection_table':'sample',
  303. 'collect_remote_file':True})
  304. upload_scanpy_report_for_vdj_to_box = \
  305. DummyOperator(
  306. task_id='upload_scanpy_report_for_vdj_to_box',
  307. dag=dag)
  308. ## PIPELINE
  309. decide_analysis_branch >> run_scirpy_for_vdj
  310. run_scirpy_for_vdj >> load_scanpy_report_for_vdj_to_db
  311. load_scanpy_report_for_vdj_to_db >> upload_scanpy_report_for_vdj_to_ftp
  312. load_scanpy_report_for_vdj_to_db >> upload_scanpy_report_for_vdj_to_box
  313. ## TASK
  314. run_scirpy_for_vdj_b = \
  315. DummyOperator(
  316. task_id='run_scirpy_for_vdj_b',
  317. dag=dag,
  318. queue='hpc_4G',
  319. python_callable=run_singlecell_notebook_wrapper_func,
  320. params={'cellranger_xcom_key':'cellranger_output',
  321. 'cellranger_xcom_pull_task':'run_cellranger',
  322. 'scanpy_timeout':1200,
  323. 'allow_errors':False,
  324. 'kernel_name':'python3',
  325. 'vdj_dir':'vdj_b',
  326. 'count_dir':'count',
  327. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  328. 'output_notebook_key':'scirpy_notebook',
  329. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  330. 'analysis_description_xcom_key':'analysis_description',
  331. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  332. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  333. load_scanpy_report_for_vdj_b_to_db = \
  334. PythonOperator(
  335. task_id='load_scanpy_report_for_vdj_b_to_db',
  336. dag=dag,
  337. queue='hpc_4G',
  338. python_callable=load_analysis_files_func,
  339. params={'collection_name_task':'load_cellranger_result_to_db',
  340. 'collection_name_key':'sample_igf_id',
  341. 'file_name_task':'run_scirpy_for_vdj_b',
  342. 'file_name_key':'scirpy_notebook',
  343. 'analysis_name':'scirpy_vdj_b',
  344. 'collection_type':'SCIRPY_VDJ_B_HTML',
  345. 'collection_table':'sample',
  346. 'output_files_key':'output_db_files'})
  347. upload_scanpy_report_for_vdj_b_to_ftp = \
  348. PythonOperator(
  349. task_id='upload_scanpy_report_for_vdj_b_to_ftp',
  350. dag=dag,
  351. queue='hpc_4G',
  352. python_callable=ftp_files_upload_for_analysis,
  353. params={'xcom_pull_task':'load_scanpy_report_for_vdj_b_to_db',
  354. 'xcom_pull_files_key':'output_db_files',
  355. 'collection_name_task':'load_cellranger_result_to_db',
  356. 'collection_name_key':'sample_igf_id',
  357. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  358. 'collection_table':'sample',
  359. 'collect_remote_file':True})
  360. upload_scanpy_report_for_vdj_b_to_box = \
  361. DummyOperator(
  362. task_id='upload_scanpy_report_for_vdj_b_to_box',
  363. dag=dag)
  364. ## PIPELINE
  365. decide_analysis_branch >> run_scirpy_for_vdj_b
  366. run_scirpy_for_vdj_b >> load_scanpy_report_for_vdj_b_to_db
  367. load_scanpy_report_for_vdj_b_to_db >> upload_scanpy_report_for_vdj_b_to_ftp
  368. load_scanpy_report_for_vdj_b_to_db >> upload_scanpy_report_for_vdj_b_to_box
  369. ## TASK
  370. run_scirpy_for_vdj_t = \
  371. DummyOperator(
  372. task_id='run_scirpy_for_vdj_t',
  373. dag=dag,
  374. queue='hpc_4G',
  375. python_callable=run_singlecell_notebook_wrapper_func,
  376. params={'cellranger_xcom_key':'cellranger_output',
  377. 'cellranger_xcom_pull_task':'run_cellranger',
  378. 'scanpy_timeout':1200,
  379. 'allow_errors':False,
  380. 'kernel_name':'python3',
  381. 'vdj_dir':'vdj_t',
  382. 'count_dir':'count',
  383. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  384. 'output_notebook_key':'scirpy_notebook',
  385. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  386. 'analysis_description_xcom_key':'analysis_description',
  387. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  388. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  389. load_scanpy_report_for_vdj_t_to_db = \
  390. PythonOperator(
  391. task_id='load_scanpy_report_for_vdj_t_to_db',
  392. dag=dag,
  393. queue='hpc_4G',
  394. python_callable=load_analysis_files_func,
  395. params={'collection_name_task':'load_cellranger_result_to_db',
  396. 'collection_name_key':'sample_igf_id',
  397. 'file_name_task':'run_scirpy_for_vdj_t',
  398. 'file_name_key':'scirpy_notebook',
  399. 'analysis_name':'scirpy_vdj_t',
  400. 'collection_type':'SCIRPY_VDJ_T_HTML',
  401. 'collection_table':'sample',
  402. 'output_files_key':'output_db_files'})
  403. upload_scanpy_report_for_vdj_t_to_ftp = \
  404. PythonOperator(
  405. task_id='upload_scanpy_report_for_vdj_t_to_ftp',
  406. dag=dag,
  407. queue='hpc_4G',
  408. python_callable=ftp_files_upload_for_analysis,
  409. params={'xcom_pull_task':'load_scanpy_report_for_vdj_t_to_db',
  410. 'xcom_pull_files_key':'output_db_files',
  411. 'collection_name_task':'load_cellranger_result_to_db',
  412. 'collection_name_key':'sample_igf_id',
  413. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  414. 'collection_table':'sample',
  415. 'collect_remote_file':True})
  416. upload_scanpy_report_for_vdj_t_to_box = \
  417. DummyOperator(
  418. task_id='upload_scanpy_report_for_vdj_t_to_box',
  419. dag=dag)
  420. ## PIPELINE
  421. decide_analysis_branch >> run_scirpy_for_vdj_t
  422. run_scirpy_for_vdj_t >> load_scanpy_report_for_vdj_t_to_db
  423. load_scanpy_report_for_vdj_t_to_db >> upload_scanpy_report_for_vdj_t_to_ftp
  424. load_scanpy_report_for_vdj_t_to_db >> upload_scanpy_report_for_vdj_t_to_box
  425. ## TASK
  426. run_seurat_for_sc_5p = \
  427. PythonOperator(
  428. task_id='run_seurat_for_sc_5p',
  429. dag=dag,
  430. queue='hpc_4G',
  431. python_callable=run_singlecell_notebook_wrapper_func,
  432. params={'cellranger_xcom_key':'cellranger_output',
  433. 'cellranger_xcom_pull_task':'run_cellranger',
  434. 'scanpy_timeout':1200,
  435. 'allow_errors':False,
  436. 'kernel_name':'R',
  437. 'vdj_dir':'vdj',
  438. 'count_dir':'count',
  439. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  440. 'output_notebook_key':'seurat_notebook',
  441. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  442. 'analysis_description_xcom_key':'analysis_description',
  443. 'template_ipynb_path':Variable.get('seurat_single_sample_template'),
  444. 'singularity_image_path':Variable.get('seurat_notebook_image')})
  445. load_seurat_report_for_sc_5p_db = \
  446. PythonOperator(
  447. task_id='load_seurat_report_for_sc_5p_db',
  448. dag=dag,
  449. queue='hpc_4G',
  450. python_callable=load_analysis_files_func,
  451. params={'collection_name_task':'load_cellranger_result_to_db',
  452. 'collection_name_key':'sample_igf_id',
  453. 'file_name_task':'run_seurat_for_sc_5p',
  454. 'file_name_key':'seurat_notebook',
  455. 'analysis_name':'seurat_5p',
  456. 'collection_type':'SEURAT_HTML',
  457. 'collection_table':'sample',
  458. 'output_files_key':'output_db_files'})
  459. upload_seurat_report_for_sc_5p_ftp = \
  460. DummyOperator(
  461. task_id='upload_seurat_report_for_sc_5p_ftp',
  462. dag=dag,
  463. queue='hpc_4G',
  464. python_callable=ftp_files_upload_for_analysis,
  465. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  466. 'xcom_pull_files_key':'output_db_files',
  467. 'collection_name_task':'load_cellranger_result_to_db',
  468. 'collection_name_key':'sample_igf_id',
  469. 'collection_type':'FTP_SEURAT_HTML',
  470. 'collection_table':'sample',
  471. 'collect_remote_file':True})
  472. upload_seurat_report_for_sc_5p_to_box = \
  473. DummyOperator(
  474. task_id='upload_seurat_report_for_sc_5p_to_box',
  475. dag=dag)
  476. ## PIPELINE
  477. decide_analysis_branch >> run_seurat_for_sc_5p
  478. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  479. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  480. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  481. ## TASK
  482. convert_bam_to_cram = \
  483. DummyOperator(
  484. task_id='convert_bam_to_cram',
  485. dag=dag,
  486. params={'cellranger_xcom_key':'cellranger_output',
  487. 'cellranger_xcom_pull_task':'run_cellranger'})
  488. upload_cram_to_irods = \
  489. DummyOperator(
  490. task_id='upload_cram_to_irods',
  491. dag=dag)
  492. ## PIPELINE
  493. decide_analysis_branch >> convert_bam_to_cram
  494. convert_bam_to_cram >> upload_cram_to_irods
  495. ## TASK
  496. run_picard_alignment_summary = \
  497. DummyOperator(
  498. task_id='run_picard_alignment_summary',
  499. dag=dag,
  500. params={'cellranger_xcom_key':'cellranger_output',
  501. 'cellranger_xcom_pull_task':'run_cellranger'})
  502. ## PIPELINE
  503. decide_analysis_branch >> run_picard_alignment_summary
  504. ## TASK
  505. run_picard_qual_summary = \
  506. DummyOperator(
  507. task_id='run_picard_qual_summary',
  508. dag=dag)
  509. ## PIPELINE
  510. run_picard_alignment_summary >> run_picard_qual_summary
  511. ## TASK
  512. run_picard_rna_summary = \
  513. DummyOperator(
  514. task_id='run_picard_rna_summary',
  515. dag=dag)
  516. ## PIPELINE
  517. run_picard_qual_summary >> run_picard_rna_summary
  518. ## TASK
  519. run_picard_gc_summary = \
  520. DummyOperator(
  521. task_id='run_picard_gc_summary',
  522. dag=dag)
  523. ## PIPELINE
  524. run_picard_rna_summary >> run_picard_gc_summary
  525. ## TASK
  526. run_samtools_stats = \
  527. DummyOperator(
  528. task_id='run_samtools_stats',
  529. dag=dag)
  530. ## PIPELINE
  531. run_picard_gc_summary >> run_samtools_stats
  532. ## TASK
  533. run_samtools_idxstats = \
  534. DummyOperator(
  535. task_id='run_samtools_idxstats',
  536. dag=dag)
  537. ## PIPELINE
  538. run_samtools_stats >> run_samtools_idxstats
  539. ## TASK
  540. run_multiqc = \
  541. DummyOperator(
  542. task_id='run_multiqc',
  543. dag=dag)
  544. ## PIPELINE
  545. run_samtools_idxstats >> run_multiqc
  546. ## TASK
  547. upload_multiqc_to_ftp = \
  548. DummyOperator(
  549. task_id='upload_multiqc_to_ftp',
  550. dag=dag)
  551. ## PIPELINE
  552. run_multiqc >> upload_multiqc_to_ftp
  553. ## TASK
  554. upload_multiqc_to_box = \
  555. DummyOperator(
  556. task_id='upload_multiqc_to_box',
  557. dag=dag)
  558. ## PIPELINE
  559. run_multiqc >> upload_multiqc_to_box
  560. ## TASK
  561. update_analysis_and_status = \
  562. DummyOperator(
  563. task_id='update_analysis_and_status',
  564. dag=dag)
  565. ## PIPELINE
  566. upload_multiqc_to_ftp >> update_analysis_and_status
  567. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  568. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  569. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  570. upload_scanpy_report_for_vdj_to_ftp >> update_analysis_and_status
  571. upload_scanpy_report_for_vdj_to_box >> update_analysis_and_status
  572. upload_scanpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  573. upload_scanpy_report_for_vdj_b_to_box >> update_analysis_and_status
  574. upload_scanpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  575. upload_scanpy_report_for_vdj_t_to_box >> update_analysis_and_status
  576. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  577. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  578. upload_results_to_irods >> update_analysis_and_status
  579. upload_report_to_ftp >> update_analysis_and_status
  580. upload_report_to_box >> update_analysis_and_status
  581. upload_cram_to_irods >> update_analysis_and_status