dag9_tenx_single_cell_immune_profiling.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.logging.upload_log_msg import send_log_to_channels,log_success,log_failure,log_sleep
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_scanpy_for_sc_5p_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  20. ## ARGS
  21. default_args = {
  22. 'owner': 'airflow',
  23. 'depends_on_past': False,
  24. 'start_date': days_ago(2),
  25. 'email_on_failure': False,
  26. 'email_on_retry': False,
  27. 'retries': 4,
  28. 'max_active_runs':10,
  29. 'catchup':False,
  30. 'retry_delay': timedelta(minutes=5),
  31. 'provide_context': True,
  32. }
  33. FEATURE_TYPE_LIST = Variable.get('tenx_single_cell_immune_profiling_feature_types')
  34. ## DAG
  35. dag = \
  36. DAG(
  37. dag_id='dag9_tenx_single_cell_immune_profiling',
  38. schedule_interval=None,
  39. tags=['hpc','analysis','tenx','sc'],
  40. default_args=default_args,
  41. orientation='LR')
  42. with dag:
  43. ## TASK
  44. fetch_analysis_info_and_branch = \
  45. BranchPythonOperator(
  46. task_id='fetch_analysis_info',
  47. dag=dag,
  48. queue='hpc_4G',
  49. params={'no_analysis_task':'no_analysis',
  50. 'analysis_description_xcom_key':'analysis_description',
  51. 'analysis_info_xcom_key':'analysis_info'},
  52. python_callable=fetch_analysis_info_and_branch_func)
  53. ## TASK
  54. configure_cellranger_run = \
  55. PythonOperator(
  56. task_id='configure_cellranger_run',
  57. dag=dag,
  58. queue='hpc_4G',
  59. params={'xcom_pull_task_id':'fetch_analysis_info_and_branch',
  60. 'analysis_description_xcom_key':'analysis_description',
  61. 'analysis_info_xcom_key':'analysis_info',
  62. 'library_csv_xcom_key':'cellranger_library_csv'},
  63. python_callable=configure_cellranger_run_func)
  64. for analysis_name in FEATURE_TYPE_LIST:
  65. ## TASK
  66. task_branch = \
  67. DummyOperator(
  68. task_id=analysis_name,
  69. dag=dag)
  70. run_trim_list = list()
  71. for run_id in range(0,32):
  72. ## TASK
  73. t = \
  74. PythonOperator(
  75. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  76. dag=dag,
  77. queue='hpc_4G',
  78. params={'xcom_pull_task_id':'fetch_analysis_info_and_branch',
  79. 'analysis_info_xcom_key':'analysis_info',
  80. 'analysis_name':analysis_name,
  81. 'run_id':run_id,
  82. 'r1_length':26,
  83. 'r2_length':0,
  84. 'fastq_input_dir_tag':'fastq_dir',
  85. 'fastq_output_dir_tag':'output_path'},
  86. python_callable=run_sc_read_trimmming_func)
  87. run_trim_list.append(t)
  88. ## TASK
  89. collect_trimmed_files = \
  90. DummyOperator(
  91. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  92. dag=dag)
  93. ## PIPELINE
  94. fetch_analysis_info_and_branch >> task_branch
  95. task_branch >> run_trim_list
  96. run_trim_list >> collect_trimmed_files
  97. collect_trimmed_files >> configure_cellranger_run
  98. ## TASK
  99. no_analysis = \
  100. DummyOperator(
  101. task_id='no_analysis',
  102. dag=dag)
  103. ## PIPELINE
  104. fetch_analysis_info_and_branch >> no_analysis
  105. ## TASK
  106. run_cellranger = \
  107. PythonOperator(
  108. task_id='run_cellranger',
  109. dag=dag,
  110. queue='hpc_64G16t24hr',
  111. params={'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  112. 'analysis_description_xcom_key':'analysis_description',
  113. 'library_csv_xcom_key':'cellranger_library_csv',
  114. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  115. 'cellranger_xcom_key':'cellranger_output',
  116. 'cellranger_options':['--localcores 8','--localmem 64']},
  117. python_callable=run_cellranger_tool)
  118. ## PIPELINE
  119. configure_cellranger_run >> run_cellranger
  120. ## TASK
  121. decide_analysis_branch = \
  122. BranchPythonOperator(
  123. task_id='decide_analysis_branch',
  124. dag=dag,
  125. queue='hpc_4G',
  126. python_callable=decide_analysis_branch_func,
  127. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  128. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  129. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  130. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  131. 'run_scirpy_vdj_t_task':'run_scirpy_vdj_t',
  132. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  133. 'run_picard_alignment_summary_task':'run_picard_alignment_summary',
  134. 'convert_bam_to_cram_task':'convert_bam_to_cram',
  135. 'library_csv_xcom_key':'cellranger_library_csv',
  136. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  137. ## PIPELINE
  138. run_cellranger >> decide_analysis_branch
  139. ## TASK
  140. load_cellranger_result_to_db = \
  141. PythonOperator(
  142. task_id='load_cellranger_result_to_db',
  143. dag=dag,
  144. queue='hpc_4G',
  145. python_callable=load_cellranger_result_to_db_func,
  146. params={'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  147. 'analysis_description_xcom_key':'analysis_description',
  148. 'cellranger_xcom_key':'cellranger_output',
  149. 'cellranger_xcom_pull_task':'run_cellranger',
  150. 'collection_type':'CELLRANGER_MULTI',
  151. 'collection_table':'sample',
  152. 'xcom_collection_name_key':'sample_igf_id',
  153. 'genome_column':'genome_build',
  154. 'analysis_name':'cellranger_multi',
  155. 'output_xcom_key':'loaded_output_files',
  156. 'html_xcom_key':'html_report_file',
  157. 'html_report_file_name':'web_summary.html'})
  158. upload_report_to_ftp = \
  159. PythonOperator(
  160. task_id='upload_report_to_ftp',
  161. dag=dag,
  162. queue='hpc_4G',
  163. python_callable=ftp_files_upload_for_analysis,
  164. params={'xcom_pull_task':'load_cellranger_result_to_db',
  165. 'xcom_pull_files_key':'html_report_file',
  166. 'collection_name_task':'load_cellranger_result_to_db',
  167. 'collection_name_key':'sample_igf_id',
  168. 'collection_type':'FTP_CELLRANGER_MULTI',
  169. 'collection_table':'sample',
  170. 'collect_remote_file':True})
  171. upload_report_to_box = \
  172. DummyOperator(
  173. task_id='upload_report_to_box',
  174. dag=dag,
  175. queue='hpc_4G',
  176. params={'xcom_pull_task':'load_cellranger_result_to_db',
  177. 'xcom_pull_files_key':'html_report_file'})
  178. upload_results_to_irods = \
  179. PythonOperator(
  180. task_id='upload_results_to_irods',
  181. dag=dag,
  182. queue='hpc_4G',
  183. python_callable=irods_files_upload_for_analysis,
  184. params={'xcom_pull_task':'load_cellranger_result_to_db',
  185. 'xcom_pull_files_key':'loaded_output_files',
  186. 'collection_name_key':'sample_igf_id',
  187. 'analysis_name':'cellranger_multi'})
  188. ## PIPELINE
  189. decide_analysis_branch >> load_cellranger_result_to_db
  190. load_cellranger_result_to_db >> upload_report_to_ftp
  191. load_cellranger_result_to_db >> upload_report_to_box
  192. load_cellranger_result_to_db >> upload_results_to_irods
  193. ## TASK
  194. run_scanpy_for_sc_5p = \
  195. PythonOperator(
  196. task_id='run_scanpy_for_sc_5p',
  197. dag=dag,
  198. queue='hpc_4G',
  199. python_callable=run_scanpy_for_sc_5p_func,
  200. params={'cellranger_xcom_key':'cellranger_output',
  201. 'cellranger_xcom_pull_task':'run_cellranger',
  202. 'scanpy_timeout':1200,
  203. 'allow_errors':False,
  204. 'output_notebook_key':'scanpy_notebook',
  205. 'output_cellbrowser_key':'cellbrowser_dirs',
  206. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  207. 'analysis_description_xcom_key':'analysis_description'})
  208. load_scanpy_report_for_sc_5p_to_db = \
  209. PythonOperator(
  210. task_id='load_scanpy_report_for_sc_5p_to_db',
  211. dag=dag,
  212. queue='hpc_4G',
  213. python_callable=load_analysis_files_func,
  214. params={'collection_name_task':'load_cellranger_result_to_db',
  215. 'collection_name_key':'sample_igf_id',
  216. 'file_name_task':'run_scanpy_for_sc_5p',
  217. 'file_name_key':'scanpy_notebook',
  218. 'analysis_name':'scanpy_5p',
  219. 'collection_type':'SCANPY_HTML',
  220. 'collection_table':'sample',
  221. 'output_files_key':'output_db_files'})
  222. upload_scanpy_report_for_sc_5p_to_ftp = \
  223. PythonOperator(
  224. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  225. dag=dag,
  226. queue='hpc_4G',
  227. python_callable=ftp_files_upload_for_analysis,
  228. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  229. 'xcom_pull_files_key':'output_db_files',
  230. 'collection_name_task':'load_cellranger_result_to_db',
  231. 'collection_name_key':'sample_igf_id',
  232. 'collection_type':'FTP_SCANPY_HTML',
  233. 'collection_table':'sample',
  234. 'collect_remote_file':True})
  235. upload_scanpy_report_for_sc_5p_to_box = \
  236. DummyOperator(
  237. task_id='upload_scanpy_report_for_sc_5p_to_box',
  238. dag=dag)
  239. upload_cellbrowser_for_sc_5p_to_ftp = \
  240. DummyOperator(
  241. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  242. dag=dag,
  243. queue='hpc_4G',
  244. python_callable=ftp_files_upload_for_analysis,
  245. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  246. 'xcom_pull_files_key':'cellbrowser_dirs',
  247. 'collection_name_task':'load_cellranger_result_to_db',
  248. 'collection_name_key':'sample_igf_id',
  249. 'collection_type':'FTP_CELLBROWSER',
  250. 'collection_table':'sample',
  251. 'collect_remote_file':True})
  252. ## PIPELINE
  253. decide_analysis_branch >> run_scanpy_for_sc_5p
  254. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  255. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  256. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  257. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  258. ## TASK
  259. run_scirpy_for_vdj = \
  260. PythonOperator(
  261. task_id='run_scirpy_for_vdj',
  262. dag=dag,
  263. queue='hpc_4G',
  264. python_callable=run_singlecell_notebook_wrapper_func,
  265. params={'cellranger_xcom_key':'cellranger_output',
  266. 'cellranger_xcom_pull_task':'run_cellranger',
  267. 'scanpy_timeout':1200,
  268. 'allow_errors':False,
  269. 'kernel_name':'python3',
  270. 'vdj_dir':'vdj',
  271. 'count_dir':'count',
  272. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  273. 'output_notebook_key':'scirpy_notebook',
  274. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  275. 'analysis_description_xcom_key':'analysis_description',
  276. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  277. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  278. load_scanpy_report_for_vdj_to_db = \
  279. PythonOperator(
  280. task_id='load_scanpy_report_for_vdj_to_db',
  281. dag=dag,
  282. queue='hpc_4G',
  283. python_callable=load_analysis_files_func,
  284. params={'collection_name_task':'load_cellranger_result_to_db',
  285. 'collection_name_key':'sample_igf_id',
  286. 'file_name_task':'run_scirpy_for_vdj',
  287. 'file_name_key':'scirpy_notebook',
  288. 'analysis_name':'scirpy_vdj',
  289. 'collection_type':'SCIRPY_VDJ_HTML',
  290. 'collection_table':'sample',
  291. 'output_files_key':'output_db_files'})
  292. upload_scanpy_report_for_vdj_to_ftp = \
  293. PythonOperator(
  294. task_id='upload_scanpy_report_for_vdj_to_ftp',
  295. dag=dag,
  296. queue='hpc_4G',
  297. python_callable=ftp_files_upload_for_analysis,
  298. params={'xcom_pull_task':'load_scanpy_report_for_vdj_to_db',
  299. 'xcom_pull_files_key':'output_db_files',
  300. 'collection_name_task':'load_cellranger_result_to_db',
  301. 'collection_name_key':'sample_igf_id',
  302. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  303. 'collection_table':'sample',
  304. 'collect_remote_file':True})
  305. upload_scanpy_report_for_vdj_to_box = \
  306. DummyOperator(
  307. task_id='upload_scanpy_report_for_vdj_to_box',
  308. dag=dag)
  309. ## PIPELINE
  310. decide_analysis_branch >> run_scirpy_for_vdj
  311. run_scirpy_for_vdj >> load_scanpy_report_for_vdj_to_db
  312. load_scanpy_report_for_vdj_to_db >> upload_scanpy_report_for_vdj_to_ftp
  313. load_scanpy_report_for_vdj_to_db >> upload_scanpy_report_for_vdj_to_box
  314. ## TASK
  315. run_scirpy_for_vdj_b = \
  316. DummyOperator(
  317. task_id='run_scirpy_for_vdj_b',
  318. dag=dag,
  319. queue='hpc_4G',
  320. python_callable=run_singlecell_notebook_wrapper_func,
  321. params={'cellranger_xcom_key':'cellranger_output',
  322. 'cellranger_xcom_pull_task':'run_cellranger',
  323. 'scanpy_timeout':1200,
  324. 'allow_errors':False,
  325. 'kernel_name':'python3',
  326. 'vdj_dir':'vdj_b',
  327. 'count_dir':'count',
  328. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  329. 'output_notebook_key':'scirpy_notebook',
  330. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  331. 'analysis_description_xcom_key':'analysis_description',
  332. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  333. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  334. load_scanpy_report_for_vdj_b_to_db = \
  335. PythonOperator(
  336. task_id='load_scanpy_report_for_vdj_b_to_db',
  337. dag=dag,
  338. queue='hpc_4G',
  339. python_callable=load_analysis_files_func,
  340. params={'collection_name_task':'load_cellranger_result_to_db',
  341. 'collection_name_key':'sample_igf_id',
  342. 'file_name_task':'run_scirpy_for_vdj_b',
  343. 'file_name_key':'scirpy_notebook',
  344. 'analysis_name':'scirpy_vdj_b',
  345. 'collection_type':'SCIRPY_VDJ_B_HTML',
  346. 'collection_table':'sample',
  347. 'output_files_key':'output_db_files'})
  348. upload_scanpy_report_for_vdj_b_to_ftp = \
  349. PythonOperator(
  350. task_id='upload_scanpy_report_for_vdj_b_to_ftp',
  351. dag=dag,
  352. queue='hpc_4G',
  353. python_callable=ftp_files_upload_for_analysis,
  354. params={'xcom_pull_task':'load_scanpy_report_for_vdj_b_to_db',
  355. 'xcom_pull_files_key':'output_db_files',
  356. 'collection_name_task':'load_cellranger_result_to_db',
  357. 'collection_name_key':'sample_igf_id',
  358. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  359. 'collection_table':'sample',
  360. 'collect_remote_file':True})
  361. upload_scanpy_report_for_vdj_b_to_box = \
  362. DummyOperator(
  363. task_id='upload_scanpy_report_for_vdj_b_to_box',
  364. dag=dag)
  365. ## PIPELINE
  366. decide_analysis_branch >> run_scirpy_for_vdj_b
  367. run_scirpy_for_vdj_b >> load_scanpy_report_for_vdj_b_to_db
  368. load_scanpy_report_for_vdj_b_to_db >> upload_scanpy_report_for_vdj_b_to_ftp
  369. load_scanpy_report_for_vdj_b_to_db >> upload_scanpy_report_for_vdj_b_to_box
  370. ## TASK
  371. run_scirpy_for_vdj_t = \
  372. DummyOperator(
  373. task_id='run_scirpy_for_vdj_t',
  374. dag=dag,
  375. queue='hpc_4G',
  376. python_callable=run_singlecell_notebook_wrapper_func,
  377. params={'cellranger_xcom_key':'cellranger_output',
  378. 'cellranger_xcom_pull_task':'run_cellranger',
  379. 'scanpy_timeout':1200,
  380. 'allow_errors':False,
  381. 'kernel_name':'python3',
  382. 'vdj_dir':'vdj_t',
  383. 'count_dir':'count',
  384. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  385. 'output_notebook_key':'scirpy_notebook',
  386. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  387. 'analysis_description_xcom_key':'analysis_description',
  388. 'template_ipynb_path':Variable.get('scirpy_single_sample_template'),
  389. 'singularity_image_path':Variable.get('scirpy_notebook_image')})
  390. load_scanpy_report_for_vdj_t_to_db = \
  391. PythonOperator(
  392. task_id='load_scanpy_report_for_vdj_t_to_db',
  393. dag=dag,
  394. queue='hpc_4G',
  395. python_callable=load_analysis_files_func,
  396. params={'collection_name_task':'load_cellranger_result_to_db',
  397. 'collection_name_key':'sample_igf_id',
  398. 'file_name_task':'run_scirpy_for_vdj_t',
  399. 'file_name_key':'scirpy_notebook',
  400. 'analysis_name':'scirpy_vdj_t',
  401. 'collection_type':'SCIRPY_VDJ_T_HTML',
  402. 'collection_table':'sample',
  403. 'output_files_key':'output_db_files'})
  404. upload_scanpy_report_for_vdj_t_to_ftp = \
  405. PythonOperator(
  406. task_id='upload_scanpy_report_for_vdj_t_to_ftp',
  407. dag=dag,
  408. queue='hpc_4G',
  409. python_callable=ftp_files_upload_for_analysis,
  410. params={'xcom_pull_task':'load_scanpy_report_for_vdj_t_to_db',
  411. 'xcom_pull_files_key':'output_db_files',
  412. 'collection_name_task':'load_cellranger_result_to_db',
  413. 'collection_name_key':'sample_igf_id',
  414. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  415. 'collection_table':'sample',
  416. 'collect_remote_file':True})
  417. upload_scanpy_report_for_vdj_t_to_box = \
  418. DummyOperator(
  419. task_id='upload_scanpy_report_for_vdj_t_to_box',
  420. dag=dag)
  421. ## PIPELINE
  422. decide_analysis_branch >> run_scirpy_for_vdj_t
  423. run_scirpy_for_vdj_t >> load_scanpy_report_for_vdj_t_to_db
  424. load_scanpy_report_for_vdj_t_to_db >> upload_scanpy_report_for_vdj_t_to_ftp
  425. load_scanpy_report_for_vdj_t_to_db >> upload_scanpy_report_for_vdj_t_to_box
  426. ## TASK
  427. run_seurat_for_sc_5p = \
  428. PythonOperator(
  429. task_id='run_seurat_for_sc_5p',
  430. dag=dag,
  431. queue='hpc_4G',
  432. python_callable=run_singlecell_notebook_wrapper_func,
  433. params={'cellranger_xcom_key':'cellranger_output',
  434. 'cellranger_xcom_pull_task':'run_cellranger',
  435. 'scanpy_timeout':1200,
  436. 'allow_errors':False,
  437. 'kernel_name':'R',
  438. 'vdj_dir':'vdj',
  439. 'count_dir':'count',
  440. 'cell_marker_list':Variable.get('all_cell_marker_list'),
  441. 'output_notebook_key':'seurat_notebook',
  442. 'analysis_description_xcom_pull_task':'fetch_analysis_info_and_branch',
  443. 'analysis_description_xcom_key':'analysis_description',
  444. 'template_ipynb_path':Variable.get('seurat_single_sample_template'),
  445. 'singularity_image_path':Variable.get('seurat_notebook_image')})
  446. load_seurat_report_for_sc_5p_db = \
  447. PythonOperator(
  448. task_id='load_seurat_report_for_sc_5p_db',
  449. dag=dag,
  450. queue='hpc_4G',
  451. python_callable=load_analysis_files_func,
  452. params={'collection_name_task':'load_cellranger_result_to_db',
  453. 'collection_name_key':'sample_igf_id',
  454. 'file_name_task':'run_seurat_for_sc_5p',
  455. 'file_name_key':'seurat_notebook',
  456. 'analysis_name':'seurat_5p',
  457. 'collection_type':'SEURAT_HTML',
  458. 'collection_table':'sample',
  459. 'output_files_key':'output_db_files'})
  460. upload_seurat_report_for_sc_5p_ftp = \
  461. DummyOperator(
  462. task_id='upload_seurat_report_for_sc_5p_ftp',
  463. dag=dag,
  464. queue='hpc_4G',
  465. python_callable=ftp_files_upload_for_analysis,
  466. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  467. 'xcom_pull_files_key':'output_db_files',
  468. 'collection_name_task':'load_cellranger_result_to_db',
  469. 'collection_name_key':'sample_igf_id',
  470. 'collection_type':'FTP_SEURAT_HTML',
  471. 'collection_table':'sample',
  472. 'collect_remote_file':True})
  473. upload_seurat_report_for_sc_5p_to_box = \
  474. DummyOperator(
  475. task_id='upload_seurat_report_for_sc_5p_to_box',
  476. dag=dag)
  477. ## PIPELINE
  478. decide_analysis_branch >> run_seurat_for_sc_5p
  479. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  480. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  481. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  482. ## TASK
  483. convert_bam_to_cram = \
  484. DummyOperator(
  485. task_id='convert_bam_to_cram',
  486. dag=dag,
  487. params={'cellranger_xcom_key':'cellranger_output',
  488. 'cellranger_xcom_pull_task':'run_cellranger'})
  489. upload_cram_to_irods = \
  490. DummyOperator(
  491. task_id='upload_cram_to_irods',
  492. dag=dag)
  493. ## PIPELINE
  494. decide_analysis_branch >> convert_bam_to_cram
  495. convert_bam_to_cram >> upload_cram_to_irods
  496. ## TASK
  497. run_picard_alignment_summary = \
  498. DummyOperator(
  499. task_id='run_picard_alignment_summary',
  500. dag=dag,
  501. params={'cellranger_xcom_key':'cellranger_output',
  502. 'cellranger_xcom_pull_task':'run_cellranger'})
  503. ## PIPELINE
  504. decide_analysis_branch >> run_picard_alignment_summary
  505. ## TASK
  506. run_picard_qual_summary = \
  507. DummyOperator(
  508. task_id='run_picard_qual_summary',
  509. dag=dag)
  510. ## PIPELINE
  511. run_picard_alignment_summary >> run_picard_qual_summary
  512. ## TASK
  513. run_picard_rna_summary = \
  514. DummyOperator(
  515. task_id='run_picard_rna_summary',
  516. dag=dag)
  517. ## PIPELINE
  518. run_picard_qual_summary >> run_picard_rna_summary
  519. ## TASK
  520. run_picard_gc_summary = \
  521. DummyOperator(
  522. task_id='run_picard_gc_summary',
  523. dag=dag)
  524. ## PIPELINE
  525. run_picard_rna_summary >> run_picard_gc_summary
  526. ## TASK
  527. run_samtools_stats = \
  528. DummyOperator(
  529. task_id='run_samtools_stats',
  530. dag=dag)
  531. ## PIPELINE
  532. run_picard_gc_summary >> run_samtools_stats
  533. ## TASK
  534. run_samtools_idxstats = \
  535. DummyOperator(
  536. task_id='run_samtools_idxstats',
  537. dag=dag)
  538. ## PIPELINE
  539. run_samtools_stats >> run_samtools_idxstats
  540. ## TASK
  541. run_multiqc = \
  542. DummyOperator(
  543. task_id='run_multiqc',
  544. dag=dag)
  545. ## PIPELINE
  546. run_samtools_idxstats >> run_multiqc
  547. ## TASK
  548. upload_multiqc_to_ftp = \
  549. DummyOperator(
  550. task_id='upload_multiqc_to_ftp',
  551. dag=dag)
  552. ## PIPELINE
  553. run_multiqc >> upload_multiqc_to_ftp
  554. ## TASK
  555. upload_multiqc_to_box = \
  556. DummyOperator(
  557. task_id='upload_multiqc_to_box',
  558. dag=dag)
  559. ## PIPELINE
  560. run_multiqc >> upload_multiqc_to_box
  561. ## TASK
  562. update_analysis_and_status = \
  563. DummyOperator(
  564. task_id='update_analysis_and_status',
  565. dag=dag)
  566. ## PIPELINE
  567. upload_multiqc_to_ftp >> update_analysis_and_status
  568. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  569. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  570. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  571. upload_scanpy_report_for_vdj_to_ftp >> update_analysis_and_status
  572. upload_scanpy_report_for_vdj_to_box >> update_analysis_and_status
  573. upload_scanpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  574. upload_scanpy_report_for_vdj_b_to_box >> update_analysis_and_status
  575. upload_scanpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  576. upload_scanpy_report_for_vdj_t_to_box >> update_analysis_and_status
  577. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  578. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  579. upload_results_to_irods >> update_analysis_and_status
  580. upload_report_to_ftp >> update_analysis_and_status
  581. upload_report_to_box >> update_analysis_and_status
  582. upload_cram_to_irods >> update_analysis_and_status