dag9_tenx_single_cell_immune_profiling.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import task_branch_function
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import upload_analysis_file_to_box
  20. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import convert_bam_to_cram_func
  21. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_picard_for_cellranger
  22. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_samtools_for_cellranger
  23. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_multiqc_for_cellranger
  24. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import index_and_copy_bam_for_parallel_analysis
  25. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import change_pipeline_status
  26. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import clean_up_files
  27. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import create_and_update_qc_pages
  28. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_metrices_to_collection
  29. ## ARGS
  30. default_args = {
  31. 'owner': 'airflow',
  32. 'depends_on_past': False,
  33. 'start_date': days_ago(2),
  34. 'email_on_failure': False,
  35. 'email_on_retry': False,
  36. 'retries': 4,
  37. 'max_active_runs':10,
  38. 'catchup':True,
  39. 'retry_delay': timedelta(minutes=5),
  40. 'provide_context': True,
  41. }
  42. FEATURE_TYPE_LIST = \
  43. Variable.get('tenx_single_cell_immune_profiling_feature_types',default_var={})#.split(',')
  44. ## DAG
  45. dag = \
  46. DAG(
  47. dag_id='dag9_tenx_single_cell_immune_profiling',
  48. schedule_interval=None,
  49. tags=['hpc','analysis','tenx','sc'],
  50. default_args=default_args,
  51. orientation='LR')
  52. with dag:
  53. ## TASK
  54. fetch_analysis_info_and_branch = \
  55. BranchPythonOperator(
  56. task_id='fetch_analysis_info',
  57. dag=dag,
  58. queue='hpc_4G',
  59. params={'no_analysis_task':'no_analysis',
  60. 'analysis_description_xcom_key':'analysis_description',
  61. 'analysis_info_xcom_key':'analysis_info'},
  62. python_callable=fetch_analysis_info_and_branch_func)
  63. ## TASK
  64. configure_cellranger_run = \
  65. PythonOperator(
  66. task_id='configure_cellranger_run',
  67. dag=dag,
  68. queue='hpc_4G',
  69. trigger_rule='none_failed_or_skipped',
  70. params={'xcom_pull_task_id':'fetch_analysis_info',
  71. 'analysis_description_xcom_key':'analysis_description',
  72. 'analysis_info_xcom_key':'analysis_info',
  73. 'library_csv_xcom_key':'cellranger_library_csv'},
  74. python_callable=configure_cellranger_run_func)
  75. for analysis_name in FEATURE_TYPE_LIST.keys():
  76. ## TASK
  77. task_branch = \
  78. BranchPythonOperator(
  79. task_id=analysis_name,
  80. dag=dag,
  81. queue='hpc_4G',
  82. params={'xcom_pull_task_id':'fetch_analysis_info',
  83. 'analysis_info_xcom_key':'analysis_info',
  84. 'analysis_name':analysis_name,
  85. 'task_prefix':'run_trim'},
  86. python_callable=task_branch_function)
  87. run_trim_list = list()
  88. for run_id in range(0,10):
  89. ## TASK
  90. t = \
  91. PythonOperator(
  92. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  93. dag=dag,
  94. queue='hpc_4G',
  95. params={'xcom_pull_task_id':'fetch_analysis_info',
  96. 'analysis_info_xcom_key':'analysis_info',
  97. 'analysis_description_xcom_key':'analysis_description',
  98. 'analysis_name':analysis_name,
  99. 'run_id':run_id,
  100. 'r1_length':0,
  101. 'r2_length':0,
  102. 'fastq_input_dir_tag':'fastq_dir',
  103. 'use_ephemeral_space':True,
  104. 'fastq_output_dir_tag':'output_path'},
  105. python_callable=run_sc_read_trimmming_func)
  106. run_trim_list.append(t)
  107. ## TASK
  108. collect_trimmed_files = \
  109. DummyOperator(
  110. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  111. trigger_rule='none_failed_or_skipped',
  112. dag=dag)
  113. ## PIPELINE
  114. fetch_analysis_info_and_branch >> task_branch
  115. task_branch >> run_trim_list
  116. run_trim_list >> collect_trimmed_files
  117. collect_trimmed_files >> configure_cellranger_run
  118. ## TASK
  119. no_analysis = \
  120. DummyOperator(
  121. task_id='no_analysis',
  122. dag=dag)
  123. ## PIPELINE
  124. fetch_analysis_info_and_branch >> no_analysis
  125. ## TASK
  126. run_cellranger = \
  127. PythonOperator(
  128. task_id='run_cellranger',
  129. dag=dag,
  130. queue='hpc_64G16t24hr',
  131. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  132. 'analysis_description_xcom_key':'analysis_description',
  133. 'library_csv_xcom_key':'cellranger_library_csv',
  134. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  135. 'cellranger_xcom_key':'cellranger_output',
  136. 'cellranger_options':['--localcores 16','--localmem 64']},
  137. python_callable=run_cellranger_tool)
  138. ## PIPELINE
  139. configure_cellranger_run >> run_cellranger
  140. ## TASK
  141. decide_analysis_branch = \
  142. BranchPythonOperator(
  143. task_id='decide_analysis_branch',
  144. dag=dag,
  145. queue='hpc_4G',
  146. python_callable=decide_analysis_branch_func,
  147. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  148. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  149. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  150. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  151. 'run_scirpy_vdj_t_task':'run_scirpy_for_vdj_t',
  152. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  153. 'convert_cellranger_bam_to_cram_task':'convert_cellranger_bam_to_cram',
  154. 'library_csv_xcom_key':'cellranger_library_csv',
  155. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  156. ## PIPELINE
  157. run_cellranger >> decide_analysis_branch
  158. ## TASK
  159. load_cellranger_result_to_db = \
  160. PythonOperator(
  161. task_id='load_cellranger_result_to_db',
  162. dag=dag,
  163. queue='hpc_4G',
  164. python_callable=load_cellranger_result_to_db_func,
  165. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  166. 'analysis_description_xcom_key':'analysis_description',
  167. 'cellranger_xcom_key':'cellranger_output',
  168. 'cellranger_xcom_pull_task':'run_cellranger',
  169. 'collection_type':'CELLRANGER_MULTI',
  170. 'html_collection_type':'CELLRANGER_HTML',
  171. 'collection_table':'sample',
  172. 'xcom_collection_name_key':'sample_igf_id',
  173. 'genome_column':'genome_build',
  174. 'analysis_name':'cellranger_multi',
  175. 'output_xcom_key':'loaded_output_files',
  176. 'html_xcom_key':'html_report_file',
  177. 'html_report_file_name':'web_summary.html'})
  178. upload_cellranger_report_to_ftp = \
  179. PythonOperator(
  180. task_id='upload_cellranger_report_to_ftp',
  181. dag=dag,
  182. queue='hpc_4G',
  183. python_callable=ftp_files_upload_for_analysis,
  184. params={'xcom_pull_task':'load_cellranger_result_to_db',
  185. 'xcom_pull_files_key':'html_report_file',
  186. 'collection_name_task':'load_cellranger_result_to_db',
  187. 'collection_name_key':'sample_igf_id',
  188. 'collection_type':'FTP_CELLRANGER_HTML',
  189. 'collection_table':'sample',
  190. 'collect_remote_file':True})
  191. upload_cellranger_report_to_box = \
  192. PythonOperator(
  193. task_id='upload_cellranger_report_to_box',
  194. dag=dag,
  195. queue='hpc_4G',
  196. python_callable=upload_analysis_file_to_box,
  197. params={'xcom_pull_task':'load_cellranger_result_to_db',
  198. 'xcom_pull_files_key':'html_report_file',
  199. 'analysis_tag':'cellranger_multi'})
  200. upload_cellranger_results_to_irods = \
  201. PythonOperator(
  202. task_id='upload_cellranger_results_to_irods',
  203. dag=dag,
  204. queue='hpc_4G',
  205. python_callable=irods_files_upload_for_analysis,
  206. params={'xcom_pull_task':'load_cellranger_result_to_db',
  207. 'xcom_pull_files_key':'loaded_output_files',
  208. 'collection_name_key':'sample_igf_id',
  209. 'collection_name_task':'load_cellranger_result_to_db',
  210. 'analysis_name':'cellranger_multi'})
  211. ## PIPELINE
  212. decide_analysis_branch >> load_cellranger_result_to_db
  213. load_cellranger_result_to_db >> upload_cellranger_report_to_ftp
  214. load_cellranger_result_to_db >> upload_cellranger_report_to_box
  215. load_cellranger_result_to_db >> upload_cellranger_results_to_irods
  216. ## TASK
  217. run_scanpy_for_sc_5p = \
  218. PythonOperator(
  219. task_id='run_scanpy_for_sc_5p',
  220. dag=dag,
  221. queue='hpc_4G',
  222. python_callable=run_singlecell_notebook_wrapper_func,
  223. params={'cellranger_xcom_key':'cellranger_output',
  224. 'cellranger_xcom_pull_task':'run_cellranger',
  225. 'scanpy_timeout':1200,
  226. 'allow_errors':False,
  227. 'kernel_name':'python3',
  228. 'count_dir':'count',
  229. 'analysis_name':'scanpy',
  230. 'output_notebook_key':'scanpy_notebook',
  231. 'output_cellbrowser_key':'cellbrowser_dirs',
  232. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  233. 'analysis_description_xcom_key':'analysis_description'})
  234. load_cellranger_gex_matrics_to_db = \
  235. PythonOperator(
  236. task_id='load_cellranger_gex_matrics_to_db',
  237. dag=dag,
  238. queue='hpc_4G',
  239. python_callable=load_cellranger_metrices_to_collection,
  240. params={'cellranger_xcom_key':'cellranger_output',
  241. 'cellranger_xcom_pull_task':'run_cellranger',
  242. 'collection_type':'ANALYSIS_CRAM',
  243. 'collection_name_task':'load_cellranger_result_to_db',
  244. 'collection_name_key':'sample_igf_id',
  245. 'metrics_summary_file':'count/metrics_summary.csv',
  246. 'attribute_prefix':'CELLRANGER_COUNT'})
  247. load_scanpy_report_for_sc_5p_to_db = \
  248. PythonOperator(
  249. task_id='load_scanpy_report_for_sc_5p_to_db',
  250. dag=dag,
  251. queue='hpc_4G',
  252. python_callable=load_analysis_files_func,
  253. params={'collection_name_task':'load_cellranger_result_to_db',
  254. 'collection_name_key':'sample_igf_id',
  255. 'file_name_task':'run_scanpy_for_sc_5p',
  256. 'file_name_key':'scanpy_notebook',
  257. 'analysis_name':'scanpy_5p',
  258. 'collection_type':'SCANPY_HTML',
  259. 'collection_table':'sample',
  260. 'output_files_key':'output_db_files'})
  261. upload_scanpy_report_for_sc_5p_to_ftp = \
  262. PythonOperator(
  263. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  264. dag=dag,
  265. queue='hpc_4G',
  266. python_callable=ftp_files_upload_for_analysis,
  267. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  268. 'xcom_pull_files_key':'output_db_files',
  269. 'collection_name_task':'load_cellranger_result_to_db',
  270. 'collection_name_key':'sample_igf_id',
  271. 'collection_type':'FTP_SCANPY_HTML',
  272. 'collection_table':'sample',
  273. 'collect_remote_file':True})
  274. upload_scanpy_report_for_sc_5p_to_box = \
  275. PythonOperator(
  276. task_id='upload_scanpy_report_for_sc_5p_to_box',
  277. dag=dag,
  278. queue='hpc_4G',
  279. python_callable=upload_analysis_file_to_box,
  280. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  281. 'xcom_pull_files_key':'output_db_files',
  282. 'analysis_tag':'scanpy_single_sample_report'})
  283. upload_cellbrowser_for_sc_5p_to_ftp = \
  284. PythonOperator(
  285. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  286. dag=dag,
  287. queue='hpc_4G',
  288. python_callable=ftp_files_upload_for_analysis,
  289. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  290. 'xcom_pull_files_key':'cellbrowser_dirs',
  291. 'collection_name_task':'load_cellranger_result_to_db',
  292. 'collection_name_key':'sample_igf_id',
  293. 'collection_type':'FTP_CELLBROWSER',
  294. 'collection_table':'sample',
  295. 'collect_remote_file':True})
  296. ## PIPELINE
  297. decide_analysis_branch >> run_scanpy_for_sc_5p
  298. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  299. run_scanpy_for_sc_5p >> load_cellranger_gex_matrics_to_db
  300. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  301. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  302. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  303. ## TASK
  304. run_scirpy_for_vdj = \
  305. PythonOperator(
  306. task_id='run_scirpy_for_vdj',
  307. dag=dag,
  308. queue='hpc_4G',
  309. python_callable=run_singlecell_notebook_wrapper_func,
  310. params={'cellranger_xcom_key':'cellranger_output',
  311. 'cellranger_xcom_pull_task':'run_cellranger',
  312. 'scanpy_timeout':1200,
  313. 'allow_errors':False,
  314. 'kernel_name':'python3',
  315. 'analysis_name':'scirpy',
  316. 'vdj_dir':'vdj',
  317. 'count_dir':'count',
  318. 'output_notebook_key':'scirpy_notebook',
  319. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  320. 'analysis_description_xcom_key':'analysis_description'})
  321. load_cellranger_vdj_matrics_to_db = \
  322. PythonOperator(
  323. task_id='load_cellranger_vdj_matrics_to_db',
  324. dag=dag,
  325. queue='hpc_4G',
  326. python_callable=load_cellranger_metrices_to_collection,
  327. params={'cellranger_xcom_key':'cellranger_output',
  328. 'cellranger_xcom_pull_task':'run_cellranger',
  329. 'collection_type':'ANALYSIS_CRAM',
  330. 'collection_name_task':'load_cellranger_result_to_db',
  331. 'collection_name_key':'sample_igf_id',
  332. 'metrics_summary_file':'vdj/metrics_summary.csv',
  333. 'attribute_prefix':'CELLRANGER_VDJ'})
  334. load_scirpy_report_for_vdj_to_db = \
  335. PythonOperator(
  336. task_id='load_scirpy_report_for_vdj_to_db',
  337. dag=dag,
  338. queue='hpc_4G',
  339. python_callable=load_analysis_files_func,
  340. params={'collection_name_task':'load_cellranger_result_to_db',
  341. 'collection_name_key':'sample_igf_id',
  342. 'file_name_task':'run_scirpy_for_vdj',
  343. 'file_name_key':'scirpy_notebook',
  344. 'analysis_name':'scirpy_vdj',
  345. 'collection_type':'SCIRPY_VDJ_HTML',
  346. 'collection_table':'sample',
  347. 'output_files_key':'output_db_files'})
  348. upload_scirpy_report_for_vdj_to_ftp = \
  349. PythonOperator(
  350. task_id='upload_scirpy_report_for_vdj_to_ftp',
  351. dag=dag,
  352. queue='hpc_4G',
  353. python_callable=ftp_files_upload_for_analysis,
  354. params={'xcom_pull_task':'load_scirpy_report_for_vdj_to_db',
  355. 'xcom_pull_files_key':'output_db_files',
  356. 'collection_name_task':'load_cellranger_result_to_db',
  357. 'collection_name_key':'sample_igf_id',
  358. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  359. 'collection_table':'sample',
  360. 'collect_remote_file':True})
  361. upload_scirpy_report_for_vdj_to_box = \
  362. PythonOperator(
  363. task_id='upload_scirpy_report_for_vdj_to_box',
  364. dag=dag,
  365. queue='hpc_4G',
  366. python_callable=upload_analysis_file_to_box,
  367. params={'xcom_pull_task':'load_scirpy_report_for_vdj_to_db',
  368. 'xcom_pull_files_key':'output_db_files',
  369. 'analysis_tag':'scirpy_vdj_single_sample_report'})
  370. ## PIPELINE
  371. decide_analysis_branch >> run_scirpy_for_vdj
  372. run_scirpy_for_vdj >> load_scirpy_report_for_vdj_to_db
  373. run_scirpy_for_vdj >> load_cellranger_vdj_matrics_to_db
  374. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_ftp
  375. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_box
  376. ## TASK
  377. run_scirpy_for_vdj_b = \
  378. PythonOperator(
  379. task_id='run_scirpy_for_vdj_b',
  380. dag=dag,
  381. queue='hpc_4G',
  382. python_callable=run_singlecell_notebook_wrapper_func,
  383. params={'cellranger_xcom_key':'cellranger_output',
  384. 'cellranger_xcom_pull_task':'run_cellranger',
  385. 'scanpy_timeout':1200,
  386. 'allow_errors':False,
  387. 'kernel_name':'python3',
  388. 'analysis_name':'scirpy',
  389. 'vdj_dir':'vdj_b',
  390. 'count_dir':'count',
  391. 'output_notebook_key':'scirpy_notebook',
  392. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  393. 'analysis_description_xcom_key':'analysis_description'})
  394. load_cellranger_vdjB_matrics_to_db = \
  395. PythonOperator(
  396. task_id='load_cellranger_vdjB_matrics_to_db',
  397. dag=dag,
  398. queue='hpc_4G',
  399. python_callable=load_cellranger_metrices_to_collection,
  400. params={'cellranger_xcom_key':'cellranger_output',
  401. 'cellranger_xcom_pull_task':'run_cellranger',
  402. 'collection_type':'ANALYSIS_CRAM',
  403. 'collection_name_task':'load_cellranger_result_to_db',
  404. 'collection_name_key':'sample_igf_id',
  405. 'metrics_summary_file':'vdj_b/metrics_summary.csv',
  406. 'attribute_prefix':'CELLRANGER_VDJB'})
  407. load_scirpy_report_for_vdj_b_to_db = \
  408. PythonOperator(
  409. task_id='load_scirpy_report_for_vdj_b_to_db',
  410. dag=dag,
  411. queue='hpc_4G',
  412. python_callable=load_analysis_files_func,
  413. params={'collection_name_task':'load_cellranger_result_to_db',
  414. 'collection_name_key':'sample_igf_id',
  415. 'file_name_task':'run_scirpy_for_vdj_b',
  416. 'file_name_key':'scirpy_notebook',
  417. 'analysis_name':'scirpy_vdj_b',
  418. 'collection_type':'SCIRPY_VDJ_B_HTML',
  419. 'collection_table':'sample',
  420. 'output_files_key':'output_db_files'})
  421. upload_scirpy_report_for_vdj_b_to_ftp = \
  422. PythonOperator(
  423. task_id='upload_scirpy_report_for_vdj_b_to_ftp',
  424. dag=dag,
  425. queue='hpc_4G',
  426. python_callable=ftp_files_upload_for_analysis,
  427. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  428. 'xcom_pull_files_key':'output_db_files',
  429. 'collection_name_task':'load_cellranger_result_to_db',
  430. 'collection_name_key':'sample_igf_id',
  431. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  432. 'collection_table':'sample',
  433. 'collect_remote_file':True})
  434. upload_scirpy_report_for_vdj_b_to_box = \
  435. PythonOperator(
  436. task_id='upload_scanpy_report_for_vdj_b_to_box',
  437. dag=dag,
  438. queue='hpc_4G',
  439. python_callable=upload_analysis_file_to_box,
  440. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  441. 'xcom_pull_files_key':'output_db_files',
  442. 'analysis_tag':'scirpy_vdj_b_single_sample_report'})
  443. ## PIPELINE
  444. decide_analysis_branch >> run_scirpy_for_vdj_b
  445. run_scirpy_for_vdj_b >> load_scirpy_report_for_vdj_b_to_db
  446. run_scirpy_for_vdj_b >> load_cellranger_vdjB_matrics_to_db
  447. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_ftp
  448. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_box
  449. ## TASK
  450. run_scirpy_for_vdj_t = \
  451. PythonOperator(
  452. task_id='run_scirpy_for_vdj_t',
  453. dag=dag,
  454. queue='hpc_4G',
  455. python_callable=run_singlecell_notebook_wrapper_func,
  456. params={'cellranger_xcom_key':'cellranger_output',
  457. 'cellranger_xcom_pull_task':'run_cellranger',
  458. 'scanpy_timeout':1200,
  459. 'allow_errors':False,
  460. 'kernel_name':'python3',
  461. 'analysis_name':'scirpy',
  462. 'vdj_dir':'vdj_t',
  463. 'count_dir':'count',
  464. 'output_notebook_key':'scirpy_notebook',
  465. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  466. 'analysis_description_xcom_key':'analysis_description'})
  467. load_cellranger_vdjT_matrics_to_db = \
  468. PythonOperator(
  469. task_id='load_cellranger_vdjT_matrics_to_db',
  470. dag=dag,
  471. queue='hpc_4G',
  472. python_callable=load_cellranger_metrices_to_collection,
  473. params={'cellranger_xcom_key':'cellranger_output',
  474. 'cellranger_xcom_pull_task':'run_cellranger',
  475. 'collection_type':'ANALYSIS_CRAM',
  476. 'collection_name_task':'load_cellranger_result_to_db',
  477. 'collection_name_key':'sample_igf_id',
  478. 'metrics_summary_file':'vdj_t/metrics_summary.csv',
  479. 'attribute_prefix':'CELLRANGER_VDJT'})
  480. load_scirpy_report_for_vdj_t_to_db = \
  481. PythonOperator(
  482. task_id='load_scirpy_report_for_vdj_t_to_db',
  483. dag=dag,
  484. queue='hpc_4G',
  485. python_callable=load_analysis_files_func,
  486. params={'collection_name_task':'load_cellranger_result_to_db',
  487. 'collection_name_key':'sample_igf_id',
  488. 'file_name_task':'run_scirpy_for_vdj_t',
  489. 'file_name_key':'scirpy_notebook',
  490. 'analysis_name':'scirpy_vdj_t',
  491. 'collection_type':'SCIRPY_VDJ_T_HTML',
  492. 'collection_table':'sample',
  493. 'output_files_key':'output_db_files'})
  494. upload_scirpy_report_for_vdj_t_to_ftp = \
  495. PythonOperator(
  496. task_id='upload_scirpy_report_for_vdj_t_to_ftp',
  497. dag=dag,
  498. queue='hpc_4G',
  499. python_callable=ftp_files_upload_for_analysis,
  500. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  501. 'xcom_pull_files_key':'output_db_files',
  502. 'collection_name_task':'load_cellranger_result_to_db',
  503. 'collection_name_key':'sample_igf_id',
  504. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  505. 'collection_table':'sample',
  506. 'collect_remote_file':True})
  507. upload_scirpy_report_for_vdj_t_to_box = \
  508. PythonOperator(
  509. task_id='upload_scirpy_report_for_vdj_t_to_box',
  510. dag=dag,
  511. queue='hpc_4G',
  512. python_callable=upload_analysis_file_to_box,
  513. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  514. 'xcom_pull_files_key':'output_db_files',
  515. 'analysis_tag':'scirpy_vdj_t_single_sample_report'})
  516. ## PIPELINE
  517. decide_analysis_branch >> run_scirpy_for_vdj_t
  518. run_scirpy_for_vdj_t >> load_scirpy_report_for_vdj_t_to_db
  519. run_scirpy_for_vdj_t >> load_cellranger_vdjT_matrics_to_db
  520. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_ftp
  521. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_box
  522. ## TASK
  523. run_seurat_for_sc_5p = \
  524. PythonOperator(
  525. task_id='run_seurat_for_sc_5p',
  526. dag=dag,
  527. queue='hpc_4G',
  528. python_callable=run_singlecell_notebook_wrapper_func,
  529. params={'cellranger_xcom_key':'cellranger_output',
  530. 'cellranger_xcom_pull_task':'run_cellranger',
  531. 'scanpy_timeout':1200,
  532. 'allow_errors':False,
  533. 'kernel_name':'ir',
  534. 'analysis_name':'seurat',
  535. 'vdj_dir':'vdj',
  536. 'count_dir':'count',
  537. 'output_notebook_key':'seurat_notebook',
  538. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  539. 'analysis_description_xcom_key':'analysis_description'})
  540. load_seurat_report_for_sc_5p_db = \
  541. PythonOperator(
  542. task_id='load_seurat_report_for_sc_5p_db',
  543. dag=dag,
  544. queue='hpc_4G',
  545. python_callable=load_analysis_files_func,
  546. params={'collection_name_task':'load_cellranger_result_to_db',
  547. 'collection_name_key':'sample_igf_id',
  548. 'file_name_task':'run_seurat_for_sc_5p',
  549. 'file_name_key':'seurat_notebook',
  550. 'analysis_name':'seurat_5p',
  551. 'collection_type':'SEURAT_HTML',
  552. 'collection_table':'sample',
  553. 'output_files_key':'output_db_files'})
  554. upload_seurat_report_for_sc_5p_ftp = \
  555. PythonOperator(
  556. task_id='upload_seurat_report_for_sc_5p_ftp',
  557. dag=dag,
  558. queue='hpc_4G',
  559. python_callable=ftp_files_upload_for_analysis,
  560. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  561. 'xcom_pull_files_key':'output_db_files',
  562. 'collection_name_task':'load_cellranger_result_to_db',
  563. 'collection_name_key':'sample_igf_id',
  564. 'collection_type':'FTP_SEURAT_HTML',
  565. 'collection_table':'sample',
  566. 'collect_remote_file':True})
  567. upload_seurat_report_for_sc_5p_to_box = \
  568. PythonOperator(
  569. task_id='upload_seurat_report_for_sc_5p_to_box',
  570. dag=dag,
  571. queue='hpc_4G',
  572. python_callable=upload_analysis_file_to_box,
  573. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  574. 'xcom_pull_files_key':'output_db_files',
  575. 'analysis_tag':'seurat_single_sample_report'})
  576. ## PIPELINE
  577. decide_analysis_branch >> run_seurat_for_sc_5p
  578. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  579. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  580. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  581. ## TASK
  582. convert_cellranger_bam_to_cram = \
  583. PythonOperator(
  584. task_id='convert_cellranger_bam_to_cram',
  585. dag=dag,
  586. queue='hpc_4G4t',
  587. python_callable=convert_bam_to_cram_func,
  588. params={'xcom_pull_files_key':'cellranger_output',
  589. 'xcom_pull_task':'run_cellranger',
  590. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  591. 'analysis_description_xcom_key':'analysis_description',
  592. 'use_ephemeral_space':True,
  593. 'threads':4,
  594. 'analysis_name':'cellranger',
  595. 'collection_type':'ANALYSIS_CRAM',
  596. 'collection_table':'sample',
  597. 'cram_files_xcom_key':'cram_files'})
  598. ## TASK
  599. copy_bam_for_parallel_runs = \
  600. BranchPythonOperator(
  601. task_id='copy_bam_for_parallel_runs',
  602. dag=dag,
  603. queue='hpc_4G',
  604. python_callable=index_and_copy_bam_for_parallel_analysis,
  605. params={'xcom_pull_files_key':'cellranger_output',
  606. 'xcom_pull_task':'run_cellranger',
  607. 'list_of_tasks':[
  608. 'run_picard_alignment_summary',
  609. 'run_picard_qual_summary',
  610. 'run_picard_rna_summary',
  611. 'run_picard_gc_summary',
  612. 'run_picard_base_dist_summary',
  613. 'run_samtools_stats']})
  614. ## TASK
  615. upload_cram_to_irods = \
  616. PythonOperator(
  617. task_id='upload_cram_to_irods',
  618. dag=dag,
  619. queue='hpc_4G',
  620. python_callable=irods_files_upload_for_analysis,
  621. params={'xcom_pull_task':'convert_cellranger_bam_to_cram',
  622. 'xcom_pull_files_key':'cram_files',
  623. 'collection_name_key':'sample_igf_id',
  624. 'collection_name_task':'load_cellranger_result_to_db',
  625. 'analysis_name':'cellranger_multi'})
  626. ## PIPELINE
  627. decide_analysis_branch >> convert_cellranger_bam_to_cram
  628. convert_cellranger_bam_to_cram >> copy_bam_for_parallel_runs # we need to load metrics to cram
  629. convert_cellranger_bam_to_cram >> upload_cram_to_irods
  630. ## TASK
  631. run_picard_alignment_summary = \
  632. PythonOperator(
  633. task_id='run_picard_alignment_summary',
  634. dag=dag,
  635. queue='hpc_4G',
  636. python_callable=run_picard_for_cellranger,
  637. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  638. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  639. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  640. 'analysis_description_xcom_key':'analysis_description',
  641. 'use_ephemeral_space':True,
  642. 'load_metrics_to_cram':True,
  643. 'java_param':'-Xmx4g',
  644. 'picard_command':'CollectAlignmentSummaryMetrics',
  645. 'picard_option':{},
  646. 'analysis_files_xcom_key':'picard_alignment_summary',
  647. 'bam_files_xcom_key':None})
  648. ## PIPELINE
  649. copy_bam_for_parallel_runs >> run_picard_alignment_summary
  650. ## TASK
  651. cleanup_picard_alignment_summary_input = \
  652. PythonOperator(
  653. task_id='cleanup_picard_alignment_summary_input',
  654. dag=dag,
  655. queue='hpc_4G',
  656. python_callable=clean_up_files,
  657. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  658. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  659. ## PIPELINE
  660. run_picard_alignment_summary >> cleanup_picard_alignment_summary_input
  661. ## TASK
  662. upload_picard_alignment_summary_to_box = \
  663. PythonOperator(
  664. task_id='upload_picard_alignment_summary_to_box',
  665. dag=dag,
  666. queue='hpc_4G',
  667. python_callable=upload_analysis_file_to_box,
  668. params={'xcom_pull_task':'run_picard_alignment_summary',
  669. 'xcom_pull_files_key':'picard_alignment_summary',
  670. 'analysis_tag':'Picard-CollectAlignmentSummaryMetrics'})
  671. ## PIPELINE
  672. run_picard_alignment_summary >> upload_picard_alignment_summary_to_box
  673. ## TASK
  674. run_picard_qual_summary = \
  675. PythonOperator(
  676. task_id='run_picard_qual_summary',
  677. dag=dag,
  678. queue='hpc_4G',
  679. python_callable=run_picard_for_cellranger,
  680. params={'xcom_pull_files_key':'run_picard_qual_summary',
  681. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  682. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  683. 'analysis_description_xcom_key':'analysis_description',
  684. 'use_ephemeral_space':True,
  685. 'load_metrics_to_cram':True,
  686. 'java_param':'-Xmx4g',
  687. 'picard_command':'QualityScoreDistribution',
  688. 'picard_option':{},
  689. 'analysis_files_xcom_key':'picard_qual_summary',
  690. 'bam_files_xcom_key':None})
  691. ## PIPELINE
  692. copy_bam_for_parallel_runs >> run_picard_qual_summary
  693. ## TASK
  694. cleanup_picard_qual_summary_input = \
  695. PythonOperator(
  696. task_id='cleanup_picard_qual_summary_input',
  697. dag=dag,
  698. queue='hpc_4G',
  699. python_callable=clean_up_files,
  700. params={'xcom_pull_files_key':'run_picard_qual_summary',
  701. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  702. ## PIPELINE
  703. run_picard_qual_summary >> cleanup_picard_qual_summary_input
  704. ## TASK
  705. upload_picard_qual_summary_to_box = \
  706. PythonOperator(
  707. task_id='upload_picard_qual_summary_to_box',
  708. dag=dag,
  709. queue='hpc_4G',
  710. python_callable=upload_analysis_file_to_box,
  711. params={'xcom_pull_task':'run_picard_qual_summary',
  712. 'xcom_pull_files_key':'picard_qual_summary',
  713. 'analysis_tag':'Picard-QualityScoreDistribution'})
  714. ## PIPELINE
  715. run_picard_qual_summary >> upload_picard_qual_summary_to_box
  716. ## TASK
  717. run_picard_rna_summary = \
  718. PythonOperator(
  719. task_id='run_picard_rna_summary',
  720. dag=dag,
  721. queue='hpc_8G',
  722. python_callable=run_picard_for_cellranger,
  723. params={'xcom_pull_files_key':'run_picard_rna_summary',
  724. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  725. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  726. 'analysis_description_xcom_key':'analysis_description',
  727. 'use_ephemeral_space':True,
  728. 'load_metrics_to_cram':True,
  729. 'java_param':'-Xmx7g',
  730. 'picard_command':'CollectRnaSeqMetrics',
  731. 'picard_option':{},
  732. 'analysis_files_xcom_key':'picard_rna_summary',
  733. 'bam_files_xcom_key':None})
  734. ## PIPELINE
  735. copy_bam_for_parallel_runs >> run_picard_rna_summary
  736. ## TASK
  737. cleanup_picard_rna_summary_input = \
  738. PythonOperator(
  739. task_id='cleanup_picard_rna_summary_input',
  740. dag=dag,
  741. queue='hpc_4G',
  742. python_callable=clean_up_files,
  743. params={'xcom_pull_files_key':'run_picard_rna_summary',
  744. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  745. ## PIPELINE
  746. run_picard_rna_summary >> cleanup_picard_rna_summary_input
  747. ## TASK
  748. upload_picard_rna_summary_to_box = \
  749. PythonOperator(
  750. task_id='upload_picard_rna_summary_to_box',
  751. dag=dag,
  752. queue='hpc_4G',
  753. python_callable=upload_analysis_file_to_box,
  754. params={'xcom_pull_task':'run_picard_rna_summary',
  755. 'xcom_pull_files_key':'picard_rna_summary',
  756. 'analysis_tag':'Picard-CollectRnaSeqMetrics'})
  757. ## PIPELINE
  758. run_picard_rna_summary >> upload_picard_rna_summary_to_box
  759. ## TASK
  760. run_picard_gc_summary = \
  761. PythonOperator(
  762. task_id='run_picard_gc_summary',
  763. dag=dag,
  764. queue='hpc_4G',
  765. python_callable=run_picard_for_cellranger,
  766. params={'xcom_pull_files_key':'run_picard_gc_summary',
  767. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  768. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  769. 'analysis_description_xcom_key':'analysis_description',
  770. 'use_ephemeral_space':True,
  771. 'load_metrics_to_cram':True,
  772. 'java_param':'-Xmx4g',
  773. 'picard_command':'CollectGcBiasMetrics',
  774. 'picard_option':{},
  775. 'analysis_files_xcom_key':'picard_gc_summary',
  776. 'bam_files_xcom_key':None})
  777. ## PIPELINE
  778. copy_bam_for_parallel_runs >> run_picard_gc_summary
  779. ## TASK
  780. cleanup_picard_gc_summary_input = \
  781. PythonOperator(
  782. task_id='cleanup_picard_gc_summary_input',
  783. dag=dag,
  784. queue='hpc_4G',
  785. python_callable=clean_up_files,
  786. params={'xcom_pull_files_key':'run_picard_gc_summary',
  787. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  788. ## PIPELINE
  789. run_picard_gc_summary >> cleanup_picard_gc_summary_input
  790. ## TASK
  791. upload_picard_gc_summary_to_box = \
  792. PythonOperator(
  793. task_id='upload_picard_alignment_summary_to_box',
  794. dag=dag,
  795. queue='hpc_4G',
  796. python_callable=upload_analysis_file_to_box,
  797. params={'xcom_pull_task':'run_picard_gc_summary',
  798. 'xcom_pull_files_key':'picard_gc_summary',
  799. 'analysis_tag':'Picard-CollectGcBiasMetrics'})
  800. ## PIPELINE
  801. run_picard_gc_summary >> upload_picard_gc_summary_to_box
  802. ## TASK
  803. run_picard_base_dist_summary = \
  804. PythonOperator(
  805. task_id='run_picard_base_dist_summary',
  806. dag=dag,
  807. queue='hpc_4G',
  808. python_callable=run_picard_for_cellranger,
  809. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  810. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  811. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  812. 'analysis_description_xcom_key':'analysis_description',
  813. 'use_ephemeral_space':True,
  814. 'load_metrics_to_cram':True,
  815. 'java_param':'-Xmx4g',
  816. 'picard_command':'CollectBaseDistributionByCycle',
  817. 'picard_option':{},
  818. 'analysis_files_xcom_key':'picard_base_summary',
  819. 'bam_files_xcom_key':None})
  820. ## PIPELINE
  821. copy_bam_for_parallel_runs >> run_picard_base_dist_summary
  822. ## TASK
  823. cleanup_picard_base_dist_summary_input = \
  824. PythonOperator(
  825. task_id='cleanup_picard_base_dist_summary_input',
  826. dag=dag,
  827. queue='hpc_4G',
  828. python_callable=clean_up_files,
  829. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  830. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  831. ## PIPELINE
  832. run_picard_base_dist_summary >> cleanup_picard_base_dist_summary_input
  833. ## TASK
  834. upload_picard_base_dist_summary_to_box = \
  835. PythonOperator(
  836. task_id='upload_picard_base_dist_summary_to_box',
  837. dag=dag,
  838. queue='hpc_4G',
  839. python_callable=upload_analysis_file_to_box,
  840. params={'xcom_pull_task':'run_picard_base_dist_summary',
  841. 'xcom_pull_files_key':'picard_base_summary',
  842. 'analysis_tag':'Picard-CollectBaseDistributionByCycle'})
  843. ## PIPELINE
  844. run_picard_base_dist_summary >> upload_picard_base_dist_summary_to_box
  845. ## TASK
  846. run_samtools_stats = \
  847. PythonOperator(
  848. task_id='run_samtools_stats',
  849. dag=dag,
  850. queue='hpc_4G4t',
  851. python_callable=run_samtools_for_cellranger,
  852. params={'xcom_pull_files_key':'run_samtools_stats',
  853. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  854. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  855. 'analysis_description_xcom_key':'analysis_description',
  856. 'use_ephemeral_space':True,
  857. 'load_metrics_to_cram':True,
  858. 'samtools_command':'stats',
  859. 'threads':4,
  860. 'analysis_files_xcom_key':'samtools_stats'})
  861. ## PIPELINE
  862. copy_bam_for_parallel_runs >> run_samtools_stats
  863. ## TASK
  864. upload_samtools_stats_to_box = \
  865. PythonOperator(
  866. task_id='upload_samtools_stats_to_box',
  867. dag=dag,
  868. queue='hpc_4G',
  869. python_callable=upload_analysis_file_to_box,
  870. params={'xcom_pull_task':'run_samtools_stats',
  871. 'xcom_pull_files_key':'samtools_stats',
  872. 'analysis_tag':'Samtools-stats'})
  873. ## PIPELINE
  874. run_samtools_stats >> upload_samtools_stats_to_box
  875. ## TASK
  876. run_samtools_idxstats = \
  877. PythonOperator(
  878. task_id='run_samtools_idxstats',
  879. dag=dag,
  880. queue='hpc_4G4t',
  881. python_callable=run_samtools_for_cellranger,
  882. params={'xcom_pull_files_key':'run_samtools_stats',
  883. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  884. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  885. 'analysis_description_xcom_key':'analysis_description',
  886. 'use_ephemeral_space':True,
  887. 'load_metrics_to_cram':True,
  888. 'samtools_command':'idxstats',
  889. 'threads':4,
  890. 'analysis_files_xcom_key':'samtools_idxstats'})
  891. ## PIPELINE
  892. run_samtools_stats >> run_samtools_idxstats
  893. ## TASK
  894. cleanup_samtools_stats_input = \
  895. PythonOperator(
  896. task_id='cleanup_samtools_stats_input',
  897. dag=dag,
  898. queue='hpc_4G',
  899. python_callable=clean_up_files,
  900. params={'xcom_pull_files_key':'run_samtools_stats',
  901. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  902. ## PIPELINE
  903. run_samtools_idxstats >> cleanup_samtools_stats_input
  904. ## TASK
  905. upload_samtools_idxstats_to_box = \
  906. PythonOperator(
  907. task_id='upload_samtools_idxstats_to_box',
  908. dag=dag,
  909. queue='hpc_4G',
  910. python_callable=upload_analysis_file_to_box,
  911. params={'xcom_pull_task':'run_samtools_idxstats',
  912. 'xcom_pull_files_key':'samtools_idxstats',
  913. 'analysis_tag':'Samtools-idxstats'})
  914. ## PIPELINE
  915. run_samtools_idxstats >> upload_samtools_idxstats_to_box
  916. ## TASK
  917. run_multiqc = \
  918. PythonOperator(
  919. task_id='run_multiqc',
  920. dag=dag,
  921. queue='hpc_4G',
  922. trigger_rule='none_failed_or_skipped',
  923. python_callable=run_multiqc_for_cellranger,
  924. params={
  925. 'list_of_analysis_xcoms_and_tasks':{
  926. 'run_cellranger':'cellranger_output',
  927. 'run_picard_alignment_summary':'picard_alignment_summary',
  928. 'run_picard_qual_summary':'picard_qual_summary',
  929. 'run_picard_rna_summary':'picard_rna_summary',
  930. 'run_picard_gc_summary':'picard_gc_summary',
  931. 'run_picard_base_dist_summary':'picard_base_summary',
  932. 'run_samtools_stats':'samtools_stats',
  933. 'run_samtools_idxstats':'samtools_idxstats'},
  934. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  935. 'analysis_description_xcom_key':'analysis_description',
  936. 'use_ephemeral_space':True,
  937. 'multiqc_html_file_xcom_key':'multiqc_html',
  938. 'multiqc_data_file_xcom_key':'multiqc_data',
  939. 'tool_order_list':['picad','samtools']})
  940. ## PIPELINE
  941. run_picard_alignment_summary >> run_multiqc
  942. run_picard_qual_summary >> run_multiqc
  943. run_picard_rna_summary >> run_multiqc
  944. run_picard_gc_summary >> run_multiqc
  945. run_picard_base_dist_summary >> run_multiqc
  946. run_samtools_idxstats >> run_multiqc
  947. ## TASK
  948. load_multiqc_html = \
  949. PythonOperator(
  950. task_id='load_multiqc_html',
  951. dag=dag,
  952. queue='hpc_4G',
  953. python_callable=load_analysis_files_func,
  954. params={'collection_name_task':'load_cellranger_result_to_db',
  955. 'collection_name_key':'sample_igf_id',
  956. 'file_name_task':'run_multiqc',
  957. 'file_name_key':'multiqc_html',
  958. 'analysis_name':'multiqc',
  959. 'collection_type':'MULTIQC_HTML',
  960. 'collection_table':'sample',
  961. 'output_files_key':'output_db_files'})
  962. ## PIPELINE
  963. run_multiqc >> load_multiqc_html
  964. ## TASK
  965. upload_multiqc_to_ftp = \
  966. PythonOperator(
  967. task_id='upload_multiqc_to_ftp',
  968. dag=dag,
  969. queue='hpc_4G',
  970. python_callable=ftp_files_upload_for_analysis,
  971. params={'xcom_pull_task':'load_multiqc_html',
  972. 'xcom_pull_files_key':'output_db_files',
  973. 'collection_name_task':'load_cellranger_result_to_db',
  974. 'collection_name_key':'sample_igf_id',
  975. 'collection_type':'FTP_MULTIQC_HTML',
  976. 'collection_table':'sample',
  977. 'collect_remote_file':True})
  978. ## PIPELINE
  979. load_multiqc_html >> upload_multiqc_to_ftp
  980. ## TASK
  981. upload_multiqc_to_box = \
  982. PythonOperator(
  983. task_id='upload_multiqc_to_box',
  984. dag=dag,
  985. queue='hpc_4G',
  986. python_callable=upload_analysis_file_to_box,
  987. params={'xcom_pull_task':'load_multiqc_html',
  988. 'xcom_pull_files_key':'output_db_files',
  989. 'analysis_tag':'multiqc_report'})
  990. ## PIPELINE
  991. load_multiqc_html >> upload_multiqc_to_box
  992. ## TASK
  993. update_analysis_and_status = \
  994. PythonOperator(
  995. task_id='update_analysis_and_status',
  996. dag=dag,
  997. queue='hpc_4G',
  998. python_callable=change_pipeline_status,
  999. trigger_rule='none_failed_or_skipped',
  1000. params={'new_status':'FINISHED',
  1001. 'no_change_status':'SEEDED'})
  1002. ## PIPELINE
  1003. upload_multiqc_to_ftp >> update_analysis_and_status
  1004. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  1005. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  1006. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  1007. upload_scirpy_report_for_vdj_to_ftp >> update_analysis_and_status
  1008. upload_scirpy_report_for_vdj_to_box >> update_analysis_and_status
  1009. upload_scirpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  1010. upload_scirpy_report_for_vdj_b_to_box >> update_analysis_and_status
  1011. upload_scirpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  1012. upload_scirpy_report_for_vdj_t_to_box >> update_analysis_and_status
  1013. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  1014. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  1015. upload_cellranger_results_to_irods >> update_analysis_and_status
  1016. upload_cellranger_report_to_ftp >> update_analysis_and_status
  1017. upload_cellranger_report_to_box >> update_analysis_and_status
  1018. upload_cram_to_irods >> update_analysis_and_status
  1019. ## TASK
  1020. update_qc_pages = \
  1021. PythonOperator(
  1022. task_id='update_qc_pages',
  1023. dag=dag,
  1024. queue='hpc_4G',
  1025. python_callable=create_and_update_qc_pages,
  1026. params={'use_ephemeral_space':True,
  1027. 'collection_type_list':[
  1028. 'FTP_MULTIQC_HTML',
  1029. 'FTP_SEURAT_HTML',
  1030. 'FTP_SCIRPY_VDJ_T_HTML',
  1031. 'FTP_SCIRPY_VDJ_B_HTML',
  1032. 'FTP_SCIRPY_VDJ_HTML',
  1033. 'FTP_CELLBROWSER',
  1034. 'FTP_SCANPY_HTML',
  1035. 'FTP_CELLRANGER_HTML']})
  1036. ## PIPELINE
  1037. update_analysis_and_status >> update_qc_pages