dag9_tenx_single_cell_immune_profiling.py 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import task_branch_function
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import upload_analysis_file_to_box
  20. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import convert_bam_to_cram_func
  21. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_picard_for_cellranger
  22. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_samtools_for_cellranger
  23. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_multiqc_for_cellranger
  24. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import index_and_copy_bam_for_parallel_analysis
  25. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import change_pipeline_status
  26. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import clean_up_files
  27. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import create_and_update_qc_pages
  28. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_metrices_to_collection
  29. ## ARGS
  30. default_args = {
  31. 'owner': 'airflow',
  32. 'depends_on_past': False,
  33. 'start_date': days_ago(2),
  34. 'email_on_failure': False,
  35. 'email_on_retry': False,
  36. 'retries': 4,
  37. 'max_active_runs':10,
  38. 'catchup':True,
  39. 'retry_delay': timedelta(minutes=5),
  40. 'provide_context': True,
  41. }
  42. FEATURE_TYPE_LIST = \
  43. Variable.get('tenx_single_cell_immune_profiling_feature_types',default_var={})#.split(',')
  44. ## DAG
  45. dag = \
  46. DAG(
  47. dag_id='dag9_tenx_single_cell_immune_profiling',
  48. schedule_interval=None,
  49. tags=['hpc','analysis','tenx','sc'],
  50. default_args=default_args,
  51. concurrency=100,
  52. max_active_runs=20,
  53. orientation='LR')
  54. with dag:
  55. ## TASK
  56. fetch_analysis_info_and_branch = \
  57. BranchPythonOperator(
  58. task_id='fetch_analysis_info',
  59. dag=dag,
  60. queue='hpc_4G',
  61. params={'no_analysis_task':'no_analysis',
  62. 'analysis_description_xcom_key':'analysis_description',
  63. 'analysis_info_xcom_key':'analysis_info'},
  64. python_callable=fetch_analysis_info_and_branch_func)
  65. ## TASK
  66. configure_cellranger_run = \
  67. PythonOperator(
  68. task_id='configure_cellranger_run',
  69. dag=dag,
  70. queue='hpc_4G',
  71. trigger_rule='none_failed_or_skipped',
  72. params={'xcom_pull_task_id':'fetch_analysis_info',
  73. 'analysis_description_xcom_key':'analysis_description',
  74. 'analysis_info_xcom_key':'analysis_info',
  75. 'library_csv_xcom_key':'cellranger_library_csv'},
  76. python_callable=configure_cellranger_run_func)
  77. for analysis_name in FEATURE_TYPE_LIST.keys():
  78. ## TASK
  79. task_branch = \
  80. BranchPythonOperator(
  81. task_id=analysis_name,
  82. dag=dag,
  83. queue='hpc_4G',
  84. params={'xcom_pull_task_id':'fetch_analysis_info',
  85. 'analysis_info_xcom_key':'analysis_info',
  86. 'analysis_name':analysis_name,
  87. 'task_prefix':'run_trim'},
  88. python_callable=task_branch_function)
  89. run_trim_list = list()
  90. for run_id in range(0,10):
  91. ## TASK
  92. t = \
  93. PythonOperator(
  94. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  95. dag=dag,
  96. queue='hpc_4G',
  97. params={'xcom_pull_task_id':'fetch_analysis_info',
  98. 'analysis_info_xcom_key':'analysis_info',
  99. 'analysis_description_xcom_key':'analysis_description',
  100. 'analysis_name':analysis_name,
  101. 'run_id':run_id,
  102. 'r1_length':0,
  103. 'r2_length':0,
  104. 'fastq_input_dir_tag':'fastq_dir',
  105. 'use_ephemeral_space':True,
  106. 'fastq_output_dir_tag':'output_path'},
  107. python_callable=run_sc_read_trimmming_func)
  108. run_trim_list.append(t)
  109. ## TASK
  110. collect_trimmed_files = \
  111. DummyOperator(
  112. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  113. trigger_rule='none_failed_or_skipped',
  114. dag=dag)
  115. ## PIPELINE
  116. fetch_analysis_info_and_branch >> task_branch
  117. task_branch >> run_trim_list
  118. run_trim_list >> collect_trimmed_files
  119. collect_trimmed_files >> configure_cellranger_run
  120. ## TASK
  121. no_analysis = \
  122. DummyOperator(
  123. task_id='no_analysis',
  124. dag=dag)
  125. ## PIPELINE
  126. fetch_analysis_info_and_branch >> no_analysis
  127. ## TASK
  128. run_cellranger = \
  129. PythonOperator(
  130. task_id='run_cellranger',
  131. dag=dag,
  132. queue='hpc_64G16t24hr',
  133. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  134. 'analysis_description_xcom_key':'analysis_description',
  135. 'library_csv_xcom_key':'cellranger_library_csv',
  136. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  137. 'cellranger_xcom_key':'cellranger_output',
  138. 'cellranger_options':['--localcores 16','--localmem 64']},
  139. python_callable=run_cellranger_tool)
  140. ## PIPELINE
  141. configure_cellranger_run >> run_cellranger
  142. ## TASK
  143. decide_analysis_branch = \
  144. BranchPythonOperator(
  145. task_id='decide_analysis_branch',
  146. dag=dag,
  147. queue='hpc_4G',
  148. python_callable=decide_analysis_branch_func,
  149. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  150. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  151. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  152. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  153. 'run_scirpy_vdj_t_task':'run_scirpy_for_vdj_t',
  154. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  155. 'convert_cellranger_bam_to_cram_task':'convert_cellranger_bam_to_cram',
  156. 'library_csv_xcom_key':'cellranger_library_csv',
  157. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  158. ## PIPELINE
  159. run_cellranger >> decide_analysis_branch
  160. ## TASK
  161. load_cellranger_result_to_db = \
  162. PythonOperator(
  163. task_id='load_cellranger_result_to_db',
  164. dag=dag,
  165. queue='hpc_4G',
  166. python_callable=load_cellranger_result_to_db_func,
  167. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  168. 'analysis_description_xcom_key':'analysis_description',
  169. 'cellranger_xcom_key':'cellranger_output',
  170. 'cellranger_xcom_pull_task':'run_cellranger',
  171. 'collection_type':'CELLRANGER_MULTI',
  172. 'html_collection_type':'CELLRANGER_HTML',
  173. 'collection_table':'sample',
  174. 'xcom_collection_name_key':'sample_igf_id',
  175. 'genome_column':'genome_build',
  176. 'analysis_name':'cellranger_multi',
  177. 'output_xcom_key':'loaded_output_files',
  178. 'html_xcom_key':'html_report_file',
  179. 'html_report_file_name':'web_summary.html'})
  180. upload_cellranger_report_to_ftp = \
  181. PythonOperator(
  182. task_id='upload_cellranger_report_to_ftp',
  183. dag=dag,
  184. queue='hpc_4G',
  185. python_callable=ftp_files_upload_for_analysis,
  186. params={'xcom_pull_task':'load_cellranger_result_to_db',
  187. 'xcom_pull_files_key':'html_report_file',
  188. 'collection_name_task':'load_cellranger_result_to_db',
  189. 'collection_name_key':'sample_igf_id',
  190. 'collection_type':'FTP_CELLRANGER_HTML',
  191. 'collection_table':'sample',
  192. 'collect_remote_file':True})
  193. upload_cellranger_report_to_box = \
  194. PythonOperator(
  195. task_id='upload_cellranger_report_to_box',
  196. dag=dag,
  197. queue='hpc_4G',
  198. python_callable=upload_analysis_file_to_box,
  199. params={'xcom_pull_task':'load_cellranger_result_to_db',
  200. 'xcom_pull_files_key':'html_report_file',
  201. 'analysis_tag':'cellranger_multi'})
  202. upload_cellranger_results_to_irods = \
  203. PythonOperator(
  204. task_id='upload_cellranger_results_to_irods',
  205. dag=dag,
  206. queue='hpc_4G',
  207. python_callable=irods_files_upload_for_analysis,
  208. params={'xcom_pull_task':'load_cellranger_result_to_db',
  209. 'xcom_pull_files_key':'loaded_output_files',
  210. 'collection_name_key':'sample_igf_id',
  211. 'collection_name_task':'load_cellranger_result_to_db',
  212. 'analysis_name':'cellranger_multi'})
  213. ## PIPELINE
  214. decide_analysis_branch >> load_cellranger_result_to_db
  215. load_cellranger_result_to_db >> upload_cellranger_report_to_ftp
  216. load_cellranger_result_to_db >> upload_cellranger_report_to_box
  217. load_cellranger_result_to_db >> upload_cellranger_results_to_irods
  218. ## TASK
  219. run_scanpy_for_sc_5p = \
  220. PythonOperator(
  221. task_id='run_scanpy_for_sc_5p',
  222. dag=dag,
  223. queue='hpc_4G',
  224. python_callable=run_singlecell_notebook_wrapper_func,
  225. params={'cellranger_xcom_key':'cellranger_output',
  226. 'cellranger_xcom_pull_task':'run_cellranger',
  227. 'scanpy_timeout':1200,
  228. 'allow_errors':False,
  229. 'kernel_name':'python3',
  230. 'count_dir':'count',
  231. 'analysis_name':'scanpy',
  232. 'output_notebook_key':'scanpy_notebook',
  233. 'output_cellbrowser_key':'cellbrowser_dirs',
  234. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  235. 'analysis_description_xcom_key':'analysis_description'})
  236. load_cellranger_gex_matrics_to_db = \
  237. PythonOperator(
  238. task_id='load_cellranger_gex_matrics_to_db',
  239. dag=dag,
  240. queue='hpc_4G',
  241. python_callable=load_cellranger_metrices_to_collection,
  242. params={'cellranger_xcom_key':'cellranger_output',
  243. 'cellranger_xcom_pull_task':'run_cellranger',
  244. 'collection_type':'CELLRANGER_MULTI',
  245. 'collection_name_task':'load_cellranger_result_to_db',
  246. 'collection_name_key':'sample_igf_id',
  247. 'metrics_summary_file':'count/metrics_summary.csv',
  248. 'attribute_prefix':'CELLRANGER_COUNT'})
  249. load_scanpy_report_for_sc_5p_to_db = \
  250. PythonOperator(
  251. task_id='load_scanpy_report_for_sc_5p_to_db',
  252. dag=dag,
  253. queue='hpc_4G',
  254. python_callable=load_analysis_files_func,
  255. params={'collection_name_task':'load_cellranger_result_to_db',
  256. 'collection_name_key':'sample_igf_id',
  257. 'file_name_task':'run_scanpy_for_sc_5p',
  258. 'file_name_key':'scanpy_notebook',
  259. 'analysis_name':'scanpy_5p',
  260. 'collection_type':'SCANPY_HTML',
  261. 'collection_table':'sample',
  262. 'output_files_key':'output_db_files'})
  263. upload_scanpy_report_for_sc_5p_to_ftp = \
  264. PythonOperator(
  265. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  266. dag=dag,
  267. queue='hpc_4G',
  268. python_callable=ftp_files_upload_for_analysis,
  269. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  270. 'xcom_pull_files_key':'output_db_files',
  271. 'collection_name_task':'load_cellranger_result_to_db',
  272. 'collection_name_key':'sample_igf_id',
  273. 'collection_type':'FTP_SCANPY_HTML',
  274. 'collection_table':'sample',
  275. 'collect_remote_file':True})
  276. upload_scanpy_report_for_sc_5p_to_box = \
  277. PythonOperator(
  278. task_id='upload_scanpy_report_for_sc_5p_to_box',
  279. dag=dag,
  280. queue='hpc_4G',
  281. python_callable=upload_analysis_file_to_box,
  282. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  283. 'xcom_pull_files_key':'output_db_files',
  284. 'analysis_tag':'scanpy_single_sample_report'})
  285. upload_cellbrowser_for_sc_5p_to_ftp = \
  286. PythonOperator(
  287. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  288. dag=dag,
  289. queue='hpc_4G',
  290. python_callable=ftp_files_upload_for_analysis,
  291. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  292. 'xcom_pull_files_key':'cellbrowser_dirs',
  293. 'collection_name_task':'load_cellranger_result_to_db',
  294. 'collection_name_key':'sample_igf_id',
  295. 'collection_type':'FTP_CELLBROWSER',
  296. 'collection_table':'sample',
  297. 'collect_remote_file':True})
  298. ## PIPELINE
  299. decide_analysis_branch >> run_scanpy_for_sc_5p
  300. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  301. run_scanpy_for_sc_5p >> load_cellranger_gex_matrics_to_db
  302. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  303. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  304. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  305. ## TASK
  306. """
  307. run_scirpy_for_vdj = \
  308. PythonOperator(
  309. task_id='run_scirpy_for_vdj',
  310. dag=dag,
  311. queue='hpc_4G',
  312. python_callable=run_singlecell_notebook_wrapper_func,
  313. params={'cellranger_xcom_key':'cellranger_output',
  314. 'cellranger_xcom_pull_task':'run_cellranger',
  315. 'scanpy_timeout':1200,
  316. 'allow_errors':False,
  317. 'kernel_name':'python3',
  318. 'analysis_name':'scirpy',
  319. 'vdj_dir':'vdj',
  320. 'count_dir':'count',
  321. 'output_notebook_key':'scirpy_notebook',
  322. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  323. 'analysis_description_xcom_key':'analysis_description'})
  324. load_cellranger_vdj_matrics_to_db = \
  325. PythonOperator(
  326. task_id='load_cellranger_vdj_matrics_to_db',
  327. dag=dag,
  328. queue='hpc_4G',
  329. python_callable=load_cellranger_metrices_to_collection,
  330. params={'cellranger_xcom_key':'cellranger_output',
  331. 'cellranger_xcom_pull_task':'run_cellranger',
  332. 'collection_type':'CELLRANGER_MULTI',
  333. 'collection_name_task':'load_cellranger_result_to_db',
  334. 'collection_name_key':'sample_igf_id',
  335. 'metrics_summary_file':'vdj/metrics_summary.csv',
  336. 'attribute_prefix':'CELLRANGER_VDJ'})
  337. load_scirpy_report_for_vdj_to_db = \
  338. PythonOperator(
  339. task_id='load_scirpy_report_for_vdj_to_db',
  340. dag=dag,
  341. queue='hpc_4G',
  342. python_callable=load_analysis_files_func,
  343. params={'collection_name_task':'load_cellranger_result_to_db',
  344. 'collection_name_key':'sample_igf_id',
  345. 'file_name_task':'run_scirpy_for_vdj',
  346. 'file_name_key':'scirpy_notebook',
  347. 'analysis_name':'scirpy_vdj',
  348. 'collection_type':'SCIRPY_VDJ_HTML',
  349. 'collection_table':'sample',
  350. 'output_files_key':'output_db_files'})
  351. upload_scirpy_report_for_vdj_to_ftp = \
  352. PythonOperator(
  353. task_id='upload_scirpy_report_for_vdj_to_ftp',
  354. dag=dag,
  355. queue='hpc_4G',
  356. python_callable=ftp_files_upload_for_analysis,
  357. params={'xcom_pull_task':'load_scirpy_report_for_vdj_to_db',
  358. 'xcom_pull_files_key':'output_db_files',
  359. 'collection_name_task':'load_cellranger_result_to_db',
  360. 'collection_name_key':'sample_igf_id',
  361. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  362. 'collection_table':'sample',
  363. 'collect_remote_file':True})
  364. upload_scirpy_report_for_vdj_to_box = \
  365. PythonOperator(
  366. task_id='upload_scirpy_report_for_vdj_to_box',
  367. dag=dag,
  368. queue='hpc_4G',
  369. python_callable=upload_analysis_file_to_box,
  370. params={'xcom_pull_task':'load_scirpy_report_for_vdj_to_db',
  371. 'xcom_pull_files_key':'output_db_files',
  372. 'analysis_tag':'scirpy_vdj_single_sample_report'})
  373. ## PIPELINE
  374. decide_analysis_branch >> run_scirpy_for_vdj
  375. run_scirpy_for_vdj >> load_scirpy_report_for_vdj_to_db
  376. run_scirpy_for_vdj >> load_cellranger_vdj_matrics_to_db
  377. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_ftp
  378. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_box
  379. """
  380. ## TASK
  381. run_scirpy_for_vdj_b = \
  382. PythonOperator(
  383. task_id='run_scirpy_for_vdj_b',
  384. dag=dag,
  385. queue='hpc_4G',
  386. python_callable=run_singlecell_notebook_wrapper_func,
  387. params={'cellranger_xcom_key':'cellranger_output',
  388. 'cellranger_xcom_pull_task':'run_cellranger',
  389. 'scanpy_timeout':1200,
  390. 'allow_errors':False,
  391. 'kernel_name':'python3',
  392. 'analysis_name':'scirpy',
  393. 'vdj_dir':'vdj_b',
  394. 'count_dir':'count',
  395. 'output_notebook_key':'scirpy_notebook',
  396. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  397. 'analysis_description_xcom_key':'analysis_description'})
  398. load_cellranger_vdjB_matrics_to_db = \
  399. PythonOperator(
  400. task_id='load_cellranger_vdjB_matrics_to_db',
  401. dag=dag,
  402. queue='hpc_4G',
  403. python_callable=load_cellranger_metrices_to_collection,
  404. params={'cellranger_xcom_key':'cellranger_output',
  405. 'cellranger_xcom_pull_task':'run_cellranger',
  406. 'collection_type':'CELLRANGER_MULTI',
  407. 'collection_name_task':'load_cellranger_result_to_db',
  408. 'collection_name_key':'sample_igf_id',
  409. 'metrics_summary_file':'vdj_b/metrics_summary.csv',
  410. 'attribute_prefix':'CELLRANGER_VDJB'})
  411. load_scirpy_report_for_vdj_b_to_db = \
  412. PythonOperator(
  413. task_id='load_scirpy_report_for_vdj_b_to_db',
  414. dag=dag,
  415. queue='hpc_4G',
  416. python_callable=load_analysis_files_func,
  417. params={'collection_name_task':'load_cellranger_result_to_db',
  418. 'collection_name_key':'sample_igf_id',
  419. 'file_name_task':'run_scirpy_for_vdj_b',
  420. 'file_name_key':'scirpy_notebook',
  421. 'analysis_name':'scirpy_vdj_b',
  422. 'collection_type':'SCIRPY_VDJ_B_HTML',
  423. 'collection_table':'sample',
  424. 'output_files_key':'output_db_files'})
  425. upload_scirpy_report_for_vdj_b_to_ftp = \
  426. PythonOperator(
  427. task_id='upload_scirpy_report_for_vdj_b_to_ftp',
  428. dag=dag,
  429. queue='hpc_4G',
  430. python_callable=ftp_files_upload_for_analysis,
  431. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  432. 'xcom_pull_files_key':'output_db_files',
  433. 'collection_name_task':'load_cellranger_result_to_db',
  434. 'collection_name_key':'sample_igf_id',
  435. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  436. 'collection_table':'sample',
  437. 'collect_remote_file':True})
  438. upload_scirpy_report_for_vdj_b_to_box = \
  439. PythonOperator(
  440. task_id='upload_scanpy_report_for_vdj_b_to_box',
  441. dag=dag,
  442. queue='hpc_4G',
  443. python_callable=upload_analysis_file_to_box,
  444. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  445. 'xcom_pull_files_key':'output_db_files',
  446. 'analysis_tag':'scirpy_vdj_b_single_sample_report'})
  447. ## PIPELINE
  448. decide_analysis_branch >> run_scirpy_for_vdj_b
  449. run_scirpy_for_vdj_b >> load_scirpy_report_for_vdj_b_to_db
  450. run_scirpy_for_vdj_b >> load_cellranger_vdjB_matrics_to_db
  451. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_ftp
  452. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_box
  453. ## TASK
  454. run_scirpy_for_vdj_t = \
  455. PythonOperator(
  456. task_id='run_scirpy_for_vdj_t',
  457. dag=dag,
  458. queue='hpc_4G',
  459. python_callable=run_singlecell_notebook_wrapper_func,
  460. params={'cellranger_xcom_key':'cellranger_output',
  461. 'cellranger_xcom_pull_task':'run_cellranger',
  462. 'scanpy_timeout':1200,
  463. 'allow_errors':False,
  464. 'kernel_name':'python3',
  465. 'analysis_name':'scirpy',
  466. 'vdj_dir':'vdj_t',
  467. 'count_dir':'count',
  468. 'output_notebook_key':'scirpy_notebook',
  469. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  470. 'analysis_description_xcom_key':'analysis_description'})
  471. load_cellranger_vdjT_matrics_to_db = \
  472. PythonOperator(
  473. task_id='load_cellranger_vdjT_matrics_to_db',
  474. dag=dag,
  475. queue='hpc_4G',
  476. python_callable=load_cellranger_metrices_to_collection,
  477. params={'cellranger_xcom_key':'cellranger_output',
  478. 'cellranger_xcom_pull_task':'run_cellranger',
  479. 'collection_type':'CELLRANGER_MULTI',
  480. 'collection_name_task':'load_cellranger_result_to_db',
  481. 'collection_name_key':'sample_igf_id',
  482. 'metrics_summary_file':'vdj_t/metrics_summary.csv',
  483. 'attribute_prefix':'CELLRANGER_VDJT'})
  484. load_scirpy_report_for_vdj_t_to_db = \
  485. PythonOperator(
  486. task_id='load_scirpy_report_for_vdj_t_to_db',
  487. dag=dag,
  488. queue='hpc_4G',
  489. python_callable=load_analysis_files_func,
  490. params={'collection_name_task':'load_cellranger_result_to_db',
  491. 'collection_name_key':'sample_igf_id',
  492. 'file_name_task':'run_scirpy_for_vdj_t',
  493. 'file_name_key':'scirpy_notebook',
  494. 'analysis_name':'scirpy_vdj_t',
  495. 'collection_type':'SCIRPY_VDJ_T_HTML',
  496. 'collection_table':'sample',
  497. 'output_files_key':'output_db_files'})
  498. upload_scirpy_report_for_vdj_t_to_ftp = \
  499. PythonOperator(
  500. task_id='upload_scirpy_report_for_vdj_t_to_ftp',
  501. dag=dag,
  502. queue='hpc_4G',
  503. python_callable=ftp_files_upload_for_analysis,
  504. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  505. 'xcom_pull_files_key':'output_db_files',
  506. 'collection_name_task':'load_cellranger_result_to_db',
  507. 'collection_name_key':'sample_igf_id',
  508. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  509. 'collection_table':'sample',
  510. 'collect_remote_file':True})
  511. upload_scirpy_report_for_vdj_t_to_box = \
  512. PythonOperator(
  513. task_id='upload_scirpy_report_for_vdj_t_to_box',
  514. dag=dag,
  515. queue='hpc_4G',
  516. python_callable=upload_analysis_file_to_box,
  517. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  518. 'xcom_pull_files_key':'output_db_files',
  519. 'analysis_tag':'scirpy_vdj_t_single_sample_report'})
  520. ## PIPELINE
  521. decide_analysis_branch >> run_scirpy_for_vdj_t
  522. run_scirpy_for_vdj_t >> load_scirpy_report_for_vdj_t_to_db
  523. run_scirpy_for_vdj_t >> load_cellranger_vdjT_matrics_to_db
  524. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_ftp
  525. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_box
  526. ## TASK
  527. run_seurat_for_sc_5p = \
  528. PythonOperator(
  529. task_id='run_seurat_for_sc_5p',
  530. dag=dag,
  531. queue='hpc_4G',
  532. python_callable=run_singlecell_notebook_wrapper_func,
  533. params={'cellranger_xcom_key':'cellranger_output',
  534. 'cellranger_xcom_pull_task':'run_cellranger',
  535. 'scanpy_timeout':1200,
  536. 'allow_errors':False,
  537. 'kernel_name':'ir',
  538. 'analysis_name':'seurat',
  539. 'vdj_dir':'vdj',
  540. 'count_dir':'count',
  541. 'output_notebook_key':'seurat_notebook',
  542. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  543. 'analysis_description_xcom_key':'analysis_description'})
  544. load_seurat_report_for_sc_5p_db = \
  545. PythonOperator(
  546. task_id='load_seurat_report_for_sc_5p_db',
  547. dag=dag,
  548. queue='hpc_4G',
  549. python_callable=load_analysis_files_func,
  550. params={'collection_name_task':'load_cellranger_result_to_db',
  551. 'collection_name_key':'sample_igf_id',
  552. 'file_name_task':'run_seurat_for_sc_5p',
  553. 'file_name_key':'seurat_notebook',
  554. 'analysis_name':'seurat_5p',
  555. 'collection_type':'SEURAT_HTML',
  556. 'collection_table':'sample',
  557. 'output_files_key':'output_db_files'})
  558. upload_seurat_report_for_sc_5p_ftp = \
  559. PythonOperator(
  560. task_id='upload_seurat_report_for_sc_5p_ftp',
  561. dag=dag,
  562. queue='hpc_4G',
  563. python_callable=ftp_files_upload_for_analysis,
  564. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  565. 'xcom_pull_files_key':'output_db_files',
  566. 'collection_name_task':'load_cellranger_result_to_db',
  567. 'collection_name_key':'sample_igf_id',
  568. 'collection_type':'FTP_SEURAT_HTML',
  569. 'collection_table':'sample',
  570. 'collect_remote_file':True})
  571. upload_seurat_report_for_sc_5p_to_box = \
  572. PythonOperator(
  573. task_id='upload_seurat_report_for_sc_5p_to_box',
  574. dag=dag,
  575. queue='hpc_4G',
  576. python_callable=upload_analysis_file_to_box,
  577. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  578. 'xcom_pull_files_key':'output_db_files',
  579. 'analysis_tag':'seurat_single_sample_report'})
  580. ## PIPELINE
  581. decide_analysis_branch >> run_seurat_for_sc_5p
  582. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  583. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  584. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  585. ## TASK
  586. convert_cellranger_bam_to_cram = \
  587. PythonOperator(
  588. task_id='convert_cellranger_bam_to_cram',
  589. dag=dag,
  590. queue='hpc_4G4t',
  591. python_callable=convert_bam_to_cram_func,
  592. params={'xcom_pull_files_key':'cellranger_output',
  593. 'xcom_pull_task':'run_cellranger',
  594. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  595. 'analysis_description_xcom_key':'analysis_description',
  596. 'use_ephemeral_space':True,
  597. 'threads':4,
  598. 'analysis_name':'cellranger',
  599. 'collection_type':'ANALYSIS_CRAM',
  600. 'collection_table':'sample',
  601. 'cram_files_xcom_key':'cram_files'})
  602. ## TASK
  603. copy_bam_for_parallel_runs = \
  604. BranchPythonOperator(
  605. task_id='copy_bam_for_parallel_runs',
  606. dag=dag,
  607. queue='hpc_4G',
  608. python_callable=index_and_copy_bam_for_parallel_analysis,
  609. params={'xcom_pull_files_key':'cellranger_output',
  610. 'xcom_pull_task':'run_cellranger',
  611. 'list_of_tasks':[
  612. 'run_picard_alignment_summary',
  613. 'run_picard_qual_summary',
  614. 'run_picard_rna_summary',
  615. 'run_picard_gc_summary',
  616. 'run_picard_base_dist_summary',
  617. 'run_samtools_stats']})
  618. ## TASK
  619. upload_cram_to_irods = \
  620. PythonOperator(
  621. task_id='upload_cram_to_irods',
  622. dag=dag,
  623. queue='hpc_4G',
  624. python_callable=irods_files_upload_for_analysis,
  625. params={'xcom_pull_task':'convert_cellranger_bam_to_cram',
  626. 'xcom_pull_files_key':'cram_files',
  627. 'collection_name_key':'sample_igf_id',
  628. 'collection_name_task':'load_cellranger_result_to_db',
  629. 'analysis_name':'cellranger_multi'})
  630. ## PIPELINE
  631. decide_analysis_branch >> convert_cellranger_bam_to_cram
  632. convert_cellranger_bam_to_cram >> copy_bam_for_parallel_runs # we need to load metrics to cram
  633. convert_cellranger_bam_to_cram >> upload_cram_to_irods
  634. ## TASK
  635. run_picard_alignment_summary = \
  636. PythonOperator(
  637. task_id='run_picard_alignment_summary',
  638. dag=dag,
  639. queue='hpc_4G',
  640. python_callable=run_picard_for_cellranger,
  641. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  642. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  643. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  644. 'analysis_description_xcom_key':'analysis_description',
  645. 'use_ephemeral_space':True,
  646. 'load_metrics_to_cram':True,
  647. 'java_param':'-Xmx4g',
  648. 'picard_command':'CollectAlignmentSummaryMetrics',
  649. 'picard_option':{},
  650. 'analysis_files_xcom_key':'picard_alignment_summary',
  651. 'bam_files_xcom_key':None})
  652. ## PIPELINE
  653. copy_bam_for_parallel_runs >> run_picard_alignment_summary
  654. ## TASK
  655. cleanup_picard_alignment_summary_input = \
  656. PythonOperator(
  657. task_id='cleanup_picard_alignment_summary_input',
  658. dag=dag,
  659. queue='hpc_4G',
  660. python_callable=clean_up_files,
  661. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  662. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  663. ## PIPELINE
  664. run_picard_alignment_summary >> cleanup_picard_alignment_summary_input
  665. ## TASK
  666. upload_picard_alignment_summary_to_box = \
  667. PythonOperator(
  668. task_id='upload_picard_alignment_summary_to_box',
  669. dag=dag,
  670. queue='hpc_4G',
  671. python_callable=upload_analysis_file_to_box,
  672. params={'xcom_pull_task':'run_picard_alignment_summary',
  673. 'xcom_pull_files_key':'picard_alignment_summary',
  674. 'analysis_tag':'Picard-CollectAlignmentSummaryMetrics'})
  675. ## PIPELINE
  676. run_picard_alignment_summary >> upload_picard_alignment_summary_to_box
  677. ## TASK
  678. run_picard_qual_summary = \
  679. PythonOperator(
  680. task_id='run_picard_qual_summary',
  681. dag=dag,
  682. queue='hpc_4G',
  683. python_callable=run_picard_for_cellranger,
  684. params={'xcom_pull_files_key':'run_picard_qual_summary',
  685. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  686. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  687. 'analysis_description_xcom_key':'analysis_description',
  688. 'use_ephemeral_space':True,
  689. 'load_metrics_to_cram':True,
  690. 'java_param':'-Xmx4g',
  691. 'picard_command':'QualityScoreDistribution',
  692. 'picard_option':{},
  693. 'analysis_files_xcom_key':'picard_qual_summary',
  694. 'bam_files_xcom_key':None})
  695. ## PIPELINE
  696. copy_bam_for_parallel_runs >> run_picard_qual_summary
  697. ## TASK
  698. cleanup_picard_qual_summary_input = \
  699. PythonOperator(
  700. task_id='cleanup_picard_qual_summary_input',
  701. dag=dag,
  702. queue='hpc_4G',
  703. python_callable=clean_up_files,
  704. params={'xcom_pull_files_key':'run_picard_qual_summary',
  705. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  706. ## PIPELINE
  707. run_picard_qual_summary >> cleanup_picard_qual_summary_input
  708. ## TASK
  709. upload_picard_qual_summary_to_box = \
  710. PythonOperator(
  711. task_id='upload_picard_qual_summary_to_box',
  712. dag=dag,
  713. queue='hpc_4G',
  714. python_callable=upload_analysis_file_to_box,
  715. params={'xcom_pull_task':'run_picard_qual_summary',
  716. 'xcom_pull_files_key':'picard_qual_summary',
  717. 'analysis_tag':'Picard-QualityScoreDistribution'})
  718. ## PIPELINE
  719. run_picard_qual_summary >> upload_picard_qual_summary_to_box
  720. ## TASK
  721. run_picard_rna_summary = \
  722. PythonOperator(
  723. task_id='run_picard_rna_summary',
  724. dag=dag,
  725. queue='hpc_8G',
  726. python_callable=run_picard_for_cellranger,
  727. params={'xcom_pull_files_key':'run_picard_rna_summary',
  728. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  729. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  730. 'analysis_description_xcom_key':'analysis_description',
  731. 'use_ephemeral_space':True,
  732. 'load_metrics_to_cram':True,
  733. 'java_param':'-Xmx7g',
  734. 'picard_command':'CollectRnaSeqMetrics',
  735. 'picard_option':{},
  736. 'analysis_files_xcom_key':'picard_rna_summary',
  737. 'bam_files_xcom_key':None})
  738. ## PIPELINE
  739. copy_bam_for_parallel_runs >> run_picard_rna_summary
  740. ## TASK
  741. cleanup_picard_rna_summary_input = \
  742. PythonOperator(
  743. task_id='cleanup_picard_rna_summary_input',
  744. dag=dag,
  745. queue='hpc_4G',
  746. python_callable=clean_up_files,
  747. params={'xcom_pull_files_key':'run_picard_rna_summary',
  748. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  749. ## PIPELINE
  750. run_picard_rna_summary >> cleanup_picard_rna_summary_input
  751. ## TASK
  752. upload_picard_rna_summary_to_box = \
  753. PythonOperator(
  754. task_id='upload_picard_rna_summary_to_box',
  755. dag=dag,
  756. queue='hpc_4G',
  757. python_callable=upload_analysis_file_to_box,
  758. params={'xcom_pull_task':'run_picard_rna_summary',
  759. 'xcom_pull_files_key':'picard_rna_summary',
  760. 'analysis_tag':'Picard-CollectRnaSeqMetrics'})
  761. ## PIPELINE
  762. run_picard_rna_summary >> upload_picard_rna_summary_to_box
  763. ## TASK
  764. run_picard_gc_summary = \
  765. PythonOperator(
  766. task_id='run_picard_gc_summary',
  767. dag=dag,
  768. queue='hpc_4G',
  769. python_callable=run_picard_for_cellranger,
  770. params={'xcom_pull_files_key':'run_picard_gc_summary',
  771. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  772. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  773. 'analysis_description_xcom_key':'analysis_description',
  774. 'use_ephemeral_space':True,
  775. 'load_metrics_to_cram':True,
  776. 'java_param':'-Xmx4g',
  777. 'picard_command':'CollectGcBiasMetrics',
  778. 'picard_option':{},
  779. 'analysis_files_xcom_key':'picard_gc_summary',
  780. 'bam_files_xcom_key':None})
  781. ## PIPELINE
  782. copy_bam_for_parallel_runs >> run_picard_gc_summary
  783. ## TASK
  784. cleanup_picard_gc_summary_input = \
  785. PythonOperator(
  786. task_id='cleanup_picard_gc_summary_input',
  787. dag=dag,
  788. queue='hpc_4G',
  789. python_callable=clean_up_files,
  790. params={'xcom_pull_files_key':'run_picard_gc_summary',
  791. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  792. ## PIPELINE
  793. run_picard_gc_summary >> cleanup_picard_gc_summary_input
  794. ## TASK
  795. upload_picard_gc_summary_to_box = \
  796. PythonOperator(
  797. task_id='upload_picard_gc_summary_to_box',
  798. dag=dag,
  799. queue='hpc_4G',
  800. python_callable=upload_analysis_file_to_box,
  801. params={'xcom_pull_task':'run_picard_gc_summary',
  802. 'xcom_pull_files_key':'picard_gc_summary',
  803. 'analysis_tag':'Picard-CollectGcBiasMetrics'})
  804. ## PIPELINE
  805. run_picard_gc_summary >> upload_picard_gc_summary_to_box
  806. ## TASK
  807. run_picard_base_dist_summary = \
  808. PythonOperator(
  809. task_id='run_picard_base_dist_summary',
  810. dag=dag,
  811. queue='hpc_4G',
  812. python_callable=run_picard_for_cellranger,
  813. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  814. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  815. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  816. 'analysis_description_xcom_key':'analysis_description',
  817. 'use_ephemeral_space':True,
  818. 'load_metrics_to_cram':True,
  819. 'java_param':'-Xmx4g',
  820. 'picard_command':'CollectBaseDistributionByCycle',
  821. 'picard_option':{},
  822. 'analysis_files_xcom_key':'picard_base_summary',
  823. 'bam_files_xcom_key':None})
  824. ## PIPELINE
  825. copy_bam_for_parallel_runs >> run_picard_base_dist_summary
  826. ## TASK
  827. cleanup_picard_base_dist_summary_input = \
  828. PythonOperator(
  829. task_id='cleanup_picard_base_dist_summary_input',
  830. dag=dag,
  831. queue='hpc_4G',
  832. python_callable=clean_up_files,
  833. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  834. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  835. ## PIPELINE
  836. run_picard_base_dist_summary >> cleanup_picard_base_dist_summary_input
  837. ## TASK
  838. upload_picard_base_dist_summary_to_box = \
  839. PythonOperator(
  840. task_id='upload_picard_base_dist_summary_to_box',
  841. dag=dag,
  842. queue='hpc_4G',
  843. python_callable=upload_analysis_file_to_box,
  844. params={'xcom_pull_task':'run_picard_base_dist_summary',
  845. 'xcom_pull_files_key':'picard_base_summary',
  846. 'analysis_tag':'Picard-CollectBaseDistributionByCycle'})
  847. ## PIPELINE
  848. run_picard_base_dist_summary >> upload_picard_base_dist_summary_to_box
  849. ## TASK
  850. run_samtools_stats = \
  851. PythonOperator(
  852. task_id='run_samtools_stats',
  853. dag=dag,
  854. queue='hpc_4G4t',
  855. python_callable=run_samtools_for_cellranger,
  856. params={'xcom_pull_files_key':'run_samtools_stats',
  857. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  858. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  859. 'analysis_description_xcom_key':'analysis_description',
  860. 'use_ephemeral_space':True,
  861. 'load_metrics_to_cram':True,
  862. 'samtools_command':'stats',
  863. 'threads':4,
  864. 'analysis_files_xcom_key':'samtools_stats'})
  865. ## PIPELINE
  866. copy_bam_for_parallel_runs >> run_samtools_stats
  867. ## TASK
  868. upload_samtools_stats_to_box = \
  869. PythonOperator(
  870. task_id='upload_samtools_stats_to_box',
  871. dag=dag,
  872. queue='hpc_4G',
  873. python_callable=upload_analysis_file_to_box,
  874. params={'xcom_pull_task':'run_samtools_stats',
  875. 'xcom_pull_files_key':'samtools_stats',
  876. 'analysis_tag':'Samtools-stats'})
  877. ## PIPELINE
  878. run_samtools_stats >> upload_samtools_stats_to_box
  879. ## TASK
  880. run_samtools_idxstats = \
  881. PythonOperator(
  882. task_id='run_samtools_idxstats',
  883. dag=dag,
  884. queue='hpc_4G4t',
  885. python_callable=run_samtools_for_cellranger,
  886. params={'xcom_pull_files_key':'run_samtools_stats',
  887. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  888. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  889. 'analysis_description_xcom_key':'analysis_description',
  890. 'use_ephemeral_space':True,
  891. 'load_metrics_to_cram':True,
  892. 'samtools_command':'idxstats',
  893. 'threads':4,
  894. 'analysis_files_xcom_key':'samtools_idxstats'})
  895. ## PIPELINE
  896. run_samtools_stats >> run_samtools_idxstats
  897. ## TASK
  898. cleanup_samtools_stats_input = \
  899. PythonOperator(
  900. task_id='cleanup_samtools_stats_input',
  901. dag=dag,
  902. queue='hpc_4G',
  903. python_callable=clean_up_files,
  904. params={'xcom_pull_files_key':'run_samtools_stats',
  905. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  906. ## PIPELINE
  907. run_samtools_idxstats >> cleanup_samtools_stats_input
  908. ## TASK
  909. upload_samtools_idxstats_to_box = \
  910. PythonOperator(
  911. task_id='upload_samtools_idxstats_to_box',
  912. dag=dag,
  913. queue='hpc_4G',
  914. python_callable=upload_analysis_file_to_box,
  915. params={'xcom_pull_task':'run_samtools_idxstats',
  916. 'xcom_pull_files_key':'samtools_idxstats',
  917. 'analysis_tag':'Samtools-idxstats'})
  918. ## PIPELINE
  919. run_samtools_idxstats >> upload_samtools_idxstats_to_box
  920. ## TASK
  921. run_multiqc = \
  922. PythonOperator(
  923. task_id='run_multiqc',
  924. dag=dag,
  925. queue='hpc_4G',
  926. trigger_rule='none_failed_or_skipped',
  927. python_callable=run_multiqc_for_cellranger,
  928. params={
  929. 'list_of_analysis_xcoms_and_tasks':{
  930. 'run_cellranger':'cellranger_output',
  931. 'run_picard_alignment_summary':'picard_alignment_summary',
  932. 'run_picard_qual_summary':'picard_qual_summary',
  933. 'run_picard_rna_summary':'picard_rna_summary',
  934. 'run_picard_gc_summary':'picard_gc_summary',
  935. 'run_picard_base_dist_summary':'picard_base_summary',
  936. 'run_samtools_stats':'samtools_stats',
  937. 'run_samtools_idxstats':'samtools_idxstats'},
  938. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  939. 'analysis_description_xcom_key':'analysis_description',
  940. 'use_ephemeral_space':True,
  941. 'multiqc_html_file_xcom_key':'multiqc_html',
  942. 'multiqc_data_file_xcom_key':'multiqc_data',
  943. 'tool_order_list':['picad','samtools']})
  944. ## PIPELINE
  945. run_picard_alignment_summary >> run_multiqc
  946. run_picard_qual_summary >> run_multiqc
  947. run_picard_rna_summary >> run_multiqc
  948. run_picard_gc_summary >> run_multiqc
  949. run_picard_base_dist_summary >> run_multiqc
  950. run_samtools_idxstats >> run_multiqc
  951. ## TASK
  952. load_multiqc_html = \
  953. PythonOperator(
  954. task_id='load_multiqc_html',
  955. dag=dag,
  956. queue='hpc_4G',
  957. python_callable=load_analysis_files_func,
  958. params={'collection_name_task':'load_cellranger_result_to_db',
  959. 'collection_name_key':'sample_igf_id',
  960. 'file_name_task':'run_multiqc',
  961. 'file_name_key':'multiqc_html',
  962. 'analysis_name':'multiqc',
  963. 'collection_type':'MULTIQC_HTML',
  964. 'collection_table':'sample',
  965. 'output_files_key':'output_db_files'})
  966. ## PIPELINE
  967. run_multiqc >> load_multiqc_html
  968. ## TASK
  969. upload_multiqc_to_ftp = \
  970. PythonOperator(
  971. task_id='upload_multiqc_to_ftp',
  972. dag=dag,
  973. queue='hpc_4G',
  974. python_callable=ftp_files_upload_for_analysis,
  975. params={'xcom_pull_task':'load_multiqc_html',
  976. 'xcom_pull_files_key':'output_db_files',
  977. 'collection_name_task':'load_cellranger_result_to_db',
  978. 'collection_name_key':'sample_igf_id',
  979. 'collection_type':'FTP_MULTIQC_HTML',
  980. 'collection_table':'sample',
  981. 'collect_remote_file':True})
  982. ## PIPELINE
  983. load_multiqc_html >> upload_multiqc_to_ftp
  984. ## TASK
  985. upload_multiqc_to_box = \
  986. PythonOperator(
  987. task_id='upload_multiqc_to_box',
  988. dag=dag,
  989. queue='hpc_4G',
  990. python_callable=upload_analysis_file_to_box,
  991. params={'xcom_pull_task':'load_multiqc_html',
  992. 'xcom_pull_files_key':'output_db_files',
  993. 'analysis_tag':'multiqc_report'})
  994. ## PIPELINE
  995. load_multiqc_html >> upload_multiqc_to_box
  996. ## TASK
  997. update_analysis_and_status = \
  998. PythonOperator(
  999. task_id='update_analysis_and_status',
  1000. dag=dag,
  1001. queue='hpc_4G',
  1002. python_callable=change_pipeline_status,
  1003. trigger_rule='none_failed_or_skipped',
  1004. params={'new_status':'FINISHED',
  1005. 'no_change_status':'SEEDED'})
  1006. ## PIPELINE
  1007. upload_multiqc_to_ftp >> update_analysis_and_status
  1008. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  1009. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  1010. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  1011. #upload_scirpy_report_for_vdj_to_ftp >> update_analysis_and_status # no more ambiguous VDJ
  1012. #upload_scirpy_report_for_vdj_to_box >> update_analysis_and_status
  1013. upload_scirpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  1014. upload_scirpy_report_for_vdj_b_to_box >> update_analysis_and_status
  1015. upload_scirpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  1016. upload_scirpy_report_for_vdj_t_to_box >> update_analysis_and_status
  1017. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  1018. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  1019. upload_cellranger_results_to_irods >> update_analysis_and_status
  1020. upload_cellranger_report_to_ftp >> update_analysis_and_status
  1021. upload_cellranger_report_to_box >> update_analysis_and_status
  1022. upload_cram_to_irods >> update_analysis_and_status
  1023. ## TASK
  1024. update_qc_pages = \
  1025. PythonOperator(
  1026. task_id='update_qc_pages',
  1027. dag=dag,
  1028. queue='hpc_4G',
  1029. python_callable=create_and_update_qc_pages,
  1030. params={'use_ephemeral_space':True,
  1031. 'collection_type_list':[
  1032. 'FTP_MULTIQC_HTML',
  1033. 'FTP_SEURAT_HTML',
  1034. 'FTP_SCIRPY_VDJ_T_HTML',
  1035. 'FTP_SCIRPY_VDJ_B_HTML',
  1036. 'FTP_SCIRPY_VDJ_HTML',
  1037. 'FTP_CELLBROWSER',
  1038. 'FTP_SCANPY_HTML',
  1039. 'FTP_CELLRANGER_HTML']})
  1040. ## PIPELINE
  1041. update_analysis_and_status >> update_qc_pages