dag9_tenx_single_cell_immune_profiling.py 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import task_branch_function
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import upload_analysis_file_to_box
  20. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import convert_bam_to_cram_func
  21. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_picard_for_cellranger
  22. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_samtools_for_cellranger
  23. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_multiqc_for_cellranger
  24. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import index_and_copy_bam_for_parallel_analysis
  25. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import change_pipeline_status
  26. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import clean_up_files
  27. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import create_and_update_qc_pages
  28. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_metrices_to_collection
  29. ## ARGS
  30. default_args = {
  31. 'owner': 'airflow',
  32. 'depends_on_past': False,
  33. 'start_date': days_ago(2),
  34. 'email_on_failure': False,
  35. 'email_on_retry': False,
  36. 'retries': 4,
  37. 'max_active_runs':10,
  38. 'catchup':True,
  39. 'retry_delay': timedelta(minutes=5),
  40. 'provide_context': True,
  41. }
  42. FEATURE_TYPE_LIST = \
  43. Variable.get('tenx_single_cell_immune_profiling_feature_types',default_var={})#.split(',')
  44. ## DAG
  45. dag = \
  46. DAG(
  47. dag_id='dag9_tenx_single_cell_immune_profiling',
  48. schedule_interval=None,
  49. tags=['hpc','analysis','tenx','sc'],
  50. default_args=default_args,
  51. orientation='LR')
  52. with dag:
  53. ## TASK
  54. fetch_analysis_info_and_branch = \
  55. BranchPythonOperator(
  56. task_id='fetch_analysis_info',
  57. dag=dag,
  58. queue='hpc_4G',
  59. params={'no_analysis_task':'no_analysis',
  60. 'analysis_description_xcom_key':'analysis_description',
  61. 'analysis_info_xcom_key':'analysis_info'},
  62. python_callable=fetch_analysis_info_and_branch_func)
  63. ## TASK
  64. configure_cellranger_run = \
  65. PythonOperator(
  66. task_id='configure_cellranger_run',
  67. dag=dag,
  68. queue='hpc_4G',
  69. trigger_rule='none_failed_or_skipped',
  70. params={'xcom_pull_task_id':'fetch_analysis_info',
  71. 'analysis_description_xcom_key':'analysis_description',
  72. 'analysis_info_xcom_key':'analysis_info',
  73. 'library_csv_xcom_key':'cellranger_library_csv'},
  74. python_callable=configure_cellranger_run_func)
  75. for analysis_name in FEATURE_TYPE_LIST.keys():
  76. ## TASK
  77. task_branch = \
  78. BranchPythonOperator(
  79. task_id=analysis_name,
  80. dag=dag,
  81. queue='hpc_4G',
  82. params={'xcom_pull_task_id':'fetch_analysis_info',
  83. 'analysis_info_xcom_key':'analysis_info',
  84. 'analysis_name':analysis_name,
  85. 'task_prefix':'run_trim'},
  86. python_callable=task_branch_function)
  87. run_trim_list = list()
  88. for run_id in range(0,10):
  89. ## TASK
  90. t = \
  91. PythonOperator(
  92. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  93. dag=dag,
  94. queue='hpc_4G',
  95. params={'xcom_pull_task_id':'fetch_analysis_info',
  96. 'analysis_info_xcom_key':'analysis_info',
  97. 'analysis_description_xcom_key':'analysis_description',
  98. 'analysis_name':analysis_name,
  99. 'run_id':run_id,
  100. 'r1_length':0,
  101. 'r2_length':0,
  102. 'fastq_input_dir_tag':'fastq_dir',
  103. 'use_ephemeral_space':True,
  104. 'fastq_output_dir_tag':'output_path'},
  105. python_callable=run_sc_read_trimmming_func)
  106. run_trim_list.append(t)
  107. ## TASK
  108. collect_trimmed_files = \
  109. DummyOperator(
  110. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  111. trigger_rule='none_failed_or_skipped',
  112. dag=dag)
  113. ## PIPELINE
  114. fetch_analysis_info_and_branch >> task_branch
  115. task_branch >> run_trim_list
  116. run_trim_list >> collect_trimmed_files
  117. collect_trimmed_files >> configure_cellranger_run
  118. ## TASK
  119. no_analysis = \
  120. DummyOperator(
  121. task_id='no_analysis',
  122. dag=dag)
  123. ## PIPELINE
  124. fetch_analysis_info_and_branch >> no_analysis
  125. ## TASK
  126. run_cellranger = \
  127. PythonOperator(
  128. task_id='run_cellranger',
  129. dag=dag,
  130. queue='hpc_64G16t24hr',
  131. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  132. 'analysis_description_xcom_key':'analysis_description',
  133. 'library_csv_xcom_key':'cellranger_library_csv',
  134. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  135. 'cellranger_xcom_key':'cellranger_output',
  136. 'cellranger_options':['--localcores 16','--localmem 64']},
  137. python_callable=run_cellranger_tool)
  138. ## PIPELINE
  139. configure_cellranger_run >> run_cellranger
  140. ## TASK
  141. decide_analysis_branch = \
  142. BranchPythonOperator(
  143. task_id='decide_analysis_branch',
  144. dag=dag,
  145. queue='hpc_4G',
  146. python_callable=decide_analysis_branch_func,
  147. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  148. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  149. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  150. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  151. 'run_scirpy_vdj_t_task':'run_scirpy_for_vdj_t',
  152. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  153. 'convert_cellranger_bam_to_cram_task':'convert_cellranger_bam_to_cram',
  154. 'library_csv_xcom_key':'cellranger_library_csv',
  155. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  156. ## PIPELINE
  157. run_cellranger >> decide_analysis_branch
  158. ## TASK
  159. load_cellranger_result_to_db = \
  160. PythonOperator(
  161. task_id='load_cellranger_result_to_db',
  162. dag=dag,
  163. queue='hpc_4G',
  164. python_callable=load_cellranger_result_to_db_func,
  165. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  166. 'analysis_description_xcom_key':'analysis_description',
  167. 'cellranger_xcom_key':'cellranger_output',
  168. 'cellranger_xcom_pull_task':'run_cellranger',
  169. 'collection_type':'CELLRANGER_MULTI',
  170. 'html_collection_type':'CELLRANGER_HTML',
  171. 'collection_table':'sample',
  172. 'xcom_collection_name_key':'sample_igf_id',
  173. 'genome_column':'genome_build',
  174. 'analysis_name':'cellranger_multi',
  175. 'output_xcom_key':'loaded_output_files',
  176. 'html_xcom_key':'html_report_file',
  177. 'html_report_file_name':'web_summary.html'})
  178. upload_cellranger_report_to_ftp = \
  179. PythonOperator(
  180. task_id='upload_cellranger_report_to_ftp',
  181. dag=dag,
  182. queue='hpc_4G',
  183. python_callable=ftp_files_upload_for_analysis,
  184. params={'xcom_pull_task':'load_cellranger_result_to_db',
  185. 'xcom_pull_files_key':'html_report_file',
  186. 'collection_name_task':'load_cellranger_result_to_db',
  187. 'collection_name_key':'sample_igf_id',
  188. 'collection_type':'FTP_CELLRANGER_HTML',
  189. 'collection_table':'sample',
  190. 'collect_remote_file':True})
  191. upload_cellranger_report_to_box = \
  192. PythonOperator(
  193. task_id='upload_cellranger_report_to_box',
  194. dag=dag,
  195. queue='hpc_4G',
  196. python_callable=upload_analysis_file_to_box,
  197. params={'xcom_pull_task':'load_cellranger_result_to_db',
  198. 'xcom_pull_files_key':'html_report_file',
  199. 'analysis_tag':'cellranger_multi'})
  200. upload_cellranger_results_to_irods = \
  201. PythonOperator(
  202. task_id='upload_cellranger_results_to_irods',
  203. dag=dag,
  204. queue='hpc_4G',
  205. python_callable=irods_files_upload_for_analysis,
  206. params={'xcom_pull_task':'load_cellranger_result_to_db',
  207. 'xcom_pull_files_key':'loaded_output_files',
  208. 'collection_name_key':'sample_igf_id',
  209. 'collection_name_task':'load_cellranger_result_to_db',
  210. 'analysis_name':'cellranger_multi'})
  211. ## PIPELINE
  212. decide_analysis_branch >> load_cellranger_result_to_db
  213. load_cellranger_result_to_db >> upload_cellranger_report_to_ftp
  214. load_cellranger_result_to_db >> upload_cellranger_report_to_box
  215. load_cellranger_result_to_db >> upload_cellranger_results_to_irods
  216. ## TASK
  217. run_scanpy_for_sc_5p = \
  218. PythonOperator(
  219. task_id='run_scanpy_for_sc_5p',
  220. dag=dag,
  221. queue='hpc_4G',
  222. python_callable=run_singlecell_notebook_wrapper_func,
  223. params={'cellranger_xcom_key':'cellranger_output',
  224. 'cellranger_xcom_pull_task':'run_cellranger',
  225. 'scanpy_timeout':1200,
  226. 'allow_errors':False,
  227. 'kernel_name':'python3',
  228. 'count_dir':'count',
  229. 'analysis_name':'scanpy',
  230. 'output_notebook_key':'scanpy_notebook',
  231. 'output_cellbrowser_key':'cellbrowser_dirs',
  232. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  233. 'analysis_description_xcom_key':'analysis_description'})
  234. load_cellranger_gex_matrics_to_db = \
  235. PythonOperator(
  236. task_id='load_cellranger_gex_matrics_to_db',
  237. dag=dag,
  238. queue='hpc_4G',
  239. python_callable=load_cellranger_metrices_to_collection,
  240. params={'cellranger_xcom_key':'cellranger_output',
  241. 'cellranger_xcom_pull_task':'run_cellranger',
  242. 'collection_type':'CELLRANGER_MULTI',
  243. 'collection_name_task':'load_cellranger_result_to_db',
  244. 'collection_name_key':'sample_igf_id',
  245. 'metrics_summary_file':'count/metrics_summary.csv',
  246. 'attribute_prefix':'CELLRANGER_COUNT'})
  247. load_scanpy_report_for_sc_5p_to_db = \
  248. PythonOperator(
  249. task_id='load_scanpy_report_for_sc_5p_to_db',
  250. dag=dag,
  251. queue='hpc_4G',
  252. python_callable=load_analysis_files_func,
  253. params={'collection_name_task':'load_cellranger_result_to_db',
  254. 'collection_name_key':'sample_igf_id',
  255. 'file_name_task':'run_scanpy_for_sc_5p',
  256. 'file_name_key':'scanpy_notebook',
  257. 'analysis_name':'scanpy_5p',
  258. 'collection_type':'SCANPY_HTML',
  259. 'collection_table':'sample',
  260. 'output_files_key':'output_db_files'})
  261. upload_scanpy_report_for_sc_5p_to_ftp = \
  262. PythonOperator(
  263. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  264. dag=dag,
  265. queue='hpc_4G',
  266. python_callable=ftp_files_upload_for_analysis,
  267. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  268. 'xcom_pull_files_key':'output_db_files',
  269. 'collection_name_task':'load_cellranger_result_to_db',
  270. 'collection_name_key':'sample_igf_id',
  271. 'collection_type':'FTP_SCANPY_HTML',
  272. 'collection_table':'sample',
  273. 'collect_remote_file':True})
  274. upload_scanpy_report_for_sc_5p_to_box = \
  275. PythonOperator(
  276. task_id='upload_scanpy_report_for_sc_5p_to_box',
  277. dag=dag,
  278. queue='hpc_4G',
  279. python_callable=upload_analysis_file_to_box,
  280. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  281. 'xcom_pull_files_key':'output_db_files',
  282. 'analysis_tag':'scanpy_single_sample_report'})
  283. upload_cellbrowser_for_sc_5p_to_ftp = \
  284. PythonOperator(
  285. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  286. dag=dag,
  287. queue='hpc_4G',
  288. python_callable=ftp_files_upload_for_analysis,
  289. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  290. 'xcom_pull_files_key':'cellbrowser_dirs',
  291. 'collection_name_task':'load_cellranger_result_to_db',
  292. 'collection_name_key':'sample_igf_id',
  293. 'collection_type':'FTP_CELLBROWSER',
  294. 'collection_table':'sample',
  295. 'collect_remote_file':True})
  296. ## PIPELINE
  297. decide_analysis_branch >> run_scanpy_for_sc_5p
  298. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  299. run_scanpy_for_sc_5p >> load_cellranger_gex_matrics_to_db
  300. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  301. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  302. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  303. ## TASK
  304. """
  305. run_scirpy_for_vdj = \
  306. PythonOperator(
  307. task_id='run_scirpy_for_vdj',
  308. dag=dag,
  309. queue='hpc_4G',
  310. python_callable=run_singlecell_notebook_wrapper_func,
  311. params={'cellranger_xcom_key':'cellranger_output',
  312. 'cellranger_xcom_pull_task':'run_cellranger',
  313. 'scanpy_timeout':1200,
  314. 'allow_errors':False,
  315. 'kernel_name':'python3',
  316. 'analysis_name':'scirpy',
  317. 'vdj_dir':'vdj',
  318. 'count_dir':'count',
  319. 'output_notebook_key':'scirpy_notebook',
  320. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  321. 'analysis_description_xcom_key':'analysis_description'})
  322. load_cellranger_vdj_matrics_to_db = \
  323. PythonOperator(
  324. task_id='load_cellranger_vdj_matrics_to_db',
  325. dag=dag,
  326. queue='hpc_4G',
  327. python_callable=load_cellranger_metrices_to_collection,
  328. params={'cellranger_xcom_key':'cellranger_output',
  329. 'cellranger_xcom_pull_task':'run_cellranger',
  330. 'collection_type':'CELLRANGER_MULTI',
  331. 'collection_name_task':'load_cellranger_result_to_db',
  332. 'collection_name_key':'sample_igf_id',
  333. 'metrics_summary_file':'vdj/metrics_summary.csv',
  334. 'attribute_prefix':'CELLRANGER_VDJ'})
  335. load_scirpy_report_for_vdj_to_db = \
  336. PythonOperator(
  337. task_id='load_scirpy_report_for_vdj_to_db',
  338. dag=dag,
  339. queue='hpc_4G',
  340. python_callable=load_analysis_files_func,
  341. params={'collection_name_task':'load_cellranger_result_to_db',
  342. 'collection_name_key':'sample_igf_id',
  343. 'file_name_task':'run_scirpy_for_vdj',
  344. 'file_name_key':'scirpy_notebook',
  345. 'analysis_name':'scirpy_vdj',
  346. 'collection_type':'SCIRPY_VDJ_HTML',
  347. 'collection_table':'sample',
  348. 'output_files_key':'output_db_files'})
  349. upload_scirpy_report_for_vdj_to_ftp = \
  350. PythonOperator(
  351. task_id='upload_scirpy_report_for_vdj_to_ftp',
  352. dag=dag,
  353. queue='hpc_4G',
  354. python_callable=ftp_files_upload_for_analysis,
  355. params={'xcom_pull_task':'load_scirpy_report_for_vdj_to_db',
  356. 'xcom_pull_files_key':'output_db_files',
  357. 'collection_name_task':'load_cellranger_result_to_db',
  358. 'collection_name_key':'sample_igf_id',
  359. 'collection_type':'FTP_SCIRPY_VDJ_HTML',
  360. 'collection_table':'sample',
  361. 'collect_remote_file':True})
  362. upload_scirpy_report_for_vdj_to_box = \
  363. PythonOperator(
  364. task_id='upload_scirpy_report_for_vdj_to_box',
  365. dag=dag,
  366. queue='hpc_4G',
  367. python_callable=upload_analysis_file_to_box,
  368. params={'xcom_pull_task':'load_scirpy_report_for_vdj_to_db',
  369. 'xcom_pull_files_key':'output_db_files',
  370. 'analysis_tag':'scirpy_vdj_single_sample_report'})
  371. ## PIPELINE
  372. decide_analysis_branch >> run_scirpy_for_vdj
  373. run_scirpy_for_vdj >> load_scirpy_report_for_vdj_to_db
  374. run_scirpy_for_vdj >> load_cellranger_vdj_matrics_to_db
  375. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_ftp
  376. load_scirpy_report_for_vdj_to_db >> upload_scirpy_report_for_vdj_to_box
  377. """
  378. ## TASK
  379. run_scirpy_for_vdj_b = \
  380. PythonOperator(
  381. task_id='run_scirpy_for_vdj_b',
  382. dag=dag,
  383. queue='hpc_4G',
  384. python_callable=run_singlecell_notebook_wrapper_func,
  385. params={'cellranger_xcom_key':'cellranger_output',
  386. 'cellranger_xcom_pull_task':'run_cellranger',
  387. 'scanpy_timeout':1200,
  388. 'allow_errors':False,
  389. 'kernel_name':'python3',
  390. 'analysis_name':'scirpy',
  391. 'vdj_dir':'vdj_b',
  392. 'count_dir':'count',
  393. 'output_notebook_key':'scirpy_notebook',
  394. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  395. 'analysis_description_xcom_key':'analysis_description'})
  396. load_cellranger_vdjB_matrics_to_db = \
  397. PythonOperator(
  398. task_id='load_cellranger_vdjB_matrics_to_db',
  399. dag=dag,
  400. queue='hpc_4G',
  401. python_callable=load_cellranger_metrices_to_collection,
  402. params={'cellranger_xcom_key':'cellranger_output',
  403. 'cellranger_xcom_pull_task':'run_cellranger',
  404. 'collection_type':'CELLRANGER_MULTI',
  405. 'collection_name_task':'load_cellranger_result_to_db',
  406. 'collection_name_key':'sample_igf_id',
  407. 'metrics_summary_file':'vdj_b/metrics_summary.csv',
  408. 'attribute_prefix':'CELLRANGER_VDJB'})
  409. load_scirpy_report_for_vdj_b_to_db = \
  410. PythonOperator(
  411. task_id='load_scirpy_report_for_vdj_b_to_db',
  412. dag=dag,
  413. queue='hpc_4G',
  414. python_callable=load_analysis_files_func,
  415. params={'collection_name_task':'load_cellranger_result_to_db',
  416. 'collection_name_key':'sample_igf_id',
  417. 'file_name_task':'run_scirpy_for_vdj_b',
  418. 'file_name_key':'scirpy_notebook',
  419. 'analysis_name':'scirpy_vdj_b',
  420. 'collection_type':'SCIRPY_VDJ_B_HTML',
  421. 'collection_table':'sample',
  422. 'output_files_key':'output_db_files'})
  423. upload_scirpy_report_for_vdj_b_to_ftp = \
  424. PythonOperator(
  425. task_id='upload_scirpy_report_for_vdj_b_to_ftp',
  426. dag=dag,
  427. queue='hpc_4G',
  428. python_callable=ftp_files_upload_for_analysis,
  429. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  430. 'xcom_pull_files_key':'output_db_files',
  431. 'collection_name_task':'load_cellranger_result_to_db',
  432. 'collection_name_key':'sample_igf_id',
  433. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  434. 'collection_table':'sample',
  435. 'collect_remote_file':True})
  436. upload_scirpy_report_for_vdj_b_to_box = \
  437. PythonOperator(
  438. task_id='upload_scanpy_report_for_vdj_b_to_box',
  439. dag=dag,
  440. queue='hpc_4G',
  441. python_callable=upload_analysis_file_to_box,
  442. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  443. 'xcom_pull_files_key':'output_db_files',
  444. 'analysis_tag':'scirpy_vdj_b_single_sample_report'})
  445. ## PIPELINE
  446. decide_analysis_branch >> run_scirpy_for_vdj_b
  447. run_scirpy_for_vdj_b >> load_scirpy_report_for_vdj_b_to_db
  448. run_scirpy_for_vdj_b >> load_cellranger_vdjB_matrics_to_db
  449. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_ftp
  450. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_box
  451. ## TASK
  452. run_scirpy_for_vdj_t = \
  453. PythonOperator(
  454. task_id='run_scirpy_for_vdj_t',
  455. dag=dag,
  456. queue='hpc_4G',
  457. python_callable=run_singlecell_notebook_wrapper_func,
  458. params={'cellranger_xcom_key':'cellranger_output',
  459. 'cellranger_xcom_pull_task':'run_cellranger',
  460. 'scanpy_timeout':1200,
  461. 'allow_errors':False,
  462. 'kernel_name':'python3',
  463. 'analysis_name':'scirpy',
  464. 'vdj_dir':'vdj_t',
  465. 'count_dir':'count',
  466. 'output_notebook_key':'scirpy_notebook',
  467. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  468. 'analysis_description_xcom_key':'analysis_description'})
  469. load_cellranger_vdjT_matrics_to_db = \
  470. PythonOperator(
  471. task_id='load_cellranger_vdjT_matrics_to_db',
  472. dag=dag,
  473. queue='hpc_4G',
  474. python_callable=load_cellranger_metrices_to_collection,
  475. params={'cellranger_xcom_key':'cellranger_output',
  476. 'cellranger_xcom_pull_task':'run_cellranger',
  477. 'collection_type':'CELLRANGER_MULTI',
  478. 'collection_name_task':'load_cellranger_result_to_db',
  479. 'collection_name_key':'sample_igf_id',
  480. 'metrics_summary_file':'vdj_t/metrics_summary.csv',
  481. 'attribute_prefix':'CELLRANGER_VDJT'})
  482. load_scirpy_report_for_vdj_t_to_db = \
  483. PythonOperator(
  484. task_id='load_scirpy_report_for_vdj_t_to_db',
  485. dag=dag,
  486. queue='hpc_4G',
  487. python_callable=load_analysis_files_func,
  488. params={'collection_name_task':'load_cellranger_result_to_db',
  489. 'collection_name_key':'sample_igf_id',
  490. 'file_name_task':'run_scirpy_for_vdj_t',
  491. 'file_name_key':'scirpy_notebook',
  492. 'analysis_name':'scirpy_vdj_t',
  493. 'collection_type':'SCIRPY_VDJ_T_HTML',
  494. 'collection_table':'sample',
  495. 'output_files_key':'output_db_files'})
  496. upload_scirpy_report_for_vdj_t_to_ftp = \
  497. PythonOperator(
  498. task_id='upload_scirpy_report_for_vdj_t_to_ftp',
  499. dag=dag,
  500. queue='hpc_4G',
  501. python_callable=ftp_files_upload_for_analysis,
  502. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  503. 'xcom_pull_files_key':'output_db_files',
  504. 'collection_name_task':'load_cellranger_result_to_db',
  505. 'collection_name_key':'sample_igf_id',
  506. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  507. 'collection_table':'sample',
  508. 'collect_remote_file':True})
  509. upload_scirpy_report_for_vdj_t_to_box = \
  510. PythonOperator(
  511. task_id='upload_scirpy_report_for_vdj_t_to_box',
  512. dag=dag,
  513. queue='hpc_4G',
  514. python_callable=upload_analysis_file_to_box,
  515. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  516. 'xcom_pull_files_key':'output_db_files',
  517. 'analysis_tag':'scirpy_vdj_t_single_sample_report'})
  518. ## PIPELINE
  519. decide_analysis_branch >> run_scirpy_for_vdj_t
  520. run_scirpy_for_vdj_t >> load_scirpy_report_for_vdj_t_to_db
  521. run_scirpy_for_vdj_t >> load_cellranger_vdjT_matrics_to_db
  522. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_ftp
  523. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_box
  524. ## TASK
  525. run_seurat_for_sc_5p = \
  526. PythonOperator(
  527. task_id='run_seurat_for_sc_5p',
  528. dag=dag,
  529. queue='hpc_4G',
  530. python_callable=run_singlecell_notebook_wrapper_func,
  531. params={'cellranger_xcom_key':'cellranger_output',
  532. 'cellranger_xcom_pull_task':'run_cellranger',
  533. 'scanpy_timeout':1200,
  534. 'allow_errors':False,
  535. 'kernel_name':'ir',
  536. 'analysis_name':'seurat',
  537. 'vdj_dir':'vdj',
  538. 'count_dir':'count',
  539. 'output_notebook_key':'seurat_notebook',
  540. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  541. 'analysis_description_xcom_key':'analysis_description'})
  542. load_seurat_report_for_sc_5p_db = \
  543. PythonOperator(
  544. task_id='load_seurat_report_for_sc_5p_db',
  545. dag=dag,
  546. queue='hpc_4G',
  547. python_callable=load_analysis_files_func,
  548. params={'collection_name_task':'load_cellranger_result_to_db',
  549. 'collection_name_key':'sample_igf_id',
  550. 'file_name_task':'run_seurat_for_sc_5p',
  551. 'file_name_key':'seurat_notebook',
  552. 'analysis_name':'seurat_5p',
  553. 'collection_type':'SEURAT_HTML',
  554. 'collection_table':'sample',
  555. 'output_files_key':'output_db_files'})
  556. upload_seurat_report_for_sc_5p_ftp = \
  557. PythonOperator(
  558. task_id='upload_seurat_report_for_sc_5p_ftp',
  559. dag=dag,
  560. queue='hpc_4G',
  561. python_callable=ftp_files_upload_for_analysis,
  562. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  563. 'xcom_pull_files_key':'output_db_files',
  564. 'collection_name_task':'load_cellranger_result_to_db',
  565. 'collection_name_key':'sample_igf_id',
  566. 'collection_type':'FTP_SEURAT_HTML',
  567. 'collection_table':'sample',
  568. 'collect_remote_file':True})
  569. upload_seurat_report_for_sc_5p_to_box = \
  570. PythonOperator(
  571. task_id='upload_seurat_report_for_sc_5p_to_box',
  572. dag=dag,
  573. queue='hpc_4G',
  574. python_callable=upload_analysis_file_to_box,
  575. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  576. 'xcom_pull_files_key':'output_db_files',
  577. 'analysis_tag':'seurat_single_sample_report'})
  578. ## PIPELINE
  579. decide_analysis_branch >> run_seurat_for_sc_5p
  580. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  581. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  582. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  583. ## TASK
  584. convert_cellranger_bam_to_cram = \
  585. PythonOperator(
  586. task_id='convert_cellranger_bam_to_cram',
  587. dag=dag,
  588. queue='hpc_4G4t',
  589. python_callable=convert_bam_to_cram_func,
  590. params={'xcom_pull_files_key':'cellranger_output',
  591. 'xcom_pull_task':'run_cellranger',
  592. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  593. 'analysis_description_xcom_key':'analysis_description',
  594. 'use_ephemeral_space':True,
  595. 'threads':4,
  596. 'analysis_name':'cellranger',
  597. 'collection_type':'ANALYSIS_CRAM',
  598. 'collection_table':'sample',
  599. 'cram_files_xcom_key':'cram_files'})
  600. ## TASK
  601. copy_bam_for_parallel_runs = \
  602. BranchPythonOperator(
  603. task_id='copy_bam_for_parallel_runs',
  604. dag=dag,
  605. queue='hpc_4G',
  606. python_callable=index_and_copy_bam_for_parallel_analysis,
  607. params={'xcom_pull_files_key':'cellranger_output',
  608. 'xcom_pull_task':'run_cellranger',
  609. 'list_of_tasks':[
  610. 'run_picard_alignment_summary',
  611. 'run_picard_qual_summary',
  612. 'run_picard_rna_summary',
  613. 'run_picard_gc_summary',
  614. 'run_picard_base_dist_summary',
  615. 'run_samtools_stats']})
  616. ## TASK
  617. upload_cram_to_irods = \
  618. PythonOperator(
  619. task_id='upload_cram_to_irods',
  620. dag=dag,
  621. queue='hpc_4G',
  622. python_callable=irods_files_upload_for_analysis,
  623. params={'xcom_pull_task':'convert_cellranger_bam_to_cram',
  624. 'xcom_pull_files_key':'cram_files',
  625. 'collection_name_key':'sample_igf_id',
  626. 'collection_name_task':'load_cellranger_result_to_db',
  627. 'analysis_name':'cellranger_multi'})
  628. ## PIPELINE
  629. decide_analysis_branch >> convert_cellranger_bam_to_cram
  630. convert_cellranger_bam_to_cram >> copy_bam_for_parallel_runs # we need to load metrics to cram
  631. convert_cellranger_bam_to_cram >> upload_cram_to_irods
  632. ## TASK
  633. run_picard_alignment_summary = \
  634. PythonOperator(
  635. task_id='run_picard_alignment_summary',
  636. dag=dag,
  637. queue='hpc_4G',
  638. python_callable=run_picard_for_cellranger,
  639. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  640. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  641. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  642. 'analysis_description_xcom_key':'analysis_description',
  643. 'use_ephemeral_space':True,
  644. 'load_metrics_to_cram':True,
  645. 'java_param':'-Xmx4g',
  646. 'picard_command':'CollectAlignmentSummaryMetrics',
  647. 'picard_option':{},
  648. 'analysis_files_xcom_key':'picard_alignment_summary',
  649. 'bam_files_xcom_key':None})
  650. ## PIPELINE
  651. copy_bam_for_parallel_runs >> run_picard_alignment_summary
  652. ## TASK
  653. cleanup_picard_alignment_summary_input = \
  654. PythonOperator(
  655. task_id='cleanup_picard_alignment_summary_input',
  656. dag=dag,
  657. queue='hpc_4G',
  658. python_callable=clean_up_files,
  659. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  660. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  661. ## PIPELINE
  662. run_picard_alignment_summary >> cleanup_picard_alignment_summary_input
  663. ## TASK
  664. upload_picard_alignment_summary_to_box = \
  665. PythonOperator(
  666. task_id='upload_picard_alignment_summary_to_box',
  667. dag=dag,
  668. queue='hpc_4G',
  669. python_callable=upload_analysis_file_to_box,
  670. params={'xcom_pull_task':'run_picard_alignment_summary',
  671. 'xcom_pull_files_key':'picard_alignment_summary',
  672. 'analysis_tag':'Picard-CollectAlignmentSummaryMetrics'})
  673. ## PIPELINE
  674. run_picard_alignment_summary >> upload_picard_alignment_summary_to_box
  675. ## TASK
  676. run_picard_qual_summary = \
  677. PythonOperator(
  678. task_id='run_picard_qual_summary',
  679. dag=dag,
  680. queue='hpc_4G',
  681. python_callable=run_picard_for_cellranger,
  682. params={'xcom_pull_files_key':'run_picard_qual_summary',
  683. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  684. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  685. 'analysis_description_xcom_key':'analysis_description',
  686. 'use_ephemeral_space':True,
  687. 'load_metrics_to_cram':True,
  688. 'java_param':'-Xmx4g',
  689. 'picard_command':'QualityScoreDistribution',
  690. 'picard_option':{},
  691. 'analysis_files_xcom_key':'picard_qual_summary',
  692. 'bam_files_xcom_key':None})
  693. ## PIPELINE
  694. copy_bam_for_parallel_runs >> run_picard_qual_summary
  695. ## TASK
  696. cleanup_picard_qual_summary_input = \
  697. PythonOperator(
  698. task_id='cleanup_picard_qual_summary_input',
  699. dag=dag,
  700. queue='hpc_4G',
  701. python_callable=clean_up_files,
  702. params={'xcom_pull_files_key':'run_picard_qual_summary',
  703. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  704. ## PIPELINE
  705. run_picard_qual_summary >> cleanup_picard_qual_summary_input
  706. ## TASK
  707. upload_picard_qual_summary_to_box = \
  708. PythonOperator(
  709. task_id='upload_picard_qual_summary_to_box',
  710. dag=dag,
  711. queue='hpc_4G',
  712. python_callable=upload_analysis_file_to_box,
  713. params={'xcom_pull_task':'run_picard_qual_summary',
  714. 'xcom_pull_files_key':'picard_qual_summary',
  715. 'analysis_tag':'Picard-QualityScoreDistribution'})
  716. ## PIPELINE
  717. run_picard_qual_summary >> upload_picard_qual_summary_to_box
  718. ## TASK
  719. run_picard_rna_summary = \
  720. PythonOperator(
  721. task_id='run_picard_rna_summary',
  722. dag=dag,
  723. queue='hpc_8G',
  724. python_callable=run_picard_for_cellranger,
  725. params={'xcom_pull_files_key':'run_picard_rna_summary',
  726. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  727. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  728. 'analysis_description_xcom_key':'analysis_description',
  729. 'use_ephemeral_space':True,
  730. 'load_metrics_to_cram':True,
  731. 'java_param':'-Xmx7g',
  732. 'picard_command':'CollectRnaSeqMetrics',
  733. 'picard_option':{},
  734. 'analysis_files_xcom_key':'picard_rna_summary',
  735. 'bam_files_xcom_key':None})
  736. ## PIPELINE
  737. copy_bam_for_parallel_runs >> run_picard_rna_summary
  738. ## TASK
  739. cleanup_picard_rna_summary_input = \
  740. PythonOperator(
  741. task_id='cleanup_picard_rna_summary_input',
  742. dag=dag,
  743. queue='hpc_4G',
  744. python_callable=clean_up_files,
  745. params={'xcom_pull_files_key':'run_picard_rna_summary',
  746. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  747. ## PIPELINE
  748. run_picard_rna_summary >> cleanup_picard_rna_summary_input
  749. ## TASK
  750. upload_picard_rna_summary_to_box = \
  751. PythonOperator(
  752. task_id='upload_picard_rna_summary_to_box',
  753. dag=dag,
  754. queue='hpc_4G',
  755. python_callable=upload_analysis_file_to_box,
  756. params={'xcom_pull_task':'run_picard_rna_summary',
  757. 'xcom_pull_files_key':'picard_rna_summary',
  758. 'analysis_tag':'Picard-CollectRnaSeqMetrics'})
  759. ## PIPELINE
  760. run_picard_rna_summary >> upload_picard_rna_summary_to_box
  761. ## TASK
  762. run_picard_gc_summary = \
  763. PythonOperator(
  764. task_id='run_picard_gc_summary',
  765. dag=dag,
  766. queue='hpc_4G',
  767. python_callable=run_picard_for_cellranger,
  768. params={'xcom_pull_files_key':'run_picard_gc_summary',
  769. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  770. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  771. 'analysis_description_xcom_key':'analysis_description',
  772. 'use_ephemeral_space':True,
  773. 'load_metrics_to_cram':True,
  774. 'java_param':'-Xmx4g',
  775. 'picard_command':'CollectGcBiasMetrics',
  776. 'picard_option':{},
  777. 'analysis_files_xcom_key':'picard_gc_summary',
  778. 'bam_files_xcom_key':None})
  779. ## PIPELINE
  780. copy_bam_for_parallel_runs >> run_picard_gc_summary
  781. ## TASK
  782. cleanup_picard_gc_summary_input = \
  783. PythonOperator(
  784. task_id='cleanup_picard_gc_summary_input',
  785. dag=dag,
  786. queue='hpc_4G',
  787. python_callable=clean_up_files,
  788. params={'xcom_pull_files_key':'run_picard_gc_summary',
  789. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  790. ## PIPELINE
  791. run_picard_gc_summary >> cleanup_picard_gc_summary_input
  792. ## TASK
  793. upload_picard_gc_summary_to_box = \
  794. PythonOperator(
  795. task_id='upload_picard_gc_summary_to_box',
  796. dag=dag,
  797. queue='hpc_4G',
  798. python_callable=upload_analysis_file_to_box,
  799. params={'xcom_pull_task':'run_picard_gc_summary',
  800. 'xcom_pull_files_key':'picard_gc_summary',
  801. 'analysis_tag':'Picard-CollectGcBiasMetrics'})
  802. ## PIPELINE
  803. run_picard_gc_summary >> upload_picard_gc_summary_to_box
  804. ## TASK
  805. run_picard_base_dist_summary = \
  806. PythonOperator(
  807. task_id='run_picard_base_dist_summary',
  808. dag=dag,
  809. queue='hpc_4G',
  810. python_callable=run_picard_for_cellranger,
  811. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  812. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  813. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  814. 'analysis_description_xcom_key':'analysis_description',
  815. 'use_ephemeral_space':True,
  816. 'load_metrics_to_cram':True,
  817. 'java_param':'-Xmx4g',
  818. 'picard_command':'CollectBaseDistributionByCycle',
  819. 'picard_option':{},
  820. 'analysis_files_xcom_key':'picard_base_summary',
  821. 'bam_files_xcom_key':None})
  822. ## PIPELINE
  823. copy_bam_for_parallel_runs >> run_picard_base_dist_summary
  824. ## TASK
  825. cleanup_picard_base_dist_summary_input = \
  826. PythonOperator(
  827. task_id='cleanup_picard_base_dist_summary_input',
  828. dag=dag,
  829. queue='hpc_4G',
  830. python_callable=clean_up_files,
  831. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  832. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  833. ## PIPELINE
  834. run_picard_base_dist_summary >> cleanup_picard_base_dist_summary_input
  835. ## TASK
  836. upload_picard_base_dist_summary_to_box = \
  837. PythonOperator(
  838. task_id='upload_picard_base_dist_summary_to_box',
  839. dag=dag,
  840. queue='hpc_4G',
  841. python_callable=upload_analysis_file_to_box,
  842. params={'xcom_pull_task':'run_picard_base_dist_summary',
  843. 'xcom_pull_files_key':'picard_base_summary',
  844. 'analysis_tag':'Picard-CollectBaseDistributionByCycle'})
  845. ## PIPELINE
  846. run_picard_base_dist_summary >> upload_picard_base_dist_summary_to_box
  847. ## TASK
  848. run_samtools_stats = \
  849. PythonOperator(
  850. task_id='run_samtools_stats',
  851. dag=dag,
  852. queue='hpc_4G4t',
  853. python_callable=run_samtools_for_cellranger,
  854. params={'xcom_pull_files_key':'run_samtools_stats',
  855. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  856. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  857. 'analysis_description_xcom_key':'analysis_description',
  858. 'use_ephemeral_space':True,
  859. 'load_metrics_to_cram':True,
  860. 'samtools_command':'stats',
  861. 'threads':4,
  862. 'analysis_files_xcom_key':'samtools_stats'})
  863. ## PIPELINE
  864. copy_bam_for_parallel_runs >> run_samtools_stats
  865. ## TASK
  866. upload_samtools_stats_to_box = \
  867. PythonOperator(
  868. task_id='upload_samtools_stats_to_box',
  869. dag=dag,
  870. queue='hpc_4G',
  871. python_callable=upload_analysis_file_to_box,
  872. params={'xcom_pull_task':'run_samtools_stats',
  873. 'xcom_pull_files_key':'samtools_stats',
  874. 'analysis_tag':'Samtools-stats'})
  875. ## PIPELINE
  876. run_samtools_stats >> upload_samtools_stats_to_box
  877. ## TASK
  878. run_samtools_idxstats = \
  879. PythonOperator(
  880. task_id='run_samtools_idxstats',
  881. dag=dag,
  882. queue='hpc_4G4t',
  883. python_callable=run_samtools_for_cellranger,
  884. params={'xcom_pull_files_key':'run_samtools_stats',
  885. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  886. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  887. 'analysis_description_xcom_key':'analysis_description',
  888. 'use_ephemeral_space':True,
  889. 'load_metrics_to_cram':True,
  890. 'samtools_command':'idxstats',
  891. 'threads':4,
  892. 'analysis_files_xcom_key':'samtools_idxstats'})
  893. ## PIPELINE
  894. run_samtools_stats >> run_samtools_idxstats
  895. ## TASK
  896. cleanup_samtools_stats_input = \
  897. PythonOperator(
  898. task_id='cleanup_samtools_stats_input',
  899. dag=dag,
  900. queue='hpc_4G',
  901. python_callable=clean_up_files,
  902. params={'xcom_pull_files_key':'run_samtools_stats',
  903. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  904. ## PIPELINE
  905. run_samtools_idxstats >> cleanup_samtools_stats_input
  906. ## TASK
  907. upload_samtools_idxstats_to_box = \
  908. PythonOperator(
  909. task_id='upload_samtools_idxstats_to_box',
  910. dag=dag,
  911. queue='hpc_4G',
  912. python_callable=upload_analysis_file_to_box,
  913. params={'xcom_pull_task':'run_samtools_idxstats',
  914. 'xcom_pull_files_key':'samtools_idxstats',
  915. 'analysis_tag':'Samtools-idxstats'})
  916. ## PIPELINE
  917. run_samtools_idxstats >> upload_samtools_idxstats_to_box
  918. ## TASK
  919. run_multiqc = \
  920. PythonOperator(
  921. task_id='run_multiqc',
  922. dag=dag,
  923. queue='hpc_4G',
  924. trigger_rule='none_failed_or_skipped',
  925. python_callable=run_multiqc_for_cellranger,
  926. params={
  927. 'list_of_analysis_xcoms_and_tasks':{
  928. 'run_cellranger':'cellranger_output',
  929. 'run_picard_alignment_summary':'picard_alignment_summary',
  930. 'run_picard_qual_summary':'picard_qual_summary',
  931. 'run_picard_rna_summary':'picard_rna_summary',
  932. 'run_picard_gc_summary':'picard_gc_summary',
  933. 'run_picard_base_dist_summary':'picard_base_summary',
  934. 'run_samtools_stats':'samtools_stats',
  935. 'run_samtools_idxstats':'samtools_idxstats'},
  936. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  937. 'analysis_description_xcom_key':'analysis_description',
  938. 'use_ephemeral_space':True,
  939. 'multiqc_html_file_xcom_key':'multiqc_html',
  940. 'multiqc_data_file_xcom_key':'multiqc_data',
  941. 'tool_order_list':['picad','samtools']})
  942. ## PIPELINE
  943. run_picard_alignment_summary >> run_multiqc
  944. run_picard_qual_summary >> run_multiqc
  945. run_picard_rna_summary >> run_multiqc
  946. run_picard_gc_summary >> run_multiqc
  947. run_picard_base_dist_summary >> run_multiqc
  948. run_samtools_idxstats >> run_multiqc
  949. ## TASK
  950. load_multiqc_html = \
  951. PythonOperator(
  952. task_id='load_multiqc_html',
  953. dag=dag,
  954. queue='hpc_4G',
  955. python_callable=load_analysis_files_func,
  956. params={'collection_name_task':'load_cellranger_result_to_db',
  957. 'collection_name_key':'sample_igf_id',
  958. 'file_name_task':'run_multiqc',
  959. 'file_name_key':'multiqc_html',
  960. 'analysis_name':'multiqc',
  961. 'collection_type':'MULTIQC_HTML',
  962. 'collection_table':'sample',
  963. 'output_files_key':'output_db_files'})
  964. ## PIPELINE
  965. run_multiqc >> load_multiqc_html
  966. ## TASK
  967. upload_multiqc_to_ftp = \
  968. PythonOperator(
  969. task_id='upload_multiqc_to_ftp',
  970. dag=dag,
  971. queue='hpc_4G',
  972. python_callable=ftp_files_upload_for_analysis,
  973. params={'xcom_pull_task':'load_multiqc_html',
  974. 'xcom_pull_files_key':'output_db_files',
  975. 'collection_name_task':'load_cellranger_result_to_db',
  976. 'collection_name_key':'sample_igf_id',
  977. 'collection_type':'FTP_MULTIQC_HTML',
  978. 'collection_table':'sample',
  979. 'collect_remote_file':True})
  980. ## PIPELINE
  981. load_multiqc_html >> upload_multiqc_to_ftp
  982. ## TASK
  983. upload_multiqc_to_box = \
  984. PythonOperator(
  985. task_id='upload_multiqc_to_box',
  986. dag=dag,
  987. queue='hpc_4G',
  988. python_callable=upload_analysis_file_to_box,
  989. params={'xcom_pull_task':'load_multiqc_html',
  990. 'xcom_pull_files_key':'output_db_files',
  991. 'analysis_tag':'multiqc_report'})
  992. ## PIPELINE
  993. load_multiqc_html >> upload_multiqc_to_box
  994. ## TASK
  995. update_analysis_and_status = \
  996. PythonOperator(
  997. task_id='update_analysis_and_status',
  998. dag=dag,
  999. queue='hpc_4G',
  1000. python_callable=change_pipeline_status,
  1001. trigger_rule='none_failed_or_skipped',
  1002. params={'new_status':'FINISHED',
  1003. 'no_change_status':'SEEDED'})
  1004. ## PIPELINE
  1005. upload_multiqc_to_ftp >> update_analysis_and_status
  1006. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  1007. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  1008. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  1009. #upload_scirpy_report_for_vdj_to_ftp >> update_analysis_and_status # no more ambiguous VDJ
  1010. #upload_scirpy_report_for_vdj_to_box >> update_analysis_and_status
  1011. upload_scirpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  1012. upload_scirpy_report_for_vdj_b_to_box >> update_analysis_and_status
  1013. upload_scirpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  1014. upload_scirpy_report_for_vdj_t_to_box >> update_analysis_and_status
  1015. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  1016. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  1017. upload_cellranger_results_to_irods >> update_analysis_and_status
  1018. upload_cellranger_report_to_ftp >> update_analysis_and_status
  1019. upload_cellranger_report_to_box >> update_analysis_and_status
  1020. upload_cram_to_irods >> update_analysis_and_status
  1021. ## TASK
  1022. update_qc_pages = \
  1023. PythonOperator(
  1024. task_id='update_qc_pages',
  1025. dag=dag,
  1026. queue='hpc_4G',
  1027. python_callable=create_and_update_qc_pages,
  1028. params={'use_ephemeral_space':True,
  1029. 'collection_type_list':[
  1030. 'FTP_MULTIQC_HTML',
  1031. 'FTP_SEURAT_HTML',
  1032. 'FTP_SCIRPY_VDJ_T_HTML',
  1033. 'FTP_SCIRPY_VDJ_B_HTML',
  1034. 'FTP_SCIRPY_VDJ_HTML',
  1035. 'FTP_CELLBROWSER',
  1036. 'FTP_SCANPY_HTML',
  1037. 'FTP_CELLRANGER_HTML']})
  1038. ## PIPELINE
  1039. update_analysis_and_status >> update_qc_pages