dag9_tenx_single_cell_immune_profiling.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import task_branch_function
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import upload_analysis_file_to_box
  20. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import convert_bam_to_cram_func
  21. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_picard_for_cellranger
  22. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_samtools_for_cellranger
  23. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_multiqc_for_cellranger
  24. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import index_and_copy_bam_for_parallel_analysis
  25. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import change_pipeline_status
  26. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import clean_up_files
  27. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import create_and_update_qc_pages
  28. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_metrices_to_collection
  29. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import generate_cell_sorted_bam_func
  30. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_velocyto_func
  31. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_scvelo_for_sc_5p_func
  32. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_loom_file_to_rds_func
  33. ## ARGS
  34. default_args = {
  35. 'owner': 'airflow',
  36. 'depends_on_past': False,
  37. 'start_date': days_ago(2),
  38. 'email_on_failure': False,
  39. 'email_on_retry': False,
  40. 'retries': 4,
  41. 'max_active_runs':10,
  42. 'catchup':True,
  43. 'retry_delay': timedelta(minutes=5),
  44. 'provide_context': True,
  45. }
  46. FEATURE_TYPE_LIST = \
  47. Variable.get('tenx_single_cell_immune_profiling_feature_types',default_var={})#.split(',')
  48. ## DAG
  49. dag = \
  50. DAG(
  51. dag_id='dag9_tenx_single_cell_immune_profiling',
  52. schedule_interval=None,
  53. tags=['hpc','analysis','tenx','sc'],
  54. default_args=default_args,
  55. concurrency=100,
  56. max_active_runs=20,
  57. orientation='LR')
  58. with dag:
  59. ## TASK
  60. fetch_analysis_info_and_branch = \
  61. BranchPythonOperator(
  62. task_id='fetch_analysis_info',
  63. dag=dag,
  64. queue='hpc_4G',
  65. params={'no_analysis_task':'no_analysis',
  66. 'analysis_description_xcom_key':'analysis_description',
  67. 'analysis_info_xcom_key':'analysis_info'},
  68. python_callable=fetch_analysis_info_and_branch_func)
  69. ## TASK
  70. configure_cellranger_run = \
  71. PythonOperator(
  72. task_id='configure_cellranger_run',
  73. dag=dag,
  74. queue='hpc_4G',
  75. trigger_rule='none_failed_or_skipped',
  76. params={'xcom_pull_task_id':'fetch_analysis_info',
  77. 'analysis_description_xcom_key':'analysis_description',
  78. 'analysis_info_xcom_key':'analysis_info',
  79. 'library_csv_xcom_key':'cellranger_library_csv'},
  80. python_callable=configure_cellranger_run_func)
  81. for analysis_name in FEATURE_TYPE_LIST.keys():
  82. ## TASK
  83. task_branch = \
  84. BranchPythonOperator(
  85. task_id=analysis_name,
  86. dag=dag,
  87. queue='hpc_4G',
  88. params={'xcom_pull_task_id':'fetch_analysis_info',
  89. 'analysis_info_xcom_key':'analysis_info',
  90. 'analysis_name':analysis_name,
  91. 'task_prefix':'run_trim'},
  92. python_callable=task_branch_function)
  93. run_trim_list = list()
  94. for run_id in range(0,10):
  95. ## TASK
  96. t = \
  97. PythonOperator(
  98. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  99. dag=dag,
  100. queue='hpc_4G',
  101. params={'xcom_pull_task_id':'fetch_analysis_info',
  102. 'analysis_info_xcom_key':'analysis_info',
  103. 'analysis_description_xcom_key':'analysis_description',
  104. 'analysis_name':analysis_name,
  105. 'run_id':run_id,
  106. 'r1-length':0,
  107. 'r2-length':0,
  108. 'fastq_input_dir_tag':'fastq_dir',
  109. 'use_ephemeral_space':True,
  110. 'fastq_output_dir_tag':'output_path'},
  111. python_callable=run_sc_read_trimmming_func)
  112. run_trim_list.append(t)
  113. ## TASK
  114. collect_trimmed_files = \
  115. DummyOperator(
  116. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  117. trigger_rule='none_failed_or_skipped',
  118. dag=dag)
  119. ## PIPELINE
  120. fetch_analysis_info_and_branch >> task_branch
  121. task_branch >> run_trim_list
  122. run_trim_list >> collect_trimmed_files
  123. collect_trimmed_files >> configure_cellranger_run
  124. ## TASK
  125. no_analysis = \
  126. DummyOperator(
  127. task_id='no_analysis',
  128. dag=dag)
  129. ## PIPELINE
  130. fetch_analysis_info_and_branch >> no_analysis
  131. ## TASK
  132. run_cellranger = \
  133. PythonOperator(
  134. task_id='run_cellranger',
  135. dag=dag,
  136. queue='hpc_64G16t24hr',
  137. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  138. 'analysis_description_xcom_key':'analysis_description',
  139. 'library_csv_xcom_key':'cellranger_library_csv',
  140. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  141. 'cellranger_xcom_key':'cellranger_output',
  142. 'cellranger_options':['--localcores 16','--localmem 64']},
  143. python_callable=run_cellranger_tool)
  144. ## PIPELINE
  145. configure_cellranger_run >> run_cellranger
  146. ## TASK
  147. decide_analysis_branch = \
  148. BranchPythonOperator(
  149. task_id='decide_analysis_branch',
  150. dag=dag,
  151. queue='hpc_4G',
  152. python_callable=decide_analysis_branch_func,
  153. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  154. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  155. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  156. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  157. 'run_scirpy_vdj_t_task':'run_scirpy_for_vdj_t',
  158. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  159. 'convert_cellranger_bam_to_cram_task':'convert_cellranger_bam_to_cram',
  160. 'library_csv_xcom_key':'cellranger_library_csv',
  161. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  162. ## PIPELINE
  163. run_cellranger >> decide_analysis_branch
  164. ## TASK
  165. load_cellranger_result_to_db = \
  166. PythonOperator(
  167. task_id='load_cellranger_result_to_db',
  168. dag=dag,
  169. queue='hpc_4G',
  170. python_callable=load_cellranger_result_to_db_func,
  171. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  172. 'analysis_description_xcom_key':'analysis_description',
  173. 'cellranger_xcom_key':'cellranger_output',
  174. 'cellranger_xcom_pull_task':'run_cellranger',
  175. 'collection_type':'CELLRANGER_MULTI',
  176. 'html_collection_type':'CELLRANGER_HTML',
  177. 'collection_table':'sample',
  178. 'xcom_collection_name_key':'sample_igf_id',
  179. 'genome_column':'genome_build',
  180. 'analysis_name':'cellranger_multi',
  181. 'output_xcom_key':'loaded_output_files',
  182. 'html_xcom_key':'html_report_file',
  183. 'html_report_file_name':'web_summary.html'})
  184. upload_cellranger_report_to_ftp = \
  185. PythonOperator(
  186. task_id='upload_cellranger_report_to_ftp',
  187. dag=dag,
  188. queue='hpc_4G',
  189. python_callable=ftp_files_upload_for_analysis,
  190. params={'xcom_pull_task':'load_cellranger_result_to_db',
  191. 'xcom_pull_files_key':'html_report_file',
  192. 'collection_name_task':'load_cellranger_result_to_db',
  193. 'collection_name_key':'sample_igf_id',
  194. 'collection_type':'FTP_CELLRANGER_HTML',
  195. 'collection_table':'sample',
  196. 'collect_remote_file':True})
  197. upload_cellranger_report_to_box = \
  198. PythonOperator(
  199. task_id='upload_cellranger_report_to_box',
  200. dag=dag,
  201. queue='hpc_4G',
  202. python_callable=upload_analysis_file_to_box,
  203. params={'xcom_pull_task':'load_cellranger_result_to_db',
  204. 'xcom_pull_files_key':'html_report_file',
  205. 'analysis_tag':'cellranger_multi'})
  206. upload_cellranger_results_to_irods = \
  207. PythonOperator(
  208. task_id='upload_cellranger_results_to_irods',
  209. dag=dag,
  210. queue='hpc_4G',
  211. python_callable=irods_files_upload_for_analysis,
  212. params={'xcom_pull_task':'load_cellranger_result_to_db',
  213. 'xcom_pull_files_key':'loaded_output_files',
  214. 'collection_name_key':'sample_igf_id',
  215. 'collection_name_task':'load_cellranger_result_to_db',
  216. 'analysis_name':'cellranger_multi'})
  217. ## PIPELINE
  218. decide_analysis_branch >> load_cellranger_result_to_db
  219. load_cellranger_result_to_db >> upload_cellranger_report_to_ftp
  220. load_cellranger_result_to_db >> upload_cellranger_report_to_box
  221. load_cellranger_result_to_db >> upload_cellranger_results_to_irods
  222. ## TASK
  223. run_scanpy_for_sc_5p = \
  224. PythonOperator(
  225. task_id='run_scanpy_for_sc_5p',
  226. dag=dag,
  227. queue='hpc_4G',
  228. python_callable=run_singlecell_notebook_wrapper_func,
  229. params={'cellranger_xcom_key':'cellranger_output',
  230. 'cellranger_xcom_pull_task':'run_cellranger',
  231. 'scanpy_timeout':1200,
  232. 'allow_errors':False,
  233. 'kernel_name':'python3',
  234. 'count_dir':'count',
  235. 'analysis_name':'scanpy',
  236. 'output_notebook_key':'scanpy_notebook',
  237. 'output_cellbrowser_key':'cellbrowser_dirs',
  238. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  239. 'analysis_description_xcom_key':'analysis_description'})
  240. load_cellranger_gex_matrics_to_db = \
  241. PythonOperator(
  242. task_id='load_cellranger_gex_matrics_to_db',
  243. dag=dag,
  244. queue='hpc_4G',
  245. python_callable=load_cellranger_metrices_to_collection,
  246. params={'cellranger_xcom_key':'cellranger_output',
  247. 'cellranger_xcom_pull_task':'run_cellranger',
  248. 'collection_type':'CELLRANGER_MULTI',
  249. 'collection_name_task':'load_cellranger_result_to_db',
  250. 'collection_name_key':'sample_igf_id',
  251. 'metrics_summary_file':'count/metrics_summary.csv',
  252. 'attribute_prefix':'CELLRANGER_COUNT'})
  253. load_scanpy_report_for_sc_5p_to_db = \
  254. PythonOperator(
  255. task_id='load_scanpy_report_for_sc_5p_to_db',
  256. dag=dag,
  257. queue='hpc_4G',
  258. python_callable=load_analysis_files_func,
  259. params={'collection_name_task':'load_cellranger_result_to_db',
  260. 'collection_name_key':'sample_igf_id',
  261. 'file_name_task':'run_scanpy_for_sc_5p',
  262. 'file_name_key':'scanpy_notebook',
  263. 'analysis_name':'scanpy_5p',
  264. 'collection_type':'SCANPY_HTML',
  265. 'collection_table':'sample',
  266. 'output_files_key':'output_db_files'})
  267. upload_scanpy_report_for_sc_5p_to_ftp = \
  268. PythonOperator(
  269. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  270. dag=dag,
  271. queue='hpc_4G',
  272. python_callable=ftp_files_upload_for_analysis,
  273. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  274. 'xcom_pull_files_key':'output_db_files',
  275. 'collection_name_task':'load_cellranger_result_to_db',
  276. 'collection_name_key':'sample_igf_id',
  277. 'collection_type':'FTP_SCANPY_HTML',
  278. 'collection_table':'sample',
  279. 'collect_remote_file':True})
  280. upload_scanpy_report_for_sc_5p_to_box = \
  281. PythonOperator(
  282. task_id='upload_scanpy_report_for_sc_5p_to_box',
  283. dag=dag,
  284. queue='hpc_4G',
  285. python_callable=upload_analysis_file_to_box,
  286. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  287. 'xcom_pull_files_key':'output_db_files',
  288. 'analysis_tag':'scanpy_single_sample_report'})
  289. upload_cellbrowser_for_sc_5p_to_ftp = \
  290. PythonOperator(
  291. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  292. dag=dag,
  293. queue='hpc_4G',
  294. python_callable=ftp_files_upload_for_analysis,
  295. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  296. 'xcom_pull_files_key':'cellbrowser_dirs',
  297. 'collection_name_task':'load_cellranger_result_to_db',
  298. 'collection_name_key':'sample_igf_id',
  299. 'collection_type':'FTP_CELLBROWSER',
  300. 'collection_table':'sample',
  301. 'collect_remote_file':True})
  302. ## PIPELINE
  303. decide_analysis_branch >> run_scanpy_for_sc_5p
  304. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  305. run_scanpy_for_sc_5p >> load_cellranger_gex_matrics_to_db
  306. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  307. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  308. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  309. ## TASK
  310. run_scirpy_for_vdj_b = \
  311. PythonOperator(
  312. task_id='run_scirpy_for_vdj_b',
  313. dag=dag,
  314. queue='hpc_4G',
  315. python_callable=run_singlecell_notebook_wrapper_func,
  316. params={'cellranger_xcom_key':'cellranger_output',
  317. 'cellranger_xcom_pull_task':'run_cellranger',
  318. 'scanpy_timeout':1200,
  319. 'allow_errors':False,
  320. 'kernel_name':'python3',
  321. 'analysis_name':'scirpy',
  322. 'vdj_dir':'vdj_b',
  323. 'count_dir':'count',
  324. 'output_notebook_key':'scirpy_notebook',
  325. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  326. 'analysis_description_xcom_key':'analysis_description'})
  327. load_cellranger_vdjB_matrics_to_db = \
  328. PythonOperator(
  329. task_id='load_cellranger_vdjB_matrics_to_db',
  330. dag=dag,
  331. queue='hpc_4G',
  332. python_callable=load_cellranger_metrices_to_collection,
  333. params={'cellranger_xcom_key':'cellranger_output',
  334. 'cellranger_xcom_pull_task':'run_cellranger',
  335. 'collection_type':'CELLRANGER_MULTI',
  336. 'collection_name_task':'load_cellranger_result_to_db',
  337. 'collection_name_key':'sample_igf_id',
  338. 'metrics_summary_file':'vdj_b/metrics_summary.csv',
  339. 'attribute_prefix':'CELLRANGER_VDJB'})
  340. load_scirpy_report_for_vdj_b_to_db = \
  341. PythonOperator(
  342. task_id='load_scirpy_report_for_vdj_b_to_db',
  343. dag=dag,
  344. queue='hpc_4G',
  345. python_callable=load_analysis_files_func,
  346. params={'collection_name_task':'load_cellranger_result_to_db',
  347. 'collection_name_key':'sample_igf_id',
  348. 'file_name_task':'run_scirpy_for_vdj_b',
  349. 'file_name_key':'scirpy_notebook',
  350. 'analysis_name':'scirpy_vdj_b',
  351. 'collection_type':'SCIRPY_VDJ_B_HTML',
  352. 'collection_table':'sample',
  353. 'output_files_key':'output_db_files'})
  354. upload_scirpy_report_for_vdj_b_to_ftp = \
  355. PythonOperator(
  356. task_id='upload_scirpy_report_for_vdj_b_to_ftp',
  357. dag=dag,
  358. queue='hpc_4G',
  359. python_callable=ftp_files_upload_for_analysis,
  360. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  361. 'xcom_pull_files_key':'output_db_files',
  362. 'collection_name_task':'load_cellranger_result_to_db',
  363. 'collection_name_key':'sample_igf_id',
  364. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  365. 'collection_table':'sample',
  366. 'collect_remote_file':True})
  367. upload_scirpy_report_for_vdj_b_to_box = \
  368. PythonOperator(
  369. task_id='upload_scanpy_report_for_vdj_b_to_box',
  370. dag=dag,
  371. queue='hpc_4G',
  372. python_callable=upload_analysis_file_to_box,
  373. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  374. 'xcom_pull_files_key':'output_db_files',
  375. 'analysis_tag':'scirpy_vdj_b_single_sample_report'})
  376. ## PIPELINE
  377. decide_analysis_branch >> run_scirpy_for_vdj_b
  378. run_scirpy_for_vdj_b >> load_scirpy_report_for_vdj_b_to_db
  379. run_scirpy_for_vdj_b >> load_cellranger_vdjB_matrics_to_db
  380. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_ftp
  381. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_box
  382. ## TASK
  383. run_scirpy_for_vdj_t = \
  384. PythonOperator(
  385. task_id='run_scirpy_for_vdj_t',
  386. dag=dag,
  387. queue='hpc_4G',
  388. python_callable=run_singlecell_notebook_wrapper_func,
  389. params={'cellranger_xcom_key':'cellranger_output',
  390. 'cellranger_xcom_pull_task':'run_cellranger',
  391. 'scanpy_timeout':1200,
  392. 'allow_errors':False,
  393. 'kernel_name':'python3',
  394. 'analysis_name':'scirpy',
  395. 'vdj_dir':'vdj_t',
  396. 'count_dir':'count',
  397. 'output_notebook_key':'scirpy_notebook',
  398. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  399. 'analysis_description_xcom_key':'analysis_description'})
  400. load_cellranger_vdjT_matrics_to_db = \
  401. PythonOperator(
  402. task_id='load_cellranger_vdjT_matrics_to_db',
  403. dag=dag,
  404. queue='hpc_4G',
  405. python_callable=load_cellranger_metrices_to_collection,
  406. params={'cellranger_xcom_key':'cellranger_output',
  407. 'cellranger_xcom_pull_task':'run_cellranger',
  408. 'collection_type':'CELLRANGER_MULTI',
  409. 'collection_name_task':'load_cellranger_result_to_db',
  410. 'collection_name_key':'sample_igf_id',
  411. 'metrics_summary_file':'vdj_t/metrics_summary.csv',
  412. 'attribute_prefix':'CELLRANGER_VDJT'})
  413. load_scirpy_report_for_vdj_t_to_db = \
  414. PythonOperator(
  415. task_id='load_scirpy_report_for_vdj_t_to_db',
  416. dag=dag,
  417. queue='hpc_4G',
  418. python_callable=load_analysis_files_func,
  419. params={'collection_name_task':'load_cellranger_result_to_db',
  420. 'collection_name_key':'sample_igf_id',
  421. 'file_name_task':'run_scirpy_for_vdj_t',
  422. 'file_name_key':'scirpy_notebook',
  423. 'analysis_name':'scirpy_vdj_t',
  424. 'collection_type':'SCIRPY_VDJ_T_HTML',
  425. 'collection_table':'sample',
  426. 'output_files_key':'output_db_files'})
  427. upload_scirpy_report_for_vdj_t_to_ftp = \
  428. PythonOperator(
  429. task_id='upload_scirpy_report_for_vdj_t_to_ftp',
  430. dag=dag,
  431. queue='hpc_4G',
  432. python_callable=ftp_files_upload_for_analysis,
  433. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  434. 'xcom_pull_files_key':'output_db_files',
  435. 'collection_name_task':'load_cellranger_result_to_db',
  436. 'collection_name_key':'sample_igf_id',
  437. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  438. 'collection_table':'sample',
  439. 'collect_remote_file':True})
  440. upload_scirpy_report_for_vdj_t_to_box = \
  441. PythonOperator(
  442. task_id='upload_scirpy_report_for_vdj_t_to_box',
  443. dag=dag,
  444. queue='hpc_4G',
  445. python_callable=upload_analysis_file_to_box,
  446. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  447. 'xcom_pull_files_key':'output_db_files',
  448. 'analysis_tag':'scirpy_vdj_t_single_sample_report'})
  449. ## PIPELINE
  450. decide_analysis_branch >> run_scirpy_for_vdj_t
  451. run_scirpy_for_vdj_t >> load_scirpy_report_for_vdj_t_to_db
  452. run_scirpy_for_vdj_t >> load_cellranger_vdjT_matrics_to_db
  453. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_ftp
  454. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_box
  455. ## TASK
  456. run_seurat_for_sc_5p = \
  457. PythonOperator(
  458. task_id='run_seurat_for_sc_5p',
  459. dag=dag,
  460. queue='hpc_4G',
  461. python_callable=run_singlecell_notebook_wrapper_func,
  462. params={'cellranger_xcom_key':'cellranger_output',
  463. 'cellranger_xcom_pull_task':'run_cellranger',
  464. 'scanpy_timeout':1200,
  465. 'allow_errors':False,
  466. 'kernel_name':'ir',
  467. 'analysis_name':'seurat',
  468. 'vdj_dir':'vdj',
  469. 'count_dir':'count',
  470. 'output_notebook_key':'seurat_notebook',
  471. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  472. 'analysis_description_xcom_key':'analysis_description'})
  473. load_seurat_report_for_sc_5p_db = \
  474. PythonOperator(
  475. task_id='load_seurat_report_for_sc_5p_db',
  476. dag=dag,
  477. queue='hpc_4G',
  478. python_callable=load_analysis_files_func,
  479. params={'collection_name_task':'load_cellranger_result_to_db',
  480. 'collection_name_key':'sample_igf_id',
  481. 'file_name_task':'run_seurat_for_sc_5p',
  482. 'file_name_key':'seurat_notebook',
  483. 'analysis_name':'seurat_5p',
  484. 'collection_type':'SEURAT_HTML',
  485. 'collection_table':'sample',
  486. 'output_files_key':'output_db_files'})
  487. upload_seurat_report_for_sc_5p_ftp = \
  488. PythonOperator(
  489. task_id='upload_seurat_report_for_sc_5p_ftp',
  490. dag=dag,
  491. queue='hpc_4G',
  492. python_callable=ftp_files_upload_for_analysis,
  493. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  494. 'xcom_pull_files_key':'output_db_files',
  495. 'collection_name_task':'load_cellranger_result_to_db',
  496. 'collection_name_key':'sample_igf_id',
  497. 'collection_type':'FTP_SEURAT_HTML',
  498. 'collection_table':'sample',
  499. 'collect_remote_file':True})
  500. upload_seurat_report_for_sc_5p_to_box = \
  501. PythonOperator(
  502. task_id='upload_seurat_report_for_sc_5p_to_box',
  503. dag=dag,
  504. queue='hpc_4G',
  505. python_callable=upload_analysis_file_to_box,
  506. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  507. 'xcom_pull_files_key':'output_db_files',
  508. 'analysis_tag':'seurat_single_sample_report'})
  509. ## PIPELINE
  510. decide_analysis_branch >> run_seurat_for_sc_5p
  511. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  512. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  513. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  514. ## TASK
  515. convert_cellranger_bam_to_cram = \
  516. PythonOperator(
  517. task_id='convert_cellranger_bam_to_cram',
  518. dag=dag,
  519. queue='hpc_4G4t',
  520. python_callable=convert_bam_to_cram_func,
  521. params={'xcom_pull_files_key':'cellranger_output',
  522. 'xcom_pull_task':'run_cellranger',
  523. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  524. 'analysis_description_xcom_key':'analysis_description',
  525. 'use_ephemeral_space':True,
  526. 'threads':4,
  527. 'analysis_name':'cellranger',
  528. 'collection_type':'ANALYSIS_CRAM',
  529. 'collection_table':'sample',
  530. 'cram_files_xcom_key':'cram_files'})
  531. ## PIPELINE
  532. decide_analysis_branch >> convert_cellranger_bam_to_cram
  533. ## TASK
  534. generate_cell_sorted_bam = \
  535. PythonOperator(
  536. task_id='generate_cell_sorted_bam',
  537. dag=dag,
  538. queue='hpc_16G8t',
  539. python_callable=generate_cell_sorted_bam_func,
  540. params={'xcom_pull_task': 'run_cellranger',
  541. 'xcom_pull_files_key': 'cellranger_output',
  542. 'cellranger_bam_path': 'count/possorted_genome_bam.bam',
  543. 'cellsorted_bam_path': 'count/cellsorted_possorted_genome_bam.bam',
  544. 'samtools_mem': '2G',
  545. 'threads': 7})
  546. run_velocyto = \
  547. PythonOperator(
  548. task_id='run_velocyto',
  549. queue='hpc_16G_long',
  550. python_callable=run_velocyto_func,
  551. params={'xcom_pull_task': 'run_cellranger',
  552. 'xcom_pull_files_key': 'cellranger_output',
  553. 'analysis_description_xcom_pull_task': 'fetch_analysis_info',
  554. 'analysis_description_xcom_key': 'analysis_description' })
  555. run_scvelo_for_sc_5p = \
  556. PythonOperator(
  557. task_id='run_scvelo_for_sc_5p',
  558. dag=dag,
  559. queue='hpc_4G',
  560. python_callable=run_scvelo_for_sc_5p_func)
  561. load_loom_file_to_rds = \
  562. PythonOperator(
  563. task_id='load_loom_file_to_rds',
  564. dag=dag,
  565. queue='hpc_4G',
  566. python_callable=load_loom_file_to_rds_func)
  567. upload_loom_file_to_irods = \
  568. DummyOperator(
  569. task_id='upload_loom_file_to_irods',
  570. dag=dag)
  571. load_scvelo_report_to_rds = \
  572. DummyOperator(
  573. task_id='load_scvelo_report_to_rds',
  574. dag=dag)
  575. upload_scvelo_report_to_ftp = \
  576. DummyOperator(
  577. task_id='upload_scvelo_report_to_ftp',
  578. dag=dag)
  579. upload_scvelo_report_to_box = \
  580. DummyOperator(
  581. task_id='upload_scvelo_report_to_box',
  582. dag=dag)
  583. ## PIPELINE
  584. convert_cellranger_bam_to_cram >> generate_cell_sorted_bam
  585. generate_cell_sorted_bam >> run_velocyto
  586. run_velocyto >> run_scvelo_for_sc_5p
  587. run_velocyto >> load_loom_file_to_rds
  588. load_loom_file_to_rds >> upload_loom_file_to_irods
  589. run_scvelo_for_sc_5p >> load_scvelo_report_to_rds
  590. load_scvelo_report_to_rds >> upload_scvelo_report_to_ftp
  591. load_scvelo_report_to_rds >> upload_scvelo_report_to_box
  592. ## TASK
  593. copy_bam_for_parallel_runs = \
  594. BranchPythonOperator(
  595. task_id='copy_bam_for_parallel_runs',
  596. dag=dag,
  597. queue='hpc_4G',
  598. python_callable=index_and_copy_bam_for_parallel_analysis,
  599. params={'xcom_pull_files_key':'cellranger_output',
  600. 'xcom_pull_task':'run_cellranger',
  601. 'list_of_tasks':[
  602. 'run_picard_alignment_summary',
  603. 'run_picard_qual_summary',
  604. 'run_picard_rna_summary',
  605. 'run_picard_gc_summary',
  606. 'run_picard_base_dist_summary',
  607. 'run_samtools_stats']})
  608. ## TASK
  609. upload_cram_to_irods = \
  610. PythonOperator(
  611. task_id='upload_cram_to_irods',
  612. dag=dag,
  613. queue='hpc_4G',
  614. python_callable=irods_files_upload_for_analysis,
  615. params={'xcom_pull_task':'convert_cellranger_bam_to_cram',
  616. 'xcom_pull_files_key':'cram_files',
  617. 'collection_name_key':'sample_igf_id',
  618. 'collection_name_task':'load_cellranger_result_to_db',
  619. 'analysis_name':'cellranger_multi'})
  620. ## PIPELINE
  621. #convert_cellranger_bam_to_cram >> copy_bam_for_parallel_runs # we need to load metrics to cram
  622. generate_cell_sorted_bam >> copy_bam_for_parallel_runs
  623. convert_cellranger_bam_to_cram >> upload_cram_to_irods
  624. ## TASK
  625. run_picard_alignment_summary = \
  626. PythonOperator(
  627. task_id='run_picard_alignment_summary',
  628. dag=dag,
  629. queue='hpc_4G',
  630. python_callable=run_picard_for_cellranger,
  631. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  632. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  633. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  634. 'analysis_description_xcom_key':'analysis_description',
  635. 'use_ephemeral_space':True,
  636. 'load_metrics_to_cram':True,
  637. 'java_param':'-Xmx4g',
  638. 'picard_command':'CollectAlignmentSummaryMetrics',
  639. 'picard_option':{},
  640. 'analysis_files_xcom_key':'picard_alignment_summary',
  641. 'bam_files_xcom_key':None})
  642. ## PIPELINE
  643. copy_bam_for_parallel_runs >> run_picard_alignment_summary
  644. ## TASK
  645. cleanup_picard_alignment_summary_input = \
  646. PythonOperator(
  647. task_id='cleanup_picard_alignment_summary_input',
  648. dag=dag,
  649. queue='hpc_4G',
  650. python_callable=clean_up_files,
  651. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  652. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  653. ## PIPELINE
  654. run_picard_alignment_summary >> cleanup_picard_alignment_summary_input
  655. ## TASK
  656. upload_picard_alignment_summary_to_box = \
  657. PythonOperator(
  658. task_id='upload_picard_alignment_summary_to_box',
  659. dag=dag,
  660. queue='hpc_4G',
  661. python_callable=upload_analysis_file_to_box,
  662. params={'xcom_pull_task':'run_picard_alignment_summary',
  663. 'xcom_pull_files_key':'picard_alignment_summary',
  664. 'analysis_tag':'Picard-CollectAlignmentSummaryMetrics'})
  665. ## PIPELINE
  666. run_picard_alignment_summary >> upload_picard_alignment_summary_to_box
  667. ## TASK
  668. run_picard_qual_summary = \
  669. PythonOperator(
  670. task_id='run_picard_qual_summary',
  671. dag=dag,
  672. queue='hpc_4G',
  673. python_callable=run_picard_for_cellranger,
  674. params={'xcom_pull_files_key':'run_picard_qual_summary',
  675. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  676. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  677. 'analysis_description_xcom_key':'analysis_description',
  678. 'use_ephemeral_space':True,
  679. 'load_metrics_to_cram':True,
  680. 'java_param':'-Xmx4g',
  681. 'picard_command':'QualityScoreDistribution',
  682. 'picard_option':{},
  683. 'analysis_files_xcom_key':'picard_qual_summary',
  684. 'bam_files_xcom_key':None})
  685. ## PIPELINE
  686. copy_bam_for_parallel_runs >> run_picard_qual_summary
  687. ## TASK
  688. cleanup_picard_qual_summary_input = \
  689. PythonOperator(
  690. task_id='cleanup_picard_qual_summary_input',
  691. dag=dag,
  692. queue='hpc_4G',
  693. python_callable=clean_up_files,
  694. params={'xcom_pull_files_key':'run_picard_qual_summary',
  695. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  696. ## PIPELINE
  697. run_picard_qual_summary >> cleanup_picard_qual_summary_input
  698. ## TASK
  699. upload_picard_qual_summary_to_box = \
  700. PythonOperator(
  701. task_id='upload_picard_qual_summary_to_box',
  702. dag=dag,
  703. queue='hpc_4G',
  704. python_callable=upload_analysis_file_to_box,
  705. params={'xcom_pull_task':'run_picard_qual_summary',
  706. 'xcom_pull_files_key':'picard_qual_summary',
  707. 'analysis_tag':'Picard-QualityScoreDistribution'})
  708. ## PIPELINE
  709. run_picard_qual_summary >> upload_picard_qual_summary_to_box
  710. ## TASK
  711. run_picard_rna_summary = \
  712. PythonOperator(
  713. task_id='run_picard_rna_summary',
  714. dag=dag,
  715. queue='hpc_8G',
  716. python_callable=run_picard_for_cellranger,
  717. params={'xcom_pull_files_key':'run_picard_rna_summary',
  718. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  719. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  720. 'analysis_description_xcom_key':'analysis_description',
  721. 'use_ephemeral_space':True,
  722. 'load_metrics_to_cram':True,
  723. 'java_param':'-Xmx7g',
  724. 'picard_command':'CollectRnaSeqMetrics',
  725. 'picard_option':{},
  726. 'analysis_files_xcom_key':'picard_rna_summary',
  727. 'bam_files_xcom_key':None})
  728. ## PIPELINE
  729. copy_bam_for_parallel_runs >> run_picard_rna_summary
  730. ## TASK
  731. cleanup_picard_rna_summary_input = \
  732. PythonOperator(
  733. task_id='cleanup_picard_rna_summary_input',
  734. dag=dag,
  735. queue='hpc_4G',
  736. python_callable=clean_up_files,
  737. params={'xcom_pull_files_key':'run_picard_rna_summary',
  738. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  739. ## PIPELINE
  740. run_picard_rna_summary >> cleanup_picard_rna_summary_input
  741. ## TASK
  742. upload_picard_rna_summary_to_box = \
  743. PythonOperator(
  744. task_id='upload_picard_rna_summary_to_box',
  745. dag=dag,
  746. queue='hpc_4G',
  747. python_callable=upload_analysis_file_to_box,
  748. params={'xcom_pull_task':'run_picard_rna_summary',
  749. 'xcom_pull_files_key':'picard_rna_summary',
  750. 'analysis_tag':'Picard-CollectRnaSeqMetrics'})
  751. ## PIPELINE
  752. run_picard_rna_summary >> upload_picard_rna_summary_to_box
  753. ## TASK
  754. run_picard_gc_summary = \
  755. PythonOperator(
  756. task_id='run_picard_gc_summary',
  757. dag=dag,
  758. queue='hpc_4G',
  759. python_callable=run_picard_for_cellranger,
  760. params={'xcom_pull_files_key':'run_picard_gc_summary',
  761. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  762. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  763. 'analysis_description_xcom_key':'analysis_description',
  764. 'use_ephemeral_space':True,
  765. 'load_metrics_to_cram':True,
  766. 'java_param':'-Xmx4g',
  767. 'picard_command':'CollectGcBiasMetrics',
  768. 'picard_option':{},
  769. 'analysis_files_xcom_key':'picard_gc_summary',
  770. 'bam_files_xcom_key':None})
  771. ## PIPELINE
  772. copy_bam_for_parallel_runs >> run_picard_gc_summary
  773. ## TASK
  774. cleanup_picard_gc_summary_input = \
  775. PythonOperator(
  776. task_id='cleanup_picard_gc_summary_input',
  777. dag=dag,
  778. queue='hpc_4G',
  779. python_callable=clean_up_files,
  780. params={'xcom_pull_files_key':'run_picard_gc_summary',
  781. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  782. ## PIPELINE
  783. run_picard_gc_summary >> cleanup_picard_gc_summary_input
  784. ## TASK
  785. upload_picard_gc_summary_to_box = \
  786. PythonOperator(
  787. task_id='upload_picard_gc_summary_to_box',
  788. dag=dag,
  789. queue='hpc_4G',
  790. python_callable=upload_analysis_file_to_box,
  791. params={'xcom_pull_task':'run_picard_gc_summary',
  792. 'xcom_pull_files_key':'picard_gc_summary',
  793. 'analysis_tag':'Picard-CollectGcBiasMetrics'})
  794. ## PIPELINE
  795. run_picard_gc_summary >> upload_picard_gc_summary_to_box
  796. ## TASK
  797. run_picard_base_dist_summary = \
  798. PythonOperator(
  799. task_id='run_picard_base_dist_summary',
  800. dag=dag,
  801. queue='hpc_4G',
  802. python_callable=run_picard_for_cellranger,
  803. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  804. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  805. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  806. 'analysis_description_xcom_key':'analysis_description',
  807. 'use_ephemeral_space':True,
  808. 'load_metrics_to_cram':True,
  809. 'java_param':'-Xmx4g',
  810. 'picard_command':'CollectBaseDistributionByCycle',
  811. 'picard_option':{},
  812. 'analysis_files_xcom_key':'picard_base_summary',
  813. 'bam_files_xcom_key':None})
  814. ## PIPELINE
  815. copy_bam_for_parallel_runs >> run_picard_base_dist_summary
  816. ## TASK
  817. cleanup_picard_base_dist_summary_input = \
  818. PythonOperator(
  819. task_id='cleanup_picard_base_dist_summary_input',
  820. dag=dag,
  821. queue='hpc_4G',
  822. python_callable=clean_up_files,
  823. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  824. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  825. ## PIPELINE
  826. run_picard_base_dist_summary >> cleanup_picard_base_dist_summary_input
  827. ## TASK
  828. upload_picard_base_dist_summary_to_box = \
  829. PythonOperator(
  830. task_id='upload_picard_base_dist_summary_to_box',
  831. dag=dag,
  832. queue='hpc_4G',
  833. python_callable=upload_analysis_file_to_box,
  834. params={'xcom_pull_task':'run_picard_base_dist_summary',
  835. 'xcom_pull_files_key':'picard_base_summary',
  836. 'analysis_tag':'Picard-CollectBaseDistributionByCycle'})
  837. ## PIPELINE
  838. run_picard_base_dist_summary >> upload_picard_base_dist_summary_to_box
  839. ## TASK
  840. run_samtools_stats = \
  841. PythonOperator(
  842. task_id='run_samtools_stats',
  843. dag=dag,
  844. queue='hpc_4G4t',
  845. python_callable=run_samtools_for_cellranger,
  846. params={'xcom_pull_files_key':'run_samtools_stats',
  847. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  848. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  849. 'analysis_description_xcom_key':'analysis_description',
  850. 'use_ephemeral_space':True,
  851. 'load_metrics_to_cram':True,
  852. 'samtools_command':'stats',
  853. 'threads':4,
  854. 'analysis_files_xcom_key':'samtools_stats'})
  855. ## PIPELINE
  856. copy_bam_for_parallel_runs >> run_samtools_stats
  857. ## TASK
  858. upload_samtools_stats_to_box = \
  859. PythonOperator(
  860. task_id='upload_samtools_stats_to_box',
  861. dag=dag,
  862. queue='hpc_4G',
  863. python_callable=upload_analysis_file_to_box,
  864. params={'xcom_pull_task':'run_samtools_stats',
  865. 'xcom_pull_files_key':'samtools_stats',
  866. 'analysis_tag':'Samtools-stats'})
  867. ## PIPELINE
  868. run_samtools_stats >> upload_samtools_stats_to_box
  869. ## TASK
  870. run_samtools_idxstats = \
  871. PythonOperator(
  872. task_id='run_samtools_idxstats',
  873. dag=dag,
  874. queue='hpc_4G4t',
  875. python_callable=run_samtools_for_cellranger,
  876. params={'xcom_pull_files_key':'run_samtools_stats',
  877. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  878. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  879. 'analysis_description_xcom_key':'analysis_description',
  880. 'use_ephemeral_space':True,
  881. 'load_metrics_to_cram':True,
  882. 'samtools_command':'idxstats',
  883. 'threads':4,
  884. 'analysis_files_xcom_key':'samtools_idxstats'})
  885. ## PIPELINE
  886. run_samtools_stats >> run_samtools_idxstats
  887. ## TASK
  888. cleanup_samtools_stats_input = \
  889. PythonOperator(
  890. task_id='cleanup_samtools_stats_input',
  891. dag=dag,
  892. queue='hpc_4G',
  893. python_callable=clean_up_files,
  894. params={'xcom_pull_files_key':'run_samtools_stats',
  895. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  896. ## PIPELINE
  897. run_samtools_idxstats >> cleanup_samtools_stats_input
  898. ## TASK
  899. upload_samtools_idxstats_to_box = \
  900. PythonOperator(
  901. task_id='upload_samtools_idxstats_to_box',
  902. dag=dag,
  903. queue='hpc_4G',
  904. python_callable=upload_analysis_file_to_box,
  905. params={'xcom_pull_task':'run_samtools_idxstats',
  906. 'xcom_pull_files_key':'samtools_idxstats',
  907. 'analysis_tag':'Samtools-idxstats'})
  908. ## PIPELINE
  909. run_samtools_idxstats >> upload_samtools_idxstats_to_box
  910. ## TASK
  911. run_multiqc = \
  912. PythonOperator(
  913. task_id='run_multiqc',
  914. dag=dag,
  915. queue='hpc_4G',
  916. trigger_rule='none_failed_or_skipped',
  917. python_callable=run_multiqc_for_cellranger,
  918. params={
  919. 'list_of_analysis_xcoms_and_tasks':{
  920. 'run_cellranger':'cellranger_output',
  921. 'run_picard_alignment_summary':'picard_alignment_summary',
  922. 'run_picard_qual_summary':'picard_qual_summary',
  923. 'run_picard_rna_summary':'picard_rna_summary',
  924. 'run_picard_gc_summary':'picard_gc_summary',
  925. 'run_picard_base_dist_summary':'picard_base_summary',
  926. 'run_samtools_stats':'samtools_stats',
  927. 'run_samtools_idxstats':'samtools_idxstats'},
  928. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  929. 'analysis_description_xcom_key':'analysis_description',
  930. 'use_ephemeral_space':True,
  931. 'multiqc_html_file_xcom_key':'multiqc_html',
  932. 'multiqc_data_file_xcom_key':'multiqc_data',
  933. 'tool_order_list':['picad','samtools']})
  934. ## PIPELINE
  935. run_picard_alignment_summary >> run_multiqc
  936. run_picard_qual_summary >> run_multiqc
  937. run_picard_rna_summary >> run_multiqc
  938. run_picard_gc_summary >> run_multiqc
  939. run_picard_base_dist_summary >> run_multiqc
  940. run_samtools_idxstats >> run_multiqc
  941. ## TASK
  942. load_multiqc_html = \
  943. PythonOperator(
  944. task_id='load_multiqc_html',
  945. dag=dag,
  946. queue='hpc_4G',
  947. python_callable=load_analysis_files_func,
  948. params={'collection_name_task':'load_cellranger_result_to_db',
  949. 'collection_name_key':'sample_igf_id',
  950. 'file_name_task':'run_multiqc',
  951. 'file_name_key':'multiqc_html',
  952. 'analysis_name':'multiqc',
  953. 'collection_type':'MULTIQC_HTML',
  954. 'collection_table':'sample',
  955. 'output_files_key':'output_db_files'})
  956. ## PIPELINE
  957. run_multiqc >> load_multiqc_html
  958. ## TASK
  959. upload_multiqc_to_ftp = \
  960. PythonOperator(
  961. task_id='upload_multiqc_to_ftp',
  962. dag=dag,
  963. queue='hpc_4G',
  964. python_callable=ftp_files_upload_for_analysis,
  965. params={'xcom_pull_task':'load_multiqc_html',
  966. 'xcom_pull_files_key':'output_db_files',
  967. 'collection_name_task':'load_cellranger_result_to_db',
  968. 'collection_name_key':'sample_igf_id',
  969. 'collection_type':'FTP_MULTIQC_HTML',
  970. 'collection_table':'sample',
  971. 'collect_remote_file':True})
  972. ## PIPELINE
  973. load_multiqc_html >> upload_multiqc_to_ftp
  974. ## TASK
  975. upload_multiqc_to_box = \
  976. PythonOperator(
  977. task_id='upload_multiqc_to_box',
  978. dag=dag,
  979. queue='hpc_4G',
  980. python_callable=upload_analysis_file_to_box,
  981. params={'xcom_pull_task':'load_multiqc_html',
  982. 'xcom_pull_files_key':'output_db_files',
  983. 'analysis_tag':'multiqc_report'})
  984. ## PIPELINE
  985. load_multiqc_html >> upload_multiqc_to_box
  986. ## TASK
  987. update_analysis_and_status = \
  988. PythonOperator(
  989. task_id='update_analysis_and_status',
  990. dag=dag,
  991. queue='hpc_4G',
  992. python_callable=change_pipeline_status,
  993. trigger_rule='none_failed_or_skipped',
  994. params={'new_status':'FINISHED',
  995. 'no_change_status':'SEEDED'})
  996. ## PIPELINE
  997. upload_multiqc_to_ftp >> update_analysis_and_status
  998. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  999. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  1000. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  1001. #upload_scirpy_report_for_vdj_to_ftp >> update_analysis_and_status # no more ambiguous VDJ
  1002. #upload_scirpy_report_for_vdj_to_box >> update_analysis_and_status
  1003. upload_scirpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  1004. upload_scirpy_report_for_vdj_b_to_box >> update_analysis_and_status
  1005. upload_scirpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  1006. upload_scirpy_report_for_vdj_t_to_box >> update_analysis_and_status
  1007. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  1008. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  1009. upload_cellranger_results_to_irods >> update_analysis_and_status
  1010. upload_cellranger_report_to_ftp >> update_analysis_and_status
  1011. upload_cellranger_report_to_box >> update_analysis_and_status
  1012. upload_cram_to_irods >> update_analysis_and_status
  1013. upload_scvelo_report_to_box >> update_analysis_and_status
  1014. upload_scvelo_report_to_ftp >> update_analysis_and_status
  1015. ## TASK
  1016. update_qc_pages = \
  1017. PythonOperator(
  1018. task_id='update_qc_pages',
  1019. dag=dag,
  1020. queue='hpc_4G',
  1021. python_callable=create_and_update_qc_pages,
  1022. params={'use_ephemeral_space':True,
  1023. 'collection_type_list':[
  1024. 'FTP_MULTIQC_HTML',
  1025. 'FTP_SEURAT_HTML',
  1026. 'FTP_SCIRPY_VDJ_T_HTML',
  1027. 'FTP_SCIRPY_VDJ_B_HTML',
  1028. 'FTP_SCIRPY_VDJ_HTML',
  1029. 'FTP_CELLBROWSER',
  1030. 'FTP_SCANPY_HTML',
  1031. 'FTP_CELLRANGER_HTML']})
  1032. ## PIPELINE
  1033. update_analysis_and_status >> update_qc_pages