dag9_tenx_single_cell_immune_profiling.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031
  1. from datetime import timedelta
  2. import os,json,logging,subprocess
  3. from airflow.models import DAG,Variable
  4. from airflow.utils.dates import days_ago
  5. from airflow.operators.python_operator import PythonOperator
  6. from airflow.operators.python_operator import BranchPythonOperator
  7. from airflow.operators.dummy_operator import DummyOperator
  8. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import fetch_analysis_info_and_branch_func
  9. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import configure_cellranger_run_func
  10. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_sc_read_trimmming_func
  11. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_cellranger_tool
  12. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import decide_analysis_branch_func
  13. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_result_to_db_func
  14. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import ftp_files_upload_for_analysis
  15. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import irods_files_upload_for_analysis
  16. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_singlecell_notebook_wrapper_func
  17. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_analysis_files_func
  18. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import task_branch_function
  19. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import upload_analysis_file_to_box
  20. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import convert_bam_to_cram_func
  21. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_picard_for_cellranger
  22. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_samtools_for_cellranger
  23. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_multiqc_for_cellranger
  24. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import index_and_copy_bam_for_parallel_analysis
  25. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import change_pipeline_status
  26. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import clean_up_files
  27. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import create_and_update_qc_pages
  28. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import load_cellranger_metrices_to_collection
  29. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import generate_cell_sorted_bam_func
  30. from igf_airflow.utils.dag9_tenx_single_cell_immune_profiling_utils import run_velocyto_func
  31. ## ARGS
  32. default_args = {
  33. 'owner': 'airflow',
  34. 'depends_on_past': False,
  35. 'start_date': days_ago(2),
  36. 'email_on_failure': False,
  37. 'email_on_retry': False,
  38. 'retries': 4,
  39. 'max_active_runs':10,
  40. 'catchup':True,
  41. 'retry_delay': timedelta(minutes=5),
  42. 'provide_context': True,
  43. }
  44. FEATURE_TYPE_LIST = \
  45. Variable.get('tenx_single_cell_immune_profiling_feature_types',default_var={})#.split(',')
  46. ## DAG
  47. dag = \
  48. DAG(
  49. dag_id='dag9_tenx_single_cell_immune_profiling',
  50. schedule_interval=None,
  51. tags=['hpc','analysis','tenx','sc'],
  52. default_args=default_args,
  53. concurrency=100,
  54. max_active_runs=20,
  55. orientation='LR')
  56. with dag:
  57. ## TASK
  58. fetch_analysis_info_and_branch = \
  59. BranchPythonOperator(
  60. task_id='fetch_analysis_info',
  61. dag=dag,
  62. queue='hpc_4G',
  63. params={'no_analysis_task':'no_analysis',
  64. 'analysis_description_xcom_key':'analysis_description',
  65. 'analysis_info_xcom_key':'analysis_info'},
  66. python_callable=fetch_analysis_info_and_branch_func)
  67. ## TASK
  68. configure_cellranger_run = \
  69. PythonOperator(
  70. task_id='configure_cellranger_run',
  71. dag=dag,
  72. queue='hpc_4G',
  73. trigger_rule='none_failed_or_skipped',
  74. params={'xcom_pull_task_id':'fetch_analysis_info',
  75. 'analysis_description_xcom_key':'analysis_description',
  76. 'analysis_info_xcom_key':'analysis_info',
  77. 'library_csv_xcom_key':'cellranger_library_csv'},
  78. python_callable=configure_cellranger_run_func)
  79. for analysis_name in FEATURE_TYPE_LIST.keys():
  80. ## TASK
  81. task_branch = \
  82. BranchPythonOperator(
  83. task_id=analysis_name,
  84. dag=dag,
  85. queue='hpc_4G',
  86. params={'xcom_pull_task_id':'fetch_analysis_info',
  87. 'analysis_info_xcom_key':'analysis_info',
  88. 'analysis_name':analysis_name,
  89. 'task_prefix':'run_trim'},
  90. python_callable=task_branch_function)
  91. run_trim_list = list()
  92. for run_id in range(0,10):
  93. ## TASK
  94. t = \
  95. PythonOperator(
  96. task_id='run_trim_{0}_{1}'.format(analysis_name,run_id),
  97. dag=dag,
  98. queue='hpc_4G',
  99. params={'xcom_pull_task_id':'fetch_analysis_info',
  100. 'analysis_info_xcom_key':'analysis_info',
  101. 'analysis_description_xcom_key':'analysis_description',
  102. 'analysis_name':analysis_name,
  103. 'run_id':run_id,
  104. 'r1-length':0,
  105. 'r2-length':0,
  106. 'fastq_input_dir_tag':'fastq_dir',
  107. 'use_ephemeral_space':True,
  108. 'fastq_output_dir_tag':'output_path'},
  109. python_callable=run_sc_read_trimmming_func)
  110. run_trim_list.append(t)
  111. ## TASK
  112. collect_trimmed_files = \
  113. DummyOperator(
  114. task_id='collect_trimmed_files_{0}'.format(analysis_name),
  115. trigger_rule='none_failed_or_skipped',
  116. dag=dag)
  117. ## PIPELINE
  118. fetch_analysis_info_and_branch >> task_branch
  119. task_branch >> run_trim_list
  120. run_trim_list >> collect_trimmed_files
  121. collect_trimmed_files >> configure_cellranger_run
  122. ## TASK
  123. no_analysis = \
  124. DummyOperator(
  125. task_id='no_analysis',
  126. dag=dag)
  127. ## PIPELINE
  128. fetch_analysis_info_and_branch >> no_analysis
  129. ## TASK
  130. run_cellranger = \
  131. PythonOperator(
  132. task_id='run_cellranger',
  133. dag=dag,
  134. queue='hpc_64G16t24hr',
  135. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  136. 'analysis_description_xcom_key':'analysis_description',
  137. 'library_csv_xcom_key':'cellranger_library_csv',
  138. 'library_csv_xcom_pull_task':'configure_cellranger_run',
  139. 'cellranger_xcom_key':'cellranger_output',
  140. 'cellranger_options':['--localcores 16','--localmem 64']},
  141. python_callable=run_cellranger_tool)
  142. ## PIPELINE
  143. configure_cellranger_run >> run_cellranger
  144. ## TASK
  145. decide_analysis_branch = \
  146. BranchPythonOperator(
  147. task_id='decide_analysis_branch',
  148. dag=dag,
  149. queue='hpc_4G',
  150. python_callable=decide_analysis_branch_func,
  151. params={'load_cellranger_result_to_db_task':'load_cellranger_result_to_db',
  152. 'run_scanpy_for_sc_5p_task':'run_scanpy_for_sc_5p',
  153. 'run_scirpy_for_vdj_task':'run_scirpy_for_vdj',
  154. 'run_scirpy_for_vdj_b_task':'run_scirpy_for_vdj_b',
  155. 'run_scirpy_vdj_t_task':'run_scirpy_for_vdj_t',
  156. 'run_seurat_for_sc_5p_task':'run_seurat_for_sc_5p',
  157. 'convert_cellranger_bam_to_cram_task':'convert_cellranger_bam_to_cram',
  158. 'library_csv_xcom_key':'cellranger_library_csv',
  159. 'library_csv_xcom_pull_task':'configure_cellranger_run'})
  160. ## PIPELINE
  161. run_cellranger >> decide_analysis_branch
  162. ## TASK
  163. load_cellranger_result_to_db = \
  164. PythonOperator(
  165. task_id='load_cellranger_result_to_db',
  166. dag=dag,
  167. queue='hpc_4G',
  168. python_callable=load_cellranger_result_to_db_func,
  169. params={'analysis_description_xcom_pull_task':'fetch_analysis_info',
  170. 'analysis_description_xcom_key':'analysis_description',
  171. 'cellranger_xcom_key':'cellranger_output',
  172. 'cellranger_xcom_pull_task':'run_cellranger',
  173. 'collection_type':'CELLRANGER_MULTI',
  174. 'html_collection_type':'CELLRANGER_HTML',
  175. 'collection_table':'sample',
  176. 'xcom_collection_name_key':'sample_igf_id',
  177. 'genome_column':'genome_build',
  178. 'analysis_name':'cellranger_multi',
  179. 'output_xcom_key':'loaded_output_files',
  180. 'html_xcom_key':'html_report_file',
  181. 'html_report_file_name':'web_summary.html'})
  182. upload_cellranger_report_to_ftp = \
  183. PythonOperator(
  184. task_id='upload_cellranger_report_to_ftp',
  185. dag=dag,
  186. queue='hpc_4G',
  187. python_callable=ftp_files_upload_for_analysis,
  188. params={'xcom_pull_task':'load_cellranger_result_to_db',
  189. 'xcom_pull_files_key':'html_report_file',
  190. 'collection_name_task':'load_cellranger_result_to_db',
  191. 'collection_name_key':'sample_igf_id',
  192. 'collection_type':'FTP_CELLRANGER_HTML',
  193. 'collection_table':'sample',
  194. 'collect_remote_file':True})
  195. upload_cellranger_report_to_box = \
  196. PythonOperator(
  197. task_id='upload_cellranger_report_to_box',
  198. dag=dag,
  199. queue='hpc_4G',
  200. python_callable=upload_analysis_file_to_box,
  201. params={'xcom_pull_task':'load_cellranger_result_to_db',
  202. 'xcom_pull_files_key':'html_report_file',
  203. 'analysis_tag':'cellranger_multi'})
  204. upload_cellranger_results_to_irods = \
  205. PythonOperator(
  206. task_id='upload_cellranger_results_to_irods',
  207. dag=dag,
  208. queue='hpc_4G',
  209. python_callable=irods_files_upload_for_analysis,
  210. params={'xcom_pull_task':'load_cellranger_result_to_db',
  211. 'xcom_pull_files_key':'loaded_output_files',
  212. 'collection_name_key':'sample_igf_id',
  213. 'collection_name_task':'load_cellranger_result_to_db',
  214. 'analysis_name':'cellranger_multi'})
  215. ## PIPELINE
  216. decide_analysis_branch >> load_cellranger_result_to_db
  217. load_cellranger_result_to_db >> upload_cellranger_report_to_ftp
  218. load_cellranger_result_to_db >> upload_cellranger_report_to_box
  219. load_cellranger_result_to_db >> upload_cellranger_results_to_irods
  220. ## TASK
  221. run_scanpy_for_sc_5p = \
  222. PythonOperator(
  223. task_id='run_scanpy_for_sc_5p',
  224. dag=dag,
  225. queue='hpc_4G',
  226. python_callable=run_singlecell_notebook_wrapper_func,
  227. params={'cellranger_xcom_key':'cellranger_output',
  228. 'cellranger_xcom_pull_task':'run_cellranger',
  229. 'scanpy_timeout':1200,
  230. 'allow_errors':False,
  231. 'kernel_name':'python3',
  232. 'count_dir':'count',
  233. 'analysis_name':'scanpy',
  234. 'output_notebook_key':'scanpy_notebook',
  235. 'output_cellbrowser_key':'cellbrowser_dirs',
  236. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  237. 'analysis_description_xcom_key':'analysis_description'})
  238. load_cellranger_gex_matrics_to_db = \
  239. PythonOperator(
  240. task_id='load_cellranger_gex_matrics_to_db',
  241. dag=dag,
  242. queue='hpc_4G',
  243. python_callable=load_cellranger_metrices_to_collection,
  244. params={'cellranger_xcom_key':'cellranger_output',
  245. 'cellranger_xcom_pull_task':'run_cellranger',
  246. 'collection_type':'CELLRANGER_MULTI',
  247. 'collection_name_task':'load_cellranger_result_to_db',
  248. 'collection_name_key':'sample_igf_id',
  249. 'metrics_summary_file':'count/metrics_summary.csv',
  250. 'attribute_prefix':'CELLRANGER_COUNT'})
  251. load_scanpy_report_for_sc_5p_to_db = \
  252. PythonOperator(
  253. task_id='load_scanpy_report_for_sc_5p_to_db',
  254. dag=dag,
  255. queue='hpc_4G',
  256. python_callable=load_analysis_files_func,
  257. params={'collection_name_task':'load_cellranger_result_to_db',
  258. 'collection_name_key':'sample_igf_id',
  259. 'file_name_task':'run_scanpy_for_sc_5p',
  260. 'file_name_key':'scanpy_notebook',
  261. 'analysis_name':'scanpy_5p',
  262. 'collection_type':'SCANPY_HTML',
  263. 'collection_table':'sample',
  264. 'output_files_key':'output_db_files'})
  265. upload_scanpy_report_for_sc_5p_to_ftp = \
  266. PythonOperator(
  267. task_id='upload_scanpy_report_for_sc_5p_to_ftp',
  268. dag=dag,
  269. queue='hpc_4G',
  270. python_callable=ftp_files_upload_for_analysis,
  271. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  272. 'xcom_pull_files_key':'output_db_files',
  273. 'collection_name_task':'load_cellranger_result_to_db',
  274. 'collection_name_key':'sample_igf_id',
  275. 'collection_type':'FTP_SCANPY_HTML',
  276. 'collection_table':'sample',
  277. 'collect_remote_file':True})
  278. upload_scanpy_report_for_sc_5p_to_box = \
  279. PythonOperator(
  280. task_id='upload_scanpy_report_for_sc_5p_to_box',
  281. dag=dag,
  282. queue='hpc_4G',
  283. python_callable=upload_analysis_file_to_box,
  284. params={'xcom_pull_task':'load_scanpy_report_for_sc_5p_to_db',
  285. 'xcom_pull_files_key':'output_db_files',
  286. 'analysis_tag':'scanpy_single_sample_report'})
  287. upload_cellbrowser_for_sc_5p_to_ftp = \
  288. PythonOperator(
  289. task_id='upload_cellbrowser_for_sc_5p_to_ftp',
  290. dag=dag,
  291. queue='hpc_4G',
  292. python_callable=ftp_files_upload_for_analysis,
  293. params={'xcom_pull_task':'run_scanpy_for_sc_5p',
  294. 'xcom_pull_files_key':'cellbrowser_dirs',
  295. 'collection_name_task':'load_cellranger_result_to_db',
  296. 'collection_name_key':'sample_igf_id',
  297. 'collection_type':'FTP_CELLBROWSER',
  298. 'collection_table':'sample',
  299. 'collect_remote_file':True})
  300. ## PIPELINE
  301. decide_analysis_branch >> run_scanpy_for_sc_5p
  302. run_scanpy_for_sc_5p >> load_scanpy_report_for_sc_5p_to_db
  303. run_scanpy_for_sc_5p >> load_cellranger_gex_matrics_to_db
  304. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_ftp
  305. load_scanpy_report_for_sc_5p_to_db >> upload_scanpy_report_for_sc_5p_to_box
  306. run_scanpy_for_sc_5p >> upload_cellbrowser_for_sc_5p_to_ftp
  307. ## TASK
  308. run_scirpy_for_vdj_b = \
  309. PythonOperator(
  310. task_id='run_scirpy_for_vdj_b',
  311. dag=dag,
  312. queue='hpc_4G',
  313. python_callable=run_singlecell_notebook_wrapper_func,
  314. params={'cellranger_xcom_key':'cellranger_output',
  315. 'cellranger_xcom_pull_task':'run_cellranger',
  316. 'scanpy_timeout':1200,
  317. 'allow_errors':False,
  318. 'kernel_name':'python3',
  319. 'analysis_name':'scirpy',
  320. 'vdj_dir':'vdj_b',
  321. 'count_dir':'count',
  322. 'output_notebook_key':'scirpy_notebook',
  323. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  324. 'analysis_description_xcom_key':'analysis_description'})
  325. load_cellranger_vdjB_matrics_to_db = \
  326. PythonOperator(
  327. task_id='load_cellranger_vdjB_matrics_to_db',
  328. dag=dag,
  329. queue='hpc_4G',
  330. python_callable=load_cellranger_metrices_to_collection,
  331. params={'cellranger_xcom_key':'cellranger_output',
  332. 'cellranger_xcom_pull_task':'run_cellranger',
  333. 'collection_type':'CELLRANGER_MULTI',
  334. 'collection_name_task':'load_cellranger_result_to_db',
  335. 'collection_name_key':'sample_igf_id',
  336. 'metrics_summary_file':'vdj_b/metrics_summary.csv',
  337. 'attribute_prefix':'CELLRANGER_VDJB'})
  338. load_scirpy_report_for_vdj_b_to_db = \
  339. PythonOperator(
  340. task_id='load_scirpy_report_for_vdj_b_to_db',
  341. dag=dag,
  342. queue='hpc_4G',
  343. python_callable=load_analysis_files_func,
  344. params={'collection_name_task':'load_cellranger_result_to_db',
  345. 'collection_name_key':'sample_igf_id',
  346. 'file_name_task':'run_scirpy_for_vdj_b',
  347. 'file_name_key':'scirpy_notebook',
  348. 'analysis_name':'scirpy_vdj_b',
  349. 'collection_type':'SCIRPY_VDJ_B_HTML',
  350. 'collection_table':'sample',
  351. 'output_files_key':'output_db_files'})
  352. upload_scirpy_report_for_vdj_b_to_ftp = \
  353. PythonOperator(
  354. task_id='upload_scirpy_report_for_vdj_b_to_ftp',
  355. dag=dag,
  356. queue='hpc_4G',
  357. python_callable=ftp_files_upload_for_analysis,
  358. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  359. 'xcom_pull_files_key':'output_db_files',
  360. 'collection_name_task':'load_cellranger_result_to_db',
  361. 'collection_name_key':'sample_igf_id',
  362. 'collection_type':'FTP_SCIRPY_VDJ_B_HTML',
  363. 'collection_table':'sample',
  364. 'collect_remote_file':True})
  365. upload_scirpy_report_for_vdj_b_to_box = \
  366. PythonOperator(
  367. task_id='upload_scanpy_report_for_vdj_b_to_box',
  368. dag=dag,
  369. queue='hpc_4G',
  370. python_callable=upload_analysis_file_to_box,
  371. params={'xcom_pull_task':'load_scirpy_report_for_vdj_b_to_db',
  372. 'xcom_pull_files_key':'output_db_files',
  373. 'analysis_tag':'scirpy_vdj_b_single_sample_report'})
  374. ## PIPELINE
  375. decide_analysis_branch >> run_scirpy_for_vdj_b
  376. run_scirpy_for_vdj_b >> load_scirpy_report_for_vdj_b_to_db
  377. run_scirpy_for_vdj_b >> load_cellranger_vdjB_matrics_to_db
  378. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_ftp
  379. load_scirpy_report_for_vdj_b_to_db >> upload_scirpy_report_for_vdj_b_to_box
  380. ## TASK
  381. run_scirpy_for_vdj_t = \
  382. PythonOperator(
  383. task_id='run_scirpy_for_vdj_t',
  384. dag=dag,
  385. queue='hpc_4G',
  386. python_callable=run_singlecell_notebook_wrapper_func,
  387. params={'cellranger_xcom_key':'cellranger_output',
  388. 'cellranger_xcom_pull_task':'run_cellranger',
  389. 'scanpy_timeout':1200,
  390. 'allow_errors':False,
  391. 'kernel_name':'python3',
  392. 'analysis_name':'scirpy',
  393. 'vdj_dir':'vdj_t',
  394. 'count_dir':'count',
  395. 'output_notebook_key':'scirpy_notebook',
  396. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  397. 'analysis_description_xcom_key':'analysis_description'})
  398. load_cellranger_vdjT_matrics_to_db = \
  399. PythonOperator(
  400. task_id='load_cellranger_vdjT_matrics_to_db',
  401. dag=dag,
  402. queue='hpc_4G',
  403. python_callable=load_cellranger_metrices_to_collection,
  404. params={'cellranger_xcom_key':'cellranger_output',
  405. 'cellranger_xcom_pull_task':'run_cellranger',
  406. 'collection_type':'CELLRANGER_MULTI',
  407. 'collection_name_task':'load_cellranger_result_to_db',
  408. 'collection_name_key':'sample_igf_id',
  409. 'metrics_summary_file':'vdj_t/metrics_summary.csv',
  410. 'attribute_prefix':'CELLRANGER_VDJT'})
  411. load_scirpy_report_for_vdj_t_to_db = \
  412. PythonOperator(
  413. task_id='load_scirpy_report_for_vdj_t_to_db',
  414. dag=dag,
  415. queue='hpc_4G',
  416. python_callable=load_analysis_files_func,
  417. params={'collection_name_task':'load_cellranger_result_to_db',
  418. 'collection_name_key':'sample_igf_id',
  419. 'file_name_task':'run_scirpy_for_vdj_t',
  420. 'file_name_key':'scirpy_notebook',
  421. 'analysis_name':'scirpy_vdj_t',
  422. 'collection_type':'SCIRPY_VDJ_T_HTML',
  423. 'collection_table':'sample',
  424. 'output_files_key':'output_db_files'})
  425. upload_scirpy_report_for_vdj_t_to_ftp = \
  426. PythonOperator(
  427. task_id='upload_scirpy_report_for_vdj_t_to_ftp',
  428. dag=dag,
  429. queue='hpc_4G',
  430. python_callable=ftp_files_upload_for_analysis,
  431. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  432. 'xcom_pull_files_key':'output_db_files',
  433. 'collection_name_task':'load_cellranger_result_to_db',
  434. 'collection_name_key':'sample_igf_id',
  435. 'collection_type':'FTP_SCIRPY_VDJ_T_HTML',
  436. 'collection_table':'sample',
  437. 'collect_remote_file':True})
  438. upload_scirpy_report_for_vdj_t_to_box = \
  439. PythonOperator(
  440. task_id='upload_scirpy_report_for_vdj_t_to_box',
  441. dag=dag,
  442. queue='hpc_4G',
  443. python_callable=upload_analysis_file_to_box,
  444. params={'xcom_pull_task':'load_scirpy_report_for_vdj_t_to_db',
  445. 'xcom_pull_files_key':'output_db_files',
  446. 'analysis_tag':'scirpy_vdj_t_single_sample_report'})
  447. ## PIPELINE
  448. decide_analysis_branch >> run_scirpy_for_vdj_t
  449. run_scirpy_for_vdj_t >> load_scirpy_report_for_vdj_t_to_db
  450. run_scirpy_for_vdj_t >> load_cellranger_vdjT_matrics_to_db
  451. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_ftp
  452. load_scirpy_report_for_vdj_t_to_db >> upload_scirpy_report_for_vdj_t_to_box
  453. ## TASK
  454. run_seurat_for_sc_5p = \
  455. PythonOperator(
  456. task_id='run_seurat_for_sc_5p',
  457. dag=dag,
  458. queue='hpc_4G',
  459. python_callable=run_singlecell_notebook_wrapper_func,
  460. params={'cellranger_xcom_key':'cellranger_output',
  461. 'cellranger_xcom_pull_task':'run_cellranger',
  462. 'scanpy_timeout':1200,
  463. 'allow_errors':False,
  464. 'kernel_name':'ir',
  465. 'analysis_name':'seurat',
  466. 'vdj_dir':'vdj',
  467. 'count_dir':'count',
  468. 'output_notebook_key':'seurat_notebook',
  469. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  470. 'analysis_description_xcom_key':'analysis_description'})
  471. load_seurat_report_for_sc_5p_db = \
  472. PythonOperator(
  473. task_id='load_seurat_report_for_sc_5p_db',
  474. dag=dag,
  475. queue='hpc_4G',
  476. python_callable=load_analysis_files_func,
  477. params={'collection_name_task':'load_cellranger_result_to_db',
  478. 'collection_name_key':'sample_igf_id',
  479. 'file_name_task':'run_seurat_for_sc_5p',
  480. 'file_name_key':'seurat_notebook',
  481. 'analysis_name':'seurat_5p',
  482. 'collection_type':'SEURAT_HTML',
  483. 'collection_table':'sample',
  484. 'output_files_key':'output_db_files'})
  485. upload_seurat_report_for_sc_5p_ftp = \
  486. PythonOperator(
  487. task_id='upload_seurat_report_for_sc_5p_ftp',
  488. dag=dag,
  489. queue='hpc_4G',
  490. python_callable=ftp_files_upload_for_analysis,
  491. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  492. 'xcom_pull_files_key':'output_db_files',
  493. 'collection_name_task':'load_cellranger_result_to_db',
  494. 'collection_name_key':'sample_igf_id',
  495. 'collection_type':'FTP_SEURAT_HTML',
  496. 'collection_table':'sample',
  497. 'collect_remote_file':True})
  498. upload_seurat_report_for_sc_5p_to_box = \
  499. PythonOperator(
  500. task_id='upload_seurat_report_for_sc_5p_to_box',
  501. dag=dag,
  502. queue='hpc_4G',
  503. python_callable=upload_analysis_file_to_box,
  504. params={'xcom_pull_task':'load_seurat_report_for_sc_5p_db',
  505. 'xcom_pull_files_key':'output_db_files',
  506. 'analysis_tag':'seurat_single_sample_report'})
  507. ## PIPELINE
  508. decide_analysis_branch >> run_seurat_for_sc_5p
  509. run_seurat_for_sc_5p >> load_seurat_report_for_sc_5p_db
  510. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_ftp
  511. load_seurat_report_for_sc_5p_db >> upload_seurat_report_for_sc_5p_to_box
  512. ## TASK
  513. convert_cellranger_bam_to_cram = \
  514. PythonOperator(
  515. task_id='convert_cellranger_bam_to_cram',
  516. dag=dag,
  517. queue='hpc_4G4t',
  518. python_callable=convert_bam_to_cram_func,
  519. params={'xcom_pull_files_key':'cellranger_output',
  520. 'xcom_pull_task':'run_cellranger',
  521. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  522. 'analysis_description_xcom_key':'analysis_description',
  523. 'use_ephemeral_space':True,
  524. 'threads':4,
  525. 'analysis_name':'cellranger',
  526. 'collection_type':'ANALYSIS_CRAM',
  527. 'collection_table':'sample',
  528. 'cram_files_xcom_key':'cram_files'})
  529. ## PIPELINE
  530. decide_analysis_branch >> convert_cellranger_bam_to_cram
  531. ## TASK
  532. generate_cell_sorted_bam = \
  533. PythonOperator(
  534. task_id='generate_cell_sorted_bam',
  535. dag=dag,
  536. queue='hpc_16G8t',
  537. python_callable=generate_cell_sorted_bam_func,
  538. params={'xcom_pull_task': 'run_cellranger',
  539. 'xcom_pull_files_key': 'cellranger_output',
  540. 'cellranger_bam_path': 'count/possorted_genome_bam.bam',
  541. 'cellsorted_bam_path': 'count/cellsorted_possorted_genome_bam.bam',
  542. 'samtools_mem': '2G',
  543. 'threads': 7})
  544. run_velocyto = \
  545. PythonOperator(
  546. task_id='run_velocyto',
  547. queue='hpc_8G_long',
  548. python_callable=run_velocyto_func,
  549. params={'xcom_pull_task': 'run_cellranger',
  550. 'xcom_pull_files_key': 'cellranger_output',
  551. 'analysis_description_xcom_pull_task': 'fetch_analysis_info',
  552. 'analysis_description_xcom_key': 'analysis_description' })
  553. run_scvelo_for_sc_5p = \
  554. DummyOperator(
  555. task_id='run_scvelo_for_sc_5p',
  556. dag=dag)
  557. load_loom_file_to_rds = \
  558. DummyOperator(
  559. task_id='load_loom_file_to_rds',
  560. dag=dag)
  561. upload_loom_file_to_irods = \
  562. DummyOperator(
  563. task_id='upload_loom_file_to_irods',
  564. dag=dag)
  565. load_scvelo_report_to_rds = \
  566. DummyOperator(
  567. task_id='load_scvelo_report_to_rds',
  568. dag=dag)
  569. upload_scvelo_report_to_ftp = \
  570. DummyOperator(
  571. task_id='upload_scvelo_report_to_ftp',
  572. dag=dag)
  573. upload_scvelo_report_to_box = \
  574. DummyOperator(
  575. task_id='upload_scvelo_report_to_box',
  576. dag=dag)
  577. ## PIPELINE
  578. convert_cellranger_bam_to_cram >> generate_cell_sorted_bam
  579. generate_cell_sorted_bam >> run_velocyto
  580. run_velocyto >> run_scvelo_for_sc_5p
  581. run_velocyto >> load_loom_file_to_rds
  582. load_loom_file_to_rds >> upload_loom_file_to_irods
  583. run_scvelo_for_sc_5p >> load_scvelo_report_to_rds
  584. load_scvelo_report_to_rds >> upload_scvelo_report_to_ftp
  585. load_scvelo_report_to_rds >> upload_scvelo_report_to_box
  586. ## TASK
  587. copy_bam_for_parallel_runs = \
  588. BranchPythonOperator(
  589. task_id='copy_bam_for_parallel_runs',
  590. dag=dag,
  591. queue='hpc_4G',
  592. python_callable=index_and_copy_bam_for_parallel_analysis,
  593. params={'xcom_pull_files_key':'cellranger_output',
  594. 'xcom_pull_task':'run_cellranger',
  595. 'list_of_tasks':[
  596. 'run_picard_alignment_summary',
  597. 'run_picard_qual_summary',
  598. 'run_picard_rna_summary',
  599. 'run_picard_gc_summary',
  600. 'run_picard_base_dist_summary',
  601. 'run_samtools_stats']})
  602. ## TASK
  603. upload_cram_to_irods = \
  604. PythonOperator(
  605. task_id='upload_cram_to_irods',
  606. dag=dag,
  607. queue='hpc_4G',
  608. python_callable=irods_files_upload_for_analysis,
  609. params={'xcom_pull_task':'convert_cellranger_bam_to_cram',
  610. 'xcom_pull_files_key':'cram_files',
  611. 'collection_name_key':'sample_igf_id',
  612. 'collection_name_task':'load_cellranger_result_to_db',
  613. 'analysis_name':'cellranger_multi'})
  614. ## PIPELINE
  615. #convert_cellranger_bam_to_cram >> copy_bam_for_parallel_runs # we need to load metrics to cram
  616. generate_cell_sorted_bam >> copy_bam_for_parallel_runs
  617. convert_cellranger_bam_to_cram >> upload_cram_to_irods
  618. ## TASK
  619. run_picard_alignment_summary = \
  620. PythonOperator(
  621. task_id='run_picard_alignment_summary',
  622. dag=dag,
  623. queue='hpc_4G',
  624. python_callable=run_picard_for_cellranger,
  625. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  626. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  627. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  628. 'analysis_description_xcom_key':'analysis_description',
  629. 'use_ephemeral_space':True,
  630. 'load_metrics_to_cram':True,
  631. 'java_param':'-Xmx4g',
  632. 'picard_command':'CollectAlignmentSummaryMetrics',
  633. 'picard_option':{},
  634. 'analysis_files_xcom_key':'picard_alignment_summary',
  635. 'bam_files_xcom_key':None})
  636. ## PIPELINE
  637. copy_bam_for_parallel_runs >> run_picard_alignment_summary
  638. ## TASK
  639. cleanup_picard_alignment_summary_input = \
  640. PythonOperator(
  641. task_id='cleanup_picard_alignment_summary_input',
  642. dag=dag,
  643. queue='hpc_4G',
  644. python_callable=clean_up_files,
  645. params={'xcom_pull_files_key':'run_picard_alignment_summary',
  646. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  647. ## PIPELINE
  648. run_picard_alignment_summary >> cleanup_picard_alignment_summary_input
  649. ## TASK
  650. upload_picard_alignment_summary_to_box = \
  651. PythonOperator(
  652. task_id='upload_picard_alignment_summary_to_box',
  653. dag=dag,
  654. queue='hpc_4G',
  655. python_callable=upload_analysis_file_to_box,
  656. params={'xcom_pull_task':'run_picard_alignment_summary',
  657. 'xcom_pull_files_key':'picard_alignment_summary',
  658. 'analysis_tag':'Picard-CollectAlignmentSummaryMetrics'})
  659. ## PIPELINE
  660. run_picard_alignment_summary >> upload_picard_alignment_summary_to_box
  661. ## TASK
  662. run_picard_qual_summary = \
  663. PythonOperator(
  664. task_id='run_picard_qual_summary',
  665. dag=dag,
  666. queue='hpc_4G',
  667. python_callable=run_picard_for_cellranger,
  668. params={'xcom_pull_files_key':'run_picard_qual_summary',
  669. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  670. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  671. 'analysis_description_xcom_key':'analysis_description',
  672. 'use_ephemeral_space':True,
  673. 'load_metrics_to_cram':True,
  674. 'java_param':'-Xmx4g',
  675. 'picard_command':'QualityScoreDistribution',
  676. 'picard_option':{},
  677. 'analysis_files_xcom_key':'picard_qual_summary',
  678. 'bam_files_xcom_key':None})
  679. ## PIPELINE
  680. copy_bam_for_parallel_runs >> run_picard_qual_summary
  681. ## TASK
  682. cleanup_picard_qual_summary_input = \
  683. PythonOperator(
  684. task_id='cleanup_picard_qual_summary_input',
  685. dag=dag,
  686. queue='hpc_4G',
  687. python_callable=clean_up_files,
  688. params={'xcom_pull_files_key':'run_picard_qual_summary',
  689. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  690. ## PIPELINE
  691. run_picard_qual_summary >> cleanup_picard_qual_summary_input
  692. ## TASK
  693. upload_picard_qual_summary_to_box = \
  694. PythonOperator(
  695. task_id='upload_picard_qual_summary_to_box',
  696. dag=dag,
  697. queue='hpc_4G',
  698. python_callable=upload_analysis_file_to_box,
  699. params={'xcom_pull_task':'run_picard_qual_summary',
  700. 'xcom_pull_files_key':'picard_qual_summary',
  701. 'analysis_tag':'Picard-QualityScoreDistribution'})
  702. ## PIPELINE
  703. run_picard_qual_summary >> upload_picard_qual_summary_to_box
  704. ## TASK
  705. run_picard_rna_summary = \
  706. PythonOperator(
  707. task_id='run_picard_rna_summary',
  708. dag=dag,
  709. queue='hpc_8G',
  710. python_callable=run_picard_for_cellranger,
  711. params={'xcom_pull_files_key':'run_picard_rna_summary',
  712. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  713. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  714. 'analysis_description_xcom_key':'analysis_description',
  715. 'use_ephemeral_space':True,
  716. 'load_metrics_to_cram':True,
  717. 'java_param':'-Xmx7g',
  718. 'picard_command':'CollectRnaSeqMetrics',
  719. 'picard_option':{},
  720. 'analysis_files_xcom_key':'picard_rna_summary',
  721. 'bam_files_xcom_key':None})
  722. ## PIPELINE
  723. copy_bam_for_parallel_runs >> run_picard_rna_summary
  724. ## TASK
  725. cleanup_picard_rna_summary_input = \
  726. PythonOperator(
  727. task_id='cleanup_picard_rna_summary_input',
  728. dag=dag,
  729. queue='hpc_4G',
  730. python_callable=clean_up_files,
  731. params={'xcom_pull_files_key':'run_picard_rna_summary',
  732. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  733. ## PIPELINE
  734. run_picard_rna_summary >> cleanup_picard_rna_summary_input
  735. ## TASK
  736. upload_picard_rna_summary_to_box = \
  737. PythonOperator(
  738. task_id='upload_picard_rna_summary_to_box',
  739. dag=dag,
  740. queue='hpc_4G',
  741. python_callable=upload_analysis_file_to_box,
  742. params={'xcom_pull_task':'run_picard_rna_summary',
  743. 'xcom_pull_files_key':'picard_rna_summary',
  744. 'analysis_tag':'Picard-CollectRnaSeqMetrics'})
  745. ## PIPELINE
  746. run_picard_rna_summary >> upload_picard_rna_summary_to_box
  747. ## TASK
  748. run_picard_gc_summary = \
  749. PythonOperator(
  750. task_id='run_picard_gc_summary',
  751. dag=dag,
  752. queue='hpc_4G',
  753. python_callable=run_picard_for_cellranger,
  754. params={'xcom_pull_files_key':'run_picard_gc_summary',
  755. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  756. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  757. 'analysis_description_xcom_key':'analysis_description',
  758. 'use_ephemeral_space':True,
  759. 'load_metrics_to_cram':True,
  760. 'java_param':'-Xmx4g',
  761. 'picard_command':'CollectGcBiasMetrics',
  762. 'picard_option':{},
  763. 'analysis_files_xcom_key':'picard_gc_summary',
  764. 'bam_files_xcom_key':None})
  765. ## PIPELINE
  766. copy_bam_for_parallel_runs >> run_picard_gc_summary
  767. ## TASK
  768. cleanup_picard_gc_summary_input = \
  769. PythonOperator(
  770. task_id='cleanup_picard_gc_summary_input',
  771. dag=dag,
  772. queue='hpc_4G',
  773. python_callable=clean_up_files,
  774. params={'xcom_pull_files_key':'run_picard_gc_summary',
  775. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  776. ## PIPELINE
  777. run_picard_gc_summary >> cleanup_picard_gc_summary_input
  778. ## TASK
  779. upload_picard_gc_summary_to_box = \
  780. PythonOperator(
  781. task_id='upload_picard_gc_summary_to_box',
  782. dag=dag,
  783. queue='hpc_4G',
  784. python_callable=upload_analysis_file_to_box,
  785. params={'xcom_pull_task':'run_picard_gc_summary',
  786. 'xcom_pull_files_key':'picard_gc_summary',
  787. 'analysis_tag':'Picard-CollectGcBiasMetrics'})
  788. ## PIPELINE
  789. run_picard_gc_summary >> upload_picard_gc_summary_to_box
  790. ## TASK
  791. run_picard_base_dist_summary = \
  792. PythonOperator(
  793. task_id='run_picard_base_dist_summary',
  794. dag=dag,
  795. queue='hpc_4G',
  796. python_callable=run_picard_for_cellranger,
  797. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  798. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  799. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  800. 'analysis_description_xcom_key':'analysis_description',
  801. 'use_ephemeral_space':True,
  802. 'load_metrics_to_cram':True,
  803. 'java_param':'-Xmx4g',
  804. 'picard_command':'CollectBaseDistributionByCycle',
  805. 'picard_option':{},
  806. 'analysis_files_xcom_key':'picard_base_summary',
  807. 'bam_files_xcom_key':None})
  808. ## PIPELINE
  809. copy_bam_for_parallel_runs >> run_picard_base_dist_summary
  810. ## TASK
  811. cleanup_picard_base_dist_summary_input = \
  812. PythonOperator(
  813. task_id='cleanup_picard_base_dist_summary_input',
  814. dag=dag,
  815. queue='hpc_4G',
  816. python_callable=clean_up_files,
  817. params={'xcom_pull_files_key':'run_picard_base_dist_summary',
  818. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  819. ## PIPELINE
  820. run_picard_base_dist_summary >> cleanup_picard_base_dist_summary_input
  821. ## TASK
  822. upload_picard_base_dist_summary_to_box = \
  823. PythonOperator(
  824. task_id='upload_picard_base_dist_summary_to_box',
  825. dag=dag,
  826. queue='hpc_4G',
  827. python_callable=upload_analysis_file_to_box,
  828. params={'xcom_pull_task':'run_picard_base_dist_summary',
  829. 'xcom_pull_files_key':'picard_base_summary',
  830. 'analysis_tag':'Picard-CollectBaseDistributionByCycle'})
  831. ## PIPELINE
  832. run_picard_base_dist_summary >> upload_picard_base_dist_summary_to_box
  833. ## TASK
  834. run_samtools_stats = \
  835. PythonOperator(
  836. task_id='run_samtools_stats',
  837. dag=dag,
  838. queue='hpc_4G4t',
  839. python_callable=run_samtools_for_cellranger,
  840. params={'xcom_pull_files_key':'run_samtools_stats',
  841. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  842. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  843. 'analysis_description_xcom_key':'analysis_description',
  844. 'use_ephemeral_space':True,
  845. 'load_metrics_to_cram':True,
  846. 'samtools_command':'stats',
  847. 'threads':4,
  848. 'analysis_files_xcom_key':'samtools_stats'})
  849. ## PIPELINE
  850. copy_bam_for_parallel_runs >> run_samtools_stats
  851. ## TASK
  852. upload_samtools_stats_to_box = \
  853. PythonOperator(
  854. task_id='upload_samtools_stats_to_box',
  855. dag=dag,
  856. queue='hpc_4G',
  857. python_callable=upload_analysis_file_to_box,
  858. params={'xcom_pull_task':'run_samtools_stats',
  859. 'xcom_pull_files_key':'samtools_stats',
  860. 'analysis_tag':'Samtools-stats'})
  861. ## PIPELINE
  862. run_samtools_stats >> upload_samtools_stats_to_box
  863. ## TASK
  864. run_samtools_idxstats = \
  865. PythonOperator(
  866. task_id='run_samtools_idxstats',
  867. dag=dag,
  868. queue='hpc_4G4t',
  869. python_callable=run_samtools_for_cellranger,
  870. params={'xcom_pull_files_key':'run_samtools_stats',
  871. 'xcom_pull_task':'copy_bam_for_parallel_runs',
  872. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  873. 'analysis_description_xcom_key':'analysis_description',
  874. 'use_ephemeral_space':True,
  875. 'load_metrics_to_cram':True,
  876. 'samtools_command':'idxstats',
  877. 'threads':4,
  878. 'analysis_files_xcom_key':'samtools_idxstats'})
  879. ## PIPELINE
  880. run_samtools_stats >> run_samtools_idxstats
  881. ## TASK
  882. cleanup_samtools_stats_input = \
  883. PythonOperator(
  884. task_id='cleanup_samtools_stats_input',
  885. dag=dag,
  886. queue='hpc_4G',
  887. python_callable=clean_up_files,
  888. params={'xcom_pull_files_key':'run_samtools_stats',
  889. 'xcom_pull_task':'copy_bam_for_parallel_runs'})
  890. ## PIPELINE
  891. run_samtools_idxstats >> cleanup_samtools_stats_input
  892. ## TASK
  893. upload_samtools_idxstats_to_box = \
  894. PythonOperator(
  895. task_id='upload_samtools_idxstats_to_box',
  896. dag=dag,
  897. queue='hpc_4G',
  898. python_callable=upload_analysis_file_to_box,
  899. params={'xcom_pull_task':'run_samtools_idxstats',
  900. 'xcom_pull_files_key':'samtools_idxstats',
  901. 'analysis_tag':'Samtools-idxstats'})
  902. ## PIPELINE
  903. run_samtools_idxstats >> upload_samtools_idxstats_to_box
  904. ## TASK
  905. run_multiqc = \
  906. PythonOperator(
  907. task_id='run_multiqc',
  908. dag=dag,
  909. queue='hpc_4G',
  910. trigger_rule='none_failed_or_skipped',
  911. python_callable=run_multiqc_for_cellranger,
  912. params={
  913. 'list_of_analysis_xcoms_and_tasks':{
  914. 'run_cellranger':'cellranger_output',
  915. 'run_picard_alignment_summary':'picard_alignment_summary',
  916. 'run_picard_qual_summary':'picard_qual_summary',
  917. 'run_picard_rna_summary':'picard_rna_summary',
  918. 'run_picard_gc_summary':'picard_gc_summary',
  919. 'run_picard_base_dist_summary':'picard_base_summary',
  920. 'run_samtools_stats':'samtools_stats',
  921. 'run_samtools_idxstats':'samtools_idxstats'},
  922. 'analysis_description_xcom_pull_task':'fetch_analysis_info',
  923. 'analysis_description_xcom_key':'analysis_description',
  924. 'use_ephemeral_space':True,
  925. 'multiqc_html_file_xcom_key':'multiqc_html',
  926. 'multiqc_data_file_xcom_key':'multiqc_data',
  927. 'tool_order_list':['picad','samtools']})
  928. ## PIPELINE
  929. run_picard_alignment_summary >> run_multiqc
  930. run_picard_qual_summary >> run_multiqc
  931. run_picard_rna_summary >> run_multiqc
  932. run_picard_gc_summary >> run_multiqc
  933. run_picard_base_dist_summary >> run_multiqc
  934. run_samtools_idxstats >> run_multiqc
  935. ## TASK
  936. load_multiqc_html = \
  937. PythonOperator(
  938. task_id='load_multiqc_html',
  939. dag=dag,
  940. queue='hpc_4G',
  941. python_callable=load_analysis_files_func,
  942. params={'collection_name_task':'load_cellranger_result_to_db',
  943. 'collection_name_key':'sample_igf_id',
  944. 'file_name_task':'run_multiqc',
  945. 'file_name_key':'multiqc_html',
  946. 'analysis_name':'multiqc',
  947. 'collection_type':'MULTIQC_HTML',
  948. 'collection_table':'sample',
  949. 'output_files_key':'output_db_files'})
  950. ## PIPELINE
  951. run_multiqc >> load_multiqc_html
  952. ## TASK
  953. upload_multiqc_to_ftp = \
  954. PythonOperator(
  955. task_id='upload_multiqc_to_ftp',
  956. dag=dag,
  957. queue='hpc_4G',
  958. python_callable=ftp_files_upload_for_analysis,
  959. params={'xcom_pull_task':'load_multiqc_html',
  960. 'xcom_pull_files_key':'output_db_files',
  961. 'collection_name_task':'load_cellranger_result_to_db',
  962. 'collection_name_key':'sample_igf_id',
  963. 'collection_type':'FTP_MULTIQC_HTML',
  964. 'collection_table':'sample',
  965. 'collect_remote_file':True})
  966. ## PIPELINE
  967. load_multiqc_html >> upload_multiqc_to_ftp
  968. ## TASK
  969. upload_multiqc_to_box = \
  970. PythonOperator(
  971. task_id='upload_multiqc_to_box',
  972. dag=dag,
  973. queue='hpc_4G',
  974. python_callable=upload_analysis_file_to_box,
  975. params={'xcom_pull_task':'load_multiqc_html',
  976. 'xcom_pull_files_key':'output_db_files',
  977. 'analysis_tag':'multiqc_report'})
  978. ## PIPELINE
  979. load_multiqc_html >> upload_multiqc_to_box
  980. ## TASK
  981. update_analysis_and_status = \
  982. PythonOperator(
  983. task_id='update_analysis_and_status',
  984. dag=dag,
  985. queue='hpc_4G',
  986. python_callable=change_pipeline_status,
  987. trigger_rule='none_failed_or_skipped',
  988. params={'new_status':'FINISHED',
  989. 'no_change_status':'SEEDED'})
  990. ## PIPELINE
  991. upload_multiqc_to_ftp >> update_analysis_and_status
  992. upload_scanpy_report_for_sc_5p_to_ftp >> update_analysis_and_status
  993. upload_scanpy_report_for_sc_5p_to_box >> update_analysis_and_status
  994. upload_cellbrowser_for_sc_5p_to_ftp >> update_analysis_and_status
  995. #upload_scirpy_report_for_vdj_to_ftp >> update_analysis_and_status # no more ambiguous VDJ
  996. #upload_scirpy_report_for_vdj_to_box >> update_analysis_and_status
  997. upload_scirpy_report_for_vdj_b_to_ftp >> update_analysis_and_status
  998. upload_scirpy_report_for_vdj_b_to_box >> update_analysis_and_status
  999. upload_scirpy_report_for_vdj_t_to_ftp >> update_analysis_and_status
  1000. upload_scirpy_report_for_vdj_t_to_box >> update_analysis_and_status
  1001. upload_seurat_report_for_sc_5p_ftp >> update_analysis_and_status
  1002. upload_seurat_report_for_sc_5p_to_box >> update_analysis_and_status
  1003. upload_cellranger_results_to_irods >> update_analysis_and_status
  1004. upload_cellranger_report_to_ftp >> update_analysis_and_status
  1005. upload_cellranger_report_to_box >> update_analysis_and_status
  1006. upload_cram_to_irods >> update_analysis_and_status
  1007. upload_scvelo_report_to_box >> update_analysis_and_status
  1008. upload_scvelo_report_to_ftp >> update_analysis_and_status
  1009. ## TASK
  1010. update_qc_pages = \
  1011. PythonOperator(
  1012. task_id='update_qc_pages',
  1013. dag=dag,
  1014. queue='hpc_4G',
  1015. python_callable=create_and_update_qc_pages,
  1016. params={'use_ephemeral_space':True,
  1017. 'collection_type_list':[
  1018. 'FTP_MULTIQC_HTML',
  1019. 'FTP_SEURAT_HTML',
  1020. 'FTP_SCIRPY_VDJ_T_HTML',
  1021. 'FTP_SCIRPY_VDJ_B_HTML',
  1022. 'FTP_SCIRPY_VDJ_HTML',
  1023. 'FTP_CELLBROWSER',
  1024. 'FTP_SCANPY_HTML',
  1025. 'FTP_CELLRANGER_HTML']})
  1026. ## PIPELINE
  1027. update_analysis_and_status >> update_qc_pages