diff --git a/CITATIONS.md b/CITATIONS.md index 9d0ea67f..a2058a53 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -16,6 +16,14 @@ > Servant N, Varoquaux N, Lajoie BR, Viara E, Chen C, Vert JP, Dekker J, Heard E, Barillot E. HiC-Pro: An optimized and flexible pipeline for Hi-C processing. Genome Biology 2015, 16:259 doi: [10.1186/s13059-015-0831-x](https://dx.doi.org/10.1186/s13059-015-0831-x) +## [HiCExplorer](https://hicexplorer.readthedocs.io/en/latest/index.html) + +> Joachim Wolff, Leily Rabbani, Ralf Gilsbach, Gautier Richard, Thomas Manke, Rolf Backofen, Björn A Grüning. Galaxy HiCExplorer 3: a web server for reproducible Hi-C, capture Hi-C and single-cell Hi-C data analysis, quality control and visualization, Nucleic Acids Research, Nucleic Acids Research, Volume 48, Issue W1, 02 July 2020, Pages W177–W184, [https://doi.org/10.1093/nar/gkaa220](https://doi.org/10.1093/nar/gkaa220) + +> Joachim Wolff, Vivek Bhardwaj, Stephan Nothjunge, Gautier Richard, Gina Renschler, Ralf Gilsbach, Thomas Manke, Rolf Backofen, Fidel Ramírez, Björn A Grüning. Galaxy HiCExplorer: a web server for reproducible Hi-C data analysis, quality control and visualization, Nucleic Acids Research, Volume 46, Issue W1, 2 July 2018, Pages W11–W16, doi: [https://doi.org/10.1093/nar/gky504](https://doi.org/10.1093/nar/gky504) + +> Fidel Ramirez, Vivek Bhardwaj, Jose Villaveces, Laura Arrigoni, Bjoern A Gruening,Kin Chung Lam, Bianca Habermann, Asifa Akhtar, Thomas Manke. “High-resolution TADs reveal DNA sequences underlying genome organization in flies”. Nature Communications, Volume 9, Article number: 189 (2018), doi: [https://doi.org/10.1038/s41467-017-02525-w](https://doi.org/10.1038/s41467-017-02525-w) + ## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) > Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. diff --git a/conf/modules.config b/conf/modules.config index f9e5c6ec..e14578d6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -426,4 +426,39 @@ process { ext.args = '--correctForMultipleTesting fdr' ext.prefix = { "${cool.baseName}" } } + + //******************************** + // LOOPS + + withName: 'HICDETECTLOOPS' { + publishDir = [ + path: { "${params.outdir}/loops/hicDetectLoops" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + ext.args = { [ + params.maxloopdistance ? "--maxLoopDistance ${params.maxloopdistance}" : '', + params.peakwidth ? "--peakWidth ${params.peakwidth}" : '', + params.windowsize ? "--windowSize ${params.windowsize}" : '', + params.pvaluepreselection ? "--pValuePreselection ${params.pvaluepreselection}" : '', + params.peakinteractionsthreshold ? "--peakInteractionsThreshold ${params.peakinteractionsthreshold}" : '', + params.obsexpthreshold ? "--obsExpThreshold ${params.obsexpthreshold}" : '', + params.pvalue ? "--pValue ${params.pvalue}" : '', + params.expected ? "--expected ${params.expected}" : '' + ].join(' ').trim() } + } + + withName: 'HICPLOTMATRIX_LOOPS' { + publishDir = [ + path: { "${params.outdir}/loops/hicDetectLoops" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + mode: 'copy' + ] + ext.args = { [ + "--dpi 300", + params.loop_plot_ytransform ? "--${params.loop_plot_ytransform}" : '', + params.loop_plot_region ? "--region ${params.loop_plot_region}" : '', + params.loop_plot_region2 ? "--region2 ${params.loop_plot_region2}" : '' + ].join(' ').trim() } + } } diff --git a/conf/test.config b/conf/test.config index 78fb706a..ea1203c2 100644 --- a/conf/test.config +++ b/conf/test.config @@ -31,9 +31,13 @@ params { min_mapq = 10 // Parameters - bin_size = '2000,1000' + bin_size = '2000,1000,10000' res_dist_decay = '1000' res_tads = '1000' + res_loops = '10000' tads_caller = 'insulation,hicexplorer' res_compartments = '2000' + peakinteractionsthreshold = 2 + pvaluepreselection = 0.5 + pvalue = 0.5 } diff --git a/docs/output.md b/docs/output.md index 88470cf2..cc0facf1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -270,6 +270,16 @@ Currently, the pipeline proposes two approaches : Usually, TADs results are presented as simple BED files, or bigWig files, with the position of boundaries along the genome. +### Loop calling + +Currently, the pipeline can follow these approaches for loop calling: + +- Detection of enriched interaction regions using [`HiCExplorer hicDetectLoops`](https://hicexplorer.readthedocs.io/en/latest/content/tools/hicDetectLoops.html). This tool can detect enriched interaction regions (peaks / loops) based on a strict candidate selection, negative binomial distributions and Wilcoxon rank-sum tests. Results are available at **`results/loops/hicDetectLoops`**. + +The output bedGraph file contains the x and y position of each loop and its corresponding p-value of the statistical test. + +These results are then visualized using hicPlotMatrix. + ### MultiQC
diff --git a/docs/usage.md b/docs/usage.md index 08c55e34..f55fb63b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -643,6 +643,27 @@ Default: '40000,20000' --res_tads '[string]' ``` +### Loop calling + +#### `--loop_caller` + +Loop calling can be performed using different approaches (WIP). +Currently available options are `hicexplorer` (hicDetectLoops). Note that all options can be specified (comma-separated). +Default: 'hicexplorer' + +```bash +--loop_caller '[string]' +``` + +#### `--res_loops` + +Resolution to run the loop calling analysis (comma-separated). +Default: '20000' + +```bash +--res_loops '[string]' +``` + ## Inputs/Outputs ### `--split_fastq` diff --git a/modules/local/hicexplorer/hicdetectloops/main.nf b/modules/local/hicexplorer/hicdetectloops/main.nf new file mode 100644 index 00000000..0346696b --- /dev/null +++ b/modules/local/hicexplorer/hicdetectloops/main.nf @@ -0,0 +1,43 @@ +process HICDETECTLOOPS { + label 'process_medium' + tag "$meta.id" + + conda "bioconda::hicexplorer=3.7.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hicexplorer:3.7.2--pyhdfd78af_1' : + 'biocontainers/hicexplorer:3.7.2--pyhdfd78af_1' }" + + input: + tuple val(meta), path(matrix) + + output: + tuple val(meta), path('*.bedGraph') , emit: bedgraph + path("versions.yml") , emit:versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.hicdetectloops" + """ + hicDetectLoops \\ + ${args} \\ + --threads ${task.cpus} \\ + --matrix ${matrix} \\ + --outFileName ${prefix}.bedGraph + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hicexplorer: \$(hicDetectLoops --version 2>&1 | sed 's/hicDetectLoops //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}.hicdetectloops" + """ + touch ${prefix}.bedGraph + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hicexplorer: \$(hicDetectLoops --version 2>&1 | sed 's/hicDetectLoops //') + END_VERSIONS + """ +} diff --git a/modules/local/hicexplorer/hicplotmatrix/main.nf b/modules/local/hicexplorer/hicplotmatrix/main.nf new file mode 100644 index 00000000..3e368f7d --- /dev/null +++ b/modules/local/hicexplorer/hicplotmatrix/main.nf @@ -0,0 +1,48 @@ +process HICPLOTMATRIX { + label 'process_medium' + tag "$meta.id" + + conda "bioconda::hicexplorer=3.7.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hicexplorer:3.7.2--pyhdfd78af_1' : + 'biocontainers/hicexplorer:3.7.2--pyhdfd78af_1' }" + + input: + tuple val(meta), path(matrix), path(tads), path(loops), path(bigwig) + + output: + tuple val(meta), path('*.png') , emit: png + path("versions.yml") , emit:versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.hicplotmatrix" + def tads_arg = tads ? "--tads ${tads}" : '' + def loops_arg = loops ? "--loops ${loops}" : '' + def bigwig_arg = bigwig ? "--bigwig ${bigwig}" : '' + """ + hicPlotMatrix \\ + ${args} \\ + --matrix ${matrix} \\ + ${tads_arg} \\ + ${loops_arg} \\ + ${bigwig_arg} \\ + --outFileName ${prefix}.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hicexplorer: \$(hicPlotMatrix --version 2>&1 | sed 's/hicPlotMatrix //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}.hicplotmatrix" + """ + touch ${prefix}.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hicexplorer: \$(hicPlotMatrix --version 2>&1 | sed 's/hicPlotMatrix //') + END_VERSIONS + """ +} diff --git a/nextflow.config b/nextflow.config index f18a8c3c..833d17ad 100644 --- a/nextflow.config +++ b/nextflow.config @@ -96,12 +96,28 @@ params { compartments_caller = 'cooltools' res_compartments = '250000' + // Loop calling + res_loops = '20000' + loop_caller = 'hicexplorer' + maxloopdistance = 2000000 + peakwidth = 2 + windowsize = 5 + pvaluepreselection = 0.1 + peakinteractionsthreshold = 10 + obsexpthreshold = 1.5 + pvalue = 0.025 + expected = 'mean' + loop_plot_ytransform = 'log1p' + loop_plot_region = null + loop_plot_region2 = null + // Workflow skip_maps = false skip_balancing = false skip_mcool = false skip_dist_decay = false skip_compartments = false + skip_loops = false skip_tads = false skip_multiqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index d44fa876..24607891 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -341,6 +341,66 @@ } } }, + "loop_calling": { + "title": "Loop calling", + "type": "object", + "description": "Set up loop calling options.", + "default": "", + "properties": { + "loop_caller": { + "type": "string", + "default": "hicexplorer", + "description": "Define methods for loop calling" + }, + "res_loops": { + "type": "string", + "pattern": "^(\\d+)(,\\d+)*$", + "default": "20000", + "description": "Resolution to run loops callers (comma separated)" + }, + "maxloopdistance": { + "type": "integer", + "default": 2000000, + "description": "Maximum genomic distance of a loop, usually loops are within a distance of ~2MB" + }, + "peakwidth": { + "type": "integer", + "default": 2, + "description": "The width of the peak region in bins. The square around the peak will include (2 * peakWidth)^2 bins" + }, + "windowsize": { + "type": "integer", + "default": 5, + "description": "The window size for the neighborhood region the peak is located in. All values from this region (exclude the values from the peak region) are tested against the peak region for significant difference. The square will have the size of (2 * windowSize)^2 bins" + }, + "pvaluepreselection": { + "type": "number", + "default": 0.1, + "description": "Only candidates with p-values less the given threshold will be considered as candidates. For each genomic distance a negative binomial distribution is fitted and for each pixel a p-value given by the cumulative density function is given. This does NOT influence the p-value for the neighborhood testing. Can a single value or a threshold file created by hicCreateThresholdFile" + }, + "peakinteractionsthreshold": { + "type": "integer", + "default": 10, + "description": "The minimum number of interactions a detected peaks needs to have to be considered" + }, + "obsexpthreshold": { + "type": "number", + "default": 1.5, + "description": "The minimum number of obs/exp interactions a detected peaks needs to have to be considered" + }, + "pvalue": { + "type": "number", + "default": 0.025, + "description": "Rejection level for Anderson-Darling or Wilcoxon-rank sum test for H0. H0 is peak region and background have the same distribution" + }, + "expected": { + "type": "string", + "default": "mean", + "description": "Method to compute the expected value per distance: Either the mean (mean), the mean of non-zero values (mean_nonzero) or the mean of non-zero values with ligation factor correction (mean_nonzero_ligation)", + "enum": ["mean", "mean_nonzero", "mean_nonzero_ligation"] + } + } + }, "skip_options": { "title": "Skip options", "type": "object", @@ -563,6 +623,9 @@ { "$ref": "#/$defs/downstream_analysis" }, + { + "$ref": "#/$defs/loop_calling" + }, { "$ref": "#/$defs/skip_options" }, diff --git a/subworkflows/local/cooler.nf b/subworkflows/local/cooler.nf index 38570337..e582aed1 100644 --- a/subworkflows/local/cooler.nf +++ b/subworkflows/local/cooler.nf @@ -92,6 +92,7 @@ workflow COOLER { emit: versions = ch_versions - cool = COOLER_BALANCE.out.cool + cool = ch_cool.map{it -> [it[0], it[1]]} + cool_balanced = COOLER_BALANCE.out.cool mcool = COOLER_ZOOMIFY.out.mcool } diff --git a/subworkflows/local/loop_calling/main.nf b/subworkflows/local/loop_calling/main.nf new file mode 100644 index 00000000..8f984201 --- /dev/null +++ b/subworkflows/local/loop_calling/main.nf @@ -0,0 +1,41 @@ +include { HICDETECTLOOPS } from '../../../modules/local/hicexplorer/hicdetectloops' +include { HICPLOTMATRIX as HICPLOTMATRIX_LOOPS } from '../../../modules/local/hicexplorer/hicplotmatrix' + +workflow LOOP_CALLING { + + take: + ch_cool + loop_caller + + main: + ch_versions = channel.empty() + + if (loop_caller =~ 'hicexplorer'){ + + HICDETECTLOOPS ( + ch_cool + ) + ch_loops = HICDETECTLOOPS.out.bedgraph + ch_versions = ch_versions.mix(HICDETECTLOOPS.out.versions) + + // Create channel: [meta, matrix, tads, loops, bigwig] + ch_cool + .map { meta, matrix -> [ meta.id, meta, matrix ] } + .combine(ch_loops.map{ meta, loops -> [meta.id, loops] }, by: 0) + .map { id, meta, matrix, loops -> + [meta, matrix, [], loops, []] + } + .set{ ch_cool_plotmatrix } + + HICPLOTMATRIX_LOOPS ( + ch_cool_plotmatrix + ) + ch_versions = ch_versions.mix(HICPLOTMATRIX_LOOPS.out.versions) + + } + + emit: + loops = ch_loops + plots = HICPLOTMATRIX_LOOPS.out.png + versions = ch_versions +} diff --git a/workflows/hic.nf b/workflows/hic.nf index 6b3a5794..27760b9a 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -19,6 +19,7 @@ include { PAIRTOOLS } from '../subworkflows/local/pairtools' include { COOLER } from '../subworkflows/local/cooler' include { COMPARTMENTS } from '../subworkflows/local/compartments' include { TADS } from '../subworkflows/local/tads' +include { LOOP_CALLING } from '../subworkflows/local/loop_calling' //**************************************** // Combine all maps resolution for downstream analysis @@ -40,6 +41,16 @@ if (params.res_tads && !params.skip_tads){ } } +if (params.res_loops && !params.skip_loops){ + ch_loops_res = Channel.from( "${params.res_loops}" ).splitCsv().flatten().toInteger() + ch_map_res = ch_map_res.concat(ch_loops_res) +}else{ + ch_loops_res = Channel.empty() + if (!params.skip_loops){ + log.warn "[nf-core/hic] Hi-C resolution for loops calling not specified. See --res_loops" + } +} + if (params.res_dist_decay && !params.skip_dist_decay){ ch_ddecay_res = Channel.from( "${params.res_dist_decay}" ).splitCsv().flatten().toInteger() ch_map_res = ch_map_res.concat(ch_ddecay_res) @@ -135,7 +146,7 @@ workflow HIC { // MODULE: HICEXPLORER/HIC_PLOT_DIST_VS_COUNTS // if (!params.skip_dist_decay){ - COOLER.out.cool + COOLER.out.cool_balanced .combine(ch_ddecay_res) .filter{ it[0].resolution == it[2] } .map { it -> [it[0], it[1]]} @@ -151,7 +162,7 @@ workflow HIC { // SUB-WORKFLOW: COMPARTMENT CALLING // if (!params.skip_compartments){ - COOLER.out.cool + COOLER.out.cool_balanced .combine(ch_comp_res) .filter{ it[0].resolution == it[2] } .map { it -> [it[0], it[1], it[2]]} @@ -169,7 +180,7 @@ workflow HIC { // SUB-WORKFLOW : TADS CALLING // if (!params.skip_tads){ - COOLER.out.cool + COOLER.out.cool_balanced .combine(ch_tads_res) .filter{ it[0].resolution == it[2] } .map { it -> [it[0], it[1]]} @@ -181,6 +192,24 @@ workflow HIC { ch_versions = ch_versions.mix(TADS.out.versions) } + // + // SUB-WORKFLOW : LOOP CALLING + // + if (!params.skip_loops){ + + COOLER.out.cool + .combine(ch_loops_res) + .filter{ it[0].resolution == it[2] } + .map { it -> [it[0], it[1]]} + .set{ ch_cool_loops } + + LOOP_CALLING ( + ch_cool_loops, + params.loop_caller + ) + ch_versions = ch_versions.mix(LOOP_CALLING.out.versions) + } + // // Collate and save software versions //