Skip to content

Commit 1d577a0

Browse files
authored
Merge pull request #8 from Juke34/agat
Agat module for normalization of GFF3 files
2 parents 5a7f0ea + ab3c503 commit 1d577a0

File tree

4 files changed

+38
-8
lines changed

4 files changed

+38
-8
lines changed

modules/agat.nf

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
1-
process convert_sp_gxf2gxf {
1+
process normalize_gxf {
22
label 'agat'
3-
tag "$sample_id"
4-
publishDir "${params.outdir}/bamutil_clipoverlap", mode: 'copy'
3+
publishDir "${params.outdir}/agat_gff3", mode: 'copy'
54

65
input:
76
path(gxf)
87

98
output:
10-
path ("*.gff"), emit: gff
9+
path ("*.gff3"), emit: gff
1110

1211
script:
13-
gff_file = genome_fasta.baseName.replaceAll(/\..+(\.gz)?$/, '')
12+
base_name = gxf.baseName.replaceAll(/\..+(\.gz)?$/, '')
1413
"""
15-
agat_convert_sp_gxf2gxf.pl --gxf ${gxf} -o ${gff_file}.gff3
14+
agat config --expose --tabix
15+
agat_convert_sp_gxf2gxf.pl --gxf ${gxf} -o ${base_name}_normalized.gff3
1616
"""
17-
1817
}

nextflow.config

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ profiles {
5454
params.aline_profiles = "${baseDir}/config/ressources/base_aline.config"
5555
params.aligner = "STAR"
5656
params.reads = "${baseDir}/data/chr21/chr21_small_R1.fastq.gz "
57-
params.genome = "${baseDir}/data/chr21/chr21_small.fasta.gz"
57+
params.genome = "${baseDir}/data/chr21/chr21_small.fasta.gz"
58+
params.annotation = "${baseDir}/data/chr21/chr21_small_filtered.gff3"
5859
params.library_type = "ISR"
5960
params.read_type = "short_single"
6061
}
@@ -63,6 +64,7 @@ profiles {
6364
params.aligner = "STAR"
6465
params.reads = "${baseDir}/data/chr21/"
6566
params.genome = "${baseDir}/data/chr21/chr21_small.fasta.gz"
67+
params.annotation = "${baseDir}/data/chr21/chr21_small_filtered.gff3"
6668
params.library_type = "ISR"
6769
params.read_type = "short_paired"
6870
}

rain.nf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import java.nio.file.*
1212
// Input/output params
1313
params.reads = "/path/to/reads_{1,2}.fastq.gz/or/folder"
1414
params.genome = "/path/to/genome.fa"
15+
params.annotation = "/path/to/annotations.gff3"
1516
params.outdir = "result"
1617
params.reads_extension = ".fastq.gz" // Extension used to detect reads in folder
1718
params.paired_reads_pattern = "_{1,2}"
@@ -80,6 +81,9 @@ def helpMSG() {
8081
--reads path to the illumina read file (fastq or fastq.gz) (default: $params.reads)
8182
--genome path to the genome (default: $params.genome)
8283
84+
Annotation input:
85+
--annotation path to a GFF3 file with annotations of genomic features
86+
8387
Output:
8488
--output path to the output directory (default: $params.outdir)
8589
@@ -133,6 +137,7 @@ include {samtools_index; samtools_fasta_index} from './modules/samtools.nf'
133137
include {reditools2} from "./modules/reditools2.nf"
134138
include {jacusa2} from "./modules/jacusa2.nf"
135139
include {sapin} from "./modules/sapin.nf"
140+
include {normalize_gxf} from "./modules/agat.nf"
136141

137142
//*************************************************
138143
// STEP 3 - Deal with parameters
@@ -248,4 +253,5 @@ workflow rain {
248253
samtools_fasta_index(genome)
249254
jacusa2(samtools_index.out.tuple_sample_bam_bamindex, samtools_fasta_index.out.tuple_fasta_fastaindex)
250255
sapin(bamutil_clipoverlap.out.tuple_sample_clipoverbam, genome)
256+
normalize_gxf(params.annotation)
251257
}

src/stats/utils.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import Bio
2+
from typing import Generator
3+
4+
def group_by_overlap(record: Bio.SeqRecord) -> Generator[list[Bio.SeqFeature], None, None]:
5+
"""Create an iterator that yields groups of features with overlapping genomic positions within a record"""
6+
7+
feature: Bio.SeqFeature = record.features[0]
8+
start: Bio.SeqFeature.ExactPosition = feature.location.start
9+
end: Bio.SeqFeature.ExactPosition = feature.location.end
10+
11+
group: list[Bio.SeqFeature] = []
12+
13+
for feature in record.features:
14+
if start <= feature.location.start <= end:
15+
end = feature.location.end
16+
group.append(feature)
17+
else:
18+
yield group
19+
start = feature.location.start
20+
end = feature.location.end
21+
group = [feature]
22+
23+
yield group

0 commit comments

Comments
 (0)