-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathgff2tsv.pl
More file actions
executable file
·89 lines (70 loc) · 2.11 KB
/
gff2tsv.pl
File metadata and controls
executable file
·89 lines (70 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env perl
use strict;
use warnings;
use List::MoreUtils qw/all/;
use List::Util qw/max min/;
use Data::Dumper;
use feature 'say';
use Carp;
use autodie;
use Pod::Usage;
use Getopt::Long;
use FindBin;
use lib "$FindBin::Bin/lib";
#use GFF::Parser::Attributes;
use GFF::Parser::Locus;
my $output = q{-};
my $help;
my $result = GetOptions (
"output|o=s" => \$output,
"help" => \$help,
);
pod2usage(-verbose => 1) if (!$result || $help || ! @ARGV);
# return elements in @x not in @y
sub complement{
my ($xx,$yy) = @_;
my @x = @$xx;
my @y = @$yy;
my %seen;
my @accum;
for (@y){ $seen{$_} = 1;}
for (@x){ if (!exists $seen{$_}) {push @accum, $_}}
return @accum;
}
unless ($output eq '-'){
close STDOUT;
open STDOUT, '>', $output or die "can't open $output for writing";
}
my @mains = qw/seqname source feature start end score strand frame attribute/;
my %seen;
# read through all files once, recording all attribute names
foreach my $file (@ARGV) {
my $parser = GFF::Parser::Locus->new(file => $file,locus => 'ID');
while (my $gff = $parser->next()){
my @record_cols = keys %$gff;
my @attributes = complement \@record_cols, \@mains;
@seen{@attributes} = map { 1 } @attributes;
}
}
my @columns = (qw/seqname source feature start end score strand frame/, sort keys %seen);
# read through them again, this time printing.
say join "\t",@columns;
foreach my $file (@ARGV) {
my $parser = GFF::Parser::Locus->new(file => $file,locus => 'ID');
while (my $gff = $parser->next()){
say join "\t", map {$_ // '.'} @{$gff}{@columns};
}
}
=head1 NAME
gff2tsv.pl - convert a GFF file to TSV, tab seperated file
=head1 SYNOPSIS
gff2tsv.pl -o output-tsv.txt input1.gff input2.gff ...
=head1 DESCRIPTION
The first 8 columns are always seqname, source, feature, start, end, score, strand, frame. The
ninth and further columns are the attributes split up into individual column. First line of output
is the column names.
=head1 OPTIONS
--input name of input exon gff file (required)
-i
--help print this information
=cut