-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcompile_gff.pl
More file actions
executable file
·91 lines (73 loc) · 2.08 KB
/
compile_gff.pl
File metadata and controls
executable file
·91 lines (73 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use feature 'say';
use FindBin;
use lib "$FindBin::Bin/DZLab-Tools/lib";
use DZLab::Tools::GFF qw/gff_to_string gff_make_iterator/;
use DZLab::Tools::GFFStore;
use Pod::Usage;
use Getopt::Long;
my $help;
my $verbose = 0;
my $outfile;
my $memory = 0;
my $result = GetOptions (
"verbose" => \$verbose,
"help" => \$help,
"outfile|o=s" => \$outfile,
"memory|m" => \$memory,
);
pod2usage(-verbose => 1) if (!$result || $help || !$outfile);
unless ($outfile eq '-'){
close STDOUT;
open STDOUT, '>', $outfile or die "can't open $outfile for writing";
}
my $gffstore = DZLab::Tools::GFFStore->new({
attributes => {c => 'numeric', t => 'numeric'},
verbose => $verbose,
memory => $memory,
#debug => 1,
});
for (@ARGV){
$gffstore->slurp({filename => $_});
}
my $iter = $gffstore->select_iter(<<SELECT );
select
seqname,
source,
feature,
start,
end,
(cast(sum(c) as real)/((sum(t)+sum(c)))) as score,
strand,
frame,
sum(c) as c,
sum(t) as t
from
gff group by seqname, start, end
SELECT
while (my $row = $iter->()){
say gff_to_string(
[
@{$row}{qw/seqname source feature start end score strand frame/},
# make the attributes
(join q{;}, map { $_ . "=" . $row->{$_} } qw/c t/)
]
);
}
=head1 NAME
compile_gff.pl - sum the 'c' and 't' field of gff records.
=head1 SYNOPSIS
compile_gff.pl [-h] [-v] [-m] -o outfile.gff gff_file1.gff gff_file2.gff ...
=head1 DESCRIPTION
Add the 't' and 'c' attributes for gff records with identical seqname, source,
start, and end coordinates. Score is updated to c/(t+c)
=head1 OPTIONS
--verbose | -v print verbose error messages
--help | -h print this information
--outfile | -o [file] output file. use '-' to dump to screen (standard out).
--memory | -m do everything in memory (not the default). this may be faster
but be careful when the filesizes add up to more than RAM.
=cut