-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathcolumnRearrange.pl
More file actions
executable file
·102 lines (91 loc) · 3.26 KB
/
columnRearrange.pl
File metadata and controls
executable file
·102 lines (91 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
use strict;
use Data::Dumper;
use Getopt::Long;
my $file;
my $order;
my $prefix;
my $sep;
GetOptions (
"file|f=s" => \$file, #filename
"order|o=s" => \$order, #comma seperated indexes
"prefix|p=s" => \$prefix,
"sep|s=s" => \$sep,
"help|h" => sub{
print "usage: $0 rearrange columns according to your order or for maf rearrange\n\nOptions:\n\t--file\t\tthe filename to be reordered\n";
print "\t--order\t\tcomma seperated indexes\n";
print "\t--prefix\tthe prefix of samples' names\n";
print "\t--sep\tthe seperator to rearrange columns\n";
print "\t--help\t\tprint this help message\n";
print "\n";
exit 0;
},
);
my @order = split(/\,/, $order);
my @prefix = split(',', $prefix);
my $prefixReg = join('|', @prefix);
print STDERR "prefixReg is $prefixReg\n";
open IN, "$file";
while ( <IN> ) {
chomp;
my @cols = split /\t/;
if ($order ne ''){ #order
my $print;
foreach my $index (@order) {
$print .= $cols[$index]."\t";
}
$print =~ s/\t$//;
print "$print\n";
} else { #order not specified, for variants maf table
if (/^#/) {
my %samples;
my $secondstart = 0;
for (my $i = 0; $i <= $#cols; $i++){
if ($cols[$i] eq $sep) {
$secondstart = $i+1;
push(@order, $i);
last;
} else {
push(@order, $i);
if ($cols[$i] =~ /^TCGA-.+?$/ or $cols[$i] =~ /^($prefixReg)([A-Za-z0-9\-\_]+)?$/) {
$samples{$cols[$i]} = $i;
}
} #else
} #for
my %inserted;
for (my $i = $secondstart; $i <= $#cols; $i++) {
if ( exists($samples{$cols[$i]}) ) {
my $insertpos = $samples{$cols[$i]};
my $offset;
foreach my $sample ( sort {$a =~ /($prefixReg)(\d+)?([A-Za-z0-9\-\_]+)?/; my $pa = $1; my $ia = $2; my $ias = $3; $b =~ /($prefixReg)(\d+)?([A-Za-z0-9\-\_]+)?/; my $pb = $1; my $ib = $2; my $ibs = $3; $pa cmp $pb or $ia <=> $ib or $ias cmp $ibs} keys %samples ) {
my $rank = $samples{$sample};
last if ($insertpos == $rank);
if ( exists($inserted{$sample}) ) {
$offset += 2;
}
}
$insertpos += $offset+1;
splice(@order, $insertpos, 0, $i, $i+1);
$inserted{$cols[$i]} = '';
$cols[$i] .= 'maf';
} elsif ( $cols[$i] !~ /d$/ ) { #not depth
if ($cols[$i] =~ /^($prefixReg)([A-Za-z0-9\-\_]+)?/) {
$cols[$i] .= 'maf';
push(@order, $i);
if ($cols[$i] =~ /^TCGA/ or $cols[$i] =~ /^($prefixReg)([A-Za-z0-9\-\_]+)?/){
push(@order, $i+1) if $cols[$i+1] =~ /^($prefixReg)([A-Za-z0-9\-\_]+)?d$/; #depth info pushed
}
} else {
push(@order, $i);
}
}
}
} #if it is the header
my $print;
foreach my $index (@order) {
$print .= $cols[$index]."\t";
}
$print =~ s/\t$//;
print "$print\n";
} #column not specified
}
close IN;