1414
1515print " \n\n -----------------------------------------------" ;
1616print " \n Sim-it\n " ;
17- print " Version 1.3.1 \n " ;
17+ print " Version 1.3.2 \n " ;
1818print " Author: Nicolas Dierckxsens, (c) 2020-2022\n " ;
1919print " -----------------------------------------------\n\n " ;
2020
142142GetOptions (
143143 " c=s" => \$config ,
144144 " o=s" => \$output ,
145- ) or die " Incorrect usage!\n " ;
145+ ) or die " Incorrect usage!\n\n Usage: perl NOVOLoci1.0.pl -c config.txt -o output_path \n\n " ;
146146
147147open (CONFIG, $config ) or die " Error: Can't open the configuration file, please check the manual!\n\n Usage: perl Sim-it.pl -c config.txt -o output_path\n\n " ;
148148
151151{
152152 $output .= " /" ;
153153}
154+ if (-d $output )
155+ {}
156+ else
157+ {
158+ mkdir $output ;
159+ }
160+
154161while (my $line = <CONFIG>)
155162{
156163 chomp ($line );
17801787my $current_contig = " " ;
17811788my $size_current_contig = ' 0' ;
17821789
1790+ # -----------------------------------------------------------------------------------------------------------------------------------
17831791# Subroutines------------------------------------------------------------------------------------------------------------------------
17841792# -----------------------------------------------------------------------------------------------------------------------------------
17851793
@@ -1865,11 +1873,13 @@ sub insert_seq
18651873}
18661874# ------------------------------------------------------------------------------------------------------------------------------------
18671875# Read simulation parameters---------------------------------------------------------------------------------------------
1876+ # ------------------------------------------------------------------------------------------------------------------------------------
18681877
18691878my @NP_range = split /-/, $NP_range ;
18701879my $NP_range_low = $NP_range [0];
18711880my $NP_range_high = $NP_range [1];
1872- my $output_NP = $output ." Nanopore_" .$project ." .fasta" ;
1881+ my $output2 = $output ." _" ;
1882+ my $output_NP = $output2 ." Reads_" .$project ." .fasta" ;
18731883
18741884my %NP_seq_length ;
18751885undef %NP_seq_length ;
@@ -1878,7 +1888,7 @@ sub insert_seq
18781888my %NP_seq_length2 ;
18791889undef %NP_seq_length2 ;
18801890
1881- if ($NP_accuracy =~ m / ^(\d +)%.*$ / )
1891+ if ($NP_accuracy =~ m / ^(\d +)%* .*$ / )
18821892{
18831893 $NP_accuracy = $1 ;
18841894}
@@ -1973,7 +1983,7 @@ sub insert_seq
19731983undef %chromosomes ;
19741984my $NP_read_count_total = ' 0' ;
19751985
1976- my $maxProcs = 6 ;
1986+ my $maxProcs = 8 ;
19771987my $pm = new Parallel::ForkManager($maxProcs );
19781988
19791989my %ret_data ;
@@ -2221,7 +2231,7 @@ sub insert_seq
22212231 my $haplo_tmp2 = $haplo_tmp [0];
22222232 my $chr_tmp = $haplo_tmp [1];
22232233 $chr_tmp =~ tr / : / __/ ;
2224- my $output_NP_tmp = $output ." Nanopore_to_merge_ " .$project ." _" .$chr_tmp ." _" .$haplo_tmp2 ." .fasta" ;
2234+ my $output_NP_tmp = $output ." Long_reads_to_merge_ " .$project ." _" .$chr_tmp ." _" .$haplo_tmp2 ." .fasta" ;
22252235 if (exists ($merge_command {$haplo_tmp2 }))
22262236 {
22272237 my $tmp = $merge_command {$haplo_tmp2 }." " .$output_NP_tmp ;
@@ -2237,7 +2247,7 @@ sub insert_seq
22372247 foreach my $haps_tmp (sort keys %haps )
22382248 {
22392249 my $pid = $pm -> start and next ;
2240-
2250+ srand ();
22412251 my $NP_read_count = ' 0' ;
22422252 my $NP_total_length_tmp = ' 0' ;
22432253 my $NP_min_read_length_tmp = ' 10000000000000000' ;
@@ -2251,8 +2261,8 @@ sub insert_seq
22512261 $chr_tmp =~ tr / : / __/ ;
22522262
22532263 my $OUTPUT_NP ;
2254- my $output_NP_tmp = $output ." Nanopore_to_merge_ " .$project ." _" .$chr_tmp ." _" .$haplo_tmp2 ." .fasta" ;
2255- open ($OUTPUT_NP , " >" .$output_NP_tmp ) or die " Can't open Nanopore file $output_NP_tmp , $! \n " ;
2264+ my $output_NP_tmp = $output ." Long_reads_to_merge_ " .$project ." _" .$chr_tmp ." _" .$haplo_tmp2 ." .fasta" ;
2265+ open ($OUTPUT_NP , " >" .$output_NP_tmp ) or die " Can't open Long reads file $output_NP_tmp , $! \n " ;
22562266
22572267 my $NP_coverage_tmp = $NP_coverage ;
22582268
@@ -2292,7 +2302,7 @@ sub insert_seq
22922302 my $d = ' 0' ;
22932303 my $count_seqs = keys %NP_seq_length ;
22942304
2295- while ($o <= $NP_coverage_tmp -$count_seqs )
2305+ while ($o < $NP_coverage_tmp -$count_seqs )
22962306 {
22972307 $NP_read_count ++;
22982308 NP_LENGTH0:
@@ -2337,7 +2347,7 @@ sub insert_seq
23372347 my $seq_tmp2 = $seq_tmp ;
23382348 my $length_read = length ($seq_tmp2 );
23392349
2340- # input sequencing errors---------------------------------------------------------------------------------------------------------------------
2350+ # input sequencing errors---------------------------------------------------------------------------------------------------------------------
23412351
23422352 my $length_tmp = length ($seq_tmp );
23432353 my $j = 1.2;
@@ -2357,7 +2367,7 @@ sub insert_seq
23572367 $NP_mismatch_rate = ' 0' ;
23582368 }
23592369
2360- # mismatch-----------------------------------------------------------------------------------------------
2370+ # mismatch-----------------------------------------------------------------------------------------------
23612371 my $mismatch_nuc_count = int ($length_tmp *$NP_mismatch_rate );
23622372 my $f = ' 0' ;
23632373 my %mismatches ;
@@ -2497,8 +2507,8 @@ sub insert_seq
24972507 $f += length ($mismatch_nuc );
24982508 substr $seq_tmp , $pos , length ($mismatch_nuc ), $mismatch_nuc ;
24992509 }
2500- }
2501- # indel------------------------------------------------------------------------------------------------------
2510+ }
2511+ # indel------------------------------------------------------------------------------------------------------
25022512 my %indels ;
25032513 undef %indels ;
25042514 my $delete_nuc_count = int ($length_tmp *$NP_deletion_rate );
@@ -2792,6 +2802,7 @@ sub insert_seq
27922802 next REF;
27932803 }
27942804# --------
2805+ $line =~ tr / actgn/ ACTGN/ ;
27952806 if ($translocating eq " yes" )
27962807 {
27972808 if (eof )
@@ -3745,7 +3756,7 @@ sub insert_seq
37453756 my $haplo_tmp2 = $haplo_tmp [0];
37463757 my $chr_tmp = $haplo_tmp [1];
37473758 $chr_tmp =~ tr / : / __/ ;
3748- my $output_NP_tmp = $output ." Nanopore_to_merge_ " .$project ." _" .$chr_tmp ." _" .$haplo_tmp2 ." .fasta" ;
3759+ my $output_NP_tmp = $output ." Long_reads_to_merge_ " .$project ." _" .$chr_tmp ." _" .$haplo_tmp2 ." .fasta" ;
37493760 if (exists ($merge_command {$haplo_tmp2 }))
37503761 {
37513762 my $tmp = $merge_command {$haplo_tmp2 }." " .$output_NP_tmp ;
@@ -3760,7 +3771,7 @@ sub insert_seq
37603771 foreach my $haps_tmp (sort keys %haps )
37613772 {
37623773 my $pid = $pm -> start and next ;
3763-
3774+ srand ();
37643775 my $NP_read_count = ' 0' ;
37653776 my $NP_total_length_tmp = ' 0' ;
37663777 my $NP_min_read_length_tmp = ' 10000000000000000' ;
@@ -3774,8 +3785,8 @@ sub insert_seq
37743785 $chr_tmp =~ tr / : / __/ ;
37753786
37763787 my $OUTPUT_NP ;
3777- my $output_NP_tmp = $output ." Nanopore_to_merge_ " .$project ." _" .$chr_tmp ." _" .$haplo_tmp2 ." .fasta" ;
3778- open ($OUTPUT_NP , " >" .$output_NP_tmp ) or die " Can't open Nanopore file $output_NP_tmp , $! \n " ;
3788+ my $output_NP_tmp = $output ." Long_reads_to_merge_ " .$project ." _" .$chr_tmp ." _" .$haplo_tmp2 ." .fasta" ;
3789+ open ($OUTPUT_NP , " >" .$output_NP_tmp ) or die " Can't open Long reads file $output_NP_tmp , $! \n " ;
37793790
37803791 my $NP_coverage_tmp = $NP_coverage ;
37813792
@@ -3815,7 +3826,7 @@ sub insert_seq
38153826 my $d = ' 0' ;
38163827 my $count_seqs = keys %NP_seq_length ;
38173828
3818- while ($o <= $NP_coverage_tmp -$count_seqs )
3829+ while ($o < $NP_coverage_tmp -$count_seqs )
38193830 {
38203831 $NP_read_count ++;
38213832 NP_LENGTH:
@@ -4246,7 +4257,7 @@ sub insert_seq
42464257
42474258foreach my $merge_command_tmp (keys %merge_command )
42484259{
4249- $merge_command = " cat " .$merge_command {$merge_command_tmp }." > " .$output ." Nanopore_ " .$project ." _HAP" .$merge_command_tmp ." .fasta" ;
4260+ $merge_command = " cat " .$merge_command {$merge_command_tmp }." > " .$output ." Long_reads_ " .$project ." _HAP" .$merge_command_tmp ." .fasta" ;
42504261 system ($merge_command );
42514262 my @remove_files = split /\s /, $merge_command {$merge_command_tmp };
42524263 foreach my $remove_file_tmp (@remove_files )
@@ -4256,6 +4267,10 @@ sub insert_seq
42564267 }
42574268}
42584269
4270+ my $merge_command2 = " cat " .$output ." Long_reads_" .$project ." _HAP1.fasta" ." " .$output ." Long_reads_" .$project ." _HAP2.fasta" ." > " .$output ." Long_reads_" .$project ." _HAP12.fasta" ;
4271+ system ($merge_command2 );
4272+
4273+
42594274print " \n\n " ;
42604275# print STATS------------------------------------------------------------------------------------------------------------------------
42614276# print "TIME_SV = ".$time_sv."\n";
0 commit comments